diyfp.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. // Tencent is pleased to support the open source community by making RapidJSON available.
  2. //
  3. // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
  4. //
  5. // Licensed under the MIT License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // http://opensource.org/licenses/MIT
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. // This is a C++ header-only implementation of Grisu2 algorithm from the publication:
  15. // Loitsch, Florian. "Printing floating-point numbers quickly and accurately with
  16. // integers." ACM Sigplan Notices 45.6 (2010): 233-243.
  17. #ifndef RAPIDJSON_DIYFP_H_
  18. #define RAPIDJSON_DIYFP_H_
  19. #include "../rapidjson.h"
  20. #include "clzll.h"
  21. #include <limits>
  22. #if defined(_MSC_VER) && defined(_M_AMD64) && !defined(__INTEL_COMPILER)
  23. #include <intrin.h>
  24. #pragma intrinsic(_umul128)
  25. #endif
  26. RAPIDJSON_NAMESPACE_BEGIN
  27. namespace internal {
  28. #ifdef __GNUC__
  29. RAPIDJSON_DIAG_PUSH
  30. RAPIDJSON_DIAG_OFF(effc++)
  31. #endif
  32. #ifdef __clang__
  33. RAPIDJSON_DIAG_PUSH
  34. RAPIDJSON_DIAG_OFF(padded)
  35. #endif
  36. struct DiyFp {
  37. DiyFp() : f(), e() {}
  38. DiyFp(uint64_t fp, int exp) : f(fp), e(exp) {}
  39. explicit DiyFp(double d) {
  40. union {
  41. double d;
  42. uint64_t u64;
  43. } u = { d };
  44. int biased_e = static_cast<int>((u.u64 & kDpExponentMask) >> kDpSignificandSize);
  45. uint64_t significand = (u.u64 & kDpSignificandMask);
  46. if (biased_e != 0) {
  47. f = significand + kDpHiddenBit;
  48. e = biased_e - kDpExponentBias;
  49. }
  50. else {
  51. f = significand;
  52. e = kDpMinExponent + 1;
  53. }
  54. }
  55. DiyFp operator-(const DiyFp& rhs) const {
  56. return DiyFp(f - rhs.f, e);
  57. }
  58. DiyFp operator*(const DiyFp& rhs) const {
  59. #if defined(_MSC_VER) && defined(_M_AMD64)
  60. uint64_t h;
  61. uint64_t l = _umul128(f, rhs.f, &h);
  62. if (l & (uint64_t(1) << 63)) // rounding
  63. h++;
  64. return DiyFp(h, e + rhs.e + 64);
  65. #elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__)
  66. __extension__ typedef unsigned __int128 uint128;
  67. uint128 p = static_cast<uint128>(f) * static_cast<uint128>(rhs.f);
  68. uint64_t h = static_cast<uint64_t>(p >> 64);
  69. uint64_t l = static_cast<uint64_t>(p);
  70. if (l & (uint64_t(1) << 63)) // rounding
  71. h++;
  72. return DiyFp(h, e + rhs.e + 64);
  73. #else
  74. const uint64_t M32 = 0xFFFFFFFF;
  75. const uint64_t a = f >> 32;
  76. const uint64_t b = f & M32;
  77. const uint64_t c = rhs.f >> 32;
  78. const uint64_t d = rhs.f & M32;
  79. const uint64_t ac = a * c;
  80. const uint64_t bc = b * c;
  81. const uint64_t ad = a * d;
  82. const uint64_t bd = b * d;
  83. uint64_t tmp = (bd >> 32) + (ad & M32) + (bc & M32);
  84. tmp += 1U << 31; /// mult_round
  85. return DiyFp(ac + (ad >> 32) + (bc >> 32) + (tmp >> 32), e + rhs.e + 64);
  86. #endif
  87. }
  88. DiyFp Normalize() const {
  89. int s = static_cast<int>(RAPIDJSON_CLZLL(f));
  90. return DiyFp(f << s, e - s);
  91. }
  92. DiyFp NormalizeBoundary() const {
  93. DiyFp res = *this;
  94. while (!(res.f & (kDpHiddenBit << 1))) {
  95. res.f <<= 1;
  96. res.e--;
  97. }
  98. res.f <<= (kDiySignificandSize - kDpSignificandSize - 2);
  99. res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 2);
  100. return res;
  101. }
  102. void NormalizedBoundaries(DiyFp* minus, DiyFp* plus) const {
  103. DiyFp pl = DiyFp((f << 1) + 1, e - 1).NormalizeBoundary();
  104. DiyFp mi = (f == kDpHiddenBit) ? DiyFp((f << 2) - 1, e - 2) : DiyFp((f << 1) - 1, e - 1);
  105. mi.f <<= mi.e - pl.e;
  106. mi.e = pl.e;
  107. *plus = pl;
  108. *minus = mi;
  109. }
  110. double ToDouble() const {
  111. union {
  112. double d;
  113. uint64_t u64;
  114. }u;
  115. RAPIDJSON_ASSERT(f <= kDpHiddenBit + kDpSignificandMask);
  116. if (e < kDpDenormalExponent) {
  117. // Underflow.
  118. return 0.0;
  119. }
  120. if (e >= kDpMaxExponent) {
  121. // Overflow.
  122. return std::numeric_limits<double>::infinity();
  123. }
  124. const uint64_t be = (e == kDpDenormalExponent && (f & kDpHiddenBit) == 0) ? 0 :
  125. static_cast<uint64_t>(e + kDpExponentBias);
  126. u.u64 = (f & kDpSignificandMask) | (be << kDpSignificandSize);
  127. return u.d;
  128. }
  129. static const int kDiySignificandSize = 64;
  130. static const int kDpSignificandSize = 52;
  131. static const int kDpExponentBias = 0x3FF + kDpSignificandSize;
  132. static const int kDpMaxExponent = 0x7FF - kDpExponentBias;
  133. static const int kDpMinExponent = -kDpExponentBias;
  134. static const int kDpDenormalExponent = -kDpExponentBias + 1;
  135. static const uint64_t kDpExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000);
  136. static const uint64_t kDpSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF);
  137. static const uint64_t kDpHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000);
  138. uint64_t f;
  139. int e;
  140. };
  141. inline DiyFp GetCachedPowerByIndex(size_t index) {
  142. // 10^-348, 10^-340, ..., 10^340
  143. static const uint64_t kCachedPowers_F[] = {
  144. RAPIDJSON_UINT64_C2(0xfa8fd5a0, 0x081c0288), RAPIDJSON_UINT64_C2(0xbaaee17f, 0xa23ebf76),
  145. RAPIDJSON_UINT64_C2(0x8b16fb20, 0x3055ac76), RAPIDJSON_UINT64_C2(0xcf42894a, 0x5dce35ea),
  146. RAPIDJSON_UINT64_C2(0x9a6bb0aa, 0x55653b2d), RAPIDJSON_UINT64_C2(0xe61acf03, 0x3d1a45df),
  147. RAPIDJSON_UINT64_C2(0xab70fe17, 0xc79ac6ca), RAPIDJSON_UINT64_C2(0xff77b1fc, 0xbebcdc4f),
  148. RAPIDJSON_UINT64_C2(0xbe5691ef, 0x416bd60c), RAPIDJSON_UINT64_C2(0x8dd01fad, 0x907ffc3c),
  149. RAPIDJSON_UINT64_C2(0xd3515c28, 0x31559a83), RAPIDJSON_UINT64_C2(0x9d71ac8f, 0xada6c9b5),
  150. RAPIDJSON_UINT64_C2(0xea9c2277, 0x23ee8bcb), RAPIDJSON_UINT64_C2(0xaecc4991, 0x4078536d),
  151. RAPIDJSON_UINT64_C2(0x823c1279, 0x5db6ce57), RAPIDJSON_UINT64_C2(0xc2109436, 0x4dfb5637),
  152. RAPIDJSON_UINT64_C2(0x9096ea6f, 0x3848984f), RAPIDJSON_UINT64_C2(0xd77485cb, 0x25823ac7),
  153. RAPIDJSON_UINT64_C2(0xa086cfcd, 0x97bf97f4), RAPIDJSON_UINT64_C2(0xef340a98, 0x172aace5),
  154. RAPIDJSON_UINT64_C2(0xb23867fb, 0x2a35b28e), RAPIDJSON_UINT64_C2(0x84c8d4df, 0xd2c63f3b),
  155. RAPIDJSON_UINT64_C2(0xc5dd4427, 0x1ad3cdba), RAPIDJSON_UINT64_C2(0x936b9fce, 0xbb25c996),
  156. RAPIDJSON_UINT64_C2(0xdbac6c24, 0x7d62a584), RAPIDJSON_UINT64_C2(0xa3ab6658, 0x0d5fdaf6),
  157. RAPIDJSON_UINT64_C2(0xf3e2f893, 0xdec3f126), RAPIDJSON_UINT64_C2(0xb5b5ada8, 0xaaff80b8),
  158. RAPIDJSON_UINT64_C2(0x87625f05, 0x6c7c4a8b), RAPIDJSON_UINT64_C2(0xc9bcff60, 0x34c13053),
  159. RAPIDJSON_UINT64_C2(0x964e858c, 0x91ba2655), RAPIDJSON_UINT64_C2(0xdff97724, 0x70297ebd),
  160. RAPIDJSON_UINT64_C2(0xa6dfbd9f, 0xb8e5b88f), RAPIDJSON_UINT64_C2(0xf8a95fcf, 0x88747d94),
  161. RAPIDJSON_UINT64_C2(0xb9447093, 0x8fa89bcf), RAPIDJSON_UINT64_C2(0x8a08f0f8, 0xbf0f156b),
  162. RAPIDJSON_UINT64_C2(0xcdb02555, 0x653131b6), RAPIDJSON_UINT64_C2(0x993fe2c6, 0xd07b7fac),
  163. RAPIDJSON_UINT64_C2(0xe45c10c4, 0x2a2b3b06), RAPIDJSON_UINT64_C2(0xaa242499, 0x697392d3),
  164. RAPIDJSON_UINT64_C2(0xfd87b5f2, 0x8300ca0e), RAPIDJSON_UINT64_C2(0xbce50864, 0x92111aeb),
  165. RAPIDJSON_UINT64_C2(0x8cbccc09, 0x6f5088cc), RAPIDJSON_UINT64_C2(0xd1b71758, 0xe219652c),
  166. RAPIDJSON_UINT64_C2(0x9c400000, 0x00000000), RAPIDJSON_UINT64_C2(0xe8d4a510, 0x00000000),
  167. RAPIDJSON_UINT64_C2(0xad78ebc5, 0xac620000), RAPIDJSON_UINT64_C2(0x813f3978, 0xf8940984),
  168. RAPIDJSON_UINT64_C2(0xc097ce7b, 0xc90715b3), RAPIDJSON_UINT64_C2(0x8f7e32ce, 0x7bea5c70),
  169. RAPIDJSON_UINT64_C2(0xd5d238a4, 0xabe98068), RAPIDJSON_UINT64_C2(0x9f4f2726, 0x179a2245),
  170. RAPIDJSON_UINT64_C2(0xed63a231, 0xd4c4fb27), RAPIDJSON_UINT64_C2(0xb0de6538, 0x8cc8ada8),
  171. RAPIDJSON_UINT64_C2(0x83c7088e, 0x1aab65db), RAPIDJSON_UINT64_C2(0xc45d1df9, 0x42711d9a),
  172. RAPIDJSON_UINT64_C2(0x924d692c, 0xa61be758), RAPIDJSON_UINT64_C2(0xda01ee64, 0x1a708dea),
  173. RAPIDJSON_UINT64_C2(0xa26da399, 0x9aef774a), RAPIDJSON_UINT64_C2(0xf209787b, 0xb47d6b85),
  174. RAPIDJSON_UINT64_C2(0xb454e4a1, 0x79dd1877), RAPIDJSON_UINT64_C2(0x865b8692, 0x5b9bc5c2),
  175. RAPIDJSON_UINT64_C2(0xc83553c5, 0xc8965d3d), RAPIDJSON_UINT64_C2(0x952ab45c, 0xfa97a0b3),
  176. RAPIDJSON_UINT64_C2(0xde469fbd, 0x99a05fe3), RAPIDJSON_UINT64_C2(0xa59bc234, 0xdb398c25),
  177. RAPIDJSON_UINT64_C2(0xf6c69a72, 0xa3989f5c), RAPIDJSON_UINT64_C2(0xb7dcbf53, 0x54e9bece),
  178. RAPIDJSON_UINT64_C2(0x88fcf317, 0xf22241e2), RAPIDJSON_UINT64_C2(0xcc20ce9b, 0xd35c78a5),
  179. RAPIDJSON_UINT64_C2(0x98165af3, 0x7b2153df), RAPIDJSON_UINT64_C2(0xe2a0b5dc, 0x971f303a),
  180. RAPIDJSON_UINT64_C2(0xa8d9d153, 0x5ce3b396), RAPIDJSON_UINT64_C2(0xfb9b7cd9, 0xa4a7443c),
  181. RAPIDJSON_UINT64_C2(0xbb764c4c, 0xa7a44410), RAPIDJSON_UINT64_C2(0x8bab8eef, 0xb6409c1a),
  182. RAPIDJSON_UINT64_C2(0xd01fef10, 0xa657842c), RAPIDJSON_UINT64_C2(0x9b10a4e5, 0xe9913129),
  183. RAPIDJSON_UINT64_C2(0xe7109bfb, 0xa19c0c9d), RAPIDJSON_UINT64_C2(0xac2820d9, 0x623bf429),
  184. RAPIDJSON_UINT64_C2(0x80444b5e, 0x7aa7cf85), RAPIDJSON_UINT64_C2(0xbf21e440, 0x03acdd2d),
  185. RAPIDJSON_UINT64_C2(0x8e679c2f, 0x5e44ff8f), RAPIDJSON_UINT64_C2(0xd433179d, 0x9c8cb841),
  186. RAPIDJSON_UINT64_C2(0x9e19db92, 0xb4e31ba9), RAPIDJSON_UINT64_C2(0xeb96bf6e, 0xbadf77d9),
  187. RAPIDJSON_UINT64_C2(0xaf87023b, 0x9bf0ee6b)
  188. };
  189. static const int16_t kCachedPowers_E[] = {
  190. -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980,
  191. -954, -927, -901, -874, -847, -821, -794, -768, -741, -715,
  192. -688, -661, -635, -608, -582, -555, -529, -502, -475, -449,
  193. -422, -396, -369, -343, -316, -289, -263, -236, -210, -183,
  194. -157, -130, -103, -77, -50, -24, 3, 30, 56, 83,
  195. 109, 136, 162, 189, 216, 242, 269, 295, 322, 348,
  196. 375, 402, 428, 455, 481, 508, 534, 561, 588, 614,
  197. 641, 667, 694, 720, 747, 774, 800, 827, 853, 880,
  198. 907, 933, 960, 986, 1013, 1039, 1066
  199. };
  200. RAPIDJSON_ASSERT(index < 87);
  201. return DiyFp(kCachedPowers_F[index], kCachedPowers_E[index]);
  202. }
  203. inline DiyFp GetCachedPower(int e, int* K) {
  204. //int k = static_cast<int>(ceil((-61 - e) * 0.30102999566398114)) + 374;
  205. double dk = (-61 - e) * 0.30102999566398114 + 347; // dk must be positive, so can do ceiling in positive
  206. int k = static_cast<int>(dk);
  207. if (dk - k > 0.0)
  208. k++;
  209. unsigned index = static_cast<unsigned>((k >> 3) + 1);
  210. *K = -(-348 + static_cast<int>(index << 3)); // decimal exponent no need lookup table
  211. return GetCachedPowerByIndex(index);
  212. }
  213. inline DiyFp GetCachedPower10(int exp, int *outExp) {
  214. RAPIDJSON_ASSERT(exp >= -348);
  215. unsigned index = static_cast<unsigned>(exp + 348) / 8u;
  216. *outExp = -348 + static_cast<int>(index) * 8;
  217. return GetCachedPowerByIndex(index);
  218. }
  219. #ifdef __GNUC__
  220. RAPIDJSON_DIAG_POP
  221. #endif
  222. #ifdef __clang__
  223. RAPIDJSON_DIAG_POP
  224. RAPIDJSON_DIAG_OFF(padded)
  225. #endif
  226. } // namespace internal
  227. RAPIDJSON_NAMESPACE_END
  228. #endif // RAPIDJSON_DIYFP_H_