diff --git a/include/fast_float/bigint.h b/include/fast_float/bigint.h index 073ddf8..5a72caa 100644 --- a/include/fast_float/bigint.h +++ b/include/fast_float/bigint.h @@ -50,7 +50,7 @@ struct stackvec { stackvec &operator=(stackvec &&other) = delete; // create stack vector from existing limb span. - stackvec(limb_span s) { + FASTFLOAT_CONSTEXPR20 stackvec(limb_span s) { FASTFLOAT_ASSERT(try_extend(s)); } @@ -97,13 +97,13 @@ struct stackvec { } } // add items to the vector, from a span, without bounds checking - void extend_unchecked(limb_span s) noexcept { + FASTFLOAT_CONSTEXPR20 void extend_unchecked(limb_span s) noexcept { limb* ptr = data + length; - ::memcpy((void*)ptr, (const void*)s.ptr, sizeof(limb) * s.len()); + std::copy_n(s.ptr, s.len(), ptr); set_len(len() + s.len()); } // try to add items to the vector, returning if items were added - bool try_extend(limb_span s) noexcept { + FASTFLOAT_CONSTEXPR20 bool try_extend(limb_span s) noexcept { if (len() + s.len() <= capacity()) { extend_unchecked(s); return true; @@ -114,6 +114,7 @@ struct stackvec { // resize the vector, without bounds checking // if the new size is longer than the vector, assign value to each // appended item. + FASTFLOAT_CONSTEXPR20 void resize_unchecked(size_t new_len, limb value) noexcept { if (new_len > len()) { size_t count = new_len - len(); @@ -126,7 +127,7 @@ struct stackvec { } } // try to resize the vector, returning if the vector was resized. - bool try_resize(size_t new_len, limb value) noexcept { + FASTFLOAT_CONSTEXPR20 bool try_resize(size_t new_len, limb value) noexcept { if (new_len > capacity()) { return false; } else { @@ -160,14 +161,14 @@ uint64_t empty_hi64(bool& truncated) noexcept { return 0; } -fastfloat_really_inline +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t uint64_hi64(uint64_t r0, bool& truncated) noexcept { truncated = false; int shl = leading_zeroes(r0); return r0 << shl; } -fastfloat_really_inline +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t uint64_hi64(uint64_t r0, uint64_t r1, bool& truncated) noexcept { int shl = leading_zeroes(r0); if (shl == 0) { @@ -180,19 +181,19 @@ uint64_t uint64_hi64(uint64_t r0, uint64_t r1, bool& truncated) noexcept { } } -fastfloat_really_inline +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t uint32_hi64(uint32_t r0, bool& truncated) noexcept { return uint64_hi64(r0, truncated); } -fastfloat_really_inline +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t uint32_hi64(uint32_t r0, uint32_t r1, bool& truncated) noexcept { uint64_t x0 = r0; uint64_t x1 = r1; return uint64_hi64((x0 << 32) | x1, truncated); } -fastfloat_really_inline +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t uint32_hi64(uint32_t r0, uint32_t r1, uint32_t r2, bool& truncated) noexcept { uint64_t x0 = r0; uint64_t x1 = r1; @@ -204,15 +205,16 @@ uint64_t uint32_hi64(uint32_t r0, uint32_t r1, uint32_t r2, bool& truncated) noe // we want an efficient operation. for msvc, where // we don't have built-in intrinsics, this is still // pretty fast. -fastfloat_really_inline +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 limb scalar_add(limb x, limb y, bool& overflow) noexcept { limb z; - // gcc and clang #if defined(__has_builtin) #if __has_builtin(__builtin_add_overflow) - overflow = __builtin_add_overflow(x, y, &z); - return z; + if (!cpp20_and_in_constexpr()) { + overflow = __builtin_add_overflow(x, y, &z); + return z; + } #endif #endif @@ -223,7 +225,7 @@ limb scalar_add(limb x, limb y, bool& overflow) noexcept { } // multiply two small integers, getting both the high and low bits. -fastfloat_really_inline +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 limb scalar_mul(limb x, limb y, limb& carry) noexcept { #ifdef FASTFLOAT_64BIT_LIMB #if defined(__SIZEOF_INT128__) @@ -251,7 +253,8 @@ limb scalar_mul(limb x, limb y, limb& carry) noexcept { // add scalar value to bigint starting from offset. // used in grade school multiplication template -inline bool small_add_from(stackvec& vec, limb y, size_t start) noexcept { +inline FASTFLOAT_CONSTEXPR20 +bool small_add_from(stackvec& vec, limb y, size_t start) noexcept { size_t index = start; limb carry = y; bool overflow; @@ -268,13 +271,15 @@ inline bool small_add_from(stackvec& vec, limb y, size_t start) noexcept { // add scalar value to bigint. template -fastfloat_really_inline bool small_add(stackvec& vec, limb y) noexcept { +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +bool small_add(stackvec& vec, limb y) noexcept { return small_add_from(vec, y, 0); } // multiply bigint by scalar value. template -inline bool small_mul(stackvec& vec, limb y) noexcept { +inline FASTFLOAT_CONSTEXPR20 +bool small_mul(stackvec& vec, limb y) noexcept { limb carry = 0; for (size_t index = 0; index < vec.len(); index++) { vec[index] = scalar_mul(vec[index], y, carry); @@ -288,6 +293,7 @@ inline bool small_mul(stackvec& vec, limb y) noexcept { // add bigint to bigint starting from index. // used in grade school multiplication template +FASTFLOAT_CONSTEXPR20 bool large_add_from(stackvec& x, limb_span y, size_t start) noexcept { // the effective x buffer is from `xstart..x.len()`, so exit early // if we can't get that current range. @@ -318,12 +324,14 @@ bool large_add_from(stackvec& x, limb_span y, size_t start) noexcept { // add bigint to bigint. template -fastfloat_really_inline bool large_add_from(stackvec& x, limb_span y) noexcept { +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +bool large_add_from(stackvec& x, limb_span y) noexcept { return large_add_from(x, y, 0); } // grade-school multiplication algorithm template +FASTFLOAT_CONSTEXPR20 bool long_mul(stackvec& x, limb_span y) noexcept { limb_span xs = limb_span(x.data, x.len()); stackvec z(xs); @@ -352,6 +360,7 @@ bool long_mul(stackvec& x, limb_span y) noexcept { // grade-school multiplication algorithm template +FASTFLOAT_CONSTEXPR20 bool large_mul(stackvec& x, limb_span y) noexcept { if (y.len() == 1) { FASTFLOAT_TRY(small_mul(x, y[0])); @@ -361,21 +370,52 @@ bool large_mul(stackvec& x, limb_span y) noexcept { return true; } +template +struct pow5_tables { + static constexpr uint32_t large_step = 135; + static constexpr uint64_t small_power_of_5[] = { + 1UL, 5UL, 25UL, 125UL, 625UL, 3125UL, 15625UL, 78125UL, 390625UL, + 1953125UL, 9765625UL, 48828125UL, 244140625UL, 1220703125UL, + 6103515625UL, 30517578125UL, 152587890625UL, 762939453125UL, + 3814697265625UL, 19073486328125UL, 95367431640625UL, 476837158203125UL, + 2384185791015625UL, 11920928955078125UL, 59604644775390625UL, + 298023223876953125UL, 1490116119384765625UL, 7450580596923828125UL, + }; +#ifdef FASTFLOAT_64BIT_LIMB + constexpr static limb large_power_of_5[] = { + 1414648277510068013UL, 9180637584431281687UL, 4539964771860779200UL, + 10482974169319127550UL, 198276706040285095UL}; +#else + constexpr static limb large_power_of_5[] = { + 4279965485U, 329373468U, 4020270615U, 2137533757U, 4287402176U, + 1057042919U, 1071430142U, 2440757623U, 381945767U, 46164893U}; +#endif +}; + +template +constexpr uint32_t pow5_tables::large_step; + +template +constexpr uint64_t pow5_tables::small_power_of_5[]; + +template +constexpr limb pow5_tables::large_power_of_5[]; + // big integer type. implements a small subset of big integer // arithmetic, using simple algorithms since asymptotically // faster algorithms are slower for a small number of limbs. // all operations assume the big-integer is normalized. -struct bigint { +struct bigint : pow5_tables<> { // storage of the limbs, in little-endian order. stackvec vec; - bigint(): vec() {} + FASTFLOAT_CONSTEXPR20 bigint(): vec() {} bigint(const bigint &) = delete; bigint &operator=(const bigint &) = delete; bigint(bigint &&) = delete; bigint &operator=(bigint &&other) = delete; - bigint(uint64_t value): vec() { + FASTFLOAT_CONSTEXPR20 bigint(uint64_t value): vec() { #ifdef FASTFLOAT_64BIT_LIMB vec.push_unchecked(value); #else @@ -387,7 +427,7 @@ struct bigint { // get the high 64 bits from the vector, and if bits were truncated. // this is to get the significant digits for the float. - uint64_t hi64(bool& truncated) const noexcept { + FASTFLOAT_CONSTEXPR20 uint64_t hi64(bool& truncated) const noexcept { #ifdef FASTFLOAT_64BIT_LIMB if (vec.len() == 0) { return empty_hi64(truncated); @@ -419,7 +459,7 @@ struct bigint { // positive, this is larger, otherwise they are equal. // the limbs are stored in little-endian order, so we // must compare the limbs in ever order. - int compare(const bigint& other) const noexcept { + FASTFLOAT_CONSTEXPR20 int compare(const bigint& other) const noexcept { if (vec.len() > other.vec.len()) { return 1; } else if (vec.len() < other.vec.len()) { @@ -440,7 +480,7 @@ struct bigint { // shift left each limb n bits, carrying over to the new limb // returns true if we were able to shift all the digits. - bool shl_bits(size_t n) noexcept { + FASTFLOAT_CONSTEXPR20 bool shl_bits(size_t n) noexcept { // Internally, for each item, we shift left by n, and add the previous // right shifted limb-bits. // For example, we transform (for u8) shifted left 2, to: @@ -466,7 +506,7 @@ struct bigint { } // move the limbs left by `n` limbs. - bool shl_limbs(size_t n) noexcept { + FASTFLOAT_CONSTEXPR20 bool shl_limbs(size_t n) noexcept { FASTFLOAT_DEBUG_ASSERT(n != 0); if (n + vec.len() > vec.capacity()) { return false; @@ -487,7 +527,7 @@ struct bigint { } // move the limbs left by `n` bits. - bool shl(size_t n) noexcept { + FASTFLOAT_CONSTEXPR20 bool shl(size_t n) noexcept { size_t rem = n % limb_bits; size_t div = n / limb_bits; if (rem != 0) { @@ -500,7 +540,7 @@ struct bigint { } // get the number of leading zeros in the bigint. - int ctlz() const noexcept { + FASTFLOAT_CONSTEXPR20 int ctlz() const noexcept { if (vec.is_empty()) { return 0; } else { @@ -515,45 +555,27 @@ struct bigint { } // get the number of bits in the bigint. - int bit_length() const noexcept { + FASTFLOAT_CONSTEXPR20 int bit_length() const noexcept { int lz = ctlz(); return int(limb_bits * vec.len()) - lz; } - bool mul(limb y) noexcept { + FASTFLOAT_CONSTEXPR20 bool mul(limb y) noexcept { return small_mul(vec, y); } - bool add(limb y) noexcept { + FASTFLOAT_CONSTEXPR20 bool add(limb y) noexcept { return small_add(vec, y); } // multiply as if by 2 raised to a power. - bool pow2(uint32_t exp) noexcept { + FASTFLOAT_CONSTEXPR20 bool pow2(uint32_t exp) noexcept { return shl(exp); } // multiply as if by 5 raised to a power. - bool pow5(uint32_t exp) noexcept { + FASTFLOAT_CONSTEXPR20 bool pow5(uint32_t exp) noexcept { // multiply by a power of 5 - static constexpr uint32_t large_step = 135; - static constexpr uint64_t small_power_of_5[] = { - 1UL, 5UL, 25UL, 125UL, 625UL, 3125UL, 15625UL, 78125UL, 390625UL, - 1953125UL, 9765625UL, 48828125UL, 244140625UL, 1220703125UL, - 6103515625UL, 30517578125UL, 152587890625UL, 762939453125UL, - 3814697265625UL, 19073486328125UL, 95367431640625UL, 476837158203125UL, - 2384185791015625UL, 11920928955078125UL, 59604644775390625UL, - 298023223876953125UL, 1490116119384765625UL, 7450580596923828125UL, - }; -#ifdef FASTFLOAT_64BIT_LIMB - constexpr static limb large_power_of_5[] = { - 1414648277510068013UL, 9180637584431281687UL, 4539964771860779200UL, - 10482974169319127550UL, 198276706040285095UL}; -#else - constexpr static limb large_power_of_5[] = { - 4279965485U, 329373468U, 4020270615U, 2137533757U, 4287402176U, - 1057042919U, 1071430142U, 2440757623U, 381945767U, 46164893U}; -#endif size_t large_length = sizeof(large_power_of_5) / sizeof(limb); limb_span large = limb_span(large_power_of_5, large_length); while (exp >= large_step) { @@ -579,7 +601,7 @@ struct bigint { } // multiply as if by 10 raised to a power. - bool pow10(uint32_t exp) noexcept { + FASTFLOAT_CONSTEXPR20 bool pow10(uint32_t exp) noexcept { FASTFLOAT_TRY(pow5(exp)); return pow2(exp); }