From ba1344c0302165fc20df35bedf7b12922f8e95b8 Mon Sep 17 00:00:00 2001 From: IRainman Date: Sat, 12 Apr 2025 17:06:38 +0300 Subject: [PATCH] * carefully work with types in the library. * fix for some types errors. * fix small amount of not optimized code. * add more comments to the code. * unified of function binary_format::max_mantissa_fast_path() because it's do the same. --- include/fast_float/ascii_number.h | 30 ++--- include/fast_float/bigint.h | 157 ++++++++++++----------- include/fast_float/decimal_to_binary.h | 7 +- include/fast_float/digit_comparison.h | 86 +++++++------ include/fast_float/float_common.h | 169 ++++++++++++------------- 5 files changed, 225 insertions(+), 224 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 98e2223..a2c84dd 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -261,8 +261,8 @@ enum class parse_error { template struct parsed_number_string_t { // an unsigned int avoids signed overflows (which are bad) - uint64_t mantissa{0}; - int16_t exponent{0}; + am_mant_t mantissa{0}; + am_pow_t exponent{0}; UC const *lastmatch{nullptr}; #ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN bool negative{false}; @@ -334,13 +334,13 @@ parse_number_string(UC const *p, UC const *pend, // multiplication answer.mantissa = 10 * answer.mantissa + - uint64_t(*p - + UC(*p - UC('0')); // might overflow, we will handle the overflow later ++p; } UC const *const end_of_integer_part = p; - uint16_t digit_count = - static_cast(end_of_integer_part - start_digits); + am_digits digit_count = + static_cast(end_of_integer_part - start_digits); answer.integer = span(start_digits, digit_count); #ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) { @@ -364,14 +364,14 @@ parse_number_string(UC const *p, UC const *pend, loop_parse_if_eight_digits(p, pend, answer.mantissa); while ((p != pend) && is_integer(*p)) { - uint8_t const digit = uint8_t(*p - UC('0')); + UC const digit = UC(*p - UC('0')); answer.mantissa = answer.mantissa * 10 + digit; // in rare cases, this will overflow, but that's ok ++p; } - answer.exponent = static_cast(before - p); - answer.fraction = span(before, static_cast(p - before)); + answer.exponent = static_cast(before - p); + answer.fraction = span(before, static_cast(p - before)); digit_count -= answer.exponent; #ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) { @@ -389,7 +389,7 @@ parse_number_string(UC const *p, UC const *pend, // We have now parsed the integer and the fraction part of the mantissa. // Now we can parse the explicit exponential part. - int16_t exp_number = 0; // explicit exponential part + am_pow_t exp_number = 0; // explicit exponential part if ((p != pend) && (chars_format_t(options.format & chars_format::scientific) && (UC('e') == *p) || (UC('E') == *p)) @@ -431,7 +431,7 @@ parse_number_string(UC const *p, UC const *pend, while ((p != pend) && is_integer(*p)) { if (exp_number < 0x1000) { // check for exponent overflow if we have too many digits. - uint8_t const digit = uint8_t(*p - UC('0')); + UC const digit = UC(*p - UC('0')); exp_number = 10 * exp_number + digit; } ++p; @@ -478,24 +478,24 @@ parse_number_string(UC const *p, UC const *pend, answer.mantissa = 0; p = answer.integer.ptr; UC const *int_end = p + answer.integer.len(); - uint64_t const minimal_nineteen_digit_integer{1000000000000000000}; + am_mant_t const minimal_nineteen_digit_integer{1000000000000000000}; while ((answer.mantissa < minimal_nineteen_digit_integer) && (p != int_end)) { - answer.mantissa = answer.mantissa * 10 + uint64_t(*p - UC('0')); + answer.mantissa = answer.mantissa * 10 + UC(*p - UC('0')); ++p; } if (answer.mantissa >= minimal_nineteen_digit_integer) { // We have a big integers - answer.exponent = int16_t(end_of_integer_part - p) + exp_number; + answer.exponent = am_pow_t(end_of_integer_part - p) + exp_number; } else { // We have a value with a fractional component. p = answer.fraction.ptr; UC const *frac_end = p + answer.fraction.len(); while ((answer.mantissa < minimal_nineteen_digit_integer) && (p != frac_end)) { - answer.mantissa = answer.mantissa * 10 + uint64_t(*p - UC('0')); + answer.mantissa = answer.mantissa * 10 + UC(*p - UC('0')); ++p; } - answer.exponent = int16_t(answer.fraction.ptr - p) + exp_number; + answer.exponent = am_pow_t(answer.fraction.ptr - p) + exp_number; } // We have now corrected both exponent and mantissa, to a truncated value } diff --git a/include/fast_float/bigint.h b/include/fast_float/bigint.h index 0dba623..489ad11 100644 --- a/include/fast_float/bigint.h +++ b/include/fast_float/bigint.h @@ -19,11 +19,11 @@ namespace fast_float { #if defined(FASTFLOAT_64BIT) && !defined(__sparc) #define FASTFLOAT_64BIT_LIMB 1 typedef uint64_t limb; -constexpr uint8_t limb_bits = 64; +constexpr limb_t limb_bits = 64; #else #define FASTFLOAT_32BIT_LIMB typedef uint32_t limb; -constexpr uint8_t limb_bits = 32; +constexpr limb_t limb_bits = 32; #endif typedef span limb_span; @@ -32,12 +32,13 @@ typedef span limb_span; // of bits required to store the largest bigint, which is // `log2(10**(digits + max_exp))`, or `log2(10**(767 + 342))`, or // ~3600 bits, so we round to 4000. -constexpr uint16_t bigint_bits = 4000; -constexpr uint8_t bigint_limbs = bigint_bits / limb_bits; +typedef uint16_t bigint_bits_t; +constexpr bigint_bits_t bigint_bits = 4000; +constexpr limb_t bigint_limbs = bigint_bits / limb_bits; // vector-like type that is allocated on the stack. the entire // buffer is pre-allocated, and only the length changes. -template struct stackvec { +template struct stackvec { limb data[size]; // we never need more than 150 limbs uint8_t length{0}; @@ -53,31 +54,31 @@ template struct stackvec { FASTFLOAT_ASSERT(try_extend(s)); } - FASTFLOAT_CONSTEXPR14 limb &operator[](uint16_t index) noexcept { + FASTFLOAT_CONSTEXPR14 limb &operator[](limb_t index) noexcept { FASTFLOAT_DEBUG_ASSERT(index < length); return data[index]; } - FASTFLOAT_CONSTEXPR14 const limb &operator[](uint16_t index) const noexcept { + FASTFLOAT_CONSTEXPR14 const limb &operator[](limb_t index) const noexcept { FASTFLOAT_DEBUG_ASSERT(index < length); return data[index]; } // index from the end of the container - FASTFLOAT_CONSTEXPR14 const limb &rindex(uint16_t index) const noexcept { + FASTFLOAT_CONSTEXPR14 const limb &rindex(limb_t index) const noexcept { FASTFLOAT_DEBUG_ASSERT(index < length); - uint16_t rindex = length - index - 1; + limb_t rindex = length - index - 1; return data[rindex]; } // set the length, without bounds checking. - FASTFLOAT_CONSTEXPR14 void set_len(uint8_t len) noexcept { length = len; } + FASTFLOAT_CONSTEXPR14 void set_len(limb_t len) noexcept { length = len; } - constexpr uint8_t len() const noexcept { return length; } + constexpr limb_t len() const noexcept { return length; } constexpr bool is_empty() const noexcept { return length == 0; } - constexpr uint8_t capacity() const noexcept { return size; } + constexpr limb_t capacity() const noexcept { return size; } // append item to vector, without bounds checking FASTFLOAT_CONSTEXPR14 void push_unchecked(limb value) noexcept { @@ -99,7 +100,7 @@ template struct stackvec { FASTFLOAT_CONSTEXPR20 void extend_unchecked(limb_span s) noexcept { limb *ptr = data + length; std::copy_n(s.ptr, s.len(), ptr); - set_len(uint8_t(len() + s.len())); + set_len(limb_t(len() + s.len())); } // try to add items to the vector, returning if items were added @@ -116,32 +117,29 @@ template struct stackvec { // if the new size is longer than the vector, assign value to each // appended item. FASTFLOAT_CONSTEXPR20 - void resize_unchecked(uint8_t new_len, limb value) noexcept { + void resize_unchecked(limb_t new_len, limb value) noexcept { if (new_len > len()) { - uint8_t count = new_len - len(); + limb_t count = new_len - len(); limb *first = data + len(); limb *last = first + count; ::std::fill(first, last, value); - set_len(new_len); - } else { - set_len(new_len); } + set_len(new_len); } // try to resize the vector, returning if the vector was resized. - FASTFLOAT_CONSTEXPR20 bool try_resize(uint8_t new_len, limb value) noexcept { + FASTFLOAT_CONSTEXPR20 bool try_resize(limb_t new_len, limb value) noexcept { if (new_len > capacity()) { return false; - } else { - resize_unchecked(new_len, value); - return true; } + resize_unchecked(new_len, value); + return true; } // check if any limbs are non-zero after the given index. // this needs to be done in reverse order, since the index // is relative to the most significant limbs. - FASTFLOAT_CONSTEXPR14 bool nonzero(uint16_t index) const noexcept { + FASTFLOAT_CONSTEXPR14 bool nonzero(limb_t index) const noexcept { while (index < len()) { if (rindex(index) != 0) { return true; @@ -256,16 +254,15 @@ scalar_mul(limb x, limb y, limb &carry) noexcept { // add scalar value to bigint starting from offset. // used in grade school multiplication -template +template inline FASTFLOAT_CONSTEXPR20 bool small_add_from(stackvec &vec, limb y, - uint32_t start) noexcept { - uint8_t index = (uint8_t)start; + limb_t start) noexcept { limb carry = y; bool overflow; - while (carry != 0 && index < vec.len()) { - vec[index] = scalar_add(vec[index], carry, overflow); + while (carry != 0 && start < vec.len()) { + vec[start] = scalar_add(vec[start], carry, overflow); carry = limb(overflow); - ++index; + ++start; } if (carry != 0) { FASTFLOAT_TRY(vec.try_push(carry)); @@ -274,18 +271,18 @@ inline FASTFLOAT_CONSTEXPR20 bool small_add_from(stackvec &vec, limb y, } // add scalar value to bigint. -template +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool small_add(stackvec &vec, limb y) noexcept { return small_add_from(vec, y, 0); } // multiply bigint by scalar value. -template +template inline FASTFLOAT_CONSTEXPR20 bool small_mul(stackvec &vec, limb y) noexcept { limb carry = 0; - for (uint8_t index = 0; index != vec.len(); ++index) { + for (limb_t index = 0; index != vec.len(); ++index) { vec[index] = scalar_mul(vec[index], y, carry); } if (carry != 0) { @@ -296,17 +293,17 @@ inline FASTFLOAT_CONSTEXPR20 bool small_mul(stackvec &vec, // add bigint to bigint starting from index. // used in grade school multiplication -template +template FASTFLOAT_CONSTEXPR20 bool large_add_from(stackvec &x, limb_span y, - uint8_t start) noexcept { + limb_t start) noexcept { // the effective x buffer is from `xstart..x.len()`, so exit early // if we can't get that current range. if (x.len() < start || y.len() > x.len() - start) { - FASTFLOAT_TRY(x.try_resize(uint8_t(y.len() + start), 0)); + FASTFLOAT_TRY(x.try_resize(limb_t(y.len() + start), 0)); } bool carry = false; - for (uint8_t index = 0; index < y.len(); ++index) { + for (limb_t index = 0; index < y.len(); ++index) { limb xi = x[index + start]; limb yi = y[index]; bool c1 = false; @@ -321,20 +318,20 @@ FASTFLOAT_CONSTEXPR20 bool large_add_from(stackvec &x, limb_span y, // handle overflow if (carry) { - FASTFLOAT_TRY(small_add_from(x, 1, y.len() + start)); + FASTFLOAT_TRY(small_add_from(x, 1, limb_t(y.len() + start))); } return true; } // add bigint to bigint. -template +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool large_add_from(stackvec &x, limb_span y) noexcept { return large_add_from(x, y, 0); } // grade-school multiplication algorithm -template +template FASTFLOAT_CONSTEXPR20 bool long_mul(stackvec &x, limb_span y) noexcept { limb_span xs = limb_span(x.data, x.len()); stackvec z(xs); @@ -343,7 +340,7 @@ FASTFLOAT_CONSTEXPR20 bool long_mul(stackvec &x, limb_span y) noexcept { if (y.len() != 0) { limb y0 = y[0]; FASTFLOAT_TRY(small_mul(x, y0)); - for (uint8_t index = 1; index != y.len(); ++index) { + for (limb_t index = 1; index != y.len(); ++index) { limb yi = y[index]; stackvec zi; if (yi != 0) { @@ -362,7 +359,7 @@ FASTFLOAT_CONSTEXPR20 bool long_mul(stackvec &x, limb_span y) noexcept { } // grade-school multiplication algorithm -template +template FASTFLOAT_CONSTEXPR20 bool large_mul(stackvec &x, limb_span y) noexcept { if (y.len() == 1) { FASTFLOAT_TRY(small_mul(x, y[0])); @@ -491,7 +488,7 @@ struct bigint : pow5_tables<> { } else if (vec.len() < other.vec.len()) { return -1; } else { - for (uint8_t index = vec.len(); index > 0; --index) { + for (limb_t index = vec.len(); index != 0; --index) { limb xi = vec[index - 1]; limb yi = other.vec[index - 1]; if (xi > yi) { @@ -506,7 +503,7 @@ struct bigint : pow5_tables<> { // shift left each limb n bits, carrying over to the new limb // returns true if we were able to shift all the digits. - FASTFLOAT_CONSTEXPR20 bool shl_bits(uint16_t n) noexcept { + FASTFLOAT_CONSTEXPR20 bool shl_bits(bigint_bits_t n) noexcept { // Internally, for each item, we shift left by n, and add the previous // right shifted limb-bits. // For example, we transform (for u8) shifted left 2, to: @@ -515,10 +512,10 @@ struct bigint : pow5_tables<> { FASTFLOAT_DEBUG_ASSERT(n != 0); FASTFLOAT_DEBUG_ASSERT(n < sizeof(limb) * 8); - uint16_t const shl = n; - uint16_t const shr = limb_bits - shl; + bigint_bits_t const shl = n; + bigint_bits_t const shr = limb_bits - shl; limb prev = 0; - for (uint8_t index = 0; index != vec.len(); ++index) { + for (limb_t index = 0; index != vec.len(); ++index) { limb xi = vec[index]; vec[index] = (xi << shl) | (prev >> shr); prev = xi; @@ -532,30 +529,32 @@ struct bigint : pow5_tables<> { } // move the limbs left by `n` limbs. - FASTFLOAT_CONSTEXPR20 bool shl_limbs(int16_t n) noexcept { + FASTFLOAT_CONSTEXPR20 bool shl_limbs(bigint_bits_t n) noexcept { FASTFLOAT_DEBUG_ASSERT(n != 0); if (n + vec.len() > vec.capacity()) { + // we can't shift more than the capacity of the vector. return false; - } else if (!vec.is_empty()) { - // move limbs - limb *dst = vec.data + n; - limb const *src = vec.data; - std::copy_backward(src, src + vec.len(), dst + vec.len()); - // fill in empty limbs - limb *first = vec.data; - limb *last = first + n; - ::std::fill(first, last, 0); - vec.set_len(uint8_t(n + vec.len())); - return true; - } else { + } + if (vec.is_empty()) { + // nothing to do return true; } + // move limbs + limb *dst = vec.data + n; + limb const *src = vec.data; + std::copy_backward(src, src + vec.len(), dst + vec.len()); + // fill in empty limbs + limb *first = vec.data; + limb *last = first + n; + ::std::fill(first, last, 0); + vec.set_len(limb_t(n + vec.len())); + return true; } // move the limbs left by `n` bits. - FASTFLOAT_CONSTEXPR20 bool shl(uint16_t n) noexcept { - uint16_t const rem = n % limb_bits; - uint16_t const div = n / limb_bits; + FASTFLOAT_CONSTEXPR20 bool shl(bigint_bits_t n) noexcept { + bigint_bits_t const rem = n % limb_bits; + bigint_bits_t const div = n / limb_bits; if (rem != 0) { FASTFLOAT_TRY(shl_bits(rem)); } @@ -566,23 +565,23 @@ struct bigint : pow5_tables<> { } // get the number of leading zeros in the bigint. - FASTFLOAT_CONSTEXPR20 uint8_t ctlz() const noexcept { + FASTFLOAT_CONSTEXPR20 limb_t ctlz() const noexcept { if (vec.is_empty()) { + // empty vector, no bits, no zeros. return 0; - } else { -#ifdef FASTFLOAT_64BIT_LIMB - return leading_zeroes(vec.rindex(0)); -#else - // no use defining a specialized leading_zeroes for a 32-bit type. - uint64_t r0 = vec.rindex(0); - return leading_zeroes(r0 << 32); -#endif } +#ifdef FASTFLOAT_64BIT_LIMB + return leading_zeroes(vec.rindex(0)); +#else + // no use defining a specialized leading_zeroes for a 32-bit type. + uint64_t r0 = vec.rindex(0); + return leading_zeroes(r0 << 32); +#endif } // get the number of bits in the bigint. - FASTFLOAT_CONSTEXPR20 uint16_t bit_length() const noexcept { - uint8_t lz = ctlz(); + FASTFLOAT_CONSTEXPR20 bigint_bits_t bit_length() const noexcept { + limb_t lz = ctlz(); return limb_bits * vec.len() - lz; } @@ -591,22 +590,22 @@ struct bigint : pow5_tables<> { FASTFLOAT_CONSTEXPR20 bool add(limb y) noexcept { return small_add(vec, y); } // multiply as if by 2 raised to a power. - FASTFLOAT_CONSTEXPR20 bool pow2(int16_t exp) noexcept { return shl(exp); } + FASTFLOAT_CONSTEXPR20 bool pow2(am_pow_t exp) noexcept { return shl(exp); } // multiply as if by 5 raised to a power. - FASTFLOAT_CONSTEXPR20 bool pow5(int16_t exp) noexcept { + FASTFLOAT_CONSTEXPR20 bool pow5(am_pow_t exp) noexcept { // multiply by a power of 5 - uint8_t const large_length = sizeof(large_power_of_5) / sizeof(limb); + limb_t const large_length = sizeof(large_power_of_5) / sizeof(limb); limb_span const large = limb_span(large_power_of_5, large_length); while (exp >= large_step) { FASTFLOAT_TRY(large_mul(vec, large)); exp -= large_step; } #ifdef FASTFLOAT_64BIT_LIMB - uint8_t const small_step = 27; + limb_t const small_step = 27; limb const max_native = 7450580596923828125UL; #else - uint8_t const small_step = 13; + limb_t const small_step = 13; limb const max_native = 1220703125U; #endif while (exp >= small_step) { @@ -625,7 +624,7 @@ struct bigint : pow5_tables<> { } // multiply as if by 10 raised to a power. - FASTFLOAT_CONSTEXPR20 bool pow10(int16_t exp) noexcept { + FASTFLOAT_CONSTEXPR20 bool pow10(am_pow_t exp) noexcept { FASTFLOAT_TRY(pow5(exp)); return pow2(exp); } diff --git a/include/fast_float/decimal_to_binary.h b/include/fast_float/decimal_to_binary.h index a334e18..6a794fe 100644 --- a/include/fast_float/decimal_to_binary.h +++ b/include/fast_float/decimal_to_binary.h @@ -76,7 +76,7 @@ compute_error_scaled(int64_t q, uint64_t w, int32_t lz) noexcept { adjusted_mantissa answer; answer.mantissa = w << hilz; int32_t bias = binary::mantissa_explicit_bits() - binary::minimum_exponent(); - answer.power2 = int16_t(detail::power(int32_t(q)) + bias - hilz - lz - 62 + + answer.power2 = am_pow_t(detail::power(int32_t(q)) + bias - hilz - lz - 62 + invalid_am_bias); return answer; } @@ -143,9 +143,9 @@ compute_float(int64_t q, uint64_t w) noexcept { answer.mantissa = product.high >> shift; - answer.power2 = int16_t(detail::power(int32_t(q)) + upperbit - lz - + answer.power2 = am_pow_t(detail::power(int32_t(q)) + upperbit - lz - binary::minimum_exponent()); - if (answer.power2 <= 0) { // we have a subnormal? + if (answer.power2 <= 0) { // we have a subnormal or very small value. // Here have that answer.power2 <= 0 so -answer.power2 >= 0 if (-answer.power2 + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you @@ -155,6 +155,7 @@ compute_float(int64_t q, uint64_t w) noexcept { // result should be zero return answer; } + // We have a subnormal number. We need to shift the mantissa to the right // next line is safe because -answer.power2 + 1 < 64 answer.mantissa >>= -answer.power2 + 1; // Thankfully, we can't have both "round-to-even" and subnormals because diff --git a/include/fast_float/digit_comparison.h b/include/fast_float/digit_comparison.h index d772ca1..73c2d42 100644 --- a/include/fast_float/digit_comparison.h +++ b/include/fast_float/digit_comparison.h @@ -41,8 +41,8 @@ constexpr static uint64_t powers_of_ten_uint64[] = {1UL, template fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int16_t scientific_exponent(parsed_number_string_t const &num) noexcept { - uint64_t mantissa = num.mantissa; - int16_t exponent = num.exponent; + am_mant_t mantissa = num.mantissa; + am_pow_t exponent = num.exponent; while (mantissa >= 10000) { mantissa /= 10000; exponent += 4; @@ -68,11 +68,15 @@ to_extended(T const &value) noexcept { constexpr equiv_uint hidden_bit_mask = binary_format::hidden_bit_mask(); adjusted_mantissa am; - int16_t bias = binary_format::mantissa_explicit_bits() - + am_pow_t bias = binary_format::mantissa_explicit_bits() - binary_format::minimum_exponent(); equiv_uint bits; #if FASTFLOAT_HAS_BIT_CAST - bits = std::bit_cast(value); + bits = +#if FASTFLOAT_HAS_BIT_CAST == 1 + std:: +#endif + bit_cast(value); #else ::memcpy(&bits, &value, sizeof(T)); #endif @@ -82,7 +86,7 @@ to_extended(T const &value) noexcept { am.mantissa = bits & mantissa_mask; } else { // normal - am.power2 = int16_t((bits & exponent_mask) >> + am.power2 = am_pow_t((bits & exponent_mask) >> binary_format::mantissa_explicit_bits()); am.power2 -= bias; am.mantissa = (bits & mantissa_mask) | hidden_bit_mask; @@ -108,14 +112,14 @@ to_extended_halfway(T const &value) noexcept { template fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void round(adjusted_mantissa &am, callback cb) noexcept { - int16_t mantissa_shift = 64 - binary_format::mantissa_explicit_bits() - 1; + am_pow_t mantissa_shift = 64 - binary_format::mantissa_explicit_bits() - 1; if (-am.power2 >= mantissa_shift) { // have a denormal float - int16_t shift = -am.power2 + 1; + am_pow_t shift = -am.power2 + 1; cb(am, std::min(shift, 64)); // check for round-up: if rounding-nearest carried us to the hidden bit. am.power2 = (am.mantissa < - (uint64_t(1) << binary_format::mantissa_explicit_bits())) + (am_mant_t(1) << binary_format::mantissa_explicit_bits())) ? 0 : 1; return; @@ -126,13 +130,13 @@ fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void round(adjusted_mantissa &am, // check for carry if (am.mantissa >= - (uint64_t(2) << binary_format::mantissa_explicit_bits())) { - am.mantissa = (uint64_t(1) << binary_format::mantissa_explicit_bits()); + (am_mant_t(2) << binary_format::mantissa_explicit_bits())) { + am.mantissa = (am_mant_t(1) << binary_format::mantissa_explicit_bits()); ++am.power2; } // check for infinite: we could have carried to an infinite power - am.mantissa &= ~(uint64_t(1) << binary_format::mantissa_explicit_bits()); + am.mantissa &= ~(am_mant_t(1) << binary_format::mantissa_explicit_bits()); if (am.power2 >= binary_format::infinite_power()) { am.power2 = binary_format::infinite_power(); am.mantissa = 0; @@ -141,11 +145,11 @@ fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void round(adjusted_mantissa &am, template fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void -round_nearest_tie_even(adjusted_mantissa &am, int16_t shift, +round_nearest_tie_even(adjusted_mantissa &am, am_pow_t shift, callback cb) noexcept { - uint64_t const mask = (shift == 64) ? UINT64_MAX : (uint64_t(1) << shift) - 1; - uint64_t const halfway = (shift == 0) ? 0 : uint64_t(1) << (shift - 1); - uint64_t truncated_bits = am.mantissa & mask; + am_mant_t const mask = (shift == 64) ? UINT64_MAX : (am_mant_t(1) << shift) - 1; + am_mant_t const halfway = (shift == 0) ? 0 : am_mant_t(1) << (shift - 1); + am_mant_t truncated_bits = am.mantissa & mask; bool is_above = truncated_bits > halfway; bool is_halfway = truncated_bits == halfway; @@ -158,11 +162,11 @@ round_nearest_tie_even(adjusted_mantissa &am, int16_t shift, am.power2 += shift; bool is_odd = (am.mantissa & 1) == 1; - am.mantissa += uint64_t(cb(is_odd, is_halfway, is_above)); + am.mantissa += am_mant_t(cb(is_odd, is_halfway, is_above)); } fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void -round_down(adjusted_mantissa &am, int16_t shift) noexcept { +round_down(adjusted_mantissa &am, am_pow_t shift) noexcept { if (shift == 64) { am.mantissa = 0; } else { @@ -223,8 +227,8 @@ is_truncated(span s) noexcept { template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void -parse_eight_digits(UC const *&p, limb &value, uint16_t &counter, - uint16_t &count) noexcept { +parse_eight_digits(UC const *&p, limb &value, am_digits &counter, + am_digits &count) noexcept { value = value * 100000000 + parse_eight_digits_unrolled(p); p += 8; counter += 8; @@ -233,8 +237,8 @@ parse_eight_digits(UC const *&p, limb &value, uint16_t &counter, template fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void -parse_one_digit(UC const *&p, limb &value, uint16_t &counter, - uint16_t &count) noexcept { +parse_one_digit(UC const *&p, limb &value, am_digits &counter, + am_digits &count) noexcept { value = value * 10 + limb(*p - UC('0')); ++p; ++counter; @@ -248,7 +252,7 @@ add_native(bigint &big, limb power, limb value) noexcept { } fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void -round_up_bigint(bigint &big, uint16_t &count) noexcept { +round_up_bigint(bigint &big, am_digits &count) noexcept { // need to round-up the digits, but need to avoid rounding // ....9999 to ...10000, which could cause a false halfway point. add_native(big, 10, 1); @@ -257,19 +261,19 @@ round_up_bigint(bigint &big, uint16_t &count) noexcept { // parse the significant digits into a big integer template -inline FASTFLOAT_CONSTEXPR20 uint16_t +inline FASTFLOAT_CONSTEXPR20 am_digits parse_mantissa(bigint &result, const parsed_number_string_t &num) noexcept { // try to minimize the number of big integer and scalar multiplication. // therefore, try to parse 8 digits at a time, and multiply by the largest // scalar value (9 or 19 digits) for each step. - uint16_t const max_digits = binary_format::max_digits(); - uint16_t counter = 0; - uint16_t digits = 0; + am_digits const max_digits = binary_format::max_digits(); + am_digits counter = 0; + am_digits digits = 0; limb value = 0; #ifdef FASTFLOAT_64BIT_LIMB - uint16_t const step = 19; + am_digits const step = 19; #else - uint16_t const step = 9; + am_digits const step = 9; #endif // process all integer digits. @@ -343,15 +347,15 @@ parse_mantissa(bigint &result, const parsed_number_string_t &num) noexcept { template inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa positive_digit_comp( - bigint &bigmant, adjusted_mantissa am, int16_t const exponent) noexcept { + bigint &bigmant, adjusted_mantissa am, am_pow_t const exponent) noexcept { FASTFLOAT_ASSERT(bigmant.pow10(exponent)); bool truncated; am.mantissa = bigmant.hi64(truncated); - int16_t bias = binary_format::mantissa_explicit_bits() - + am_pow_t bias = binary_format::mantissa_explicit_bits() - binary_format::minimum_exponent(); am.power2 = bigmant.bit_length() - 64 + bias; - round(am, [truncated](adjusted_mantissa &a, int16_t shift) { + round(am, [truncated](adjusted_mantissa &a, am_pow_t shift) { round_nearest_tie_even( a, shift, [truncated](bool is_odd, bool is_halfway, bool is_above) -> bool { @@ -370,9 +374,9 @@ inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa positive_digit_comp( // are of the same magnitude. template inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa negative_digit_comp( - bigint &bigmant, adjusted_mantissa am, int16_t const exponent) noexcept { + bigint &bigmant, adjusted_mantissa am, am_pow_t const exponent) noexcept { bigint &real_digits = bigmant; - int16_t const &real_exp = exponent; + am_pow_t const &real_exp = exponent; T b; { @@ -382,7 +386,7 @@ inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa negative_digit_comp( // gcc7 bug: use a lambda to remove the noexcept qualifier bug with // -Wnoexcept-type. round(am_b, - [](adjusted_mantissa &a, int16_t shift) { round_down(a, shift); }); + [](adjusted_mantissa &a, am_pow_t shift) { round_down(a, shift); }); to_float( #ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN false, @@ -391,11 +395,11 @@ inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa negative_digit_comp( } adjusted_mantissa theor = to_extended_halfway(b); bigint theor_digits(theor.mantissa); - int16_t theor_exp = theor.power2; + am_pow_t theor_exp = theor.power2; // scale real digits and theor digits to be same power. - int16_t pow2_exp = theor_exp - real_exp; - uint16_t pow5_exp = -real_exp; + am_pow_t pow2_exp = theor_exp - real_exp; + am_pow_t pow5_exp = -real_exp; if (pow5_exp != 0) { FASTFLOAT_ASSERT(theor_digits.pow5(pow5_exp)); } @@ -407,7 +411,7 @@ inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa negative_digit_comp( // compare digits, and use it to director rounding int ord = real_digits.compare(theor_digits); - round(am, [ord](adjusted_mantissa &a, int16_t shift) { + round(am, [ord](adjusted_mantissa &a, am_pow_t shift) { round_nearest_tie_even( a, shift, [ord](bool is_odd, bool _, bool __) -> bool { (void)_; // not needed, since we've done our comparison @@ -445,11 +449,11 @@ inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa digit_comp( am.power2 -= invalid_am_bias; bigint bigmant; - int16_t const sci_exp = scientific_exponent(num); + am_pow_t const sci_exp = scientific_exponent(num); - uint16_t const digits = parse_mantissa(bigmant, num); + am_digits const digits = parse_mantissa(bigmant, num); // can't underflow, since digits is at most max_digits. - int16_t const exponent = sci_exp + 1 - digits; + am_pow_t const exponent = sci_exp + 1 - digits; if (exponent >= 0) { return positive_digit_comp(bigmant, am, exponent); } else { diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index 5ed55ad..b850a4f 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -33,6 +33,12 @@ namespace fast_float { +// The number of digits in the mantissa. +typedef uint16_t am_digits; + +// The number of bits in the limb. +typedef uint8_t limb_t; + typedef uint8_t chars_format_t; enum class chars_format : chars_format_t; @@ -280,12 +286,13 @@ struct is_supported_char_type > { }; +#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN // Compares two ASCII strings in a case insensitive manner. template inline FASTFLOAT_CONSTEXPR14 bool fastfloat_strncasecmp(UC const *actual_mixedcase, UC const *expected_lowercase, - size_t length) noexcept { - for (size_t i = 0; i < length; ++i) { + uint8_t const length) noexcept { + for (uint8_t i = 0; i != length; ++i) { UC const actual = actual_mixedcase[i]; if ((actual < 256 ? actual | 32 : actual) != expected_lowercase[i]) { return false; @@ -293,6 +300,7 @@ fastfloat_strncasecmp(UC const *actual_mixedcase, UC const *expected_lowercase, } return true; } +#endif #ifndef FLT_EVAL_METHOD #error "FLT_EVAL_METHOD should be defined, please include cfloat." @@ -301,16 +309,16 @@ fastfloat_strncasecmp(UC const *actual_mixedcase, UC const *expected_lowercase, // a pointer and a length to a contiguous block of memory template struct span { T const *ptr; - uint16_t length; + am_digits length; - constexpr span(T const *_ptr, uint16_t _length) noexcept + constexpr span(T const *_ptr, am_digits _length) noexcept : ptr(_ptr), length(_length) {} constexpr span() noexcept : ptr(nullptr), length(0) {} - constexpr uint16_t len() const noexcept { return length; } + constexpr am_digits len() const noexcept { return length; } - FASTFLOAT_CONSTEXPR14 const T &operator[](uint16_t index) const noexcept { + FASTFLOAT_CONSTEXPR14 const T &operator[](am_digits index) const noexcept { FASTFLOAT_DEBUG_ASSERT(index < length); return ptr[index]; } @@ -327,7 +335,7 @@ struct value128 { }; /* Helper C++14 constexpr generic implementation of leading_zeroes */ -fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint8_t +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 limb_t leading_zeroes_generic(uint64_t input_num, uint64_t last_bit = 0) noexcept { if (input_num & uint64_t(0xffffffff00000000)) { input_num >>= 32; @@ -352,11 +360,11 @@ leading_zeroes_generic(uint64_t input_num, uint64_t last_bit = 0) noexcept { if (input_num & uint64_t(0x2)) { /* input_num >>= 1; */ last_bit |= 1; } - return 63 - (uint8_t)last_bit; + return 63 - (limb_t)last_bit; } /* result might be undefined when input_num is zero */ -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint8_t +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 limb_t leading_zeroes(uint64_t input_num) noexcept { assert(input_num > 0); FASTFLOAT_ASSUME(input_num > 0); @@ -369,12 +377,12 @@ leading_zeroes(uint64_t input_num) noexcept { // Search the mask data from most significant bit (MSB) // to least significant bit (LSB) for a set bit (1). _BitScanReverse64(&leading_zero, input_num); - return (uint8_t)(63 - leading_zero); + return (limb_t)(63 - leading_zero); #else - return (uint8_t)leading_zeroes_generic(input_num); + return (limb_t)leading_zeroes_generic(input_num); #endif #else - return (uint8_t)__builtin_clzll(input_num); + return (limb_t)__builtin_clzll(input_num); #endif } @@ -436,9 +444,21 @@ full_multiplication(uint64_t a, uint64_t b) noexcept { return answer; } +// Value of the mantissa. +typedef uint64_t am_mant_t; +// Size of bits in the mantissa. +typedef uint8_t am_bits_t; + +// Power bias is signed for handling a denormal float +// or an invalid mantissa. +typedef int16_t am_pow_t; + +// Bias so we can get the real exponent with an invalid adjusted_mantissa. +constexpr static am_pow_t invalid_am_bias = -0x8000; + struct adjusted_mantissa { - uint64_t mantissa; - int16_t power2; // a negative value indicates an invalid result + am_mant_t mantissa; + am_pow_t power2; adjusted_mantissa() noexcept = default; constexpr bool operator==(adjusted_mantissa const &o) const noexcept { @@ -450,35 +470,30 @@ struct adjusted_mantissa { } }; -// Bias so we can get the real exponent with an invalid adjusted_mantissa. -constexpr static int32_t invalid_am_bias = -0x8000; - // used for binary_format_lookup_tables::max_mantissa -constexpr uint64_t constant_55555 = 5 * 5 * 5 * 5 * 5; +constexpr am_mant_t constant_55555 = 5 * 5 * 5 * 5 * 5; template struct binary_format_lookup_tables; template struct binary_format : binary_format_lookup_tables { using equiv_uint = equiv_uint_t; - // TODO add type for bit shift operations and use it. - // TODO add type for exponent operations and use it. - static constexpr uint8_t mantissa_explicit_bits(); - static constexpr int16_t minimum_exponent(); - static constexpr int16_t infinite_power(); - static constexpr uint8_t sign_index(); - static constexpr int8_t + static constexpr am_bits_t mantissa_explicit_bits(); + static constexpr am_pow_t minimum_exponent(); + static constexpr am_pow_t infinite_power(); + static constexpr am_bits_t sign_index(); + static constexpr am_pow_t min_exponent_fast_path(); // used when fegetround() == FE_TONEAREST - static constexpr int8_t max_exponent_fast_path(); - static constexpr int16_t max_exponent_round_to_even(); - static constexpr int16_t min_exponent_round_to_even(); + static constexpr am_pow_t max_exponent_fast_path(); + static constexpr am_pow_t max_exponent_round_to_even(); + static constexpr am_pow_t min_exponent_round_to_even(); static constexpr equiv_uint max_mantissa_fast_path(int64_t power); static constexpr equiv_uint max_mantissa_fast_path(); // used when fegetround() == FE_TONEAREST - static constexpr int16_t largest_power_of_ten(); - static constexpr int16_t smallest_power_of_ten(); + static constexpr am_pow_t largest_power_of_ten(); + static constexpr am_pow_t smallest_power_of_ten(); static constexpr T exact_power_of_ten(int64_t power); - static constexpr uint16_t max_digits(); + static constexpr am_digits max_digits(); static constexpr equiv_uint exponent_mask(); static constexpr equiv_uint mantissa_mask(); static constexpr equiv_uint hidden_bit_mask(); @@ -568,7 +583,7 @@ constexpr uint64_t binary_format_lookup_tables::max_mantissa[]; #endif template <> -inline constexpr int8_t binary_format::min_exponent_fast_path() { +inline constexpr am_pow_t binary_format::min_exponent_fast_path() { #if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) return 0; #else @@ -577,7 +592,7 @@ inline constexpr int8_t binary_format::min_exponent_fast_path() { } template <> -inline constexpr int8_t binary_format::min_exponent_fast_path() { +inline constexpr am_pow_t binary_format::min_exponent_fast_path() { #if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) return 0; #else @@ -586,81 +601,76 @@ inline constexpr int8_t binary_format::min_exponent_fast_path() { } template <> -inline constexpr uint8_t binary_format::mantissa_explicit_bits() { +inline constexpr am_bits_t binary_format::mantissa_explicit_bits() { return 52; } template <> -inline constexpr uint8_t binary_format::mantissa_explicit_bits() { +inline constexpr am_bits_t binary_format::mantissa_explicit_bits() { return 23; } template <> -inline constexpr int16_t binary_format::max_exponent_round_to_even() { +inline constexpr am_pow_t binary_format::max_exponent_round_to_even() { return 23; } template <> -inline constexpr int16_t binary_format::max_exponent_round_to_even() { +inline constexpr am_pow_t binary_format::max_exponent_round_to_even() { return 10; } template <> -inline constexpr int16_t binary_format::min_exponent_round_to_even() { +inline constexpr am_pow_t binary_format::min_exponent_round_to_even() { return -4; } template <> -inline constexpr int16_t binary_format::min_exponent_round_to_even() { +inline constexpr am_pow_t binary_format::min_exponent_round_to_even() { return -17; } -template <> inline constexpr int16_t binary_format::minimum_exponent() { +template <> inline constexpr am_pow_t binary_format::minimum_exponent() { return -1023; } -template <> inline constexpr int16_t binary_format::minimum_exponent() { +template <> inline constexpr am_pow_t binary_format::minimum_exponent() { return -127; } -template <> inline constexpr int16_t binary_format::infinite_power() { +template <> inline constexpr am_pow_t binary_format::infinite_power() { return 0x7FF; } -template <> inline constexpr int16_t binary_format::infinite_power() { +template <> inline constexpr am_pow_t binary_format::infinite_power() { return 0xFF; } #ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN -template <> inline constexpr uint8_t binary_format::sign_index() { +template <> inline constexpr am_bits_t binary_format::sign_index() { return 63; } -template <> inline constexpr uint8_t binary_format::sign_index() { +template <> inline constexpr am_bits_t binary_format::sign_index() { return 31; } #endif template <> -inline constexpr int8_t binary_format::max_exponent_fast_path() { +inline constexpr am_pow_t binary_format::max_exponent_fast_path() { return 22; } template <> -inline constexpr int8_t binary_format::max_exponent_fast_path() { +inline constexpr am_pow_t binary_format::max_exponent_fast_path() { return 10; } -template <> -inline constexpr uint64_t binary_format::max_mantissa_fast_path() { - return uint64_t(2) << mantissa_explicit_bits(); -} - -template <> -inline constexpr uint32_t binary_format::max_mantissa_fast_path() { - return uint32_t(2) << mantissa_explicit_bits(); +template +inline constexpr binary_format::equiv_uint binary_format::max_mantissa_fast_path() { + return binary_format::equiv_uint(2) << mantissa_explicit_bits(); } // credit: Jakub JelĂ­nek @@ -728,12 +738,6 @@ binary_format::mantissa_explicit_bits() { return 10; } -template <> -inline constexpr int8_t -binary_format::max_mantissa_fast_path() { - return uint16_t(2) << mantissa_explicit_bits(); -} - template <> inline constexpr uint64_t binary_format::max_mantissa_fast_path(int64_t power) { @@ -763,37 +767,37 @@ binary_format::min_exponent_round_to_even() { } template <> -inline constexpr int16_t binary_format::minimum_exponent() { +inline constexpr am_exp_t binary_format::minimum_exponent() { return -15; } template <> -inline constexpr int16_t binary_format::infinite_power() { +inline constexpr am_exp_t binary_format::infinite_power() { return 0x1F; } #ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN template <> -inline constexpr uint8_t binary_format::sign_index() { +inline constexpr am_bits_t binary_format::sign_index() { return 15; } #endif template <> -inline constexpr int16_t binary_format::largest_power_of_ten() { +inline constexpr am_exp_t binary_format::largest_power_of_ten() { return 4; } template <> -inline constexpr int16_t +inline constexpr am_exp_t binary_format::smallest_power_of_ten() { return -27; } template <> -inline constexpr uint8_t binary_format::max_digits() { +inline constexpr am_digits binary_format::max_digits() { return 22; } #endif // __STDCPP_FLOAT16_T__ @@ -860,13 +864,6 @@ binary_format::mantissa_explicit_bits() { return 7; } -template <> -inline constexpr binary_format::equiv_uint -binary_format::max_mantissa_fast_path() { - return binary_format::equiv_uint(2) - << mantissa_explicit_bits(); -} - template <> inline constexpr uint64_t binary_format::max_mantissa_fast_path(int64_t power) { @@ -884,24 +881,24 @@ binary_format::min_exponent_fast_path() { } template <> -inline constexpr int16_t +inline constexpr am_exp_t binary_format::max_exponent_round_to_even() { return 3; } template <> -inline constexpr int16_t +inline constexpr am_exp_t binary_format::min_exponent_round_to_even() { return -24; } template <> -inline constexpr int16_t binary_format::minimum_exponent() { +inline constexpr am_exp_t binary_format::minimum_exponent() { return -127; } template <> -inline constexpr int16_t binary_format::infinite_power() { +inline constexpr am_exp_t binary_format::infinite_power() { return 0xFF; } @@ -915,13 +912,13 @@ inline constexpr uint8_t binary_format::sign_index() { #endif template <> -inline constexpr int16_t +inline constexpr am_exp_t binary_format::largest_power_of_ten() { return 38; } template <> -inline constexpr int16_t +inline constexpr am_exp_t binary_format::smallest_power_of_ten() { return -60; } @@ -972,30 +969,30 @@ inline constexpr float binary_format::exact_power_of_ten(int64_t power) { } template <> -inline constexpr int16_t binary_format::largest_power_of_ten() { +inline constexpr am_pow_t binary_format::largest_power_of_ten() { return 308; } template <> -inline constexpr int16_t binary_format::largest_power_of_ten() { +inline constexpr am_pow_t binary_format::largest_power_of_ten() { return 38; } template <> -inline constexpr int16_t binary_format::smallest_power_of_ten() { +inline constexpr am_pow_t binary_format::smallest_power_of_ten() { return -342; } template <> -inline constexpr int16_t binary_format::smallest_power_of_ten() { +inline constexpr am_pow_t binary_format::smallest_power_of_ten() { return -64; } -template <> inline constexpr uint16_t binary_format::max_digits() { +template <> inline constexpr am_digits binary_format::max_digits() { return 769; } -template <> inline constexpr uint16_t binary_format::max_digits() { +template <> inline constexpr am_digits binary_format::max_digits() { return 114; }