From 8e1fda5d08a3e0bce1bb0fe1ac1a4d14cb09342a Mon Sep 17 00:00:00 2001 From: IRainman Date: Thu, 10 Apr 2025 17:18:08 +0300 Subject: [PATCH] fixes and cleanup for the parse_number_string function. exponent value is always less than in16_t. original main: Tests: time is: 44278ms. size of my tests 389.0k size of my program 164.0k my main: Tests: time is: 42015ms. size of my tests 389.0k size of my program 164.0k my main with FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN Tests: time is: 41282ms. size of my tests 386.5k size of my program 161.5k After this I'll try it on my partner Linux machine with the original tests and compare much better. --- include/fast_float/ascii_number.h | 77 +++++++++++---------- include/fast_float/bigint.h | 96 +++++++++++++------------- include/fast_float/decimal_to_binary.h | 12 ++-- include/fast_float/digit_comparison.h | 70 ++++++++++--------- include/fast_float/float_common.h | 24 +++---- include/fast_float/parse_number.h | 2 +- 6 files changed, 143 insertions(+), 138 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index a3dd7c4..ec2b6b8 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -50,7 +50,7 @@ fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t read8_to_u64(UC const *chars) { if (cpp20_and_in_constexpr() || !std::is_same::value) { uint64_t val = 0; - for (int i = 0; i < 8; ++i) { + for (uint8_t i = 0; i != 8; ++i) { val |= uint64_t(uint8_t(*chars)) << (i * 8); ++chars; } @@ -261,7 +261,7 @@ enum class parse_error { template struct parsed_number_string_t { uint64_t mantissa{0}; - int32_t exponent{0}; + int16_t exponent{0}; #ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN bool negative{false}; #endif @@ -327,18 +327,17 @@ parse_number_string(UC const *p, UC const *pend, UC const *const start_digits = p; - uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad) - + // an unsigned int avoids signed overflows (which are bad) while ((p != pend) && is_integer(*p)) { // a multiplication by 10 is cheaper than an arbitrary integer // multiplication - i = 10 * i + + answer.mantissa = 10 * answer.mantissa + uint64_t(*p - UC('0')); // might overflow, we will handle the overflow later ++p; } UC const *const end_of_integer_part = p; - uint32_t digit_count = uint32_t(end_of_integer_part - start_digits); + uint16_t digit_count = uint16_t(end_of_integer_part - start_digits); answer.integer = span(start_digits, digit_count); #ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) { @@ -353,43 +352,46 @@ parse_number_string(UC const *p, UC const *pend, } #endif - int32_t exponent = 0; bool const has_decimal_point = (p != pend) && (*p == options.decimal_point); if (has_decimal_point) { ++p; UC const *before = p; + uint16_t fraction = 0; // can occur at most twice without overflowing, but let it occur more, since // for integers with many digits, digit parsing is the primary bottleneck. - loop_parse_if_eight_digits(p, pend, i); + loop_parse_if_eight_digits(p, pend, answer.mantissa); while ((p != pend) && is_integer(*p)) { uint8_t const digit = uint8_t(*p - UC('0')); - i = i * 10 + digit; // in rare cases, this will overflow, but that's ok + answer.mantissa = answer.mantissa * 10 + digit; // in rare cases, this will overflow, but that's ok ++p; } - exponent = int32_t(before - p); - answer.fraction = span(before, uint32_t(p - before)); - digit_count -= exponent; - } + fraction = uint16_t(before - p); + answer.fraction = span(before, uint16_t(p - before)); + digit_count -= fraction; #ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN - FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) { - // at least 1 digit in fractional part - if (has_decimal_point && exponent == 0) { - return report_parse_error(p, + FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) { + // at least 1 digit in fractional part + if (has_decimal_point && fraction == 0) { + return report_parse_error(p, parse_error::no_digits_in_fractional_part); + } } - } #endif + } else if (digit_count == 0) { // we must have encountered at least one integer! return report_parse_error(p, parse_error::no_digits_in_mantissa); } - int32_t exp_number = 0; // explicit exponential part + // We have now parsed the integer and the fraction part of the mantissa. + + // Now we can parse the exponent part. if (p != pend && (uint8_t(options.format & chars_format::scientific) && - ((UC('e') == *p) || (UC('E') == *p))) + (UC('e') == *p) || (UC('E') == *p)) #ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN || (uint8_t(options.format & detail::basic_fortran_fmt) && - (UC('d') == *p) || (UC('D') == *p)) + ((UC('+') == *p) || (UC('-') == *p) || + (UC('d') == *p) || (UC('D') == *p))) #endif ) { UC const *location_of_e = p; @@ -416,14 +418,16 @@ parse_number_string(UC const *p, UC const *pend, p = location_of_e; } else { while ((p != pend) && is_integer(*p)) { - uint8_t const digit = uint8_t(*p - UC('0')); - exp_number = 10 * exp_number + digit; + if (answer.exponent < 0x1000) { + // check for exponent overflow if we have too many digits. + uint8_t const digit = uint8_t(*p - UC('0')); + answer.exponent = 10 * answer.exponent + digit; + } ++p; } if (neg_exp) { - exp_number = -exp_number; + answer.exponent = -answer.exponent; } - exponent += exp_number; } } else { // If it scientific and not fixed, we have to bail out. @@ -459,30 +463,28 @@ parse_number_string(UC const *p, UC const *pend, // Let us start again, this time, avoiding overflows. // We don't need to check if is_integer, since we use the // pre-tokenized spans from above. - i = 0; + answer.mantissa = 0; p = answer.integer.ptr; UC const *int_end = p + answer.integer.len(); uint64_t const minimal_nineteen_digit_integer{1000000000000000000}; - while ((i < minimal_nineteen_digit_integer) && (p != int_end)) { - i = i * 10 + uint64_t(*p - UC('0')); + while ((answer.mantissa < minimal_nineteen_digit_integer) && (p != int_end)) { + answer.mantissa = answer.mantissa * 10 + uint64_t(*p - UC('0')); ++p; } - if (i >= minimal_nineteen_digit_integer) { // We have a big integers - exponent = uint32_t(end_of_integer_part - p) + exp_number; + if (answer.mantissa >= minimal_nineteen_digit_integer) { // We have a big integers + answer.exponent += int16_t(end_of_integer_part - p); } else { // We have a value with a fractional component. p = answer.fraction.ptr; UC const *frac_end = p + answer.fraction.len(); - while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) { - i = i * 10 + uint64_t(*p - UC('0')); + while ((answer.mantissa < minimal_nineteen_digit_integer) && (p != frac_end)) { + answer.mantissa = answer.mantissa * 10 + uint64_t(*p - UC('0')); ++p; } - exponent = uint32_t(answer.fraction.ptr - p) + exp_number; + answer.exponent += int16_t(answer.fraction.ptr - p); } - // We have now corrected both exponent and i, to a truncated value + // We have now corrected both exponent and mantissa, to a truncated value } } - answer.exponent = exponent; - answer.mantissa = i; return answer; } @@ -518,7 +520,6 @@ parse_int_string(UC const *p, UC const *pend, T &value, UC const *const start_num = p; - // use SIMD here? while (p != pend && *p == UC('0')) { ++p; } @@ -541,7 +542,7 @@ parse_int_string(UC const *p, UC const *pend, T &value, p++; } - uint32_t const digit_count = uint32_t(p - start_digits); + uint16_t const digit_count = uint16_t(p - start_digits); if (digit_count == 0) { if (has_leading_zeros) { diff --git a/include/fast_float/bigint.h b/include/fast_float/bigint.h index 7a481b4..aa18c2f 100644 --- a/include/fast_float/bigint.h +++ b/include/fast_float/bigint.h @@ -19,11 +19,11 @@ namespace fast_float { #if defined(FASTFLOAT_64BIT) && !defined(__sparc) #define FASTFLOAT_64BIT_LIMB 1 typedef uint64_t limb; -constexpr uint32_t limb_bits = 64; +constexpr uint16_t limb_bits = 64; #else #define FASTFLOAT_32BIT_LIMB typedef uint32_t limb; -constexpr uint32_t limb_bits = 32; +constexpr uint16_t limb_bits = 32; #endif typedef span limb_span; @@ -32,15 +32,15 @@ typedef span limb_span; // of bits required to store the largest bigint, which is // `log2(10**(digits + max_exp))`, or `log2(10**(767 + 342))`, or // ~3600 bits, so we round to 4000. -constexpr uint32_t bigint_bits = 4000; -constexpr uint32_t bigint_limbs = bigint_bits / limb_bits; +constexpr uint16_t bigint_bits = 4000; +constexpr uint16_t bigint_limbs = bigint_bits / limb_bits; // vector-like type that is allocated on the stack. the entire // buffer is pre-allocated, and only the length changes. -template struct stackvec { +template struct stackvec { limb data[size]; // we never need more than 150 limbs - uint32_t length{0}; + uint8_t length{0}; FASTFLOAT_CONSTEXPR20 stackvec() noexcept = default; stackvec(stackvec const &) = delete; @@ -53,33 +53,33 @@ template struct stackvec { FASTFLOAT_ASSERT(try_extend(s)); } - FASTFLOAT_CONSTEXPR14 limb &operator[](uint32_t index) noexcept { + FASTFLOAT_CONSTEXPR14 limb &operator[](uint16_t index) noexcept { FASTFLOAT_DEBUG_ASSERT(index < length); return data[index]; } - FASTFLOAT_CONSTEXPR14 const limb &operator[](uint32_t index) const noexcept { + FASTFLOAT_CONSTEXPR14 const limb &operator[](uint16_t index) const noexcept { FASTFLOAT_DEBUG_ASSERT(index < length); return data[index]; } // index from the end of the container - FASTFLOAT_CONSTEXPR14 const limb &rindex(uint32_t index) const noexcept { + FASTFLOAT_CONSTEXPR14 const limb &rindex(uint16_t index) const noexcept { FASTFLOAT_DEBUG_ASSERT(index < length); - uint32_t rindex = length - index - 1; + uint16_t rindex = length - index - 1; return data[rindex]; } // set the length, without bounds checking. - FASTFLOAT_CONSTEXPR14 void set_len(uint32_t len) noexcept { + FASTFLOAT_CONSTEXPR14 void set_len(uint8_t len) noexcept { length = len; } - constexpr uint32_t len() const noexcept { return length; } + constexpr uint8_t len() const noexcept { return length; } constexpr bool is_empty() const noexcept { return length == 0; } - constexpr uint32_t capacity() const noexcept { return size; } + constexpr uint8_t capacity() const noexcept { return size; } // append item to vector, without bounds checking FASTFLOAT_CONSTEXPR14 void push_unchecked(limb value) noexcept { @@ -118,9 +118,9 @@ template struct stackvec { // if the new size is longer than the vector, assign value to each // appended item. FASTFLOAT_CONSTEXPR20 - void resize_unchecked(uint32_t new_len, limb value) noexcept { + void resize_unchecked(uint8_t new_len, limb value) noexcept { if (new_len > len()) { - uint32_t count = new_len - len(); + uint8_t count = new_len - len(); limb *first = data + len(); limb *last = first + count; ::std::fill(first, last, value); @@ -131,7 +131,7 @@ template struct stackvec { } // try to resize the vector, returning if the vector was resized. - FASTFLOAT_CONSTEXPR20 bool try_resize(uint32_t new_len, limb value) noexcept { + FASTFLOAT_CONSTEXPR20 bool try_resize(uint8_t new_len, limb value) noexcept { if (new_len > capacity()) { return false; } else { @@ -143,7 +143,7 @@ template struct stackvec { // check if any limbs are non-zero after the given index. // this needs to be done in reverse order, since the index // is relative to the most significant limbs. - FASTFLOAT_CONSTEXPR14 bool nonzero(uint32_t index) const noexcept { + FASTFLOAT_CONSTEXPR14 bool nonzero(uint16_t index) const noexcept { while (index < len()) { if (rindex(index) != 0) { return true; @@ -258,10 +258,10 @@ scalar_mul(limb x, limb y, limb &carry) noexcept { // add scalar value to bigint starting from offset. // used in grade school multiplication -template +template inline FASTFLOAT_CONSTEXPR20 bool small_add_from(stackvec &vec, limb y, uint32_t start) noexcept { - uint32_t index = start; + uint8_t index = (uint8_t)start; limb carry = y; bool overflow; while (carry != 0 && index < vec.len()) { @@ -276,18 +276,18 @@ inline FASTFLOAT_CONSTEXPR20 bool small_add_from(stackvec &vec, limb y, } // add scalar value to bigint. -template +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool small_add(stackvec &vec, limb y) noexcept { return small_add_from(vec, y, 0); } // multiply bigint by scalar value. -template +template inline FASTFLOAT_CONSTEXPR20 bool small_mul(stackvec &vec, limb y) noexcept { limb carry = 0; - for (uint32_t index = 0; index != vec.len(); ++index) { + for (uint8_t index = 0; index != vec.len(); ++index) { vec[index] = scalar_mul(vec[index], y, carry); } if (carry != 0) { @@ -298,9 +298,9 @@ inline FASTFLOAT_CONSTEXPR20 bool small_mul(stackvec &vec, // add bigint to bigint starting from index. // used in grade school multiplication -template +template FASTFLOAT_CONSTEXPR20 bool large_add_from(stackvec &x, limb_span y, - uint32_t start) noexcept { + uint8_t start) noexcept { // the effective x buffer is from `xstart..x.len()`, so exit early // if we can't get that current range. if (x.len() < start || y.len() > x.len() - start) { @@ -308,7 +308,7 @@ FASTFLOAT_CONSTEXPR20 bool large_add_from(stackvec &x, limb_span y, } bool carry = false; - for (uint32_t index = 0; index < y.len(); ++index) { + for (uint8_t index = 0; index < y.len(); ++index) { limb xi = x[index + start]; limb yi = y[index]; bool c1 = false; @@ -329,14 +329,14 @@ FASTFLOAT_CONSTEXPR20 bool large_add_from(stackvec &x, limb_span y, } // add bigint to bigint. -template +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool large_add_from(stackvec &x, limb_span y) noexcept { return large_add_from(x, y, 0); } // grade-school multiplication algorithm -template +template FASTFLOAT_CONSTEXPR20 bool long_mul(stackvec &x, limb_span y) noexcept { limb_span xs = limb_span(x.data, x.len()); stackvec z(xs); @@ -345,7 +345,7 @@ FASTFLOAT_CONSTEXPR20 bool long_mul(stackvec &x, limb_span y) noexcept { if (y.len() != 0) { limb y0 = y[0]; FASTFLOAT_TRY(small_mul(x, y0)); - for (uint32_t index = 1; index != y.len(); ++index) { + for (uint8_t index = 1; index != y.len(); ++index) { limb yi = y[index]; stackvec zi; if (yi != 0) { @@ -364,7 +364,7 @@ FASTFLOAT_CONSTEXPR20 bool long_mul(stackvec &x, limb_span y) noexcept { } // grade-school multiplication algorithm -template +template FASTFLOAT_CONSTEXPR20 bool large_mul(stackvec &x, limb_span y) noexcept { if (y.len() == 1) { FASTFLOAT_TRY(small_mul(x, y[0])); @@ -493,7 +493,7 @@ struct bigint : pow5_tables<> { } else if (vec.len() < other.vec.len()) { return -1; } else { - for (uint32_t index = vec.len(); index > 0; --index) { + for (uint8_t index = vec.len(); index > 0; --index) { limb xi = vec[index - 1]; limb yi = other.vec[index - 1]; if (xi > yi) { @@ -508,7 +508,7 @@ struct bigint : pow5_tables<> { // shift left each limb n bits, carrying over to the new limb // returns true if we were able to shift all the digits. - FASTFLOAT_CONSTEXPR20 bool shl_bits(uint32_t n) noexcept { + FASTFLOAT_CONSTEXPR20 bool shl_bits(uint16_t n) noexcept { // Internally, for each item, we shift left by n, and add the previous // right shifted limb-bits. // For example, we transform (for u8) shifted left 2, to: @@ -517,10 +517,10 @@ struct bigint : pow5_tables<> { FASTFLOAT_DEBUG_ASSERT(n != 0); FASTFLOAT_DEBUG_ASSERT(n < sizeof(limb) * 8); - uint32_t const shl = n; - uint32_t const shr = limb_bits - shl; + uint16_t const shl = n; + uint16_t const shr = limb_bits - shl; limb prev = 0; - for (uint32_t index = 0; index != vec.len(); ++index) { + for (uint8_t index = 0; index != vec.len(); ++index) { limb xi = vec[index]; vec[index] = (xi << shl) | (prev >> shr); prev = xi; @@ -534,7 +534,7 @@ struct bigint : pow5_tables<> { } // move the limbs left by `n` limbs. - FASTFLOAT_CONSTEXPR20 bool shl_limbs(uint32_t n) noexcept { + FASTFLOAT_CONSTEXPR20 bool shl_limbs(int16_t n) noexcept { FASTFLOAT_DEBUG_ASSERT(n != 0); if (n + vec.len() > vec.capacity()) { return false; @@ -555,9 +555,9 @@ struct bigint : pow5_tables<> { } // move the limbs left by `n` bits. - FASTFLOAT_CONSTEXPR20 bool shl(uint32_t n) noexcept { - uint32_t const rem = n % limb_bits; - uint32_t const div = n / limb_bits; + FASTFLOAT_CONSTEXPR20 bool shl(uint16_t n) noexcept { + uint16_t const rem = n % limb_bits; + uint16_t const div = n / limb_bits; if (rem != 0) { FASTFLOAT_TRY(shl_bits(rem)); } @@ -568,7 +568,7 @@ struct bigint : pow5_tables<> { } // get the number of leading zeros in the bigint. - FASTFLOAT_CONSTEXPR20 int ctlz() const noexcept { + FASTFLOAT_CONSTEXPR20 uint8_t ctlz() const noexcept { if (vec.is_empty()) { return 0; } else { @@ -583,9 +583,9 @@ struct bigint : pow5_tables<> { } // get the number of bits in the bigint. - FASTFLOAT_CONSTEXPR20 int bit_length() const noexcept { - int lz = ctlz(); - return int(limb_bits * vec.len()) - lz; + FASTFLOAT_CONSTEXPR20 uint16_t bit_length() const noexcept { + uint16_t lz = ctlz(); + return uint16_t(limb_bits * vec.len()) - lz; } FASTFLOAT_CONSTEXPR20 bool mul(limb y) noexcept { return small_mul(vec, y); } @@ -593,22 +593,22 @@ struct bigint : pow5_tables<> { FASTFLOAT_CONSTEXPR20 bool add(limb y) noexcept { return small_add(vec, y); } // multiply as if by 2 raised to a power. - FASTFLOAT_CONSTEXPR20 bool pow2(uint32_t exp) noexcept { return shl(exp); } + FASTFLOAT_CONSTEXPR20 bool pow2(int16_t exp) noexcept { return shl(exp); } // multiply as if by 5 raised to a power. - FASTFLOAT_CONSTEXPR20 bool pow5(uint32_t exp) noexcept { + FASTFLOAT_CONSTEXPR20 bool pow5(int16_t exp) noexcept { // multiply by a power of 5 - size_t const large_length = sizeof(large_power_of_5) / sizeof(limb); + uint8_t const large_length = sizeof(large_power_of_5) / sizeof(limb); limb_span const large = limb_span(large_power_of_5, large_length); while (exp >= large_step) { FASTFLOAT_TRY(large_mul(vec, large)); exp -= large_step; } #ifdef FASTFLOAT_64BIT_LIMB - uint32_t const small_step = 27; + uint8_t const small_step = 27; limb const max_native = 7450580596923828125UL; #else - uint32_t const small_step = 13; + uint8_t const small_step = 13; limb const max_native = 1220703125U; #endif while (exp >= small_step) { @@ -627,7 +627,7 @@ struct bigint : pow5_tables<> { } // multiply as if by 10 raised to a power. - FASTFLOAT_CONSTEXPR20 bool pow10(uint32_t exp) noexcept { + FASTFLOAT_CONSTEXPR20 bool pow10(int16_t exp) noexcept { FASTFLOAT_TRY(pow5(exp)); return pow2(exp); } diff --git a/include/fast_float/decimal_to_binary.h b/include/fast_float/decimal_to_binary.h index 2255716..a334e18 100644 --- a/include/fast_float/decimal_to_binary.h +++ b/include/fast_float/decimal_to_binary.h @@ -71,12 +71,12 @@ constexpr fastfloat_really_inline int32_t power(int32_t q) noexcept { // for significant digits already multiplied by 10 ** q. template fastfloat_really_inline FASTFLOAT_CONSTEXPR14 adjusted_mantissa -compute_error_scaled(int64_t q, uint64_t w, int lz) noexcept { - int hilz = int(w >> 63) ^ 1; +compute_error_scaled(int64_t q, uint64_t w, int32_t lz) noexcept { + int32_t hilz = int32_t(w >> 63) ^ 1; adjusted_mantissa answer; answer.mantissa = w << hilz; - int bias = binary::mantissa_explicit_bits() - binary::minimum_exponent(); - answer.power2 = int32_t(detail::power(int32_t(q)) + bias - hilz - lz - 62 + + int32_t bias = binary::mantissa_explicit_bits() - binary::minimum_exponent(); + answer.power2 = int16_t(detail::power(int32_t(q)) + bias - hilz - lz - 62 + invalid_am_bias); return answer; } @@ -143,7 +143,7 @@ compute_float(int64_t q, uint64_t w) noexcept { answer.mantissa = product.high >> shift; - answer.power2 = int32_t(detail::power(int32_t(q)) + upperbit - lz - + answer.power2 = int16_t(detail::power(int32_t(q)) + upperbit - lz - binary::minimum_exponent()); if (answer.power2 <= 0) { // we have a subnormal? // Here have that answer.power2 <= 0 so -answer.power2 >= 0 @@ -196,7 +196,7 @@ compute_float(int64_t q, uint64_t w) noexcept { answer.mantissa >>= 1; if (answer.mantissa >= (uint64_t(2) << binary::mantissa_explicit_bits())) { answer.mantissa = (uint64_t(1) << binary::mantissa_explicit_bits()); - answer.power2++; // undo previous addition + ++answer.power2; // undo previous addition } answer.mantissa &= ~(uint64_t(1) << binary::mantissa_explicit_bits()); diff --git a/include/fast_float/digit_comparison.h b/include/fast_float/digit_comparison.h index 4090a69..82a2953 100644 --- a/include/fast_float/digit_comparison.h +++ b/include/fast_float/digit_comparison.h @@ -39,10 +39,10 @@ constexpr static uint64_t powers_of_ten_uint64[] = {1UL, // effect on performance: in order to have a faster algorithm, we'd need // to slow down performance for faster algorithms, and this is still fast. template -fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int32_t +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int16_t scientific_exponent(parsed_number_string_t const &num) noexcept { uint64_t mantissa = num.mantissa; - int32_t exponent = num.exponent; + int16_t exponent = num.exponent; while (mantissa >= 10000) { mantissa /= 10000; exponent += 4; @@ -68,7 +68,7 @@ to_extended(T const &value) noexcept { constexpr equiv_uint hidden_bit_mask = binary_format::hidden_bit_mask(); adjusted_mantissa am; - int32_t bias = binary_format::mantissa_explicit_bits() - + int16_t bias = binary_format::mantissa_explicit_bits() - binary_format::minimum_exponent(); equiv_uint bits; #if FASTFLOAT_HAS_BIT_CAST @@ -82,7 +82,7 @@ to_extended(T const &value) noexcept { am.mantissa = bits & mantissa_mask; } else { // normal - am.power2 = int32_t((bits & exponent_mask) >> + am.power2 = int16_t((bits & exponent_mask) >> binary_format::mantissa_explicit_bits()); am.power2 -= bias; am.mantissa = (bits & mantissa_mask) | hidden_bit_mask; @@ -108,11 +108,11 @@ to_extended_halfway(T const &value) noexcept { template fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void round(adjusted_mantissa &am, callback cb) noexcept { - int32_t mantissa_shift = 64 - binary_format::mantissa_explicit_bits() - 1; + int16_t mantissa_shift = 64 - binary_format::mantissa_explicit_bits() - 1; if (-am.power2 >= mantissa_shift) { // have a denormal float - int32_t shift = -am.power2 + 1; - cb(am, std::min(shift, 64)); + int16_t shift = -am.power2 + 1; + cb(am, std::min(shift, 64)); // check for round-up: if rounding-nearest carried us to the hidden bit. am.power2 = (am.mantissa < (uint64_t(1) << binary_format::mantissa_explicit_bits())) @@ -128,7 +128,7 @@ fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void round(adjusted_mantissa &am, if (am.mantissa >= (uint64_t(2) << binary_format::mantissa_explicit_bits())) { am.mantissa = (uint64_t(1) << binary_format::mantissa_explicit_bits()); - am.power2++; + ++am.power2; } // check for infinite: we could have carried to an infinite power @@ -141,7 +141,7 @@ fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void round(adjusted_mantissa &am, template fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void -round_nearest_tie_even(adjusted_mantissa &am, int32_t shift, +round_nearest_tie_even(adjusted_mantissa &am, int16_t shift, callback cb) noexcept { uint64_t const mask = (shift == 64) ? UINT64_MAX : (uint64_t(1) << shift) - 1; uint64_t const halfway = (shift == 0) ? 0 : uint64_t(1) << (shift - 1); @@ -162,7 +162,7 @@ round_nearest_tie_even(adjusted_mantissa &am, int32_t shift, } fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void -round_down(adjusted_mantissa &am, int32_t shift) noexcept { +round_down(adjusted_mantissa &am, int16_t shift) noexcept { if (shift == 64) { am.mantissa = 0; } else { @@ -342,17 +342,17 @@ parse_mantissa(bigint &result, const parsed_number_string_t &num) noexcept { } template -inline FASTFLOAT_CONSTEXPR20 void -positive_digit_comp(bigint &bigmant, adjusted_mantissa &am, - int32_t const exponent) noexcept { - FASTFLOAT_ASSERT(bigmant.pow10(uint32_t(exponent))); +inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa +positive_digit_comp(bigint &bigmant, adjusted_mantissa am, + int16_t const exponent) noexcept { + FASTFLOAT_ASSERT(bigmant.pow10(exponent)); bool truncated; am.mantissa = bigmant.hi64(truncated); - int32_t bias = binary_format::mantissa_explicit_bits() - + int16_t bias = binary_format::mantissa_explicit_bits() - binary_format::minimum_exponent(); am.power2 = bigmant.bit_length() - 64 + bias; - round(am, [truncated](adjusted_mantissa &a, int32_t shift) { + round(am, [truncated](adjusted_mantissa &a, int16_t shift) { round_nearest_tie_even( a, shift, [truncated](bool is_odd, bool is_halfway, bool is_above) -> bool { @@ -360,6 +360,8 @@ positive_digit_comp(bigint &bigmant, adjusted_mantissa &am, (is_odd && is_halfway); }); }); + + return am; } // the scaling here is quite simple: we have, for the real digits `m * 10^e`, @@ -368,11 +370,11 @@ positive_digit_comp(bigint &bigmant, adjusted_mantissa &am, // we then need to scale by `2^(f- e)`, and then the two significant digits // are of the same magnitude. template -inline FASTFLOAT_CONSTEXPR20 void -negative_digit_comp(bigint &bigmant, adjusted_mantissa &am, - int32_t const exponent) noexcept { +inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa +negative_digit_comp(bigint &bigmant, adjusted_mantissa am, + int16_t const exponent) noexcept { bigint &real_digits = bigmant; - const int32_t &real_exp = exponent; + int16_t const &real_exp = exponent; T b; { @@ -381,7 +383,7 @@ negative_digit_comp(bigint &bigmant, adjusted_mantissa &am, // gcc7 bug: use a lambda to remove the noexcept qualifier bug with // -Wnoexcept-type. round(am_b, - [](adjusted_mantissa &a, int32_t shift) { round_down(a, shift); }); + [](adjusted_mantissa &a, int16_t shift) { round_down(a, shift); }); to_float( #ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN false, @@ -390,23 +392,23 @@ negative_digit_comp(bigint &bigmant, adjusted_mantissa &am, } adjusted_mantissa theor = to_extended_halfway(b); bigint theor_digits(theor.mantissa); - int32_t theor_exp = theor.power2; + int16_t theor_exp = theor.power2; // scale real digits and theor digits to be same power. - int32_t pow2_exp = theor_exp - real_exp; - uint32_t pow5_exp = uint32_t(-real_exp); + int16_t pow2_exp = theor_exp - real_exp; + uint16_t pow5_exp = uint16_t(-real_exp); if (pow5_exp != 0) { FASTFLOAT_ASSERT(theor_digits.pow5(pow5_exp)); } if (pow2_exp > 0) { - FASTFLOAT_ASSERT(theor_digits.pow2(uint32_t(pow2_exp))); + FASTFLOAT_ASSERT(theor_digits.pow2(pow2_exp)); } else if (pow2_exp < 0) { - FASTFLOAT_ASSERT(real_digits.pow2(uint32_t(-pow2_exp))); + FASTFLOAT_ASSERT(real_digits.pow2(-pow2_exp)); } // compare digits, and use it to director rounding int ord = real_digits.compare(theor_digits); - round(am, [ord](adjusted_mantissa &a, int32_t shift) { + round(am, [ord](adjusted_mantissa &a, int16_t shift) { round_nearest_tie_even( a, shift, [ord](bool is_odd, bool _, bool __) -> bool { (void)_; // not needed, since we've done our comparison @@ -420,6 +422,8 @@ negative_digit_comp(bigint &bigmant, adjusted_mantissa &am, } }); }); + + return am; } // parse the significant digits as a big integer to unambiguously round the @@ -436,21 +440,21 @@ negative_digit_comp(bigint &bigmant, adjusted_mantissa &am, // the actual digits. we then compare the big integer representations // of both, and use that to direct rounding. template -inline FASTFLOAT_CONSTEXPR20 void digit_comp( - parsed_number_string_t const &num, adjusted_mantissa &am) noexcept { +inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa digit_comp( + parsed_number_string_t const &num, adjusted_mantissa am) noexcept { // remove the invalid exponent bias am.power2 -= invalid_am_bias; bigint bigmant; - int32_t const sci_exp = scientific_exponent(num); + int16_t const sci_exp = scientific_exponent(num); uint16_t const digits = parse_mantissa(bigmant, num); // can't underflow, since digits is at most max_digits. - int32_t const exponent = sci_exp + 1 - digits; + int16_t const exponent = sci_exp + 1 - digits; if (exponent >= 0) { - positive_digit_comp(bigmant, am, exponent); + return positive_digit_comp(bigmant, am, exponent); } else { - negative_digit_comp(bigmant, am, exponent); + return negative_digit_comp(bigmant, am, exponent); } } diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index 53fc4b4..7de83f5 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -293,15 +293,15 @@ fastfloat_strncasecmp(UC const *actual_mixedcase, UC const *expected_lowercase, // a pointer and a length to a contiguous block of memory template struct span { T const *ptr; - uint32_t length; + uint16_t length; - constexpr span(T const *_ptr, uint32_t _length) : ptr(_ptr), length(_length) {} + constexpr span(T const *_ptr, uint16_t _length) : ptr(_ptr), length(_length) {} constexpr span() : ptr(nullptr), length(0) {} - constexpr uint32_t len() const noexcept { return length; } + constexpr uint16_t len() const noexcept { return length; } - FASTFLOAT_CONSTEXPR14 const T &operator[](uint32_t index) const noexcept { + FASTFLOAT_CONSTEXPR14 const T &operator[](uint16_t index) const noexcept { FASTFLOAT_DEBUG_ASSERT(index < length); return ptr[index]; } @@ -318,8 +318,8 @@ struct value128 { }; /* Helper C++14 constexpr generic implementation of leading_zeroes */ -fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int -leading_zeroes_generic(uint64_t input_num, int last_bit = 0) { +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint8_t +leading_zeroes_generic(uint64_t input_num, uint64_t last_bit = 0) { if (input_num & uint64_t(0xffffffff00000000)) { input_num >>= 32; last_bit |= 32; @@ -343,11 +343,11 @@ leading_zeroes_generic(uint64_t input_num, int last_bit = 0) { if (input_num & uint64_t(0x2)) { /* input_num >>= 1; */ last_bit |= 1; } - return 63 - last_bit; + return 63 - (uint8_t)last_bit; } /* result might be undefined when input_num is zero */ -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 int +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint8_t leading_zeroes(uint64_t input_num) noexcept { assert(input_num > 0); FASTFLOAT_ASSUME(input_num > 0); @@ -360,12 +360,12 @@ leading_zeroes(uint64_t input_num) noexcept { // Search the mask data from most significant bit (MSB) // to least significant bit (LSB) for a set bit (1). _BitScanReverse64(&leading_zero, input_num); - return (int)(63 - leading_zero); + return (uint8_t)(63 - leading_zero); #else - return leading_zeroes_generic(input_num); + return (uint8_t)leading_zeroes_generic(input_num); #endif #else - return __builtin_clzll(input_num); + return (uint8_t)__builtin_clzll(input_num); #endif } @@ -429,7 +429,7 @@ full_multiplication(uint64_t a, uint64_t b) noexcept { struct adjusted_mantissa { uint64_t mantissa; - int32_t power2; // a negative value indicates an invalid result + int16_t power2; // a negative value indicates an invalid result adjusted_mantissa() noexcept = default; constexpr bool operator==(adjusted_mantissa const &o) const noexcept { diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index 93c28a1..ca94b05 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -283,7 +283,7 @@ from_chars_advanced(parsed_number_string_t const &pns, T &value) noexcept { // and we have an invalid power (am.power2 < 0), then we need to go the long // way around again. This is very uncommon. if (am.power2 < 0) { - digit_comp(pns, am); + am = digit_comp(pns, am); } to_float( #ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN