diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index d39ad52..3b23da0 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -15,22 +15,29 @@ namespace fast_float { fastfloat_really_inline bool is_integer(char c) noexcept { return c >= '0' && c <= '9'; } -// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +// credit @aqrit +fastfloat_really_inline uint32_t parse_eight_digits_unrolled(uint64_t val) { + const uint64_t mask = 0x000000FF000000FF; + const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32) + const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32) + val -= 0x3030303030303030; + val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8; + val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; + return uint32_t(val); +} + fastfloat_really_inline uint32_t parse_eight_digits_unrolled(const char *chars) noexcept { uint64_t val; ::memcpy(&val, chars, sizeof(uint64_t)); - val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; - val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; - return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); + return parse_eight_digits_unrolled(val); } +// credit @aqrit fastfloat_really_inline bool is_made_of_eight_digits_fast(uint64_t val) noexcept { - return (((val & 0xF0F0F0F0F0F0F0F0) | - (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == - 0x3333333333333333); + return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) & + 0x8080808080808080)); } - fastfloat_really_inline bool is_made_of_eight_digits_fast(const char *chars) noexcept { uint64_t val; ::memcpy(&val, chars, 8); diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index 1b7a419..4571409 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -91,7 +91,7 @@ from_chars_result from_chars(const char *first, const char *last, } answer.ec = std::errc(); // be optimistic answer.ptr = pns.lastmatch; - + // Next is Clinger's fast path. if (binary_format::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format::max_exponent_fast_path() && pns.mantissa <=binary_format::max_mantissa_fast_path()) { value = T(pns.mantissa); if (pns.exponent < 0) { value = value / binary_format::exact_power_of_ten(-pns.exponent); } diff --git a/include/fast_float/simple_decimal_conversion.h b/include/fast_float/simple_decimal_conversion.h index f87dbb3..5c107bc 100644 --- a/include/fast_float/simple_decimal_conversion.h +++ b/include/fast_float/simple_decimal_conversion.h @@ -360,6 +360,8 @@ adjusted_mantissa parse_long_mantissa(const char *first, const char* last) { // credit: R. Oudompheng who first implemented this fast path (to my knowledge). // It is rough, but it does the job of accelerating the slow path since most // long streams of digits are determined after 19 digits. + // Note that mantissa+1 cannot overflow since mantissa < 10**19 and so + // mantissa+1 <= 10**19 < 2**64. adjusted_mantissa am1 = compute_float(exponent, mantissa); adjusted_mantissa am2 = compute_float(exponent, mantissa+1); // They must both agree and be both a successful result. diff --git a/script/table_generation.py b/script/table_generation.py index a85dc47..24fec7c 100644 --- a/script/table_generation.py +++ b/script/table_generation.py @@ -18,8 +18,8 @@ for q in range(-342,0): # truncate while(c >= (1<<128)): c //= 2 - format(c) - + format(c) + for q in range(0,308+1): power5 = 5 ** q # move the most significant bit in position