From 41ee34701bcc5621eac3e4d4ff578cd39e7fae17 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 9 Nov 2020 19:06:51 -0500 Subject: [PATCH 1/6] Magical optimizations from @aqrit --- include/fast_float/ascii_number.h | 35 +++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index d571fcb..27cc8ba 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -15,20 +15,43 @@ namespace fast_float { fastfloat_really_inline bool is_integer(char c) noexcept { return c >= '0' && c <= '9'; } +// credit @aqrit +fastfloat_really_inline uint32_t parse_eight_digits_unrolled(uint64_t val) { + const uint64_t mask = 0x000000FF000000FF; + const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32) + const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32) + + val -= 0x3030303030303030; + val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8; + val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; + return val; +} + // credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +//fastfloat_really_inline uint32_t parse_eight_digits_unrolled(uint64_t val) noexcept { +// uint64_t val; +// ::memcpy(&val, chars, sizeof(uint64_t)); +// val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; +// val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; +// return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +//} + fastfloat_really_inline uint32_t parse_eight_digits_unrolled(const char *chars) noexcept { uint64_t val; ::memcpy(&val, chars, sizeof(uint64_t)); - val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; - val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; - return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); + return parse_eight_digits_unrolled(val); } +// credit @aqrit fastfloat_really_inline bool is_made_of_eight_digits_fast(uint64_t val) noexcept { - return (((val & 0xF0F0F0F0F0F0F0F0) | - (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == - 0x3333333333333333); + return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) & + 0x8080808080808080)); } +//fastfloat_really_inline bool is_made_of_eight_digits_fast(uint64_t val) noexcept { +// return (((val & 0xF0F0F0F0F0F0F0F0) | +// (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == +// 0x3333333333333333); +//} fastfloat_really_inline bool is_made_of_eight_digits_fast(const char *chars) noexcept { From d26a697a4d98c642aa5758ac6a91572f437f8b13 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 9 Nov 2020 20:22:45 -0500 Subject: [PATCH 2/6] Added remark. --- include/fast_float/simple_decimal_conversion.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/fast_float/simple_decimal_conversion.h b/include/fast_float/simple_decimal_conversion.h index f87dbb3..5c107bc 100644 --- a/include/fast_float/simple_decimal_conversion.h +++ b/include/fast_float/simple_decimal_conversion.h @@ -360,6 +360,8 @@ adjusted_mantissa parse_long_mantissa(const char *first, const char* last) { // credit: R. Oudompheng who first implemented this fast path (to my knowledge). // It is rough, but it does the job of accelerating the slow path since most // long streams of digits are determined after 19 digits. + // Note that mantissa+1 cannot overflow since mantissa < 10**19 and so + // mantissa+1 <= 10**19 < 2**64. adjusted_mantissa am1 = compute_float(exponent, mantissa); adjusted_mantissa am2 = compute_float(exponent, mantissa+1); // They must both agree and be both a successful result. From 58faaac627ed92e586fc70030bafe402f64b5fcf Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 9 Nov 2020 20:23:38 -0500 Subject: [PATCH 3/6] Adding another remark. --- include/fast_float/parse_number.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index 3e246d9..6d39b5f 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -91,7 +91,7 @@ from_chars_result from_chars(const char *first, const char *last, } answer.ec = std::errc(); // be optimistic answer.ptr = pns.lastmatch; - + // Next is Clinger's fast path. if (binary_format::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format::max_exponent_fast_path() && pns.mantissa <=binary_format::max_mantissa_fast_path()) { value = T(pns.mantissa); if (pns.exponent < 0) { value = value / binary_format::exact_power_of_ten(-pns.exponent); } From 8d1e38733ea9f1249981af67d45e65525ad1b70e Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Tue, 10 Nov 2020 12:07:26 -0500 Subject: [PATCH 4/6] Cleaning. --- script/table_generation.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/script/table_generation.py b/script/table_generation.py index 92bf9a9..0dfada7 100644 --- a/script/table_generation.py +++ b/script/table_generation.py @@ -1,11 +1,4 @@ - def format(number): - # move the most significant bit in position - while(number < (1<<127)): - number *= 2 - # then *truncate* - while(number >= (1<<128)): - number //= 2 upper = number // (1<<64) lower = number % (1<<64) print(""+hex(upper)+","+hex(lower)+",") @@ -15,16 +8,24 @@ for q in range(-342,0): z = 0 while( (1<= -17 ): + if(q >= -17): b = z + 127 c = 2 ** b // power5 + 1 - assert c < (1<<128) format(c) else: b = 2 * z + 2 * 64 c = 2 ** b // power5 + 1 + # truncate + while(c >= (1<<128)): + c //= 2 format(c) for q in range(0,308+1): power5 = 5 ** q + # move the most significant bit in position + while(power5 < (1<<127)): + power5 *= 2 + # *truncate* + while(power5 >= (1<<128)): + power5 //= 2 format(power5) From 86bc73af9e1f8d9b090c63c6ee5756ca8d38aba8 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 23 Nov 2020 13:53:50 -0500 Subject: [PATCH 5/6] Need explicit cast. --- include/fast_float/ascii_number.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 6093b09..f628e19 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -23,7 +23,7 @@ fastfloat_really_inline uint32_t parse_eight_digits_unrolled(uint64_t val) { val -= 0x3030303030303030; val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8; - val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; + val = uint32_t((((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32); return val; } From 7ef9d9b7d20fbc549b224f1bd3c0c6b337e9439e Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 23 Nov 2020 15:04:48 -0500 Subject: [PATCH 6/6] Tweaking cast. --- include/fast_float/ascii_number.h | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index f628e19..3b23da0 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -20,22 +20,12 @@ fastfloat_really_inline uint32_t parse_eight_digits_unrolled(uint64_t val) { const uint64_t mask = 0x000000FF000000FF; const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32) const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32) - val -= 0x3030303030303030; val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8; - val = uint32_t((((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32); - return val; + val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; + return uint32_t(val); } -// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ -//fastfloat_really_inline uint32_t parse_eight_digits_unrolled(uint64_t val) noexcept { -// uint64_t val; -// ::memcpy(&val, chars, sizeof(uint64_t)); -// val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; -// val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; -// return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); -//} - fastfloat_really_inline uint32_t parse_eight_digits_unrolled(const char *chars) noexcept { uint64_t val; ::memcpy(&val, chars, sizeof(uint64_t)); @@ -47,12 +37,6 @@ fastfloat_really_inline bool is_made_of_eight_digits_fast(uint64_t val) noexcep return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) & 0x8080808080808080)); } -//fastfloat_really_inline bool is_made_of_eight_digits_fast(uint64_t val) noexcept { -// return (((val & 0xF0F0F0F0F0F0F0F0) | -// (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == -// 0x3333333333333333); -//} - fastfloat_really_inline bool is_made_of_eight_digits_fast(const char *chars) noexcept { uint64_t val;