From 41ee34701bcc5621eac3e4d4ff578cd39e7fae17 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <lemire@gmail.com>
Date: Mon, 9 Nov 2020 19:06:51 -0500
Subject: [PATCH 1/6] Magical optimizations from @aqrit

---
 include/fast_float/ascii_number.h | 35 +++++++++++++++++++++++++------
 1 file changed, 29 insertions(+), 6 deletions(-)
diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
index d571fcb..27cc8ba 100644
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@@ -15,20 +15,43 @@ namespace fast_float {
 fastfloat_really_inline bool is_integer(char c)  noexcept  { return c >= '0' && c <= '9'; }
 
 
+// credit  @aqrit
+fastfloat_really_inline uint32_t  parse_eight_digits_unrolled(uint64_t val) {
+  const uint64_t mask = 0x000000FF000000FF;
+  const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
+  const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
+
+  val -= 0x3030303030303030;
+  val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
+  val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
+  return val;
+}
+
 // credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
+//fastfloat_really_inline uint32_t parse_eight_digits_unrolled(uint64_t val)  noexcept  {
+//  uint64_t val;
+//  ::memcpy(&val, chars, sizeof(uint64_t));
+//  val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8;
+//  val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16;
+//  return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32);
+//}
+
 fastfloat_really_inline uint32_t parse_eight_digits_unrolled(const char *chars)  noexcept  {
   uint64_t val;
   ::memcpy(&val, chars, sizeof(uint64_t));
-  val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8;
-  val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16;
-  return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32);
+  return parse_eight_digits_unrolled(val);
 }
 
+// credit @aqrit
 fastfloat_really_inline bool is_made_of_eight_digits_fast(uint64_t val)  noexcept  {
-  return (((val & 0xF0F0F0F0F0F0F0F0) |
-           (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) ==
-          0x3333333333333333);
+  return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) &
+     0x8080808080808080)); 
 }
+//fastfloat_really_inline bool is_made_of_eight_digits_fast(uint64_t val)  noexcept  {
+//  return (((val & 0xF0F0F0F0F0F0F0F0) |
+//           (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) ==
+//          0x3333333333333333);
+//}
 
 
 fastfloat_really_inline bool is_made_of_eight_digits_fast(const char *chars)  noexcept  {

From d26a697a4d98c642aa5758ac6a91572f437f8b13 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <lemire@gmail.com>
Date: Mon, 9 Nov 2020 20:22:45 -0500
Subject: [PATCH 2/6] Added remark.

---
 include/fast_float/simple_decimal_conversion.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/fast_float/simple_decimal_conversion.h b/include/fast_float/simple_decimal_conversion.h
index f87dbb3..5c107bc 100644
--- a/include/fast_float/simple_decimal_conversion.h
+++ b/include/fast_float/simple_decimal_conversion.h
@@ -360,6 +360,8 @@ adjusted_mantissa parse_long_mantissa(const char *first, const char* last) {
     // credit: R. Oudompheng who first implemented this fast path (to my knowledge).
     // It is rough, but it does the job of accelerating the slow path since most
     // long streams of digits are determined after 19 digits.
+    // Note that mantissa+1 cannot overflow since mantissa < 10**19 and so
+    // mantissa+1 <= 10**19 < 2**64.
     adjusted_mantissa am1 = compute_float<binary>(exponent, mantissa);
     adjusted_mantissa am2 = compute_float<binary>(exponent, mantissa+1);
     // They must both agree and be both a successful result.

From 58faaac627ed92e586fc70030bafe402f64b5fcf Mon Sep 17 00:00:00 2001
From: Daniel Lemire <lemire@gmail.com>
Date: Mon, 9 Nov 2020 20:23:38 -0500
Subject: [PATCH 3/6] Adding another remark.

---
 include/fast_float/parse_number.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h
index 3e246d9..6d39b5f 100644
--- a/include/fast_float/parse_number.h
+++ b/include/fast_float/parse_number.h
@@ -91,7 +91,7 @@ from_chars_result from_chars(const char *first, const char *last,
   }
   answer.ec = std::errc(); // be optimistic
   answer.ptr = pns.lastmatch;
-
+  // Next is Clinger's fast path.
   if (binary_format<T>::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format<T>::max_exponent_fast_path() && pns.mantissa <=binary_format<T>::max_mantissa_fast_path()) {
     value = T(pns.mantissa);
     if (pns.exponent < 0) { value = value / binary_format<T>::exact_power_of_ten(-pns.exponent); }

From 8d1e38733ea9f1249981af67d45e65525ad1b70e Mon Sep 17 00:00:00 2001
From: Daniel Lemire <lemire@gmail.com>
Date: Tue, 10 Nov 2020 12:07:26 -0500
Subject: [PATCH 4/6] Cleaning.

---
 script/table_generation.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/script/table_generation.py b/script/table_generation.py
index 92bf9a9..0dfada7 100644
--- a/script/table_generation.py
+++ b/script/table_generation.py
@@ -1,11 +1,4 @@
-
 def format(number):
-    # move the most significant bit in position
-    while(number < (1<<127)):
-        number *= 2
-    # then *truncate*
-    while(number >= (1<<128)):
-        number //= 2
     upper = number // (1<<64)
     lower = number % (1<<64)
     print(""+hex(upper)+","+hex(lower)+",")
@@ -15,16 +8,24 @@ for q in range(-342,0):
     z = 0
     while( (1<<z) < power5) :
         z += 1
-    if( q >= -17 ):
+    if(q >= -17):
         b = z + 127
         c = 2 ** b // power5 + 1
-        assert c < (1<<128)
         format(c)
     else:
         b = 2 * z + 2 * 64
         c = 2 ** b // power5 + 1
+        # truncate
+        while(c >= (1<<128)):
+          c //= 2
         format(c)    
     
 for q in range(0,308+1):
     power5 = 5 ** q
+    # move the most significant bit in position
+    while(power5 < (1<<127)):
+        power5 *= 2
+    # *truncate*
+    while(power5 >= (1<<128)):
+        power5 //= 2
     format(power5)

From 86bc73af9e1f8d9b090c63c6ee5756ca8d38aba8 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <lemire@gmail.com>
Date: Mon, 23 Nov 2020 13:53:50 -0500
Subject: [PATCH 5/6] Need explicit cast.

---
 include/fast_float/ascii_number.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
index 6093b09..f628e19 100644
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@@ -23,7 +23,7 @@ fastfloat_really_inline uint32_t  parse_eight_digits_unrolled(uint64_t val) {
 
   val -= 0x3030303030303030;
   val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
-  val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
+  val = uint32_t((((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32);
   return val;
 }
 

From 7ef9d9b7d20fbc549b224f1bd3c0c6b337e9439e Mon Sep 17 00:00:00 2001
From: Daniel Lemire <lemire@gmail.com>
Date: Mon, 23 Nov 2020 15:04:48 -0500
Subject: [PATCH 6/6] Tweaking cast.

---
 include/fast_float/ascii_number.h | 20 ++------------------
 1 file changed, 2 insertions(+), 18 deletions(-)

diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
index f628e19..3b23da0 100644
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@@ -20,22 +20,12 @@ fastfloat_really_inline uint32_t  parse_eight_digits_unrolled(uint64_t val) {
   const uint64_t mask = 0x000000FF000000FF;
   const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
   const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
-
   val -= 0x3030303030303030;
   val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
-  val = uint32_t((((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32);
-  return val;
+  val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
+  return uint32_t(val);
 }
 
-// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
-//fastfloat_really_inline uint32_t parse_eight_digits_unrolled(uint64_t val)  noexcept  {
-//  uint64_t val;
-//  ::memcpy(&val, chars, sizeof(uint64_t));
-//  val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8;
-//  val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16;
-//  return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32);
-//}
-
 fastfloat_really_inline uint32_t parse_eight_digits_unrolled(const char *chars)  noexcept  {
   uint64_t val;
   ::memcpy(&val, chars, sizeof(uint64_t));
@@ -47,12 +37,6 @@ fastfloat_really_inline bool is_made_of_eight_digits_fast(uint64_t val)  noexcep
   return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) &
      0x8080808080808080)); 
 }
-//fastfloat_really_inline bool is_made_of_eight_digits_fast(uint64_t val)  noexcept  {
-//  return (((val & 0xF0F0F0F0F0F0F0F0) |
-//           (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) ==
-//          0x3333333333333333);
-//}
-
 
 fastfloat_really_inline bool is_made_of_eight_digits_fast(const char *chars)  noexcept  {
   uint64_t val;