diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
index d39ad52..3b23da0 100644
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@@ -15,22 +15,29 @@ namespace fast_float {
 fastfloat_really_inline bool is_integer(char c)  noexcept  { return c >= '0' && c <= '9'; }
 
 
-// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
+// credit  @aqrit
+fastfloat_really_inline uint32_t  parse_eight_digits_unrolled(uint64_t val) {
+  const uint64_t mask = 0x000000FF000000FF;
+  const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
+  const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
+  val -= 0x3030303030303030;
+  val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
+  val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
+  return uint32_t(val);
+}
+
 fastfloat_really_inline uint32_t parse_eight_digits_unrolled(const char *chars)  noexcept  {
   uint64_t val;
   ::memcpy(&val, chars, sizeof(uint64_t));
-  val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8;
-  val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16;
-  return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32);
+  return parse_eight_digits_unrolled(val);
 }
 
+// credit @aqrit
 fastfloat_really_inline bool is_made_of_eight_digits_fast(uint64_t val)  noexcept  {
-  return (((val & 0xF0F0F0F0F0F0F0F0) |
-           (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) ==
-          0x3333333333333333);
+  return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) &
+     0x8080808080808080)); 
 }
 
-
 fastfloat_really_inline bool is_made_of_eight_digits_fast(const char *chars)  noexcept  {
   uint64_t val;
   ::memcpy(&val, chars, 8);
diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h
index 1b7a419..4571409 100644
--- a/include/fast_float/parse_number.h
+++ b/include/fast_float/parse_number.h
@@ -91,7 +91,7 @@ from_chars_result from_chars(const char *first, const char *last,
   }
   answer.ec = std::errc(); // be optimistic
   answer.ptr = pns.lastmatch;
-
+  // Next is Clinger's fast path.
   if (binary_format<T>::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format<T>::max_exponent_fast_path() && pns.mantissa <=binary_format<T>::max_mantissa_fast_path()) {
     value = T(pns.mantissa);
     if (pns.exponent < 0) { value = value / binary_format<T>::exact_power_of_ten(-pns.exponent); }
diff --git a/include/fast_float/simple_decimal_conversion.h b/include/fast_float/simple_decimal_conversion.h
index f87dbb3..5c107bc 100644
--- a/include/fast_float/simple_decimal_conversion.h
+++ b/include/fast_float/simple_decimal_conversion.h
@@ -360,6 +360,8 @@ adjusted_mantissa parse_long_mantissa(const char *first, const char* last) {
     // credit: R. Oudompheng who first implemented this fast path (to my knowledge).
     // It is rough, but it does the job of accelerating the slow path since most
     // long streams of digits are determined after 19 digits.
+    // Note that mantissa+1 cannot overflow since mantissa < 10**19 and so
+    // mantissa+1 <= 10**19 < 2**64.
     adjusted_mantissa am1 = compute_float<binary>(exponent, mantissa);
     adjusted_mantissa am2 = compute_float<binary>(exponent, mantissa+1);
     // They must both agree and be both a successful result.
diff --git a/script/table_generation.py b/script/table_generation.py
index a85dc47..24fec7c 100644
--- a/script/table_generation.py
+++ b/script/table_generation.py
@@ -18,8 +18,8 @@ for q in range(-342,0):
         # truncate
         while(c >= (1<<128)):
           c //= 2
-        format(c)    
-    
+        format(c)
+
 for q in range(0,308+1):
     power5 = 5 ** q
     # move the most significant bit in position