From 47d3d443d86d54bdb9f689b251216c15f2353ab6 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <lemire@gmail.com>
Date: Tue, 27 Oct 2020 21:26:11 -0400
Subject: [PATCH] Minor fix.

---
 include/fast_float/ascii_number.h              | 10 ++++++++--
 include/fast_float/decimal_to_binary.h         |  2 +-
 include/fast_float/float_common.h              |  2 +-
 include/fast_float/parse_number.h              |  6 +++---
 include/fast_float/simple_decimal_conversion.h |  5 ++++-
 5 files changed, 17 insertions(+), 8 deletions(-)
diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
index 7789995..f21f7e7 100644
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@@ -168,7 +168,10 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_
 }
 
 
-// This should always succeed since it follows a call to parse_number_string.
+// This should always succeed since it follows a call to parse_number_string
+// This function could be optimized. In particular, we could stop after 19 digits
+// and try to bail out. Furthermore, we should be able to recover the computed
+// exponent from the pass in parse_number_string.
 decimal parse_decimal(const char *p, const char *pend) noexcept {
   decimal answer;
   answer.num_digits = 0;
@@ -203,10 +206,13 @@ decimal parse_decimal(const char *p, const char *pend) noexcept {
        ++p;
       }
     }
+    // We expect that this loop will often take the bulk of the running time
+    // because when a value has lots of digits, these digits often 
     while ((p + 8 <= pend) && (answer.num_digits + 8 < max_digits)) {
       uint64_t val;
       ::memcpy(&val, p, sizeof(uint64_t));
-      if(! is_made_of_eight_digits_fast(val)) break;
+      if(! is_made_of_eight_digits_fast(val)) { break; }
+      // We have eight digits, process them in one go!
       val -= 0x3030303030303030;
       ::memcpy(answer.digits + answer.num_digits, &val, sizeof(uint64_t));
       answer.num_digits += 8;
diff --git a/include/fast_float/decimal_to_binary.h b/include/fast_float/decimal_to_binary.h
index 18d0c51..b46e93a 100644
--- a/include/fast_float/decimal_to_binary.h
+++ b/include/fast_float/decimal_to_binary.h
@@ -94,7 +94,7 @@ adjusted_mantissa compute_float(int64_t q, uint64_t w)  noexcept  {
   if(product.low == 0xFFFFFFFFFFFFFFFF) { //  could guard it further
     // In some very rare cases, this could happen, in which case we might need a more accurate
     // computation that what we can provide cheaply. This is very, very unlikely.
-    answer.power2 = -1;
+    answer.power2 = -1; // This (a negative value) indicates an error condition.
     return answer;
   }
   // The "compute_product_approximation" function can be slightly slower than a branchless approach:
diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h
index 38085e7..8c8b21b 100644
--- a/include/fast_float/float_common.h
+++ b/include/fast_float/float_common.h
@@ -125,7 +125,7 @@ value128 full_multiplication(uint64_t value1, uint64_t value2) {
 
 struct adjusted_mantissa {
   uint64_t mantissa;
-  int power2;
+  int power2;// a negative value indicate an invalid result
   adjusted_mantissa() = default;
   //bool operator==(const adjusted_mantissa &o) const = default;
   bool operator==(const adjusted_mantissa &o) const {
diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h
index fa027e1..a9f5a84 100644
--- a/include/fast_float/parse_number.h
+++ b/include/fast_float/parse_number.h
@@ -100,9 +100,9 @@ from_chars_result from_chars(const char *first, const char *last,
     return answer;
   }
   adjusted_mantissa am = pns.too_many_digits ? parse_long_mantissa<binary_format<T>>(first,last) : compute_float<binary_format<T>>(pns.exponent, pns.mantissa);
-  if(am.power2 < 0) {
-    am = parse_long_mantissa<binary_format<T>>(first,last);
-  }
+  // If we called compute_float<binary_format<T>>(pns.exponent, pns.mantissa) and we have an invalid power (am.power2 < 0),
+  // then we need to go the long way around again. This is very uncommon.
+  if(am.power2 < 0) { am = parse_long_mantissa<binary_format<T>>(first,last); }
   uint64_t word = am.mantissa;
   word |= uint64_t(am.power2) << binary_format<T>::mantissa_explicit_bits();
   word = pns.negative 
diff --git a/include/fast_float/simple_decimal_conversion.h b/include/fast_float/simple_decimal_conversion.h
index 2ac43ba..620b38a 100644
--- a/include/fast_float/simple_decimal_conversion.h
+++ b/include/fast_float/simple_decimal_conversion.h
@@ -354,6 +354,8 @@ adjusted_mantissa compute_float(decimal &d) {
 template <typename binary>
 adjusted_mantissa parse_long_mantissa(const char *first, const char* last) {
     decimal d = parse_decimal(first, last);
+    // In some cases we can get lucky and looking at only the first 19 digits is enough.
+    // Let us try that.
     const uint64_t mantissa = d.to_truncated_mantissa();
     const int64_t exponent =  d.to_truncated_exponent();
     // credit: Nigel Tao who first implemented this fast path (to my knowledge).
@@ -361,7 +363,8 @@ adjusted_mantissa parse_long_mantissa(const char *first, const char* last) {
     // long streams of digits are determined after 19 digits.
     adjusted_mantissa am1 = compute_float<binary>(exponent, mantissa);
     adjusted_mantissa am2 = compute_float<binary>(exponent, mantissa+1);
-    if( am1 == am2 ) { return am1; }
+    // They must both agree and be both a successful result.
+    if(( am1 == am2 ) && (am1.power2 >= 0)) { return am1; }
     return compute_float<binary>(d);
 }