This should be mostly correct.

2026-01-01 03:12:18 +08:00 · 2021-01-07 17:46:47 -05:00 · 2021-01-07 17:46:47 -05:00 · a27fcc230d
commit a27fcc230d
parent ca51b646c8
3 changed files with 123 additions and 22 deletions
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@ -60,6 +60,7 @@ fastfloat_really_inline
 parsed_number_string parse_number_string(const char *p, const char *pend, chars_format fmt) noexcept {
  parsed_number_string answer;
  answer.valid = false;
+  answer.too_many_digits = false;
  answer.negative = (*p == '-');
  if ((*p == '-') || (*p == '+')) {
    ++p;
@ -81,6 +82,8 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_
        uint64_t(*p - '0'); // might overflow, we will handle the overflow later
    ++p;
  }
+  const char *const end_of_integer_part = p;
+
  int64_t exponent = 0;
  if ((p != pend) && (*p == '.')) {
    ++p;
@ -111,9 +114,9 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_
  int32_t digit_count =
      int32_t(p - start_digits); // used later to guard against overflows
  if(exponent > 0) {digit_count--;}
+  int64_t exp_number = 0;            // explicit exponential part
  if ((fmt & chars_format::scientific) && (p != pend) && (('e' == *p) || ('E' == *p))) {
    const char * location_of_e = p;
-    int64_t exp_number = 0;            // exponential part
    ++p;
    bool neg_exp = false;
    if ((p != pend) && ('-' == *p)) {
@ -137,7 +140,8 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_
        }
        ++p;
      }
-      exponent += (neg_exp ? -exp_number : exp_number);
+      if(neg_exp) { exp_number = - exp_number; }
+      exponent += exp_number;
    }
  } else {
    // If it scientific and not fixed, we have to bail out.
@ -164,12 +168,29 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_
    // We over-decrement by one when there is a decimal separator
    digit_count -= int(start - start_digits);
    if (digit_count > 19) {
-      answer.mantissa = 0xFFFFFFFFFFFFFFFF; // important: we don't want the mantissa to be used in a fast path uninitialized.
      answer.too_many_digits = true;
-      return answer;
+      // Let us start again, this time, avoiding overflows.
+      i = 0;
+      p = start_digits;
+      const uint64_t minimal_nineteen_digit_integer{1000000000000000000};
+      while((i < minimal_nineteen_digit_integer) && (p != pend) && is_integer(*p)) {
+        i = i * 10 + uint64_t(*p - '0');
+        ++p;
+      }
+      if (i >= minimal_nineteen_digit_integer) { // We have a big integers
+        exponent = end_of_integer_part - p + exp_number;
+      } else { // We have a value with a fractional component.
+          p++; // skip the '.'
+          const char *first_after_period = p;
+          while((i < minimal_nineteen_digit_integer) && (p != pend) && is_integer(*p)) {
+            i = i * 10 + uint64_t(*p - '0');
+            ++p;
+          }
+          exponent = first_after_period - p + exp_number;
+      }
+      // We have now corrected both exponent and i, to a truncated value.
    }
  }
-  answer.too_many_digits = false;
  answer.exponent = exponent;
  answer.mantissa = i;
  return answer;
--- a/include/fast_float/float_common.h
+++ b/include/fast_float/float_common.h
@ -184,6 +184,9 @@ struct adjusted_mantissa {
  bool operator==(const adjusted_mantissa &o) const {
    return mantissa == o.mantissa && power2 == o.power2;
  }
+  bool operator!=(const adjusted_mantissa &o) const {
+    return mantissa != o.mantissa || power2 != o.power2;
+  }
 };

 struct decimal {
@ -372,4 +375,4 @@ inline OStream& operator<<(OStream &out, const fast_float::decimal &d) {
  return out;
 }

-#endif
+#endif
--- a/include/fast_float/parse_number.h
+++ b/include/fast_float/parse_number.h
@ -66,6 +66,25 @@ from_chars_result parse_infnan(const char *first, const char *last, T &value)  n
  answer.ptr = first;
  return answer;
 }
+
+template<typename T>
+fastfloat_really_inline void to_float(bool negative, adjusted_mantissa am, T &value) {
+  uint64_t word = am.mantissa;
+  word |= uint64_t(am.power2) << binary_format<T>::mantissa_explicit_bits();
+  word = negative
+  ? word | (uint64_t(1) << binary_format<T>::sign_index()) : word;
+#if FASTFLOAT_IS_BIG_ENDIAN == 1
+   if (std::is_same<T, float>::value) {
+     ::memcpy(&value, (char *)&word + 4, sizeof(T)); // extract value at offset 4-7 if float on big-endian
+   } else {
+     ::memcpy(&value, &word, sizeof(T));
+   }
+#else
+   // For little-endian systems:
+   ::memcpy(&value, &word, sizeof(T));
+#endif
+}
+
 } // namespace


@ -92,31 +111,89 @@ from_chars_result from_chars(const char *first, const char *last,
  answer.ec = std::errc(); // be optimistic
  answer.ptr = pns.lastmatch;
  // Next is Clinger's fast path.
-  if (binary_format<T>::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format<T>::max_exponent_fast_path() && pns.mantissa <=binary_format<T>::max_mantissa_fast_path()) {
+  if (binary_format<T>::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format<T>::max_exponent_fast_path() && pns.mantissa <=binary_format<T>::max_mantissa_fast_path() && !pns.too_many_digits) {
    value = T(pns.mantissa);
    if (pns.exponent < 0) { value = value / binary_format<T>::exact_power_of_ten(-pns.exponent); }
    else { value = value * binary_format<T>::exact_power_of_ten(pns.exponent); }
    if (pns.negative) { value = -value; }
    return answer;
  }
-  adjusted_mantissa am = pns.too_many_digits ? parse_long_mantissa<binary_format<T>>(first,last) : compute_float<binary_format<T>>(pns.exponent, pns.mantissa);
+  adjusted_mantissa am = compute_float<binary_format<T>>(pns.exponent, pns.mantissa);
+  if(pns.too_many_digits) {
+    if(am != compute_float<binary_format<T>>(pns.exponent, pns.mantissa + 1)) {
+      am.power2 = -1; // value is invalid.
+    }
+  }
  // If we called compute_float<binary_format<T>>(pns.exponent, pns.mantissa) and we have an invalid power (am.power2 < 0),
  // then we need to go the long way around again. This is very uncommon.
  if(am.power2 < 0) { am = parse_long_mantissa<binary_format<T>>(first,last); }
-  uint64_t word = am.mantissa;
-  word |= uint64_t(am.power2) << binary_format<T>::mantissa_explicit_bits();
-  word = pns.negative
-  ? word | (uint64_t(1) << binary_format<T>::sign_index()) : word;
-#if FASTFLOAT_IS_BIG_ENDIAN == 1
-   if (std::is_same<T, float>::value) {
-     ::memcpy(&value, (char *)&word + 4, sizeof(T)); // extract value at offset 4-7 if float on big-endian
-   } else {
-     ::memcpy(&value, &word, sizeof(T));
-   }
-#else
-   // For little-endian systems:
-   ::memcpy(&value, &word, sizeof(T));
-#endif
+  to_float(pns.negative, am, value);
+  return answer;
+}
+
+template<typename T>
+from_chars_result odlfrom_chars(const char *first, const char *last,
+                             T &value, chars_format fmt /*= chars_format::general*/)  noexcept  {
+  static_assert (std::is_same<T, double>::value || std::is_same<T, float>::value, "only float and double are supported");
+
+
+  from_chars_result answer;
+  while ((first != last) && fast_float::is_space(uint8_t(*first))) {
+    first++;
+  }
+  if (first == last) {
+    answer.ec = std::errc::invalid_argument;
+    answer.ptr = first;
+    return answer;
+  }
+  parsed_number_string pns = parse_number_string(first, last, fmt);
+  if (!pns.valid) {
+    return parse_infnan(first, last, value);
+  }
+  answer.ec = std::errc(); // be optimistic
+  answer.ptr = pns.lastmatch;
+  adjusted_mantissa am;
+  // Most times, we have pns.too_many_digits = false.
+  if(pns.too_many_digits) {
+    // Uncommon path where we have too many digits.
+    //
+    // credit: R. Oudompheng who first implemented this fast path.
+    // It does the job of accelerating the slow path since most
+    // long streams of digits are determined after 19 digits.
+    // Note that mantissa+1 cannot overflow since mantissa < 10**19 and so
+    // mantissa+1 <= 10**19 < 2**64.
+    adjusted_mantissa am1 = compute_float<binary_format<T>>(pns.exponent, pns.mantissa);
+    adjusted_mantissa am2 = compute_float<binary_format<T>>(pns.exponent, pns.mantissa+1);
+    // They must both agree and be both a successful result.
+    if(( am1 == am2 ) && (am1.power2 >= 0)) {
+      am = am1;
+    } else {
+      // long way! (uncommon)
+      decimal d = parse_decimal(first, last);
+      am = compute_float<binary_format<T>>(d);
+    }
+    to_float(pns.negative, am, value);
+  } else {
+    // We are entering the common path where the number of digits is no more than 19.
+    //
+    // Next is Clinger's fast path.
+    if (binary_format<T>::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format<T>::max_exponent_fast_path() && pns.mantissa <=binary_format<T>::max_mantissa_fast_path()) {
+      value = T(pns.mantissa);
+      if (pns.exponent < 0) { value = value / binary_format<T>::exact_power_of_ten(-pns.exponent); }
+      else { value = value * binary_format<T>::exact_power_of_ten(pns.exponent); }
+      if (pns.negative) { value = -value; }
+      return answer;
+    }
+    // Then we have our main routine.
+    am = compute_float<binary_format<T>>(pns.exponent, pns.mantissa);
+    // If we called compute_float<binary_format<T>>(pns.exponent, pns.mantissa) and we have an invalid power (am.power2 < 0),
+    // then we need to go the long way around again. This is very uncommon.
+    if(am.power2 < 0) { // long way! (uncommon)
+      decimal d = parse_decimal(first, last);
+      am = compute_float<binary_format<T>>(d);
+    }
+    to_float(pns.negative, am, value);
+  }
  return answer;
 }