Merge a6c87105dcae975c9f78c1b3ad87cf022f48b06e into 221a4920db7d68d33ab9794af602daef19667351

2026-02-09 11:16:45 +08:00 · 2026-02-08 13:15:25 +00:00 · 2026-02-08 13:15:25 +00:00 · a44985fb38
commit a44985fb38
parent 221a4920db a6c87105dc
3 changed files with 80 additions and 14 deletions
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@ -351,52 +351,81 @@ parse_number_string(UC const *p, UC const *pend,
    }
  }
  UC const *const start_digits = p;
  const UC separator = options.digit_separator;
  const bool has_separator = (separator != UC('\0'));
  uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad)
  int64_t digit_count = 0;
  UC const *first_digit_ptr = nullptr;
-  while ((p != pend) && is_integer(*p)) {
+  while (p != pend) {
    if (has_separator && *p == separator) {
      ++p;
      continue;
    }
    if (!is_integer(*p)) {
      break;
    }
    if (digit_count == 0) {
      first_digit_ptr = p;
    }
    // a multiplication by 10 is cheaper than an arbitrary integer
    // multiplication
    i = 10 * i +
        uint64_t(*p -
                 UC('0')); // might overflow, we will handle the overflow later
    ++p;
    ++digit_count;
  }
  UC const *const end_of_integer_part = p;
-  int64_t digit_count = int64_t(end_of_integer_part - start_digits);
+  answer.integer =
-  answer.integer = span<UC const>(start_digits, size_t(digit_count));
+      span<UC const>(start_digits, size_t(end_of_integer_part - start_digits));
  FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
    // at least 1 digit in integer part, without leading zeros
    if (digit_count == 0) {
      return report_parse_error<UC>(p, parse_error::no_digits_in_integer_part);
    }
-    if ((start_digits[0] == UC('0') && digit_count > 1)) {
+    if (digit_count > 1 && *first_digit_ptr == UC('0')) {
      return report_parse_error<UC>(start_digits,
                                    parse_error::leading_zeros_in_integer_part);
    }
  }
  int64_t exponent = 0;
  int64_t fractional_digit_count = 0;
  bool const has_decimal_point = (p != pend) && (*p == decimal_point);
  if (has_decimal_point) {
    ++p;
    UC const *before = p;
    // can occur at most twice without overflowing, but let it occur more, since
    // for integers with many digits, digit parsing is the primary bottleneck.
-    loop_parse_if_eight_digits(p, pend, i);
+    if (!has_separator) {
      UC const *const before_simd = p;
      loop_parse_if_eight_digits(p, pend, i);
      size_t const exploded = size_t(p - before_simd);
      fractional_digit_count += int64_t(exploded);
    }
-    while ((p != pend) && is_integer(*p)) {
+    while (p != pend) {
      if (has_separator && *p == separator) {
        ++p;
        continue;
      }
      if (!is_integer(*p)) {
        break;
      }
      uint8_t digit = uint8_t(*p - UC('0'));
      ++p;
      i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
      ++fractional_digit_count;
    }
-    exponent = before - p;
+    exponent = -fractional_digit_count;
    answer.fraction = span<UC const>(before, size_t(p - before));
-    digit_count -= exponent;
+    digit_count += fractional_digit_count;
  }
  FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
    // at least 1 digit in fractional part
-    if (has_decimal_point && exponent == 0) {
+    if (has_decimal_point && fractional_digit_count == 0) {
      return report_parse_error<UC>(p,
                                    parse_error::no_digits_in_fractional_part);
    }
@ -467,7 +496,8 @@ parse_number_string(UC const *p, UC const *pend,
    // We need to be mindful of the case where we only have zeroes...
    // E.g., 0.000000000...000.
    UC const *start = start_digits;
-    while ((start != pend) && (*start == UC('0') || *start == decimal_point)) {
+    while ((start != pend) && (*start == UC('0') || *start == decimal_point ||
                               (has_separator && *start == separator))) {
      if (*start == UC('0')) {
        digit_count--;
      }
@ -484,19 +514,38 @@ parse_number_string(UC const *p, UC const *pend,
      UC const *int_end = p + answer.integer.len();
      uint64_t const minimal_nineteen_digit_integer{1000000000000000000};
      while ((i < minimal_nineteen_digit_integer) && (p != int_end)) {
        if (has_separator && *p == separator) {
          ++p;
          continue;
        }
        i = i * 10 + uint64_t(*p - UC('0'));
        ++p;
      }
      if (i >= minimal_nineteen_digit_integer) { // We have a big integer
-        exponent = end_of_integer_part - p + exp_number;
+        int64_t remaining_integer_digits = 0;
        while (p != int_end) {
          if (has_separator && *p == separator) {
            ++p;
            continue;
          }
          ++p;
          ++remaining_integer_digits;
        }
        exponent = remaining_integer_digits + exp_number;
      } else { // We have a value with a fractional component.
        p = answer.fraction.ptr;
        UC const *frac_end = p + answer.fraction.len();
        int64_t fraction_digits_consumed = 0;
        while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
          if (has_separator && *p == separator) {
            ++p;
            continue;
          }
          i = i * 10 + uint64_t(*p - UC('0'));
          ++p;
          ++fraction_digits_consumed;
        }
-        exponent = answer.fraction.ptr - p + exp_number;
+        exponent = exp_number - fraction_digits_consumed;
      }
      // We have now corrected both exponent and i, to a truncated value
    }
--- a/include/fast_float/float_common.h
+++ b/include/fast_float/float_common.h
@ -70,8 +70,10 @@ using from_chars_result = from_chars_result_t<char>;
 template <typename UC> struct parse_options_t {
  constexpr explicit parse_options_t(chars_format fmt = chars_format::general,
-                                     UC dot = UC('.'), int b = 10)
+                                     UC dot = UC('.'), int b = 10,
-      : format(fmt), decimal_point(dot), base(b) {}
+                                     UC sep = UC('\0'), uint8_t opts = 0)
      : format(fmt), decimal_point(dot), base(b), digit_separator(sep),
        format_options(opts) {}
  /** Which number formats are accepted */
  chars_format format;
@ -79,6 +81,14 @@ template <typename UC> struct parse_options_t {
  UC decimal_point;
  /** The base used for integers */
  int base;
  /** The character used as digit separator. Use '\0' to
   * disable */
  UC digit_separator;
  /** Additional format options (bitmask) */
  uint8_t format_options;
  /** Option to skip prefixes like 0x, 0b */
  static constexpr uint8_t skip_prefix = 1;
 };
 using parse_options = parse_options_t<char>;
--- a/include/fast_float/parse_number.h
+++ b/include/fast_float/parse_number.h
@ -476,6 +476,13 @@ template <typename T, typename UC>
 FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
 from_chars_advanced(UC const *first, UC const *last, T &value,
                    parse_options_t<UC> options) noexcept {
  if (((options.format_options & parse_options_t<UC>::skip_prefix) != 0) &&
      (last - first >= 2) && (*first == UC('0'))) {
    const UC c_low = UC(first[1] | UC(0x20));
    if (c_low == UC('x') || c_low == UC('b')) {
      first += 2;
    }
  }
  return from_chars_advanced_caller<
      size_t(is_supported_float_type<T>::value) +
      2 * size_t(is_supported_integer_type<T>::value)>::call(first, last, value,