Option to forbid nan/inf, refactor

2026-01-01 03:12:18 +08:00 · 2023-04-15 23:16:01 -04:00 · 2023-04-15 23:16:01 -04:00 · c849b7a8ff
commit c849b7a8ff
parent cda25408bc
4 changed files with 72 additions and 46 deletions
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@ -157,7 +157,7 @@ uint32_t parse_eight_digits_unrolled(const char16_t* chars)  noexcept {
  if (cpp20_and_in_constexpr() || !has_simd()) {
    return parse_eight_digits_unrolled(read_u64(chars));
  }
-#if !FASTFLOAT_SSE2
+#if !FASTFLOAT_HAS_SIMD
  return 0; // never reaches here, satisfy compiler
 #else
 FASTFLOAT_SIMD_DISABLE_WARNINGS
@ -184,7 +184,7 @@ bool parse_if_eight_digits_unrolled(const char16_t* chars, std::uint64_t& i) noe
    i = i * 100000000 + parse_eight_digits_unrolled(read_u64(chars));
    return true;
  }
-#if !FASTFLOAT_SSE2
+#if !FASTFLOAT_HAS_SIMD
  return false; // never reaches here, satisfy compiler
 #else
 FASTFLOAT_SIMD_DISABLE_WARNINGS
@ -210,10 +210,10 @@ template <typename CharT>
 struct parsed_number_string {
  int64_t exponent{0};
  uint64_t mantissa{0};
+  int64_t exp_number{0};
  const CharT *lastmatch{nullptr};
  bool negative{false};
  bool valid{false};
-  bool is_64bit_int{false};
  bool too_many_digits{false};
  // contains the range of the significant digits
  span<const CharT> integer{};  // non-nullable
@ -224,7 +224,7 @@ struct parsed_number_string {
 // parse an ASCII string.
 template <typename CharT>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
-parsed_number_string<CharT> parse_number_string(const CharT *p, const CharT *pend, parse_options options, const bool parse_ints = false) noexcept {
+parsed_number_string<CharT> parse_number_string(const CharT *p, const CharT *pend, parse_options options) noexcept {
  const chars_format fmt = options.format;
  const parse_rules rules = options.rules;
  const CharT decimal_point = static_cast<CharT>(options.decimal_point);
@ -322,7 +322,7 @@ parsed_number_string<CharT> parse_number_string(const CharT *p, const CharT *pen

  answer.lastmatch = p;
  answer.valid = true;
-  answer.is_64bit_int = (p == end_of_integer_part);
+  answer.exp_number = exp_number;

  // If we frequently had to deal with long strings of digits,
  // we could extend our code by using a 128-bit integer instead
@ -339,45 +339,49 @@ parsed_number_string<CharT> parse_number_string(const CharT *p, const CharT *pen
      if(*start == static_cast<CharT>('0')) { digit_count --; }
      start++;
    }
-    constexpr uint64_t minimal_twenty_digit_integer{10000000000000000000ULL};
-    // maya: A 64-bit number may have up to 20 digits!
-    // If we're parsing ints, preserve accuracy up to 20 digits 
-    // instead of rounding them to a floating point value.
-    answer.too_many_digits = rules == parse_rules::json_rules && parse_ints && answer.is_64bit_int ?
-        (digit_count > 20 || i < minimal_twenty_digit_integer) : digit_count > 19;
-        
-    if (answer.too_many_digits) {
-      answer.is_64bit_int = false;
-      // Let us start again, this time, avoiding overflows.
-      // We don't need to check if is_integer, since we use the
-      // pre-tokenized spans from above.
-      i = 0;
-      p = answer.integer.ptr;
-      const CharT* int_end = p + answer.integer.len();
-      const uint64_t minimal_nineteen_digit_integer{1000000000000000000};
-      while((i < minimal_nineteen_digit_integer) && (p != int_end)) {
-        i = i * 10 + uint64_t(*p - static_cast<CharT>('0'));
-        ++p;
-      }
-      if (i >= minimal_nineteen_digit_integer) { // We have a big integers
-        exponent = end_of_integer_part - p + exp_number;
-      } else { // We have a value with a fractional component.
-          p = answer.fraction.ptr;
-          const CharT* frac_end = p + answer.fraction.len();
-          while((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
-            i = i * 10 + uint64_t(*p - static_cast<CharT>('0'));
-            ++p;
-          }
-          exponent = answer.fraction.ptr - p + exp_number;
-      }
-      // We have now corrected both exponent and i, to a truncated value
-    }
+
+    // exponent/mantissa must be truncated later
+    answer.too_many_digits = digit_count > 19;
  }
  answer.exponent = exponent;
  answer.mantissa = i;
  return answer;
 }

+template <typename CharT>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
+void truncate_exponent_mantissa(parsed_number_string<CharT>& ps)
+{
+  // Let us start again, this time, avoiding overflows.
+  // We don't need to check if is_integer, since we use the
+  // pre-tokenized spans.
+  uint64_t i = 0;
+  int64_t exponent = 0;
+  const CharT* p = ps.integer.ptr;
+  const CharT* const int_end = p + ps.integer.len();
+  const uint64_t minimal_nineteen_digit_integer{1000000000000000000};
+  while ((i < minimal_nineteen_digit_integer) && (p != int_end)) {
+    i = i * 10 + uint64_t(*p - static_cast<CharT>('0'));
+    ++p;
+  }
+  if (i >= minimal_nineteen_digit_integer) { // We have a big integers
+    exponent = int_end - p + ps.exp_number;
+  }
+  else { // We have a value with a fractional component.
+    p = ps.fraction.ptr;
+    const CharT* const frac_end = p + ps.fraction.len();
+    while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
+      i = i * 10 + uint64_t(*p - static_cast<CharT>('0'));
+      ++p;
+    }
+    exponent = ps.fraction.ptr - p + ps.exp_number;
+  }
+  // We have now corrected both exponent and i, to a truncated value
+
+  ps.exponent = exponent;
+  ps.mantissa = i;
+}
+
 } // namespace fast_float

 #endif
--- a/include/fast_float/fast_float.h
+++ b/include/fast_float/fast_float.h
@ -26,18 +26,34 @@ struct from_chars_result {

 struct parse_options {
  constexpr explicit parse_options(
-      chars_format fmt = chars_format::general,
-      parse_rules rules = parse_rules::std_rules, char dot = '.')
-    : format(fmt), rules(rules), decimal_point(dot) {}
+    chars_format fmt = chars_format::general,
+    parse_rules rules = parse_rules::std_rules, 
+    char dot = '.', bool allow_inf_nan = true)
+    : format(fmt), rules(rules), allow_inf_nan(allow_inf_nan), decimal_point(dot) {}

  /** Which number formats are accepted */
  chars_format format;
  /** Which parsing rules to use */
  parse_rules rules;
+  /** Whether to allow inf and nan */
+  bool allow_inf_nan;
  /** The character used as decimal point */
  char decimal_point;
 };

+struct preparsed_parse_options {
+  constexpr explicit preparsed_parse_options(
+    bool allow_inf_nan = true)
+    : allow_inf_nan(allow_inf_nan) {}
+
+  constexpr preparsed_parse_options(
+    const parse_options& options)
+    : allow_inf_nan(options.allow_inf_nan) {}
+
+  /** Whether to allow inf and nan */
+  bool allow_inf_nan;
+};
+
 /**
 * This function parses the character sequence [first,last) for a number. It parses floating-point numbers expecting
 * a locale-indepent format equivalent to what is used by std::strtod in the default ("C") locale.
@ -78,7 +94,7 @@ namespace fast_float {
 template <typename T, typename CharT>
 FASTFLOAT_CONSTEXPR20
 from_chars_result<CharT> from_chars_preparsed(parsed_number_string<CharT> parsed, 
-    const CharT* first, const CharT* last, T& value) noexcept;
+    const CharT* first, const CharT* last, T& value, preparsed_parse_options options) noexcept;
 }

 // namespace fast_float
--- a/include/fast_float/float_common.h
+++ b/include/fast_float/float_common.h
@ -78,6 +78,9 @@
 #endif
 #endif

+#if FASTFLOAT_SSE2
+#define FASTFLOAT_HAS_SIMD (1)
+#endif

 #if defined(__GNUC__)
 #define FASTFLOAT_SIMD_DISABLE_WARNINGS \
@ -124,7 +127,7 @@ fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() {
 }

 fastfloat_really_inline constexpr bool has_simd() {
-#if FASTFLOAT_SSE2
+#if FASTFLOAT_HAS_SIMD
  return true;
 #else
  return false;
--- a/include/fast_float/parse_number.h
+++ b/include/fast_float/parse_number.h
@ -143,15 +143,18 @@ from_chars_result<CharT> from_chars(const CharT *first, const CharT *last,

 template<typename T, typename CharT>
 FASTFLOAT_CONSTEXPR20
-from_chars_result<CharT> from_chars_preparsed(parsed_number_string<CharT> pns, const CharT* first, const CharT* last, T& value) noexcept
+from_chars_result<CharT> from_chars_preparsed(parsed_number_string<CharT> pns, const CharT* first, const CharT* last, T& value, preparsed_parse_options options) noexcept
 {
  static_assert (std::is_same<T, double>::value || std::is_same<T, float>::value, "only float and double are supported");

-  
+
  from_chars_result<CharT> answer;
  if (!pns.valid) {
-    return detail::parse_infnan(first, last, value);
+    return options.allow_inf_nan ? detail::parse_infnan(first, last, value) : answer;
  }
+  if (pns.too_many_digits)
+    truncate_exponent_mantissa(pns);
+
  answer.ec = std::errc(); // be optimistic
  answer.ptr = pns.lastmatch;
  // The implementation of the Clinger's fast path is convoluted because