From c849b7a8ff0520d2d337e23ef9bcca5d4f274619 Mon Sep 17 00:00:00 2001
From: Maya Warrier <maya.warrierm@gmail.com>
Date: Sat, 15 Apr 2023 23:16:01 -0400
Subject: [PATCH] Option to forbid nan/inf, refactor

---
 include/fast_float/ascii_number.h | 80 ++++++++++++++++---------------
 include/fast_float/fast_float.h   | 24 ++++++++--
 include/fast_float/float_common.h |  5 +-
 include/fast_float/parse_number.h |  9 ++--
 4 files changed, 72 insertions(+), 46 deletions(-)
diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
index aef072d..9fbe9ac 100644
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@@ -157,7 +157,7 @@ uint32_t parse_eight_digits_unrolled(const char16_t* chars)  noexcept {
   if (cpp20_and_in_constexpr() || !has_simd()) {
     return parse_eight_digits_unrolled(read_u64(chars));
   }
-#if !FASTFLOAT_SSE2
+#if !FASTFLOAT_HAS_SIMD
   return 0; // never reaches here, satisfy compiler
 #else
 FASTFLOAT_SIMD_DISABLE_WARNINGS
@@ -184,7 +184,7 @@ bool parse_if_eight_digits_unrolled(const char16_t* chars, std::uint64_t& i) noe
     i = i * 100000000 + parse_eight_digits_unrolled(read_u64(chars));
     return true;
   }
-#if !FASTFLOAT_SSE2
+#if !FASTFLOAT_HAS_SIMD
   return false; // never reaches here, satisfy compiler
 #else
 FASTFLOAT_SIMD_DISABLE_WARNINGS
@@ -210,10 +210,10 @@ template <typename CharT>
 struct parsed_number_string {
   int64_t exponent{0};
   uint64_t mantissa{0};
+  int64_t exp_number{0};
   const CharT *lastmatch{nullptr};
   bool negative{false};
   bool valid{false};
-  bool is_64bit_int{false};
   bool too_many_digits{false};
   // contains the range of the significant digits
   span<const CharT> integer{};  // non-nullable
@@ -224,7 +224,7 @@ struct parsed_number_string {
 // parse an ASCII string.
 template <typename CharT>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
-parsed_number_string<CharT> parse_number_string(const CharT *p, const CharT *pend, parse_options options, const bool parse_ints = false) noexcept {
+parsed_number_string<CharT> parse_number_string(const CharT *p, const CharT *pend, parse_options options) noexcept {
   const chars_format fmt = options.format;
   const parse_rules rules = options.rules;
   const CharT decimal_point = static_cast<CharT>(options.decimal_point);
@@ -322,7 +322,7 @@ parsed_number_string<CharT> parse_number_string(const CharT *p, const CharT *pen
 
   answer.lastmatch = p;
   answer.valid = true;
-  answer.is_64bit_int = (p == end_of_integer_part);
+  answer.exp_number = exp_number;
 
   // If we frequently had to deal with long strings of digits,
   // we could extend our code by using a 128-bit integer instead
@@ -339,45 +339,49 @@ parsed_number_string<CharT> parse_number_string(const CharT *p, const CharT *pen
       if(*start == static_cast<CharT>('0')) { digit_count --; }
       start++;
     }
-    constexpr uint64_t minimal_twenty_digit_integer{10000000000000000000ULL};
-    // maya: A 64-bit number may have up to 20 digits!
-    // If we're parsing ints, preserve accuracy up to 20 digits 
-    // instead of rounding them to a floating point value.
-    answer.too_many_digits = rules == parse_rules::json_rules && parse_ints && answer.is_64bit_int ?
-        (digit_count > 20 || i < minimal_twenty_digit_integer) : digit_count > 19;
-        
-    if (answer.too_many_digits) {
-      answer.is_64bit_int = false;
-      // Let us start again, this time, avoiding overflows.
-      // We don't need to check if is_integer, since we use the
-      // pre-tokenized spans from above.
-      i = 0;
-      p = answer.integer.ptr;
-      const CharT* int_end = p + answer.integer.len();
-      const uint64_t minimal_nineteen_digit_integer{1000000000000000000};
-      while((i < minimal_nineteen_digit_integer) && (p != int_end)) {
-        i = i * 10 + uint64_t(*p - static_cast<CharT>('0'));
-        ++p;
-      }
-      if (i >= minimal_nineteen_digit_integer) { // We have a big integers
-        exponent = end_of_integer_part - p + exp_number;
-      } else { // We have a value with a fractional component.
-          p = answer.fraction.ptr;
-          const CharT* frac_end = p + answer.fraction.len();
-          while((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
-            i = i * 10 + uint64_t(*p - static_cast<CharT>('0'));
-            ++p;
-          }
-          exponent = answer.fraction.ptr - p + exp_number;
-      }
-      // We have now corrected both exponent and i, to a truncated value
-    }
+
+    // exponent/mantissa must be truncated later
+    answer.too_many_digits = digit_count > 19;
   }
   answer.exponent = exponent;
   answer.mantissa = i;
   return answer;
 }
 
+template <typename CharT>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
+void truncate_exponent_mantissa(parsed_number_string<CharT>& ps)
+{
+  // Let us start again, this time, avoiding overflows.
+  // We don't need to check if is_integer, since we use the
+  // pre-tokenized spans.
+  uint64_t i = 0;
+  int64_t exponent = 0;
+  const CharT* p = ps.integer.ptr;
+  const CharT* const int_end = p + ps.integer.len();
+  const uint64_t minimal_nineteen_digit_integer{1000000000000000000};
+  while ((i < minimal_nineteen_digit_integer) && (p != int_end)) {
+    i = i * 10 + uint64_t(*p - static_cast<CharT>('0'));
+    ++p;
+  }
+  if (i >= minimal_nineteen_digit_integer) { // We have a big integers
+    exponent = int_end - p + ps.exp_number;
+  }
+  else { // We have a value with a fractional component.
+    p = ps.fraction.ptr;
+    const CharT* const frac_end = p + ps.fraction.len();
+    while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
+      i = i * 10 + uint64_t(*p - static_cast<CharT>('0'));
+      ++p;
+    }
+    exponent = ps.fraction.ptr - p + ps.exp_number;
+  }
+  // We have now corrected both exponent and i, to a truncated value
+
+  ps.exponent = exponent;
+  ps.mantissa = i;
+}
+
 } // namespace fast_float
 
 #endif
diff --git a/include/fast_float/fast_float.h b/include/fast_float/fast_float.h
index 91870a7..470d05a 100644
--- a/include/fast_float/fast_float.h
+++ b/include/fast_float/fast_float.h
@@ -26,18 +26,34 @@ struct from_chars_result {
 
 struct parse_options {
   constexpr explicit parse_options(
-      chars_format fmt = chars_format::general,
-      parse_rules rules = parse_rules::std_rules, char dot = '.')
-    : format(fmt), rules(rules), decimal_point(dot) {}
+    chars_format fmt = chars_format::general,
+    parse_rules rules = parse_rules::std_rules, 
+    char dot = '.', bool allow_inf_nan = true)
+    : format(fmt), rules(rules), allow_inf_nan(allow_inf_nan), decimal_point(dot) {}
 
   /** Which number formats are accepted */
   chars_format format;
   /** Which parsing rules to use */
   parse_rules rules;
+  /** Whether to allow inf and nan */
+  bool allow_inf_nan;
   /** The character used as decimal point */
   char decimal_point;
 };
 
+struct preparsed_parse_options {
+  constexpr explicit preparsed_parse_options(
+    bool allow_inf_nan = true)
+    : allow_inf_nan(allow_inf_nan) {}
+
+  constexpr preparsed_parse_options(
+    const parse_options& options)
+    : allow_inf_nan(options.allow_inf_nan) {}
+
+  /** Whether to allow inf and nan */
+  bool allow_inf_nan;
+};
+
 /**
  * This function parses the character sequence [first,last) for a number. It parses floating-point numbers expecting
  * a locale-indepent format equivalent to what is used by std::strtod in the default ("C") locale.
@@ -78,7 +94,7 @@ namespace fast_float {
 template <typename T, typename CharT>
 FASTFLOAT_CONSTEXPR20
 from_chars_result<CharT> from_chars_preparsed(parsed_number_string<CharT> parsed, 
-    const CharT* first, const CharT* last, T& value) noexcept;
+    const CharT* first, const CharT* last, T& value, preparsed_parse_options options) noexcept;
 }
 
 // namespace fast_float
diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h
index fe64126..4fb03fc 100644
--- a/include/fast_float/float_common.h
+++ b/include/fast_float/float_common.h
@@ -78,6 +78,9 @@
 #endif
 #endif
 
+#if FASTFLOAT_SSE2
+#define FASTFLOAT_HAS_SIMD (1)
+#endif
 
 #if defined(__GNUC__)
 #define FASTFLOAT_SIMD_DISABLE_WARNINGS \
@@ -124,7 +127,7 @@ fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() {
 }
 
 fastfloat_really_inline constexpr bool has_simd() {
-#if FASTFLOAT_SSE2
+#if FASTFLOAT_HAS_SIMD
   return true;
 #else
   return false;
diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h
index 2c2d2c3..2f30e35 100644
--- a/include/fast_float/parse_number.h
+++ b/include/fast_float/parse_number.h
@@ -143,15 +143,18 @@ from_chars_result<CharT> from_chars(const CharT *first, const CharT *last,
 
 template<typename T, typename CharT>
 FASTFLOAT_CONSTEXPR20
-from_chars_result<CharT> from_chars_preparsed(parsed_number_string<CharT> pns, const CharT* first, const CharT* last, T& value) noexcept
+from_chars_result<CharT> from_chars_preparsed(parsed_number_string<CharT> pns, const CharT* first, const CharT* last, T& value, preparsed_parse_options options) noexcept
 {
   static_assert (std::is_same<T, double>::value || std::is_same<T, float>::value, "only float and double are supported");
 
-  
+
   from_chars_result<CharT> answer;
   if (!pns.valid) {
-    return detail::parse_infnan(first, last, value);
+    return options.allow_inf_nan ? detail::parse_infnan(first, last, value) : answer;
   }
+  if (pns.too_many_digits)
+    truncate_exponent_mantissa(pns);
+
   answer.ec = std::errc(); // be optimistic
   answer.ptr = pns.lastmatch;
   // The implementation of the Clinger's fast path is convoluted because