From 3cafcca2ffe7112cb8d8b57cc51a218759db743b Mon Sep 17 00:00:00 2001
From: Maya Warrier <maya.warrierm@gmail.com>
Date: Wed, 29 Mar 2023 02:14:12 -0400
Subject: [PATCH] Add support for json parsing rules and integers

---
 include/fast_float/ascii_number.h | 36 ++++++++++++++++++++++---------
 include/fast_float/fast_float.h   | 19 ++++++++++++----
 include/fast_float/parse_number.h |  4 ++--
 3 files changed, 43 insertions(+), 16 deletions(-)
diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
index 9ad754e..83d7f9b 100644
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@@ -96,10 +96,10 @@ typedef span<const char> byte_span;
 struct parsed_number_string {
   int64_t exponent{0};
   uint64_t mantissa{0};
-  uint64_t integer_value{-1};
   const char *lastmatch{nullptr};
   bool negative{false};
   bool valid{false};
+  bool is_64bit_uint{false};
   bool too_many_digits{false};
   // contains the range of the significant digits
   byte_span integer{};  // non-nullable
@@ -111,6 +111,8 @@ struct parsed_number_string {
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
 parsed_number_string parse_number_string(const char *p, const char *pend, parse_options options) noexcept {
   const chars_format fmt = options.format;
+  const parse_rules rules = options.rules;
+  const bool parse_ints = options.parse_ints;
   const char decimal_point = options.decimal_point;
 
   parsed_number_string answer;
@@ -126,9 +128,9 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
     if (p == pend) {
       return answer;
     }
-    if (!is_integer(*p) && (*p != decimal_point)) { // a sign must be followed by an integer or the dot
-      return answer;
-    }
+    // a sign must be followed by an integer or the dot
+    if (!is_integer(*p) && (rules == parse_rules::json_rules || *p != decimal_point))
+        return answer;
   }
   const char *const start_digits = p;
 
@@ -144,9 +146,9 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
   const char *const end_of_integer_part = p;
   int64_t digit_count = int64_t(end_of_integer_part - start_digits);
   answer.integer = byte_span(start_digits, size_t(digit_count));
-  answer.integer_value = i;
   int64_t exponent = 0;
-  if ((p != pend) && (*p == decimal_point)) {
+  const bool has_decimal_point = (p != pend) && (*p == decimal_point);
+  if (has_decimal_point) {
     ++p;
     const char* before = p;
     // can occur at most twice without overflowing, but let it occur more, since
@@ -164,8 +166,8 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
     answer.fraction = byte_span(before, size_t(p - before));
     digit_count -= exponent;
   }
-  // we must have encountered at least one integer!
-  if (digit_count == 0) {
+  // we must have encountered at least one integer (or two if a decimal point exists, with json rules).
+  if (digit_count == 0 || (rules == parse_rules::json_rules && has_decimal_point && digit_count == 1)) {
     return answer;
   }
   int64_t exp_number = 0;            // explicit exponential part
@@ -201,6 +203,11 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
     // If it scientific and not fixed, we have to bail out.
     if((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) { return answer; }
   }
+  
+  // disallow leading zeros before the decimal point
+  if (rules == parse_rules::json_rules && start_digits[0] == '0' && digit_count >= 2 && is_integer(start_digits[1]))
+      return answer;
+
   answer.lastmatch = p;
   answer.valid = true;
 
@@ -219,8 +226,16 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
       if(*start == '0') { digit_count --; }
       start++;
     }
-    if (digit_count > 19) {
-      answer.too_many_digits = true;
+    constexpr uint64_t minimal_twenty_digit_integer{10000000000000000000};
+    // maya: A 64-bit number may have up to 20 digits, not 19! 
+    // If we're parsing ints, preserve accuracy up to 20 digits instead
+    // of converting them to the closest floating point value.
+    answer.too_many_digits = rules == parse_rules::json_rules && parse_ints ?
+        answer.is_integer && (digit_count > 20 || i < minimal_twenty_digit_integer) :
+        digit_count > 19;
+        
+    if (answer.too_many_digits) {
+      answer.is_64bit_uint = false;
       // Let us start again, this time, avoiding overflows.
       // We don't need to check if is_integer, since we use the
       // pre-tokenized spans from above.
@@ -245,6 +260,7 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
       }
       // We have now corrected both exponent and i, to a truncated value
     }
+    else answer.is_64bit_uint = (p == end_of_integer_part);
   }
   answer.exponent = exponent;
   answer.mantissa = i;
diff --git a/include/fast_float/fast_float.h b/include/fast_float/fast_float.h
index d84405d..2bfabdc 100644
--- a/include/fast_float/fast_float.h
+++ b/include/fast_float/fast_float.h
@@ -13,6 +13,10 @@ enum chars_format {
     general = fixed | scientific
 };
 
+enum parse_rules {
+    std_rules,
+    json_rules,
+};
 
 struct from_chars_result {
   const char *ptr;
@@ -20,12 +24,18 @@ struct from_chars_result {
 };
 
 struct parse_options {
-  constexpr explicit parse_options(chars_format fmt = chars_format::general,
-                         char dot = '.')
-    : format(fmt), decimal_point(dot) {}
+  constexpr explicit parse_options(
+      chars_format fmt = chars_format::general,
+      parse_rules rules = parse_rules::std_rules,
+      bool parse_ints = false, char dot = '.', )
+    : format(fmt), rules(rules), parse_ints(parse_ints), decimal_point(dot) {}
 
   /** Which number formats are accepted */
   chars_format format;
+  /** Which parsing rules to use */
+  parse_rules rules;
+  /* Whether to parse integers too, only applicable with json_rules */
+  bool parse_ints;
   /** The character used as decimal point */
   char decimal_point;
 };
@@ -69,7 +79,8 @@ from_chars_result from_chars_advanced(const char *first, const char *last,
 namespace fast_float {
 template <typename T>
 FASTFLOAT_CONSTEXPR20
-from_chars_result from_chars_preparsed(parsed_number_string parsed, T& value) noexcept;
+from_chars_result from_chars_preparsed(parsed_number_string parsed, 
+    const char* first, const char* last, T& value) noexcept;
 }
 
 // namespace fast_float
diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h
index c880f1e..c13b641 100644
--- a/include/fast_float/parse_number.h
+++ b/include/fast_float/parse_number.h
@@ -141,7 +141,7 @@ from_chars_result from_chars(const char *first, const char *last,
 
 template<typename T>
 FASTFLOAT_CONSTEXPR20
-from_chars_result from_chars_preparsed(parsed_number_string pns, T& value) noexcept
+from_chars_result from_chars_preparsed(parsed_number_string pns, const char* first, const char* last, T& value) noexcept
 {
   static_assert (std::is_same<T, double>::value || std::is_same<T, float>::value, "only float and double are supported");
 
@@ -221,7 +221,7 @@ from_chars_result from_chars_advanced(const char *first, const char *last,
     answer.ptr = first;
     return answer;
   }
-  answer = from_chars_preparsed(parse_number_string(first, last, options), value);
+  answer = from_chars_preparsed(parse_number_string(first, last, options), first, last, value);
   return answer;
 }