From 7385c2053b141bbe9fa395b371fa29dfe6082bc8 Mon Sep 17 00:00:00 2001
From: Maya Warrier <maya.warrierm@gmail.com>
Date: Mon, 27 Mar 2023 22:17:02 -0400
Subject: [PATCH 01/27] Update .gitignore

---
 .gitignore | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
diff --git a/.gitignore b/.gitignore
index 1566557..6bbf906 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,18 @@ build/*
 Testing/*
 .cache/
 compile_commands.json
+
+# Visual studio
+.vs/
+Debug/
+Release/
+/out/build/
+*.sln
+*.vcxproj
+*.vcxproj.filters
+*.vcxproj.user
+*.psess
+*.vspx
+*.vsp
+*.diagsession
+*.hint
\ No newline at end of file

From 8f94758c7862a77964864c2386c2ce425d1f4aab Mon Sep 17 00:00:00 2001
From: Maya Warrier <maya.warrierm@gmail.com>
Date: Mon, 27 Mar 2023 22:50:21 -0400
Subject: [PATCH 02/27] Expose parsed string (before computation) so it can be
 reused

---
 include/fast_float/ascii_number.h |  2 ++
 include/fast_float/fast_float.h   | 12 +++++++++-
 include/fast_float/parse_number.h | 38 +++++++++++++++++++------------
 3 files changed, 36 insertions(+), 16 deletions(-)

diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
index 72b8098..9ad754e 100644
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@@ -96,6 +96,7 @@ typedef span<const char> byte_span;
 struct parsed_number_string {
   int64_t exponent{0};
   uint64_t mantissa{0};
+  uint64_t integer_value{-1};
   const char *lastmatch{nullptr};
   bool negative{false};
   bool valid{false};
@@ -143,6 +144,7 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
   const char *const end_of_integer_part = p;
   int64_t digit_count = int64_t(end_of_integer_part - start_digits);
   answer.integer = byte_span(start_digits, size_t(digit_count));
+  answer.integer_value = i;
   int64_t exponent = 0;
   if ((p != pend) && (*p == decimal_point)) {
     ++p;
diff --git a/include/fast_float/fast_float.h b/include/fast_float/fast_float.h
index 65704da..d84405d 100644
--- a/include/fast_float/fast_float.h
+++ b/include/fast_float/fast_float.h
@@ -62,6 +62,16 @@ FASTFLOAT_CONSTEXPR20
 from_chars_result from_chars_advanced(const char *first, const char *last,
                                       T &value, parse_options options)  noexcept;
 
-} // namespace fast_float
+} 
+
+#include "ascii_number.h" // parsed_number_string
+
+namespace fast_float {
+template <typename T>
+FASTFLOAT_CONSTEXPR20
+from_chars_result from_chars_preparsed(parsed_number_string parsed, T& value) noexcept;
+}
+
+// namespace fast_float
 #include "parse_number.h"
 #endif // FASTFLOAT_FAST_FLOAT_H
diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h
index d16a25d..c880f1e 100644
--- a/include/fast_float/parse_number.h
+++ b/include/fast_float/parse_number.h
@@ -141,24 +141,12 @@ from_chars_result from_chars(const char *first, const char *last,
 
 template<typename T>
 FASTFLOAT_CONSTEXPR20
-from_chars_result from_chars_advanced(const char *first, const char *last,
-                                      T &value, parse_options options)  noexcept  {
-
+from_chars_result from_chars_preparsed(parsed_number_string pns, T& value) noexcept
+{
   static_assert (std::is_same<T, double>::value || std::is_same<T, float>::value, "only float and double are supported");
 
-
+  
   from_chars_result answer;
-#if FASTFLOAT_SKIP_WHITE_SPACE  // disabled by default
-  while ((first != last) && fast_float::is_space(uint8_t(*first))) {
-    first++;
-  }
-#endif
-  if (first == last) {
-    answer.ec = std::errc::invalid_argument;
-    answer.ptr = first;
-    return answer;
-  }
-  parsed_number_string pns = parse_number_string(first, last, options);
   if (!pns.valid) {
     return detail::parse_infnan(first, last, value);
   }
@@ -217,6 +205,26 @@ from_chars_result from_chars_advanced(const char *first, const char *last,
   return answer;
 }
 
+template<typename T>
+FASTFLOAT_CONSTEXPR20
+from_chars_result from_chars_advanced(const char *first, const char *last,
+                                      T &value, parse_options options)  noexcept  {
+
+  from_chars_result answer;
+#if FASTFLOAT_SKIP_WHITE_SPACE  // disabled by default
+  while ((first != last) && fast_float::is_space(uint8_t(*first))) {
+    first++;
+  }
+#endif
+  if (first == last) {
+    answer.ec = std::errc::invalid_argument;
+    answer.ptr = first;
+    return answer;
+  }
+  answer = from_chars_preparsed(parse_number_string(first, last, options), value);
+  return answer;
+}
+
 } // namespace fast_float
 
 #endif

From 3cafcca2ffe7112cb8d8b57cc51a218759db743b Mon Sep 17 00:00:00 2001
From: Maya Warrier <maya.warrierm@gmail.com>
Date: Wed, 29 Mar 2023 02:14:12 -0400
Subject: [PATCH 03/27] Add support for json parsing rules and integers

---
 include/fast_float/ascii_number.h | 36 ++++++++++++++++++++++---------
 include/fast_float/fast_float.h   | 19 ++++++++++++----
 include/fast_float/parse_number.h |  4 ++--
 3 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
index 9ad754e..83d7f9b 100644
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@@ -96,10 +96,10 @@ typedef span<const char> byte_span;
 struct parsed_number_string {
   int64_t exponent{0};
   uint64_t mantissa{0};
-  uint64_t integer_value{-1};
   const char *lastmatch{nullptr};
   bool negative{false};
   bool valid{false};
+  bool is_64bit_uint{false};
   bool too_many_digits{false};
   // contains the range of the significant digits
   byte_span integer{};  // non-nullable
@@ -111,6 +111,8 @@ struct parsed_number_string {
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
 parsed_number_string parse_number_string(const char *p, const char *pend, parse_options options) noexcept {
   const chars_format fmt = options.format;
+  const parse_rules rules = options.rules;
+  const bool parse_ints = options.parse_ints;
   const char decimal_point = options.decimal_point;
 
   parsed_number_string answer;
@@ -126,9 +128,9 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
     if (p == pend) {
       return answer;
     }
-    if (!is_integer(*p) && (*p != decimal_point)) { // a sign must be followed by an integer or the dot
-      return answer;
-    }
+    // a sign must be followed by an integer or the dot
+    if (!is_integer(*p) && (rules == parse_rules::json_rules || *p != decimal_point))
+        return answer;
   }
   const char *const start_digits = p;
 
@@ -144,9 +146,9 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
   const char *const end_of_integer_part = p;
   int64_t digit_count = int64_t(end_of_integer_part - start_digits);
   answer.integer = byte_span(start_digits, size_t(digit_count));
-  answer.integer_value = i;
   int64_t exponent = 0;
-  if ((p != pend) && (*p == decimal_point)) {
+  const bool has_decimal_point = (p != pend) && (*p == decimal_point);
+  if (has_decimal_point) {
     ++p;
     const char* before = p;
     // can occur at most twice without overflowing, but let it occur more, since
@@ -164,8 +166,8 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
     answer.fraction = byte_span(before, size_t(p - before));
     digit_count -= exponent;
   }
-  // we must have encountered at least one integer!
-  if (digit_count == 0) {
+  // we must have encountered at least one integer (or two if a decimal point exists, with json rules).
+  if (digit_count == 0 || (rules == parse_rules::json_rules && has_decimal_point && digit_count == 1)) {
     return answer;
   }
   int64_t exp_number = 0;            // explicit exponential part
@@ -201,6 +203,11 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
     // If it scientific and not fixed, we have to bail out.
     if((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) { return answer; }
   }
+  
+  // disallow leading zeros before the decimal point
+  if (rules == parse_rules::json_rules && start_digits[0] == '0' && digit_count >= 2 && is_integer(start_digits[1]))
+      return answer;
+
   answer.lastmatch = p;
   answer.valid = true;
 
@@ -219,8 +226,16 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
       if(*start == '0') { digit_count --; }
       start++;
     }
-    if (digit_count > 19) {
-      answer.too_many_digits = true;
+    constexpr uint64_t minimal_twenty_digit_integer{10000000000000000000};
+    // maya: A 64-bit number may have up to 20 digits, not 19! 
+    // If we're parsing ints, preserve accuracy up to 20 digits instead
+    // of converting them to the closest floating point value.
+    answer.too_many_digits = rules == parse_rules::json_rules && parse_ints ?
+        answer.is_integer && (digit_count > 20 || i < minimal_twenty_digit_integer) :
+        digit_count > 19;
+        
+    if (answer.too_many_digits) {
+      answer.is_64bit_uint = false;
       // Let us start again, this time, avoiding overflows.
       // We don't need to check if is_integer, since we use the
       // pre-tokenized spans from above.
@@ -245,6 +260,7 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
       }
       // We have now corrected both exponent and i, to a truncated value
     }
+    else answer.is_64bit_uint = (p == end_of_integer_part);
   }
   answer.exponent = exponent;
   answer.mantissa = i;
diff --git a/include/fast_float/fast_float.h b/include/fast_float/fast_float.h
index d84405d..2bfabdc 100644
--- a/include/fast_float/fast_float.h
+++ b/include/fast_float/fast_float.h
@@ -13,6 +13,10 @@ enum chars_format {
     general = fixed | scientific
 };
 
+enum parse_rules {
+    std_rules,
+    json_rules,
+};
 
 struct from_chars_result {
   const char *ptr;
@@ -20,12 +24,18 @@ struct from_chars_result {
 };
 
 struct parse_options {
-  constexpr explicit parse_options(chars_format fmt = chars_format::general,
-                         char dot = '.')
-    : format(fmt), decimal_point(dot) {}
+  constexpr explicit parse_options(
+      chars_format fmt = chars_format::general,
+      parse_rules rules = parse_rules::std_rules,
+      bool parse_ints = false, char dot = '.', )
+    : format(fmt), rules(rules), parse_ints(parse_ints), decimal_point(dot) {}
 
   /** Which number formats are accepted */
   chars_format format;
+  /** Which parsing rules to use */
+  parse_rules rules;
+  /* Whether to parse integers too, only applicable with json_rules */
+  bool parse_ints;
   /** The character used as decimal point */
   char decimal_point;
 };
@@ -69,7 +79,8 @@ from_chars_result from_chars_advanced(const char *first, const char *last,
 namespace fast_float {
 template <typename T>
 FASTFLOAT_CONSTEXPR20
-from_chars_result from_chars_preparsed(parsed_number_string parsed, T& value) noexcept;
+from_chars_result from_chars_preparsed(parsed_number_string parsed, 
+    const char* first, const char* last, T& value) noexcept;
 }
 
 // namespace fast_float
diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h
index c880f1e..c13b641 100644
--- a/include/fast_float/parse_number.h
+++ b/include/fast_float/parse_number.h
@@ -141,7 +141,7 @@ from_chars_result from_chars(const char *first, const char *last,
 
 template<typename T>
 FASTFLOAT_CONSTEXPR20
-from_chars_result from_chars_preparsed(parsed_number_string pns, T& value) noexcept
+from_chars_result from_chars_preparsed(parsed_number_string pns, const char* first, const char* last, T& value) noexcept
 {
   static_assert (std::is_same<T, double>::value || std::is_same<T, float>::value, "only float and double are supported");
 
@@ -221,7 +221,7 @@ from_chars_result from_chars_advanced(const char *first, const char *last,
     answer.ptr = first;
     return answer;
   }
-  answer = from_chars_preparsed(parse_number_string(first, last, options), value);
+  answer = from_chars_preparsed(parse_number_string(first, last, options), first, last, value);
   return answer;
 }
 

From b6acf38a2ea6c78a95c044bab6e134aff60aa7be Mon Sep 17 00:00:00 2001
From: Maya Warrier <maya.warrierm@gmail.com>
Date: Wed, 29 Mar 2023 02:20:36 -0400
Subject: [PATCH 04/27] Fix bugs

---
 include/fast_float/ascii_number.h | 13 ++++++-------
 include/fast_float/fast_float.h   |  2 +-
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
index 83d7f9b..58305ba 100644
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@@ -99,7 +99,7 @@ struct parsed_number_string {
   const char *lastmatch{nullptr};
   bool negative{false};
   bool valid{false};
-  bool is_64bit_uint{false};
+  bool is_64bit_int{false};
   bool too_many_digits{false};
   // contains the range of the significant digits
   byte_span integer{};  // non-nullable
@@ -210,6 +210,7 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
 
   answer.lastmatch = p;
   answer.valid = true;
+  answer.is_64bit_int = (p == end_of_integer_part);
 
   // If we frequently had to deal with long strings of digits,
   // we could extend our code by using a 128-bit integer instead
@@ -226,16 +227,15 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
       if(*start == '0') { digit_count --; }
       start++;
     }
-    constexpr uint64_t minimal_twenty_digit_integer{10000000000000000000};
+    constexpr uint64_t minimal_twenty_digit_integer{10000000000000000000ULL};
     // maya: A 64-bit number may have up to 20 digits, not 19! 
     // If we're parsing ints, preserve accuracy up to 20 digits instead
     // of converting them to the closest floating point value.
-    answer.too_many_digits = rules == parse_rules::json_rules && parse_ints ?
-        answer.is_integer && (digit_count > 20 || i < minimal_twenty_digit_integer) :
-        digit_count > 19;
+    answer.too_many_digits = rules == parse_rules::json_rules && parse_ints && answer.is_64bit_int ?
+        (digit_count > 20 || i < minimal_twenty_digit_integer) : digit_count > 19;
         
     if (answer.too_many_digits) {
-      answer.is_64bit_uint = false;
+      answer.is_64bit_int = false;
       // Let us start again, this time, avoiding overflows.
       // We don't need to check if is_integer, since we use the
       // pre-tokenized spans from above.
@@ -260,7 +260,6 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
       }
       // We have now corrected both exponent and i, to a truncated value
     }
-    else answer.is_64bit_uint = (p == end_of_integer_part);
   }
   answer.exponent = exponent;
   answer.mantissa = i;
diff --git a/include/fast_float/fast_float.h b/include/fast_float/fast_float.h
index 2bfabdc..c11627f 100644
--- a/include/fast_float/fast_float.h
+++ b/include/fast_float/fast_float.h
@@ -27,7 +27,7 @@ struct parse_options {
   constexpr explicit parse_options(
       chars_format fmt = chars_format::general,
       parse_rules rules = parse_rules::std_rules,
-      bool parse_ints = false, char dot = '.', )
+      bool parse_ints = false, char dot = '.')
     : format(fmt), rules(rules), parse_ints(parse_ints), decimal_point(dot) {}
 
   /** Which number formats are accepted */

From a699476fd2b3344af4ceca055b9e842f1d91c870 Mon Sep 17 00:00:00 2001
From: Maya Warrier <maya.warrierm@gmail.com>
Date: Thu, 30 Mar 2023 04:47:42 -0400
Subject: [PATCH 05/27] ignore VS CMakeSettings file

---
 .gitignore | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 6bbf906..a2601ad 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,4 +16,5 @@ Release/
 *.vspx
 *.vsp
 *.diagsession
-*.hint
\ No newline at end of file
+*.hint
+/CMakeSettings.json

From 2b118c843ab704fd437e56283961d69482ebd12a Mon Sep 17 00:00:00 2001
From: Maya Warrier <maya.warrierm@gmail.com>
Date: Thu, 30 Mar 2023 04:48:18 -0400
Subject: [PATCH 06/27] Experimental support for char_t types

---
 include/fast_float/ascii_number.h     | 125 ++++++++++++++++++--------
 include/fast_float/digit_comparison.h |  39 ++++----
 include/fast_float/fast_float.h       |  17 ++--
 include/fast_float/float_common.h     |   6 +-
 include/fast_float/parse_number.h     |  38 ++++----
 5 files changed, 144 insertions(+), 81 deletions(-)

diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
index 58305ba..3d85543 100644
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@@ -5,15 +5,24 @@
 #include <cstdint>
 #include <cstring>
 #include <iterator>
+#include <type_traits>
 
 #include "float_common.h"
 
+#define FASTFLOAT_SSE2 1
+
+#if FASTFLOAT_SSE2
+#include <emmintrin.h>
+#endif
+
+
 namespace fast_float {
 
 // Next function can be micro-optimized, but compilers are entirely
 // able to optimize it well.
-fastfloat_really_inline constexpr bool is_integer(char c) noexcept {
-  return c >= '0' && c <= '9';
+template <typename CharT>
+fastfloat_really_inline constexpr bool is_integer(CharT c) noexcept {
+  return c >= static_cast<CharT>('0') && c <= static_cast<CharT>('9');
 }
 
 fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) {
@@ -28,7 +37,46 @@ fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) {
 }
 
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
-uint64_t read_u64(const char *chars) {
+uint64_t fast_read_u64(const char* chars)
+{
+  uint64_t val;
+  ::memcpy(&val, chars, sizeof(uint64_t));
+  return val;
+}
+
+fastfloat_really_inline
+uint64_t fast_read_u64(const char16_t* chars)
+{
+#if FASTFLOAT_SSE2
+  const void* const p = chars;
+
+  static const char16_t masks[] = {0xff, 0xff, 0xff, 0xff};
+  const __m128i m_masks = _mm_loadu_si128(reinterpret_cast<const __m128i*>(masks));
+  // mask hi bytes
+  __m128i i1 = _mm_and_si128(_mm_loadu_si64(p), m_masks);
+  __m128i i2 = _mm_and_si128(_mm_loadu_si64(p + 8), m_masks);
+
+  // pack into chars
+  __m128i packed = _mm_packus_epi16(i1, i2);
+
+  // extract
+  uint64_t val;
+  _mm_storeu_epi64(&val, _mm_shuffle_epi32(packed, 0x8));
+  return val;
+#else
+  alignas(8) unsigned char bytes[8];
+  for (int i = 0; i < 8; ++i)
+      bytes[i] = (unsigned char)chars[i];
+
+  uint64_t val;
+  ::memcpy(&val, bytes, sizeof(uint64_t));
+  return val;
+#endif
+}
+
+template <typename CharT>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
+uint64_t read_u64(const CharT *chars) {
   if (cpp20_and_in_constexpr()) {
     uint64_t val = 0;
     for(int i = 0; i < 8; ++i) {
@@ -37,8 +85,7 @@ uint64_t read_u64(const char *chars) {
     }
     return val;
   }
-  uint64_t val;
-  ::memcpy(&val, chars, sizeof(uint64_t));
+  uint64_t val = fast_read_u64(chars);
 #if FASTFLOAT_IS_BIG_ENDIAN == 1
   // Need to read as-if the number was in little-endian order.
   val = byteswap(val);
@@ -46,6 +93,7 @@ uint64_t read_u64(const char *chars) {
   return val;
 }
 
+
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
 void write_u64(uint8_t *chars, uint64_t val) {
   if (cpp20_and_in_constexpr()) {
@@ -75,8 +123,9 @@ uint32_t parse_eight_digits_unrolled(uint64_t val) {
   return uint32_t(val);
 }
 
+template <typename CharT>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
-uint32_t parse_eight_digits_unrolled(const char *chars)  noexcept  {
+uint32_t parse_eight_digits_unrolled(const CharT *chars)  noexcept  {
   return parse_eight_digits_unrolled(read_u64(chars));
 }
 
@@ -86,43 +135,46 @@ fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val
      0x8080808080808080));
 }
 
+template <typename CharT>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
-bool is_made_of_eight_digits_fast(const char *chars)  noexcept  {
+bool is_made_of_eight_digits_fast(const CharT *chars)  noexcept  {
   return is_made_of_eight_digits_fast(read_u64(chars));
 }
 
 typedef span<const char> byte_span;
 
+template <typename CharT = char>
 struct parsed_number_string {
   int64_t exponent{0};
   uint64_t mantissa{0};
-  const char *lastmatch{nullptr};
+  const CharT *lastmatch{nullptr};
   bool negative{false};
   bool valid{false};
   bool is_64bit_int{false};
   bool too_many_digits{false};
   // contains the range of the significant digits
-  byte_span integer{};  // non-nullable
-  byte_span fraction{}; // nullable
+  span<const CharT> integer{};  // non-nullable
+  span<const CharT> fraction{}; // nullable
 };
 
 // Assuming that you use no more than 19 digits, this will
 // parse an ASCII string.
+template <typename CharT>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
-parsed_number_string parse_number_string(const char *p, const char *pend, parse_options options) noexcept {
+parsed_number_string<CharT> parse_number_string(const CharT *p, const CharT *pend, parse_options options) noexcept {
   const chars_format fmt = options.format;
   const parse_rules rules = options.rules;
   const bool parse_ints = options.parse_ints;
-  const char decimal_point = options.decimal_point;
+  const CharT decimal_point = static_cast<CharT>(options.decimal_point);
 
-  parsed_number_string answer;
+  parsed_number_string<CharT> answer;
   answer.valid = false;
   answer.too_many_digits = false;
-  answer.negative = (*p == '-');
+  answer.negative = (*p == static_cast<CharT>('-'));
 #if FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default
-  if ((*p == '-') || (*p == '+')) {
+  if ((*p == static_cast<CharT>('-')) || (*p == static_cast<CharT>('+'))) {
 #else
-  if (*p == '-') { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here
+  if (*p == static_cast<CharT>('-')) { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here
 #endif
     ++p;
     if (p == pend) {
@@ -132,7 +184,7 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
     if (!is_integer(*p) && (rules == parse_rules::json_rules || *p != decimal_point))
         return answer;
   }
-  const char *const start_digits = p;
+  const CharT *const start_digits = p;
 
   uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad)
 
@@ -140,17 +192,17 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
     // a multiplication by 10 is cheaper than an arbitrary integer
     // multiplication
     i = 10 * i +
-        uint64_t(*p - '0'); // might overflow, we will handle the overflow later
+        uint64_t(*p - static_cast<CharT>('0')); // might overflow, we will handle the overflow later
     ++p;
   }
-  const char *const end_of_integer_part = p;
+  const CharT *const end_of_integer_part = p;
   int64_t digit_count = int64_t(end_of_integer_part - start_digits);
-  answer.integer = byte_span(start_digits, size_t(digit_count));
+  answer.integer = span<const CharT>(start_digits, size_t(digit_count));
   int64_t exponent = 0;
   const bool has_decimal_point = (p != pend) && (*p == decimal_point);
   if (has_decimal_point) {
     ++p;
-    const char* before = p;
+    const CharT* before = p;
     // can occur at most twice without overflowing, but let it occur more, since
     // for integers with many digits, digit parsing is the primary bottleneck.
     while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) {
@@ -158,12 +210,11 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
       p += 8;
     }
     while ((p != pend) && is_integer(*p)) {
-      uint8_t digit = uint8_t(*p - '0');
+      i = i * 10 + uint64_t(*p - static_cast<CharT>('0')); // in rare cases, this will overflow, but that's ok
       ++p;
-      i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
     }
     exponent = before - p;
-    answer.fraction = byte_span(before, size_t(p - before));
+    answer.fraction = span<const CharT>(before, size_t(p - before));
     digit_count -= exponent;
   }
   // we must have encountered at least one integer (or two if a decimal point exists, with json rules).
@@ -171,14 +222,14 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
     return answer;
   }
   int64_t exp_number = 0;            // explicit exponential part
-  if ((fmt & chars_format::scientific) && (p != pend) && (('e' == *p) || ('E' == *p))) {
-    const char * location_of_e = p;
+  if ((fmt & chars_format::scientific) && (p != pend) && ((static_cast<CharT>('e') == *p) || (static_cast<CharT>('E') == *p))) {
+    const CharT * location_of_e = p;
     ++p;
     bool neg_exp = false;
-    if ((p != pend) && ('-' == *p)) {
+    if ((p != pend) && (static_cast<CharT>('-') == *p)) {
       neg_exp = true;
       ++p;
-    } else if ((p != pend) && ('+' == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1)
+    } else if ((p != pend) && (static_cast<CharT>('+') == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1)
       ++p;
     }
     if ((p == pend) || !is_integer(*p)) {
@@ -190,7 +241,7 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
       p = location_of_e;
     } else {
       while ((p != pend) && is_integer(*p)) {
-        uint8_t digit = uint8_t(*p - '0');
+        uint8_t digit = uint8_t(*p - static_cast<CharT>('0'));
         if (exp_number < 0x10000000) {
           exp_number = 10 * exp_number + digit;
         }
@@ -205,7 +256,7 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
   }
   
   // disallow leading zeros before the decimal point
-  if (rules == parse_rules::json_rules && start_digits[0] == '0' && digit_count >= 2 && is_integer(start_digits[1]))
+  if (rules == parse_rules::json_rules && start_digits[0] == static_cast<CharT>('0') && digit_count >= 2 && is_integer(start_digits[1]))
       return answer;
 
   answer.lastmatch = p;
@@ -222,9 +273,9 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
     // We have to handle the case where we have 0.0000somenumber.
     // We need to be mindful of the case where we only have zeroes...
     // E.g., 0.000000000...000.
-    const char *start = start_digits;
-    while ((start != pend) && (*start == '0' || *start == decimal_point)) {
-      if(*start == '0') { digit_count --; }
+    const CharT *start = start_digits;
+    while ((start != pend) && (*start == static_cast<CharT>('0') || *start == decimal_point)) {
+      if(*start == static_cast<CharT>('0')) { digit_count --; }
       start++;
     }
     constexpr uint64_t minimal_twenty_digit_integer{10000000000000000000ULL};
@@ -241,19 +292,19 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
       // pre-tokenized spans from above.
       i = 0;
       p = answer.integer.ptr;
-      const char* int_end = p + answer.integer.len();
+      const CharT* int_end = p + answer.integer.len();
       const uint64_t minimal_nineteen_digit_integer{1000000000000000000};
       while((i < minimal_nineteen_digit_integer) && (p != int_end)) {
-        i = i * 10 + uint64_t(*p - '0');
+        i = i * 10 + uint64_t(*p - static_cast<CharT>('0'));
         ++p;
       }
       if (i >= minimal_nineteen_digit_integer) { // We have a big integers
         exponent = end_of_integer_part - p + exp_number;
       } else { // We have a value with a fractional component.
           p = answer.fraction.ptr;
-          const char* frac_end = p + answer.fraction.len();
+          const CharT* frac_end = p + answer.fraction.len();
           while((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
-            i = i * 10 + uint64_t(*p - '0');
+            i = i * 10 + uint64_t(*p - static_cast<CharT>('0'));
             ++p;
           }
           exponent = answer.fraction.ptr - p + exp_number;
diff --git a/include/fast_float/digit_comparison.h b/include/fast_float/digit_comparison.h
index 3959ba0..5ba91af 100644
--- a/include/fast_float/digit_comparison.h
+++ b/include/fast_float/digit_comparison.h
@@ -23,8 +23,9 @@ constexpr static uint64_t powers_of_ten_uint64[] = {
 // this algorithm is not even close to optimized, but it has no practical
 // effect on performance: in order to have a faster algorithm, we'd need
 // to slow down performance for faster algorithms, and this is still fast.
+template <typename CharT>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR14
-int32_t scientific_exponent(parsed_number_string& num) noexcept {
+int32_t scientific_exponent(parsed_number_string<CharT>& num) noexcept {
   uint64_t mantissa = num.mantissa;
   int32_t exponent = int32_t(num.exponent);
   while (mantissa >= 10000) {
@@ -154,18 +155,19 @@ void round_down(adjusted_mantissa& am, int32_t shift) noexcept {
   am.power2 += shift;
 }
 
+template <typename CharT>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
-void skip_zeros(const char*& first, const char* last) noexcept {
+void skip_zeros(const CharT*& first, const CharT* last) noexcept {
   uint64_t val;
   while (!cpp20_and_in_constexpr() && std::distance(first, last) >= 8) {
-    ::memcpy(&val, first, sizeof(uint64_t));
+    val = fast_read_u64(first);
     if (val != 0x3030303030303030) {
       break;
     }
     first += 8;
   }
   while (first != last) {
-    if (*first != '0') {
+    if (*first != static_cast<CharT>('0')) {
       break;
     }
     first++;
@@ -174,19 +176,20 @@ void skip_zeros(const char*& first, const char* last) noexcept {
 
 // determine if any non-zero digits were truncated.
 // all characters must be valid digits.
+template <typename CharT>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
-bool is_truncated(const char* first, const char* last) noexcept {
+bool is_truncated(const CharT* first, const CharT* last) noexcept {
   // do 8-bit optimizations, can just compare to 8 literal 0s.
   uint64_t val;
   while (!cpp20_and_in_constexpr() && std::distance(first, last) >= 8) {
-    ::memcpy(&val, first, sizeof(uint64_t));
+    val = fast_read_u64(first);
     if (val != 0x3030303030303030) {
       return true;
     }
     first += 8;
   }
   while (first != last) {
-    if (*first != '0') {
+    if (*first != static_cast<CharT>('0')) {
       return true;
     }
     first++;
@@ -194,22 +197,25 @@ bool is_truncated(const char* first, const char* last) noexcept {
   return false;
 }
 
+template <typename CharT>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
-bool is_truncated(byte_span s) noexcept {
+bool is_truncated(span<const CharT> s) noexcept {
   return is_truncated(s.ptr, s.ptr + s.len());
 }
 
+template <typename CharT>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
-void parse_eight_digits(const char*& p, limb& value, size_t& counter, size_t& count) noexcept {
+void parse_eight_digits(const CharT*& p, limb& value, size_t& counter, size_t& count) noexcept {
   value = value * 100000000 + parse_eight_digits_unrolled(p);
   p += 8;
   counter += 8;
   count += 8;
 }
 
+template <typename CharT>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR14
-void parse_one_digit(const char*& p, limb& value, size_t& counter, size_t& count) noexcept {
-  value = value * 10 + limb(*p - '0');
+void parse_one_digit(const CharT*& p, limb& value, size_t& counter, size_t& count) noexcept {
+  value = value * 10 + limb(*p - static_cast<CharT>('0'));
   p++;
   counter++;
   count++;
@@ -230,8 +236,9 @@ void round_up_bigint(bigint& big, size_t& count) noexcept {
 }
 
 // parse the significant digits into a big integer
+template <typename CharT>
 inline FASTFLOAT_CONSTEXPR20
-void parse_mantissa(bigint& result, parsed_number_string& num, size_t max_digits, size_t& digits) noexcept {
+void parse_mantissa(bigint& result, parsed_number_string<CharT>& num, size_t max_digits, size_t& digits) noexcept {
   // try to minimize the number of big integer and scalar multiplication.
   // therefore, try to parse 8 digits at a time, and multiply by the largest
   // scalar value (9 or 19 digits) for each step.
@@ -245,8 +252,8 @@ void parse_mantissa(bigint& result, parsed_number_string& num, size_t max_digits
 #endif
 
   // process all integer digits.
-  const char* p = num.integer.ptr;
-  const char* pend = p + num.integer.len();
+  const CharT* p = num.integer.ptr;
+  const CharT* pend = p + num.integer.len();
   skip_zeros(p, pend);
   // process all digits, in increments of step per loop
   while (p != pend) {
@@ -395,9 +402,9 @@ adjusted_mantissa negative_digit_comp(bigint& bigmant, adjusted_mantissa am, int
 // `b` as a big-integer type, scaled to the same binary exponent as
 // the actual digits. we then compare the big integer representations
 // of both, and use that to direct rounding.
-template <typename T>
+template <typename T, typename CharT>
 inline FASTFLOAT_CONSTEXPR20
-adjusted_mantissa digit_comp(parsed_number_string& num, adjusted_mantissa am) noexcept {
+adjusted_mantissa digit_comp(parsed_number_string<CharT>& num, adjusted_mantissa am) noexcept {
   // remove the invalid exponent bias
   am.power2 -= invalid_am_bias;
 
diff --git a/include/fast_float/fast_float.h b/include/fast_float/fast_float.h
index c11627f..d4648e6 100644
--- a/include/fast_float/fast_float.h
+++ b/include/fast_float/fast_float.h
@@ -18,8 +18,9 @@ enum parse_rules {
     json_rules,
 };
 
+template <typename CharT>
 struct from_chars_result {
-  const char *ptr;
+  const CharT *ptr;
   std::errc ec;
 };
 
@@ -59,17 +60,17 @@ struct parse_options {
  * to determine whether we allow the fixed point and scientific notation respectively.
  * The default is  `fast_float::chars_format::general` which allows both `fixed` and `scientific`.
  */
-template<typename T>
+template<typename T, typename CharT>
 FASTFLOAT_CONSTEXPR20
-from_chars_result from_chars(const char *first, const char *last,
+from_chars_result<CharT> from_chars(const CharT *first, const CharT *last,
                              T &value, chars_format fmt = chars_format::general)  noexcept;
 
 /**
  * Like from_chars, but accepts an `options` argument to govern number parsing.
  */
-template<typename T>
+template<typename T, typename CharT>
 FASTFLOAT_CONSTEXPR20
-from_chars_result from_chars_advanced(const char *first, const char *last,
+from_chars_result<CharT> from_chars_advanced(const CharT *first, const CharT *last,
                                       T &value, parse_options options)  noexcept;
 
 } 
@@ -77,10 +78,10 @@ from_chars_result from_chars_advanced(const char *first, const char *last,
 #include "ascii_number.h" // parsed_number_string
 
 namespace fast_float {
-template <typename T>
+template <typename T, typename CharT>
 FASTFLOAT_CONSTEXPR20
-from_chars_result from_chars_preparsed(parsed_number_string parsed, 
-    const char* first, const char* last, T& value) noexcept;
+from_chars_result<CharT> from_chars_preparsed(parsed_number_string<CharT> parsed, 
+    const CharT* first, const CharT* last, T& value) noexcept;
 }
 
 // namespace fast_float
diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h
index c878486..eaa6e73 100644
--- a/include/fast_float/float_common.h
+++ b/include/fast_float/float_common.h
@@ -106,11 +106,13 @@ fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() {
 }
 
 // Compares two ASCII strings in a case insensitive manner.
+// maya: for now, keep input2 ASCII only
+template <typename CharT>
 inline FASTFLOAT_CONSTEXPR14 bool
-fastfloat_strncasecmp(const char *input1, const char *input2, size_t length) {
+fastfloat_strncasecmp(const CharT *input1, const char *input2, size_t length) {
   char running_diff{0};
   for (size_t i = 0; i < length; i++) {
-    running_diff |= (input1[i] ^ input2[i]);
+    running_diff |= (static_cast<char>(input1[i]) ^ input2[i]);
   }
   return (running_diff == 0) || (running_diff == 32);
 }
diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h
index c13b641..2c2d2c3 100644
--- a/include/fast_float/parse_number.h
+++ b/include/fast_float/parse_number.h
@@ -19,19 +19,19 @@ namespace detail {
  * The case comparisons could be made much faster given that we know that the
  * strings a null-free and fixed.
  **/
-template <typename T>
-from_chars_result FASTFLOAT_CONSTEXPR14
-parse_infnan(const char *first, const char *last, T &value)  noexcept  {
-  from_chars_result answer{};
+template <typename T, typename CharT>
+from_chars_result<CharT> FASTFLOAT_CONSTEXPR14
+parse_infnan(const CharT *first, const CharT *last, T &value)  noexcept  {
+  from_chars_result<CharT> answer{};
   answer.ptr = first;
   answer.ec = std::errc(); // be optimistic
   bool minusSign = false;
-  if (*first == '-') { // assume first < last, so dereference without checks; C++17 20.19.3.(7.1) explicitly forbids '+' here
+  if (*first == static_cast<CharT>('-')) { // assume first < last, so dereference without checks; C++17 20.19.3.(7.1) explicitly forbids '+' here
       minusSign = true;
       ++first;
   }
 #if FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default
-  if (*first == '+') {
+  if (*first == static_cast<CharT>('+')) {
       ++first;
   }
 #endif
@@ -40,13 +40,15 @@ parse_infnan(const char *first, const char *last, T &value)  noexcept  {
       answer.ptr = (first += 3);
       value = minusSign ? -std::numeric_limits<T>::quiet_NaN() : std::numeric_limits<T>::quiet_NaN();
       // Check for possible nan(n-char-seq-opt), C++17 20.19.3.7, C11 7.20.1.3.3. At least MSVC produces nan(ind) and nan(snan).
-      if(first != last && *first == '(') {
-        for(const char* ptr = first + 1; ptr != last; ++ptr) {
-          if (*ptr == ')') {
+      if(first != last && *first == static_cast<CharT>('(')) {
+        for(const CharT* ptr = first + 1; ptr != last; ++ptr) {
+          if (*ptr == static_cast<CharT>(')')) {
             answer.ptr = ptr + 1; // valid nan(n-char-seq-opt)
             break;
           }
-          else if(!(('a' <= *ptr && *ptr <= 'z') || ('A' <= *ptr && *ptr <= 'Z') || ('0' <= *ptr && *ptr <= '9') || *ptr == '_'))
+          else if(!((static_cast<CharT>('a') <= *ptr && *ptr <= static_cast<CharT>('z')) || 
+              (static_cast<CharT>('A') <= *ptr && *ptr <= static_cast<CharT>('Z')) || 
+              (static_cast<CharT>('0') <= *ptr && *ptr <= static_cast<CharT>('9')) || *ptr == static_cast<CharT>('_')))
             break; // forbidden char, not nan(n-char-seq-opt)
         }
       }
@@ -132,21 +134,21 @@ fastfloat_really_inline bool rounds_to_nearest() noexcept {
 
 } // namespace detail
 
-template<typename T>
+template<typename T, typename CharT>
 FASTFLOAT_CONSTEXPR20
-from_chars_result from_chars(const char *first, const char *last,
+from_chars_result<CharT> from_chars(const CharT *first, const CharT *last,
                              T &value, chars_format fmt /*= chars_format::general*/)  noexcept  {
   return from_chars_advanced(first, last, value, parse_options{fmt});
 }
 
-template<typename T>
+template<typename T, typename CharT>
 FASTFLOAT_CONSTEXPR20
-from_chars_result from_chars_preparsed(parsed_number_string pns, const char* first, const char* last, T& value) noexcept
+from_chars_result<CharT> from_chars_preparsed(parsed_number_string<CharT> pns, const CharT* first, const CharT* last, T& value) noexcept
 {
   static_assert (std::is_same<T, double>::value || std::is_same<T, float>::value, "only float and double are supported");
 
   
-  from_chars_result answer;
+  from_chars_result<CharT> answer;
   if (!pns.valid) {
     return detail::parse_infnan(first, last, value);
   }
@@ -205,12 +207,12 @@ from_chars_result from_chars_preparsed(parsed_number_string pns, const char* fir
   return answer;
 }
 
-template<typename T>
+template<typename T, typename CharT>
 FASTFLOAT_CONSTEXPR20
-from_chars_result from_chars_advanced(const char *first, const char *last,
+from_chars_result<CharT> from_chars_advanced(const CharT *first, const CharT *last,
                                       T &value, parse_options options)  noexcept  {
 
-  from_chars_result answer;
+  from_chars_result<CharT> answer;
 #if FASTFLOAT_SKIP_WHITE_SPACE  // disabled by default
   while ((first != last) && fast_float::is_space(uint8_t(*first))) {
     first++;

From 20f3870361f9f799b528dad3466f3ec5d38374d4 Mon Sep 17 00:00:00 2001
From: Maya Warrier <maya.warrierm@gmail.com>
Date: Thu, 30 Mar 2023 04:51:27 -0400
Subject: [PATCH 07/27] Fixes

---
 include/fast_float/ascii_number.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
index 3d85543..2676182 100644
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@@ -36,7 +36,7 @@ fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) {
     | (val & 0x00000000000000FF) << 56;
 }
 
-fastfloat_really_inline FASTFLOAT_CONSTEXPR20
+fastfloat_really_inline
 uint64_t fast_read_u64(const char* chars)
 {
   uint64_t val;
@@ -48,7 +48,7 @@ fastfloat_really_inline
 uint64_t fast_read_u64(const char16_t* chars)
 {
 #if FASTFLOAT_SSE2
-  const void* const p = chars;
+  const unsigned char* const p = reinterpret_cast<const unsigned char *>(chars);
 
   static const char16_t masks[] = {0xff, 0xff, 0xff, 0xff};
   const __m128i m_masks = _mm_loadu_si128(reinterpret_cast<const __m128i*>(masks));
@@ -61,7 +61,7 @@ uint64_t fast_read_u64(const char16_t* chars)
 
   // extract
   uint64_t val;
-  _mm_storeu_epi64(&val, _mm_shuffle_epi32(packed, 0x8));
+  _mm_storeu_si64(&val, _mm_shuffle_epi32(packed, 0x8));
   return val;
 #else
   alignas(8) unsigned char bytes[8];

From f59f73c4dac05e115e186a3b5a4401bd808c5781 Mon Sep 17 00:00:00 2001
From: Maya Warrier <maya.warrierm@gmail.com>
Date: Sat, 1 Apr 2023 04:09:00 -0400
Subject: [PATCH 08/27] Disable simd-related warnings

---
 include/fast_float/ascii_number.h | 24 +++++++++++-------------
 include/fast_float/fast_float.h   |  7 ++-----
 include/fast_float/float_common.h | 18 ++++++++++++++++++
 3 files changed, 31 insertions(+), 18 deletions(-)

diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
index 2676182..9c2ed8a 100644
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@@ -9,8 +9,6 @@
 
 #include "float_common.h"
 
-#define FASTFLOAT_SSE2 1
-
 #if FASTFLOAT_SSE2
 #include <emmintrin.h>
 #endif
@@ -44,25 +42,26 @@ uint64_t fast_read_u64(const char* chars)
   return val;
 }
 
+// https://quick-bench.com/q/fk6Y07KDGu8XZ9iUtQD8QJTc3Hg
 fastfloat_really_inline
 uint64_t fast_read_u64(const char16_t* chars)
 {
 #if FASTFLOAT_SSE2
-  const unsigned char* const p = reinterpret_cast<const unsigned char *>(chars);
-
+FASTFLOAT_SIMD_DISABLE_WARNINGS
   static const char16_t masks[] = {0xff, 0xff, 0xff, 0xff};
   const __m128i m_masks = _mm_loadu_si128(reinterpret_cast<const __m128i*>(masks));
-  // mask hi bytes
+
+  // mask hi bytes and pack
+  const char* const p = reinterpret_cast<const char*>(chars);
   __m128i i1 = _mm_and_si128(_mm_loadu_si64(p), m_masks);
   __m128i i2 = _mm_and_si128(_mm_loadu_si64(p + 8), m_masks);
-
-  // pack into chars
   __m128i packed = _mm_packus_epi16(i1, i2);
 
   // extract
   uint64_t val;
   _mm_storeu_si64(&val, _mm_shuffle_epi32(packed, 0x8));
   return val;
+FASTFLOAT_SIMD_RESTORE_WARNINGS
 #else
   alignas(8) unsigned char bytes[8];
   for (int i = 0; i < 8; ++i)
@@ -143,7 +142,7 @@ bool is_made_of_eight_digits_fast(const CharT *chars)  noexcept  {
 
 typedef span<const char> byte_span;
 
-template <typename CharT = char>
+template <typename CharT>
 struct parsed_number_string {
   int64_t exponent{0};
   uint64_t mantissa{0};
@@ -161,10 +160,9 @@ struct parsed_number_string {
 // parse an ASCII string.
 template <typename CharT>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
-parsed_number_string<CharT> parse_number_string(const CharT *p, const CharT *pend, parse_options options) noexcept {
+parsed_number_string<CharT> parse_number_string(const CharT *p, const CharT *pend, parse_options options, const bool parse_ints = false) noexcept {
   const chars_format fmt = options.format;
   const parse_rules rules = options.rules;
-  const bool parse_ints = options.parse_ints;
   const CharT decimal_point = static_cast<CharT>(options.decimal_point);
 
   parsed_number_string<CharT> answer;
@@ -279,9 +277,9 @@ parsed_number_string<CharT> parse_number_string(const CharT *p, const CharT *pen
       start++;
     }
     constexpr uint64_t minimal_twenty_digit_integer{10000000000000000000ULL};
-    // maya: A 64-bit number may have up to 20 digits, not 19! 
-    // If we're parsing ints, preserve accuracy up to 20 digits instead
-    // of converting them to the closest floating point value.
+    // maya: A 64-bit number may have up to 20 digits!
+    // If we're parsing ints, preserve accuracy up to 20 digits 
+    // instead of rounding them to a floating point value.
     answer.too_many_digits = rules == parse_rules::json_rules && parse_ints && answer.is_64bit_int ?
         (digit_count > 20 || i < minimal_twenty_digit_integer) : digit_count > 19;
         
diff --git a/include/fast_float/fast_float.h b/include/fast_float/fast_float.h
index d4648e6..91870a7 100644
--- a/include/fast_float/fast_float.h
+++ b/include/fast_float/fast_float.h
@@ -27,16 +27,13 @@ struct from_chars_result {
 struct parse_options {
   constexpr explicit parse_options(
       chars_format fmt = chars_format::general,
-      parse_rules rules = parse_rules::std_rules,
-      bool parse_ints = false, char dot = '.')
-    : format(fmt), rules(rules), parse_ints(parse_ints), decimal_point(dot) {}
+      parse_rules rules = parse_rules::std_rules, char dot = '.')
+    : format(fmt), rules(rules), decimal_point(dot) {}
 
   /** Which number formats are accepted */
   chars_format format;
   /** Which parsing rules to use */
   parse_rules rules;
-  /* Whether to parse integers too, only applicable with json_rules */
-  bool parse_ints;
   /** The character used as decimal point */
   char decimal_point;
 };
diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h
index eaa6e73..7ca3284 100644
--- a/include/fast_float/float_common.h
+++ b/include/fast_float/float_common.h
@@ -78,6 +78,24 @@
 #endif
 #endif
 
+
+#if defined(__GNUC__)
+#define FASTFLOAT_SIMD_DISABLE_WARNINGS \
+  _Pragma("GCC diagnostic push") \
+  _Pragma("GCC diagnostic ignored \"-Wcast-align=strict\"")
+#else
+#define FASTFLOAT_SIMD_DISABLE_WARNINGS
+#endif
+
+#if defined(__GNUC__)
+#define FASTFLOAT_SIMD_RESTORE_WARNINGS \
+  _Pragma("GCC diagnostic pop")
+#else
+#define FASTFLOAT_SIMD_RESTORE_WARNINGS
+#endif
+
+
+
 #ifdef FASTFLOAT_VISUAL_STUDIO
 #define fastfloat_really_inline __forceinline
 #else

From 8a9a9d538a2721ad6d8143230a08104c30032a37 Mon Sep 17 00:00:00 2001
From: Maya Warrier <maya.warrierm@gmail.com>
Date: Sat, 1 Apr 2023 22:43:00 -0400
Subject: [PATCH 09/27] SIMD optimization to parse 8 char16_t at a time

---
 include/fast_float/ascii_number.h | 77 +++++++++++++++++++++++++------
 1 file changed, 62 insertions(+), 15 deletions(-)

diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
index 9c2ed8a..3b192dc 100644
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@@ -35,8 +35,7 @@ fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) {
 }
 
 fastfloat_really_inline
-uint64_t fast_read_u64(const char* chars)
-{
+uint64_t fast_read_u64(const char* chars) {
   uint64_t val;
   ::memcpy(&val, chars, sizeof(uint64_t));
   return val;
@@ -44,8 +43,7 @@ uint64_t fast_read_u64(const char* chars)
 
 // https://quick-bench.com/q/fk6Y07KDGu8XZ9iUtQD8QJTc3Hg
 fastfloat_really_inline
-uint64_t fast_read_u64(const char16_t* chars)
-{
+uint64_t fast_read_u64(const char16_t* chars) {
 #if FASTFLOAT_SSE2
 FASTFLOAT_SIMD_DISABLE_WARNINGS
   static const char16_t masks[] = {0xff, 0xff, 0xff, 0xff};
@@ -63,7 +61,7 @@ FASTFLOAT_SIMD_DISABLE_WARNINGS
   return val;
 FASTFLOAT_SIMD_RESTORE_WARNINGS
 #else
-  alignas(8) unsigned char bytes[8];
+  unsigned char bytes[8];
   for (int i = 0; i < 8; ++i)
       bytes[i] = (unsigned char)chars[i];
 
@@ -122,24 +120,74 @@ uint32_t parse_eight_digits_unrolled(uint64_t val) {
   return uint32_t(val);
 }
 
-template <typename CharT>
-fastfloat_really_inline FASTFLOAT_CONSTEXPR20
-uint32_t parse_eight_digits_unrolled(const CharT *chars)  noexcept  {
-  return parse_eight_digits_unrolled(read_u64(chars));
-}
-
 // credit @aqrit
 fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val)  noexcept  {
   return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) &
      0x8080808080808080));
 }
 
-template <typename CharT>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
-bool is_made_of_eight_digits_fast(const CharT *chars)  noexcept  {
+uint32_t parse_eight_digits_unrolled(const char* chars)  noexcept {
+    return parse_eight_digits_unrolled(read_u64(chars));
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
+bool is_made_of_eight_digits_fast(const char *chars)  noexcept  {
   return is_made_of_eight_digits_fast(read_u64(chars));
 }
 
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
+bool parse_if_eight_digits_unrolled(const char* chars, std::uint64_t& i) noexcept {
+    const bool all = is_made_of_eight_digits_fast(chars);
+    if (all) i = i * 100000000 * parse_eight_digits_unrolled(chars);
+    return all;
+}
+
+// http://0x80.pl/articles/simd-parsing-int-sequences.html
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
+bool parse_if_eight_digits_unrolled(const char16_t* chars, std::uint64_t& i) noexcept {
+  if (cpp20_and_in_constexpr() || !FASTFLOAT_SSE2) {
+    for (int i = 0; i < 8; ++i) {
+      if (chars[i] < u'0' || chars[i] > u'9')
+        return false;
+    }
+    i = i * 100000000 + parse_eight_digits_unrolled(read_u64(chars));
+    return true;
+  }
+#if FASTFLOAT_SSE2
+FASTFLOAT_SIMD_DISABLE_WARNINGS
+  const __m128i data = _mm_loadu_si128(reinterpret_cast<const char*>(chars));
+
+  // (x - '0') <= 9
+  const __m128i t0 = _mm_sub_epi16(data, _mm_set1_epi16(80));
+  const __m128i t1 = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-119));
+  const bool is_digits = _mm_movemask_epi8(t1) == 0;
+
+  if (is_digits) {
+    // x - '0'
+    const __m128i s1digits16 = _mm_sub_epi16(data, _mm_set1_epi16('0'));
+    // 10 * x(b) + x(b-1) -> 2 digit numbers
+    const __m128i s2digits32 = _mm_madd_epi16(s1digits16, _mm_setr_epi16(10, 1, 10, 1, 10, 1, 10, 1);
+    const __m128i s2digits16 = _mm_packus_epi16(s2digits32, s2digits32);
+    // 100 * x(b) + x(b-1) -> 4 digit numbers
+    const __m128i s4digits32 = _mm_madd_epi16(s2digits16, _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1));
+    const __m128i s4digits16 = _mm_packus_epi16(s4digits32, s4digits32);
+    // 10000 * x(b) + x(b-1) -> 8 digit number
+    const __m128i s8digits32 = _mm_madd_epi16(s4digits16, _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1));
+
+    uint32_t value;
+    _mm_storeu_si32(&value, s8digits32);
+
+    i = i * 100000000 + value;
+    return true;
+  }
+  else return false;
+FASTFLOAT_SIMD_RESTORE_WARNINGS
+#endif
+}
+
+
+
 typedef span<const char> byte_span;
 
 template <typename CharT>
@@ -203,8 +251,7 @@ parsed_number_string<CharT> parse_number_string(const CharT *p, const CharT *pen
     const CharT* before = p;
     // can occur at most twice without overflowing, but let it occur more, since
     // for integers with many digits, digit parsing is the primary bottleneck.
-    while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) {
-      i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok
+    while ((std::distance(p, pend) >= 8) && parse_if_eight_digits_unrolled(p)) {  // in rare cases, this will overflow, but that's ok
       p += 8;
     }
     while ((p != pend) && is_integer(*p)) {

From 2d57c09530445f1c4c98d6b128ef953767207e8e Mon Sep 17 00:00:00 2001
From: Maya Warrier <maya.warrierm@gmail.com>
Date: Sat, 1 Apr 2023 22:46:43 -0400
Subject: [PATCH 10/27] Fixes

---
 include/fast_float/ascii_number.h | 17 ++++++++++-------
 include/fast_float/float_common.h | 10 +++++++++-
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
index 3b192dc..6acd3da 100644
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@@ -126,8 +126,9 @@ fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val
      0x8080808080808080));
 }
 
+template <typename CharT>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
-uint32_t parse_eight_digits_unrolled(const char* chars)  noexcept {
+uint32_t parse_eight_digits_unrolled(const CharT* chars)  noexcept {
     return parse_eight_digits_unrolled(read_u64(chars));
 }
 
@@ -139,14 +140,14 @@ bool is_made_of_eight_digits_fast(const char *chars)  noexcept  {
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
 bool parse_if_eight_digits_unrolled(const char* chars, std::uint64_t& i) noexcept {
     const bool all = is_made_of_eight_digits_fast(chars);
-    if (all) i = i * 100000000 * parse_eight_digits_unrolled(chars);
+    if (all) i = i * 100000000 + parse_eight_digits_unrolled(chars);
     return all;
 }
 
 // http://0x80.pl/articles/simd-parsing-int-sequences.html
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
 bool parse_if_eight_digits_unrolled(const char16_t* chars, std::uint64_t& i) noexcept {
-  if (cpp20_and_in_constexpr() || !FASTFLOAT_SSE2) {
+  if (cpp20_and_in_constexpr() || !has_simd()) {
     for (int i = 0; i < 8; ++i) {
       if (chars[i] < u'0' || chars[i] > u'9')
         return false;
@@ -154,9 +155,11 @@ bool parse_if_eight_digits_unrolled(const char16_t* chars, std::uint64_t& i) noe
     i = i * 100000000 + parse_eight_digits_unrolled(read_u64(chars));
     return true;
   }
-#if FASTFLOAT_SSE2
+#if !FASTFLOAT_SSE2
+  return false; // never reaches here, satisfy compiler
+#else
 FASTFLOAT_SIMD_DISABLE_WARNINGS
-  const __m128i data = _mm_loadu_si128(reinterpret_cast<const char*>(chars));
+  const __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i*>(chars));
 
   // (x - '0') <= 9
   const __m128i t0 = _mm_sub_epi16(data, _mm_set1_epi16(80));
@@ -167,7 +170,7 @@ FASTFLOAT_SIMD_DISABLE_WARNINGS
     // x - '0'
     const __m128i s1digits16 = _mm_sub_epi16(data, _mm_set1_epi16('0'));
     // 10 * x(b) + x(b-1) -> 2 digit numbers
-    const __m128i s2digits32 = _mm_madd_epi16(s1digits16, _mm_setr_epi16(10, 1, 10, 1, 10, 1, 10, 1);
+    const __m128i s2digits32 = _mm_madd_epi16(s1digits16, _mm_setr_epi16(10, 1, 10, 1, 10, 1, 10, 1));
     const __m128i s2digits16 = _mm_packus_epi16(s2digits32, s2digits32);
     // 100 * x(b) + x(b-1) -> 4 digit numbers
     const __m128i s4digits32 = _mm_madd_epi16(s2digits16, _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1));
@@ -251,7 +254,7 @@ parsed_number_string<CharT> parse_number_string(const CharT *p, const CharT *pen
     const CharT* before = p;
     // can occur at most twice without overflowing, but let it occur more, since
     // for integers with many digits, digit parsing is the primary bottleneck.
-    while ((std::distance(p, pend) >= 8) && parse_if_eight_digits_unrolled(p)) {  // in rare cases, this will overflow, but that's ok
+    while ((std::distance(p, pend) >= 8) && parse_if_eight_digits_unrolled(p, i)) {  // in rare cases, this will overflow, but that's ok
       p += 8;
     }
     while ((p != pend) && is_integer(*p)) {
diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h
index 7ca3284..fe64126 100644
--- a/include/fast_float/float_common.h
+++ b/include/fast_float/float_common.h
@@ -82,7 +82,7 @@
 #if defined(__GNUC__)
 #define FASTFLOAT_SIMD_DISABLE_WARNINGS \
   _Pragma("GCC diagnostic push") \
-  _Pragma("GCC diagnostic ignored \"-Wcast-align=strict\"")
+  _Pragma("GCC diagnostic ignored \"-Wcast-align\"")
 #else
 #define FASTFLOAT_SIMD_DISABLE_WARNINGS
 #endif
@@ -123,6 +123,14 @@ fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() {
 #endif
 }
 
+fastfloat_really_inline constexpr bool has_simd() {
+#if FASTFLOAT_SSE2
+  return true;
+#else
+  return false;
+#endif
+}
+
 // Compares two ASCII strings in a case insensitive manner.
 // maya: for now, keep input2 ASCII only
 template <typename CharT>

From cda25408bcb1473a3684ae9de1d63036c3579c21 Mon Sep 17 00:00:00 2001
From: Maya Warrier <maya.warrierm@gmail.com>
Date: Sun, 2 Apr 2023 00:32:30 -0400
Subject: [PATCH 11/27] Optimize char16_t parsing for digit_comparison.h

---
 include/fast_float/ascii_number.h | 59 +++++++++++++++++++------------
 1 file changed, 36 insertions(+), 23 deletions(-)

diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
index 6acd3da..aef072d 100644
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@@ -120,27 +120,56 @@ uint32_t parse_eight_digits_unrolled(uint64_t val) {
   return uint32_t(val);
 }
 
+// http://0x80.pl/articles/simd-parsing-int-sequences.html
+#if FASTFLOAT_SSE2
+fastfloat_really_inline
+uint32_t parse_eight_digits_unrolled_c16(const __m128i val) {
+  // x - '0'
+  const __m128i s1digits16 = _mm_sub_epi16(val, _mm_set1_epi16('0'));
+  // 10 * x(b) + x(b-1) -> 2 digit numbers
+  const __m128i s2digits32 = _mm_madd_epi16(s1digits16, _mm_setr_epi16(10, 1, 10, 1, 10, 1, 10, 1));
+  const __m128i s2digits16 = _mm_packus_epi16(s2digits32, s2digits32);
+  // 100 * x(b) + x(b-1) -> 4 digit numbers
+  const __m128i s4digits32 = _mm_madd_epi16(s2digits16, _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1));
+  const __m128i s4digits16 = _mm_packus_epi16(s4digits32, s4digits32);
+  // 10000 * x(b) + x(b-1) -> 8 digit number
+  const __m128i s8digits32 = _mm_madd_epi16(s4digits16, _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1));
+
+  uint32_t value;
+  _mm_storeu_si32(&value, s8digits32);
+  return value;
+}
+#endif
+
 // credit @aqrit
 fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val)  noexcept  {
   return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) &
      0x8080808080808080));
 }
 
-template <typename CharT>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
-uint32_t parse_eight_digits_unrolled(const CharT* chars)  noexcept {
+uint32_t parse_eight_digits_unrolled(const char* chars)  noexcept {
     return parse_eight_digits_unrolled(read_u64(chars));
 }
 
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
-bool is_made_of_eight_digits_fast(const char *chars)  noexcept  {
-  return is_made_of_eight_digits_fast(read_u64(chars));
+uint32_t parse_eight_digits_unrolled(const char16_t* chars)  noexcept {
+  if (cpp20_and_in_constexpr() || !has_simd()) {
+    return parse_eight_digits_unrolled(read_u64(chars));
+  }
+#if !FASTFLOAT_SSE2
+  return 0; // never reaches here, satisfy compiler
+#else
+FASTFLOAT_SIMD_DISABLE_WARNINGS
+  return parse_eight_digits_unrolled_c16(_mm_loadu_si128(reinterpret_cast<const __m128i*>(chars)));
+FASTFLOAT_SIMD_RESTORE_WARNINGS
+#endif
 }
 
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
 bool parse_if_eight_digits_unrolled(const char* chars, std::uint64_t& i) noexcept {
-    const bool all = is_made_of_eight_digits_fast(chars);
-    if (all) i = i * 100000000 + parse_eight_digits_unrolled(chars);
+    const bool all = is_made_of_eight_digits_fast(read_u64(chars));
+    if (all) i = i * 100000000 + parse_eight_digits_unrolled(read_u64(chars));
     return all;
 }
 
@@ -160,28 +189,13 @@ bool parse_if_eight_digits_unrolled(const char16_t* chars, std::uint64_t& i) noe
 #else
 FASTFLOAT_SIMD_DISABLE_WARNINGS
   const __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i*>(chars));
-
   // (x - '0') <= 9
   const __m128i t0 = _mm_sub_epi16(data, _mm_set1_epi16(80));
   const __m128i t1 = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-119));
   const bool is_digits = _mm_movemask_epi8(t1) == 0;
 
   if (is_digits) {
-    // x - '0'
-    const __m128i s1digits16 = _mm_sub_epi16(data, _mm_set1_epi16('0'));
-    // 10 * x(b) + x(b-1) -> 2 digit numbers
-    const __m128i s2digits32 = _mm_madd_epi16(s1digits16, _mm_setr_epi16(10, 1, 10, 1, 10, 1, 10, 1));
-    const __m128i s2digits16 = _mm_packus_epi16(s2digits32, s2digits32);
-    // 100 * x(b) + x(b-1) -> 4 digit numbers
-    const __m128i s4digits32 = _mm_madd_epi16(s2digits16, _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1));
-    const __m128i s4digits16 = _mm_packus_epi16(s4digits32, s4digits32);
-    // 10000 * x(b) + x(b-1) -> 8 digit number
-    const __m128i s8digits32 = _mm_madd_epi16(s4digits16, _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1));
-
-    uint32_t value;
-    _mm_storeu_si32(&value, s8digits32);
-
-    i = i * 100000000 + value;
+    i = i * 100000000 + parse_eight_digits_unrolled_c16(data);
     return true;
   }
   else return false;
@@ -190,7 +204,6 @@ FASTFLOAT_SIMD_RESTORE_WARNINGS
 }
 
 
-
 typedef span<const char> byte_span;
 
 template <typename CharT>

From c849b7a8ff0520d2d337e23ef9bcca5d4f274619 Mon Sep 17 00:00:00 2001
From: Maya Warrier <maya.warrierm@gmail.com>
Date: Sat, 15 Apr 2023 23:16:01 -0400
Subject: [PATCH 12/27] Option to forbid nan/inf, refactor

---
 include/fast_float/ascii_number.h | 80 ++++++++++++++++---------------
 include/fast_float/fast_float.h   | 24 ++++++++--
 include/fast_float/float_common.h |  5 +-
 include/fast_float/parse_number.h |  9 ++--
 4 files changed, 72 insertions(+), 46 deletions(-)

diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
index aef072d..9fbe9ac 100644
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@@ -157,7 +157,7 @@ uint32_t parse_eight_digits_unrolled(const char16_t* chars)  noexcept {
   if (cpp20_and_in_constexpr() || !has_simd()) {
     return parse_eight_digits_unrolled(read_u64(chars));
   }
-#if !FASTFLOAT_SSE2
+#if !FASTFLOAT_HAS_SIMD
   return 0; // never reaches here, satisfy compiler
 #else
 FASTFLOAT_SIMD_DISABLE_WARNINGS
@@ -184,7 +184,7 @@ bool parse_if_eight_digits_unrolled(const char16_t* chars, std::uint64_t& i) noe
     i = i * 100000000 + parse_eight_digits_unrolled(read_u64(chars));
     return true;
   }
-#if !FASTFLOAT_SSE2
+#if !FASTFLOAT_HAS_SIMD
   return false; // never reaches here, satisfy compiler
 #else
 FASTFLOAT_SIMD_DISABLE_WARNINGS
@@ -210,10 +210,10 @@ template <typename CharT>
 struct parsed_number_string {
   int64_t exponent{0};
   uint64_t mantissa{0};
+  int64_t exp_number{0};
   const CharT *lastmatch{nullptr};
   bool negative{false};
   bool valid{false};
-  bool is_64bit_int{false};
   bool too_many_digits{false};
   // contains the range of the significant digits
   span<const CharT> integer{};  // non-nullable
@@ -224,7 +224,7 @@ struct parsed_number_string {
 // parse an ASCII string.
 template <typename CharT>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
-parsed_number_string<CharT> parse_number_string(const CharT *p, const CharT *pend, parse_options options, const bool parse_ints = false) noexcept {
+parsed_number_string<CharT> parse_number_string(const CharT *p, const CharT *pend, parse_options options) noexcept {
   const chars_format fmt = options.format;
   const parse_rules rules = options.rules;
   const CharT decimal_point = static_cast<CharT>(options.decimal_point);
@@ -322,7 +322,7 @@ parsed_number_string<CharT> parse_number_string(const CharT *p, const CharT *pen
 
   answer.lastmatch = p;
   answer.valid = true;
-  answer.is_64bit_int = (p == end_of_integer_part);
+  answer.exp_number = exp_number;
 
   // If we frequently had to deal with long strings of digits,
   // we could extend our code by using a 128-bit integer instead
@@ -339,45 +339,49 @@ parsed_number_string<CharT> parse_number_string(const CharT *p, const CharT *pen
       if(*start == static_cast<CharT>('0')) { digit_count --; }
       start++;
     }
-    constexpr uint64_t minimal_twenty_digit_integer{10000000000000000000ULL};
-    // maya: A 64-bit number may have up to 20 digits!
-    // If we're parsing ints, preserve accuracy up to 20 digits 
-    // instead of rounding them to a floating point value.
-    answer.too_many_digits = rules == parse_rules::json_rules && parse_ints && answer.is_64bit_int ?
-        (digit_count > 20 || i < minimal_twenty_digit_integer) : digit_count > 19;
-        
-    if (answer.too_many_digits) {
-      answer.is_64bit_int = false;
-      // Let us start again, this time, avoiding overflows.
-      // We don't need to check if is_integer, since we use the
-      // pre-tokenized spans from above.
-      i = 0;
-      p = answer.integer.ptr;
-      const CharT* int_end = p + answer.integer.len();
-      const uint64_t minimal_nineteen_digit_integer{1000000000000000000};
-      while((i < minimal_nineteen_digit_integer) && (p != int_end)) {
-        i = i * 10 + uint64_t(*p - static_cast<CharT>('0'));
-        ++p;
-      }
-      if (i >= minimal_nineteen_digit_integer) { // We have a big integers
-        exponent = end_of_integer_part - p + exp_number;
-      } else { // We have a value with a fractional component.
-          p = answer.fraction.ptr;
-          const CharT* frac_end = p + answer.fraction.len();
-          while((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
-            i = i * 10 + uint64_t(*p - static_cast<CharT>('0'));
-            ++p;
-          }
-          exponent = answer.fraction.ptr - p + exp_number;
-      }
-      // We have now corrected both exponent and i, to a truncated value
-    }
+
+    // exponent/mantissa must be truncated later
+    answer.too_many_digits = digit_count > 19;
   }
   answer.exponent = exponent;
   answer.mantissa = i;
   return answer;
 }
 
+template <typename CharT>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
+void truncate_exponent_mantissa(parsed_number_string<CharT>& ps)
+{
+  // Let us start again, this time, avoiding overflows.
+  // We don't need to check if is_integer, since we use the
+  // pre-tokenized spans.
+  uint64_t i = 0;
+  int64_t exponent = 0;
+  const CharT* p = ps.integer.ptr;
+  const CharT* const int_end = p + ps.integer.len();
+  const uint64_t minimal_nineteen_digit_integer{1000000000000000000};
+  while ((i < minimal_nineteen_digit_integer) && (p != int_end)) {
+    i = i * 10 + uint64_t(*p - static_cast<CharT>('0'));
+    ++p;
+  }
+  if (i >= minimal_nineteen_digit_integer) { // We have a big integers
+    exponent = int_end - p + ps.exp_number;
+  }
+  else { // We have a value with a fractional component.
+    p = ps.fraction.ptr;
+    const CharT* const frac_end = p + ps.fraction.len();
+    while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
+      i = i * 10 + uint64_t(*p - static_cast<CharT>('0'));
+      ++p;
+    }
+    exponent = ps.fraction.ptr - p + ps.exp_number;
+  }
+  // We have now corrected both exponent and i, to a truncated value
+
+  ps.exponent = exponent;
+  ps.mantissa = i;
+}
+
 } // namespace fast_float
 
 #endif
diff --git a/include/fast_float/fast_float.h b/include/fast_float/fast_float.h
index 91870a7..470d05a 100644
--- a/include/fast_float/fast_float.h
+++ b/include/fast_float/fast_float.h
@@ -26,18 +26,34 @@ struct from_chars_result {
 
 struct parse_options {
   constexpr explicit parse_options(
-      chars_format fmt = chars_format::general,
-      parse_rules rules = parse_rules::std_rules, char dot = '.')
-    : format(fmt), rules(rules), decimal_point(dot) {}
+    chars_format fmt = chars_format::general,
+    parse_rules rules = parse_rules::std_rules, 
+    char dot = '.', bool allow_inf_nan = true)
+    : format(fmt), rules(rules), allow_inf_nan(allow_inf_nan), decimal_point(dot) {}
 
   /** Which number formats are accepted */
   chars_format format;
   /** Which parsing rules to use */
   parse_rules rules;
+  /** Whether to allow inf and nan */
+  bool allow_inf_nan;
   /** The character used as decimal point */
   char decimal_point;
 };
 
+struct preparsed_parse_options {
+  constexpr explicit preparsed_parse_options(
+    bool allow_inf_nan = true)
+    : allow_inf_nan(allow_inf_nan) {}
+
+  constexpr preparsed_parse_options(
+    const parse_options& options)
+    : allow_inf_nan(options.allow_inf_nan) {}
+
+  /** Whether to allow inf and nan */
+  bool allow_inf_nan;
+};
+
 /**
  * This function parses the character sequence [first,last) for a number. It parses floating-point numbers expecting
  * a locale-indepent format equivalent to what is used by std::strtod in the default ("C") locale.
@@ -78,7 +94,7 @@ namespace fast_float {
 template <typename T, typename CharT>
 FASTFLOAT_CONSTEXPR20
 from_chars_result<CharT> from_chars_preparsed(parsed_number_string<CharT> parsed, 
-    const CharT* first, const CharT* last, T& value) noexcept;
+    const CharT* first, const CharT* last, T& value, preparsed_parse_options options) noexcept;
 }
 
 // namespace fast_float
diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h
index fe64126..4fb03fc 100644
--- a/include/fast_float/float_common.h
+++ b/include/fast_float/float_common.h
@@ -78,6 +78,9 @@
 #endif
 #endif
 
+#if FASTFLOAT_SSE2
+#define FASTFLOAT_HAS_SIMD (1)
+#endif
 
 #if defined(__GNUC__)
 #define FASTFLOAT_SIMD_DISABLE_WARNINGS \
@@ -124,7 +127,7 @@ fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() {
 }
 
 fastfloat_really_inline constexpr bool has_simd() {
-#if FASTFLOAT_SSE2
+#if FASTFLOAT_HAS_SIMD
   return true;
 #else
   return false;
diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h
index 2c2d2c3..2f30e35 100644
--- a/include/fast_float/parse_number.h
+++ b/include/fast_float/parse_number.h
@@ -143,15 +143,18 @@ from_chars_result<CharT> from_chars(const CharT *first, const CharT *last,
 
 template<typename T, typename CharT>
 FASTFLOAT_CONSTEXPR20
-from_chars_result<CharT> from_chars_preparsed(parsed_number_string<CharT> pns, const CharT* first, const CharT* last, T& value) noexcept
+from_chars_result<CharT> from_chars_preparsed(parsed_number_string<CharT> pns, const CharT* first, const CharT* last, T& value, preparsed_parse_options options) noexcept
 {
   static_assert (std::is_same<T, double>::value || std::is_same<T, float>::value, "only float and double are supported");
 
-  
+
   from_chars_result<CharT> answer;
   if (!pns.valid) {
-    return detail::parse_infnan(first, last, value);
+    return options.allow_inf_nan ? detail::parse_infnan(first, last, value) : answer;
   }
+  if (pns.too_many_digits)
+    truncate_exponent_mantissa(pns);
+
   answer.ec = std::errc(); // be optimistic
   answer.ptr = pns.lastmatch;
   // The implementation of the Clinger's fast path is convoluted because

From 653790b5f3401abaa0bc9d13ede552b15b63a042 Mon Sep 17 00:00:00 2001
From: Maya Warrier <maya.warrierm@gmail.com>
Date: Sun, 16 Apr 2023 00:36:52 -0400
Subject: [PATCH 13/27] fixes

---
 include/fast_float/parse_number.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h
index 2f30e35..d5b266d 100644
--- a/include/fast_float/parse_number.h
+++ b/include/fast_float/parse_number.h
@@ -150,7 +150,13 @@ from_chars_result<CharT> from_chars_preparsed(parsed_number_string<CharT> pns, c
 
   from_chars_result<CharT> answer;
   if (!pns.valid) {
-    return options.allow_inf_nan ? detail::parse_infnan(first, last, value) : answer;
+    if (options.allow_inf_nan)
+      return detail::parse_infnan(first, last, value);
+    else {
+      answer.ec = std::errc::invalid_argument;
+      answer.ptr = first;
+      return answer;
+    }
   }
   if (pns.too_many_digits)
     truncate_exponent_mantissa(pns);
@@ -226,7 +232,7 @@ from_chars_result<CharT> from_chars_advanced(const CharT *first, const CharT *la
     answer.ptr = first;
     return answer;
   }
-  answer = from_chars_preparsed(parse_number_string(first, last, options), first, last, value);
+  answer = from_chars_preparsed(parse_number_string(first, last, options), first, last, value, options);
   return answer;
 }
 

From 89fc24007a5aaca3ef7c165625b67a96d14c040f Mon Sep 17 00:00:00 2001
From: Maya Warrier <maya.warrierm@gmail.com>
Date: Wed, 26 Apr 2023 16:25:41 -0400
Subject: [PATCH 14/27] Clean up

---
 include/fast_float/ascii_number.h     | 60 +++++++++++++++------------
 include/fast_float/digit_comparison.h | 36 +++++++++-------
 include/fast_float/float_common.h     |  5 ++-
 include/fast_float/parse_number.h     | 16 +++----
 4 files changed, 64 insertions(+), 53 deletions(-)

diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
index 9fbe9ac..c8a051f 100644
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@@ -9,7 +9,7 @@
 
 #include "float_common.h"
 
-#if FASTFLOAT_SSE2
+#ifdef FASTFLOAT_SSE2
 #include <emmintrin.h>
 #endif
 
@@ -20,7 +20,7 @@ namespace fast_float {
 // able to optimize it well.
 template <typename CharT>
 fastfloat_really_inline constexpr bool is_integer(CharT c) noexcept {
-  return c >= static_cast<CharT>('0') && c <= static_cast<CharT>('9');
+  return c >= CharT('0') && c <= CharT('9');
 }
 
 fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) {
@@ -42,9 +42,10 @@ uint64_t fast_read_u64(const char* chars) {
 }
 
 // https://quick-bench.com/q/fk6Y07KDGu8XZ9iUtQD8QJTc3Hg
+// todo: add support for char32_t
 fastfloat_really_inline
 uint64_t fast_read_u64(const char16_t* chars) {
-#if FASTFLOAT_SSE2
+#ifdef FASTFLOAT_SSE2
 FASTFLOAT_SIMD_DISABLE_WARNINGS
   static const char16_t masks[] = {0xff, 0xff, 0xff, 0xff};
   const __m128i m_masks = _mm_loadu_si128(reinterpret_cast<const __m128i*>(masks));
@@ -65,6 +66,7 @@ FASTFLOAT_SIMD_RESTORE_WARNINGS
   for (int i = 0; i < 8; ++i)
       bytes[i] = (unsigned char)chars[i];
 
+  // bit-cast
   uint64_t val;
   ::memcpy(&val, bytes, sizeof(uint64_t));
   return val;
@@ -77,7 +79,7 @@ uint64_t read_u64(const CharT *chars) {
   if (cpp20_and_in_constexpr()) {
     uint64_t val = 0;
     for(int i = 0; i < 8; ++i) {
-      val |= uint64_t(*chars) << (i*8);
+      val |= uint64_t(char(*chars)) << (i*8);
       ++chars;
     }
     return val;
@@ -121,7 +123,7 @@ uint32_t parse_eight_digits_unrolled(uint64_t val) {
 }
 
 // http://0x80.pl/articles/simd-parsing-int-sequences.html
-#if FASTFLOAT_SSE2
+#ifdef FASTFLOAT_SSE2
 fastfloat_really_inline
 uint32_t parse_eight_digits_unrolled_c16(const __m128i val) {
   // x - '0'
@@ -152,13 +154,15 @@ uint32_t parse_eight_digits_unrolled(const char* chars)  noexcept {
     return parse_eight_digits_unrolled(read_u64(chars));
 }
 
+// Call this if you know chars are only digits
+//todo: add support for char32_t
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
 uint32_t parse_eight_digits_unrolled(const char16_t* chars)  noexcept {
   if (cpp20_and_in_constexpr() || !has_simd()) {
     return parse_eight_digits_unrolled(read_u64(chars));
   }
-#if !FASTFLOAT_HAS_SIMD
-  return 0; // never reaches here, satisfy compiler
+#ifndef FASTFLOAT_HAS_SIMD
+  return 0; // never reaches here, remove warning
 #else
 FASTFLOAT_SIMD_DISABLE_WARNINGS
   return parse_eight_digits_unrolled_c16(_mm_loadu_si128(reinterpret_cast<const __m128i*>(chars)));
@@ -173,7 +177,9 @@ bool parse_if_eight_digits_unrolled(const char* chars, std::uint64_t& i) noexcep
     return all;
 }
 
+// Call this if you don't know whether chars are only digits
 // http://0x80.pl/articles/simd-parsing-int-sequences.html
+//todo: add support for char32_t
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
 bool parse_if_eight_digits_unrolled(const char16_t* chars, std::uint64_t& i) noexcept {
   if (cpp20_and_in_constexpr() || !has_simd()) {
@@ -184,17 +190,16 @@ bool parse_if_eight_digits_unrolled(const char16_t* chars, std::uint64_t& i) noe
     i = i * 100000000 + parse_eight_digits_unrolled(read_u64(chars));
     return true;
   }
-#if !FASTFLOAT_HAS_SIMD
-  return false; // never reaches here, satisfy compiler
+#ifndef FASTFLOAT_HAS_SIMD
+  return false; // never reaches here, remove warning
 #else
 FASTFLOAT_SIMD_DISABLE_WARNINGS
   const __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i*>(chars));
   // (x - '0') <= 9
   const __m128i t0 = _mm_sub_epi16(data, _mm_set1_epi16(80));
   const __m128i t1 = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-119));
-  const bool is_digits = _mm_movemask_epi8(t1) == 0;
 
-  if (is_digits) {
+  if (_mm_movemask_epi8(t1) == 0) {
     i = i * 100000000 + parse_eight_digits_unrolled_c16(data);
     return true;
   }
@@ -227,16 +232,16 @@ fastfloat_really_inline FASTFLOAT_CONSTEXPR20
 parsed_number_string<CharT> parse_number_string(const CharT *p, const CharT *pend, parse_options options) noexcept {
   const chars_format fmt = options.format;
   const parse_rules rules = options.rules;
-  const CharT decimal_point = static_cast<CharT>(options.decimal_point);
+  const CharT decimal_point = CharT(options.decimal_point);
 
   parsed_number_string<CharT> answer;
   answer.valid = false;
   answer.too_many_digits = false;
-  answer.negative = (*p == static_cast<CharT>('-'));
+  answer.negative = (*p == CharT('-'));
 #if FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default
-  if ((*p == static_cast<CharT>('-')) || (*p == static_cast<CharT>('+'))) {
+  if ((*p == CharT('-')) || (*p == CharT('+'))) {
 #else
-  if (*p == static_cast<CharT>('-')) { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here
+  if (*p == CharT('-')) { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here
 #endif
     ++p;
     if (p == pend) {
@@ -254,7 +259,7 @@ parsed_number_string<CharT> parse_number_string(const CharT *p, const CharT *pen
     // a multiplication by 10 is cheaper than an arbitrary integer
     // multiplication
     i = 10 * i +
-        uint64_t(*p - static_cast<CharT>('0')); // might overflow, we will handle the overflow later
+        uint64_t(*p - CharT('0')); // might overflow, we will handle the overflow later
     ++p;
   }
   const CharT *const end_of_integer_part = p;
@@ -271,7 +276,7 @@ parsed_number_string<CharT> parse_number_string(const CharT *p, const CharT *pen
       p += 8;
     }
     while ((p != pend) && is_integer(*p)) {
-      i = i * 10 + uint64_t(*p - static_cast<CharT>('0')); // in rare cases, this will overflow, but that's ok
+      i = i * 10 + uint64_t(*p - CharT('0')); // in rare cases, this will overflow, but that's ok
       ++p;
     }
     exponent = before - p;
@@ -283,14 +288,14 @@ parsed_number_string<CharT> parse_number_string(const CharT *p, const CharT *pen
     return answer;
   }
   int64_t exp_number = 0;            // explicit exponential part
-  if ((fmt & chars_format::scientific) && (p != pend) && ((static_cast<CharT>('e') == *p) || (static_cast<CharT>('E') == *p))) {
+  if ((fmt & chars_format::scientific) && (p != pend) && ((CharT('e') == *p) || (CharT('E') == *p))) {
     const CharT * location_of_e = p;
     ++p;
     bool neg_exp = false;
-    if ((p != pend) && (static_cast<CharT>('-') == *p)) {
+    if ((p != pend) && (CharT('-') == *p)) {
       neg_exp = true;
       ++p;
-    } else if ((p != pend) && (static_cast<CharT>('+') == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1)
+    } else if ((p != pend) && (CharT('+') == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1)
       ++p;
     }
     if ((p == pend) || !is_integer(*p)) {
@@ -302,7 +307,7 @@ parsed_number_string<CharT> parse_number_string(const CharT *p, const CharT *pen
       p = location_of_e;
     } else {
       while ((p != pend) && is_integer(*p)) {
-        uint8_t digit = uint8_t(*p - static_cast<CharT>('0'));
+        uint8_t digit = uint8_t(*p - CharT('0'));
         if (exp_number < 0x10000000) {
           exp_number = 10 * exp_number + digit;
         }
@@ -317,7 +322,7 @@ parsed_number_string<CharT> parse_number_string(const CharT *p, const CharT *pen
   }
   
   // disallow leading zeros before the decimal point
-  if (rules == parse_rules::json_rules && start_digits[0] == static_cast<CharT>('0') && digit_count >= 2 && is_integer(start_digits[1]))
+  if (rules == parse_rules::json_rules && start_digits[0] == CharT('0') && digit_count >= 2 && is_integer(start_digits[1]))
       return answer;
 
   answer.lastmatch = p;
@@ -335,12 +340,13 @@ parsed_number_string<CharT> parse_number_string(const CharT *p, const CharT *pen
     // We need to be mindful of the case where we only have zeroes...
     // E.g., 0.000000000...000.
     const CharT *start = start_digits;
-    while ((start != pend) && (*start == static_cast<CharT>('0') || *start == decimal_point)) {
-      if(*start == static_cast<CharT>('0')) { digit_count --; }
+    while ((start != pend) && (*start == CharT('0') || *start == decimal_point)) {
+      if(*start == CharT('0')) { digit_count --; }
       start++;
     }
 
     // exponent/mantissa must be truncated later
+    // this is unlikely, so don't inline truncation code with the rest of parse_number_string()
     answer.too_many_digits = digit_count > 19;
   }
   answer.exponent = exponent;
@@ -350,7 +356,7 @@ parsed_number_string<CharT> parse_number_string(const CharT *p, const CharT *pen
 
 template <typename CharT>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
-void truncate_exponent_mantissa(parsed_number_string<CharT>& ps)
+void parse_truncated_number_string(parsed_number_string<CharT>& ps)
 {
   // Let us start again, this time, avoiding overflows.
   // We don't need to check if is_integer, since we use the
@@ -361,7 +367,7 @@ void truncate_exponent_mantissa(parsed_number_string<CharT>& ps)
   const CharT* const int_end = p + ps.integer.len();
   const uint64_t minimal_nineteen_digit_integer{1000000000000000000};
   while ((i < minimal_nineteen_digit_integer) && (p != int_end)) {
-    i = i * 10 + uint64_t(*p - static_cast<CharT>('0'));
+    i = i * 10 + uint64_t(*p - CharT('0'));
     ++p;
   }
   if (i >= minimal_nineteen_digit_integer) { // We have a big integers
@@ -371,7 +377,7 @@ void truncate_exponent_mantissa(parsed_number_string<CharT>& ps)
     p = ps.fraction.ptr;
     const CharT* const frac_end = p + ps.fraction.len();
     while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
-      i = i * 10 + uint64_t(*p - static_cast<CharT>('0'));
+      i = i * 10 + uint64_t(*p - CharT('0'));
       ++p;
     }
     exponent = ps.fraction.ptr - p + ps.exp_number;
diff --git a/include/fast_float/digit_comparison.h b/include/fast_float/digit_comparison.h
index 5ba91af..73d6732 100644
--- a/include/fast_float/digit_comparison.h
+++ b/include/fast_float/digit_comparison.h
@@ -158,16 +158,18 @@ void round_down(adjusted_mantissa& am, int32_t shift) noexcept {
 template <typename CharT>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
 void skip_zeros(const CharT*& first, const CharT* last) noexcept {
-  uint64_t val;
-  while (!cpp20_and_in_constexpr() && std::distance(first, last) >= 8) {
-    val = fast_read_u64(first);
-    if (val != 0x3030303030303030) {
-      break;
+  if (std::is_same<CharT, char>::value || has_simd()) {
+    uint64_t val;
+    while (!cpp20_and_in_constexpr() && std::distance(first, last) >= 8) {
+      val = fast_read_u64(first);
+      if (val != 0x3030303030303030) {
+        break;
+      }
+      first += 8;
     }
-    first += 8;
   }
   while (first != last) {
-    if (*first != static_cast<CharT>('0')) {
+    if (*first != CharT('0')) {
       break;
     }
     first++;
@@ -179,17 +181,19 @@ void skip_zeros(const CharT*& first, const CharT* last) noexcept {
 template <typename CharT>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
 bool is_truncated(const CharT* first, const CharT* last) noexcept {
-  // do 8-bit optimizations, can just compare to 8 literal 0s.
-  uint64_t val;
-  while (!cpp20_and_in_constexpr() && std::distance(first, last) >= 8) {
-    val = fast_read_u64(first);
-    if (val != 0x3030303030303030) {
-      return true;
+  if (std::is_same<CharT, char>::value || has_simd()) {
+    // do 8-bit optimizations, can just compare to 8 literal 0s.
+    uint64_t val;
+    while (!cpp20_and_in_constexpr() && std::distance(first, last) >= 8) {
+      val = fast_read_u64(first);
+      if (val != 0x3030303030303030) {
+        return true;
+      }
+      first += 8;
     }
-    first += 8;
   }
   while (first != last) {
-    if (*first != static_cast<CharT>('0')) {
+    if (*first != CharT('0')) {
       return true;
     }
     first++;
@@ -215,7 +219,7 @@ void parse_eight_digits(const CharT*& p, limb& value, size_t& counter, size_t& c
 template <typename CharT>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR14
 void parse_one_digit(const CharT*& p, limb& value, size_t& counter, size_t& count) noexcept {
-  value = value * 10 + limb(*p - static_cast<CharT>('0'));
+  value = value * 10 + limb(*p - CharT('0'));
   p++;
   counter++;
   count++;
diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h
index 4fb03fc..020ae6b 100644
--- a/include/fast_float/float_common.h
+++ b/include/fast_float/float_common.h
@@ -78,11 +78,12 @@
 #endif
 #endif
 
-#if FASTFLOAT_SSE2
+#ifdef FASTFLOAT_SSE2
 #define FASTFLOAT_HAS_SIMD (1)
 #endif
 
 #if defined(__GNUC__)
+// disable -Wcast-align=strict (GCC only)
 #define FASTFLOAT_SIMD_DISABLE_WARNINGS \
   _Pragma("GCC diagnostic push") \
   _Pragma("GCC diagnostic ignored \"-Wcast-align\"")
@@ -141,7 +142,7 @@ inline FASTFLOAT_CONSTEXPR14 bool
 fastfloat_strncasecmp(const CharT *input1, const char *input2, size_t length) {
   char running_diff{0};
   for (size_t i = 0; i < length; i++) {
-    running_diff |= (static_cast<char>(input1[i]) ^ input2[i]);
+    running_diff |= (char(input1[i]) ^ input2[i]);
   }
   return (running_diff == 0) || (running_diff == 32);
 }
diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h
index d5b266d..a407bfc 100644
--- a/include/fast_float/parse_number.h
+++ b/include/fast_float/parse_number.h
@@ -26,12 +26,12 @@ parse_infnan(const CharT *first, const CharT *last, T &value)  noexcept  {
   answer.ptr = first;
   answer.ec = std::errc(); // be optimistic
   bool minusSign = false;
-  if (*first == static_cast<CharT>('-')) { // assume first < last, so dereference without checks; C++17 20.19.3.(7.1) explicitly forbids '+' here
+  if (*first == CharT('-')) { // assume first < last, so dereference without checks; C++17 20.19.3.(7.1) explicitly forbids '+' here
       minusSign = true;
       ++first;
   }
 #if FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default
-  if (*first == static_cast<CharT>('+')) {
+  if (*first == CharT('+')) {
       ++first;
   }
 #endif
@@ -40,15 +40,15 @@ parse_infnan(const CharT *first, const CharT *last, T &value)  noexcept  {
       answer.ptr = (first += 3);
       value = minusSign ? -std::numeric_limits<T>::quiet_NaN() : std::numeric_limits<T>::quiet_NaN();
       // Check for possible nan(n-char-seq-opt), C++17 20.19.3.7, C11 7.20.1.3.3. At least MSVC produces nan(ind) and nan(snan).
-      if(first != last && *first == static_cast<CharT>('(')) {
+      if(first != last && *first == CharT('(')) {
         for(const CharT* ptr = first + 1; ptr != last; ++ptr) {
-          if (*ptr == static_cast<CharT>(')')) {
+          if (*ptr == CharT(')')) {
             answer.ptr = ptr + 1; // valid nan(n-char-seq-opt)
             break;
           }
-          else if(!((static_cast<CharT>('a') <= *ptr && *ptr <= static_cast<CharT>('z')) || 
-              (static_cast<CharT>('A') <= *ptr && *ptr <= static_cast<CharT>('Z')) || 
-              (static_cast<CharT>('0') <= *ptr && *ptr <= static_cast<CharT>('9')) || *ptr == static_cast<CharT>('_')))
+          else if(!((CharT('a') <= *ptr && *ptr <= CharT('z')) || 
+              (CharT('A') <= *ptr && *ptr <= CharT('Z')) || 
+              (CharT('0') <= *ptr && *ptr <= CharT('9')) || *ptr == CharT('_')))
             break; // forbidden char, not nan(n-char-seq-opt)
         }
       }
@@ -159,7 +159,7 @@ from_chars_result<CharT> from_chars_preparsed(parsed_number_string<CharT> pns, c
     }
   }
   if (pns.too_many_digits)
-    truncate_exponent_mantissa(pns);
+    parse_truncated_number_string(pns);
 
   answer.ec = std::errc(); // be optimistic
   answer.ptr = pns.lastmatch;

From 091458d192d129336116df4b330983740b2c32b1 Mon Sep 17 00:00:00 2001
From: Maya Warrier <maya.warrierm@gmail.com>
Date: Sun, 30 Apr 2023 02:20:24 -0400
Subject: [PATCH 15/27] Add basic support for char32_t (unoptimized)

---
 include/fast_float/ascii_number.h     | 160 +++++++++++++-------------
 include/fast_float/digit_comparison.h |   8 +-
 2 files changed, 81 insertions(+), 87 deletions(-)

diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
index c72e210..cc9619c 100644
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@@ -34,49 +34,47 @@ fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) {
     | (val & 0x00000000000000FF) << 56;
 }
 
+
+#ifdef FASTFLOAT_SSE2
+
 fastfloat_really_inline
-uint64_t fast_read_u64(const char* chars) {
-  uint64_t val;
-  ::memcpy(&val, chars, sizeof(uint64_t));
-  return val;
+__m128i load_packus_masks_c16(void) noexcept {
+FASTFLOAT_SIMD_DISABLE_WARNINGS
+  static const char16_t masks[] = { 0xff, 0xff, 0xff, 0xff };
+  return _mm_loadu_si128(reinterpret_cast<const __m128i*>(masks));
+FASTFLOAT_SIMD_RESTORE_WARNINGS
 }
 
-// https://quick-bench.com/q/fk6Y07KDGu8XZ9iUtQD8QJTc3Hg
-// todo: add support for char32_t
+// packus_masks is an argument only so its value may be preloaded.
+// it should always come from load_packus_masks_c16().
 fastfloat_really_inline
-uint64_t fast_read_u64(const char16_t* chars) {
-#ifdef FASTFLOAT_SSE2
+uint64_t simd_read8_to_u64(const char16_t* chars, const __m128i packus_masks) {
 FASTFLOAT_SIMD_DISABLE_WARNINGS
-  static const char16_t masks[] = {0xff, 0xff, 0xff, 0xff};
-  const __m128i m_masks = _mm_loadu_si128(reinterpret_cast<const __m128i*>(masks));
-
-  // mask hi bytes and pack
-  const char* const p = reinterpret_cast<const char*>(chars);
-  __m128i i1 = _mm_and_si128(_mm_loadu_si64(p), m_masks);
-  __m128i i2 = _mm_and_si128(_mm_loadu_si64(p + 8), m_masks);
+  // process 4 and 4 chars simultaneously (loadu_si64 has high latency)
+  // with AVX512BW + AVX512VL, masking is not required as we have cvtepi16_epi8
+  const char* const p = reinterpret_cast<const char*>(chars); 
+  __m128i i1 = _mm_and_si128(_mm_loadu_si64(p), packus_masks);
+  __m128i i2 = _mm_and_si128(_mm_loadu_si64(p + 8), packus_masks);
   __m128i packed = _mm_packus_epi16(i1, i2);
 
-  // extract
   uint64_t val;
   _mm_storeu_si64(&val, _mm_shuffle_epi32(packed, 0x8));
   return val;
 FASTFLOAT_SIMD_RESTORE_WARNINGS
-#else
-  unsigned char bytes[8];
-  for (int i = 0; i < 8; ++i)
-      bytes[i] = (unsigned char)chars[i];
-
-  // bit-cast
-  uint64_t val;
-  ::memcpy(&val, bytes, sizeof(uint64_t));
-  return val;
-#endif
 }
 
+// https://quick-bench.com/q/fk6Y07KDGu8XZ9iUtQD8QJTc3Hg
+fastfloat_really_inline
+uint64_t simd_read8_to_u64(const char16_t* chars) {
+  return simd_read8_to_u64(chars, load_packus_masks_c16());
+}
+#endif
+
+// Read 8 CharT into a u64. Truncates CharT if != char.
 template <typename CharT>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
-uint64_t read_u64(const CharT *chars) {
-  if (cpp20_and_in_constexpr()) {
+uint64_t read8_to_u64(const CharT *chars) {
+  if (cpp20_and_in_constexpr() || !std::is_same<CharT, char>::value) {
     uint64_t val = 0;
     for(int i = 0; i < 8; ++i) {
       val |= uint64_t(char(*chars)) << (i*8);
@@ -84,7 +82,8 @@ uint64_t read_u64(const CharT *chars) {
     }
     return val;
   }
-  uint64_t val = fast_read_u64(chars);
+  uint64_t val;
+  ::memcpy(&val, chars, sizeof(uint64_t));
 #if FASTFLOAT_IS_BIG_ENDIAN == 1
   // Need to read as-if the number was in little-endian order.
   val = byteswap(val);
@@ -121,92 +120,87 @@ uint32_t parse_eight_digits_unrolled(uint64_t val) {
   return uint32_t(val);
 }
 
-// http://0x80.pl/articles/simd-parsing-int-sequences.html
-#ifdef FASTFLOAT_SSE2
-fastfloat_really_inline
-uint32_t parse_eight_digits_unrolled_c16(const __m128i val) {
-  // x - '0'
-  const __m128i s1digits16 = _mm_sub_epi16(val, _mm_set1_epi16('0'));
-  // 10 * x(b) + x(b-1) -> 2 digit numbers
-  const __m128i s2digits32 = _mm_madd_epi16(s1digits16, _mm_setr_epi16(10, 1, 10, 1, 10, 1, 10, 1));
-  const __m128i s2digits16 = _mm_packus_epi16(s2digits32, s2digits32);
-  // 100 * x(b) + x(b-1) -> 4 digit numbers
-  const __m128i s4digits32 = _mm_madd_epi16(s2digits16, _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1));
-  const __m128i s4digits16 = _mm_packus_epi16(s4digits32, s4digits32);
-  // 10000 * x(b) + x(b-1) -> 8 digit number
-  const __m128i s8digits32 = _mm_madd_epi16(s4digits16, _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1));
-
-  uint32_t value;
-  _mm_storeu_si32(&value, s8digits32);
-  return value;
-}
-#endif
-
-// credit @aqrit
-fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val)  noexcept  {
-  return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) &
-     0x8080808080808080));
-}
 
+// Call this if chars are definitely 8 digits.
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
 uint32_t parse_eight_digits_unrolled(const char* chars)  noexcept {
-    return parse_eight_digits_unrolled(read_u64(chars));
+    return parse_eight_digits_unrolled(read8_to_u64(chars));
 }
 
-// Call this if you know chars are only digits
-//todo: add support for char32_t
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
 uint32_t parse_eight_digits_unrolled(const char16_t* chars)  noexcept {
   if (cpp20_and_in_constexpr() || !has_simd()) {
-    return parse_eight_digits_unrolled(read_u64(chars));
+    return parse_eight_digits_unrolled(read8_to_u64(chars));
   }
-#ifndef FASTFLOAT_HAS_SIMD
-  return 0; // never reaches here, remove warning
+#ifdef FASTFLOAT_HAS_SIMD
+  return parse_eight_digits_unrolled(simd_read8_to_u64(chars));
 #else
-FASTFLOAT_SIMD_DISABLE_WARNINGS
-  return parse_eight_digits_unrolled_c16(_mm_loadu_si128(reinterpret_cast<const __m128i*>(chars)));
-FASTFLOAT_SIMD_RESTORE_WARNINGS
+  // never reaches here, remove warning
+  return 0;
 #endif
 }
 
+// todo, no simd optimization yet
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
-bool parse_if_eight_digits_unrolled(const char* chars, std::uint64_t& i) noexcept {
-    const bool all = is_made_of_eight_digits_fast(read_u64(chars));
-    if (all) i = i * 100000000 + parse_eight_digits_unrolled(read_u64(chars));
-    return all;
+uint32_t parse_eight_digits_unrolled(const char32_t* chars)  noexcept {
+  return parse_eight_digits_unrolled(read8_to_u64(chars));
+}
+
+
+// credit @aqrit
+fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val)  noexcept {
+  return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) &
+    0x8080808080808080));
 }
 
-// Call this if you don't know whether chars are only digits
-// http://0x80.pl/articles/simd-parsing-int-sequences.html
-//todo: add support for char32_t
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
-bool parse_if_eight_digits_unrolled(const char16_t* chars, std::uint64_t& i) noexcept {
-  if (cpp20_and_in_constexpr() || !has_simd()) {
-    for (int i = 0; i < 8; ++i) {
-      if (chars[i] < u'0' || chars[i] > u'9')
-        return false;
-    }
-    i = i * 100000000 + parse_eight_digits_unrolled(read_u64(chars));
-    return true;
+bool parse_if_eight_digits_unrolled(const char* chars, uint64_t& i) noexcept {
+  const bool is_digits = is_made_of_eight_digits_fast(read8_to_u64(chars));
+  if (is_digits) {
+    i = i * 100000000 + parse_eight_digits_unrolled(read8_to_u64(chars));
   }
-#ifndef FASTFLOAT_HAS_SIMD
-  return false; // never reaches here, remove warning
-#else
+  return is_digits;
+}
+
+// Call this if chars might not be 8 digits.
+// Using this (instead of is_made_of_eight_digits_fast() then parse_eight_digits_unrolled())
+// ensures we don't load SIMD registers twice.
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
+bool parse_if_eight_digits_unrolled(const char16_t* chars, uint64_t& i) noexcept {
+#ifdef FASTFLOAT_SSE2
+  if (cpp20_and_in_constexpr()) {
+    return false;
+  }    
 FASTFLOAT_SIMD_DISABLE_WARNINGS
   const __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i*>(chars));
+  const __m128i packus_masks = load_packus_masks_c16(); // be optimistic, preload
+
   // (x - '0') <= 9
+  // http://0x80.pl/articles/simd-parsing-int-sequences.html
   const __m128i t0 = _mm_sub_epi16(data, _mm_set1_epi16(80));
   const __m128i t1 = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-119));
 
   if (_mm_movemask_epi8(t1) == 0) {
-    i = i * 100000000 + parse_eight_digits_unrolled_c16(data);
+    uint64_t digits = simd_read8_to_u64(chars, packus_masks);
+    i = i * 100000000 + parse_eight_digits_unrolled(digits);
     return true;
   }
   else return false;
 FASTFLOAT_SIMD_RESTORE_WARNINGS
+
+#else // No SIMD available
+  return false;
 #endif
 }
 
+// todo, no simd optimization yet
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
+bool parse_if_eight_digits_unrolled(const char32_t*, uint64_t&) noexcept {
+  return false;
+}
+
+
+
 typedef span<const char> byte_span;
 
 template <typename CharT>
diff --git a/include/fast_float/digit_comparison.h b/include/fast_float/digit_comparison.h
index 73d6732..b9601c3 100644
--- a/include/fast_float/digit_comparison.h
+++ b/include/fast_float/digit_comparison.h
@@ -158,10 +158,10 @@ void round_down(adjusted_mantissa& am, int32_t shift) noexcept {
 template <typename CharT>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
 void skip_zeros(const CharT*& first, const CharT* last) noexcept {
-  if (std::is_same<CharT, char>::value || has_simd()) {
+  if (std::is_same<CharT, char>::value) {
     uint64_t val;
     while (!cpp20_and_in_constexpr() && std::distance(first, last) >= 8) {
-      val = fast_read_u64(first);
+      ::memcpy(&val, first, sizeof(uint64_t));
       if (val != 0x3030303030303030) {
         break;
       }
@@ -181,11 +181,11 @@ void skip_zeros(const CharT*& first, const CharT* last) noexcept {
 template <typename CharT>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
 bool is_truncated(const CharT* first, const CharT* last) noexcept {
-  if (std::is_same<CharT, char>::value || has_simd()) {
+  if (std::is_same<CharT, char>::value) {
     // do 8-bit optimizations, can just compare to 8 literal 0s.
     uint64_t val;
     while (!cpp20_and_in_constexpr() && std::distance(first, last) >= 8) {
-      val = fast_read_u64(first);
+      ::memcpy(&val, first, sizeof(uint64_t));
       if (val != 0x3030303030303030) {
         return true;
       }

From e08c55c38051883e5e9cad82957cc749a1fbda35 Mon Sep 17 00:00:00 2001
From: Maya Warrier <maya.warrierm@gmail.com>
Date: Mon, 1 May 2023 19:45:50 -0400
Subject: [PATCH 16/27] Remove json parse rules/allow inf_nan

---
 .gitignore                        |  2 +-
 include/fast_float/ascii_number.h | 16 ++++-------
 include/fast_float/fast_float.h   | 37 ++----------------------
 include/fast_float/parse_number.h | 47 +++++++++++--------------------
 4 files changed, 25 insertions(+), 77 deletions(-)

diff --git a/.gitignore b/.gitignore
index a2601ad..5cc2dd6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,7 +7,7 @@ compile_commands.json
 .vs/
 Debug/
 Release/
-/out/build/
+/out/
 *.sln
 *.vcxproj
 *.vcxproj.filters
diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
index cc9619c..f583c2b 100644
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@@ -51,7 +51,7 @@ fastfloat_really_inline
 uint64_t simd_read8_to_u64(const char16_t* chars, const __m128i packus_masks) {
 FASTFLOAT_SIMD_DISABLE_WARNINGS
   // process 4 and 4 chars simultaneously (loadu_si64 has high latency)
-  // with AVX512BW + AVX512VL, masking is not required as we have cvtepi16_epi8
+  // with AVX512BW + AVX512VL, masking is not required as we can use cvtepi16_epi8
   const char* const p = reinterpret_cast<const char*>(chars); 
   __m128i i1 = _mm_and_si128(_mm_loadu_si64(p), packus_masks);
   __m128i i2 = _mm_and_si128(_mm_loadu_si64(p + 8), packus_masks);
@@ -223,8 +223,7 @@ template <typename CharT>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
 parsed_number_string<CharT> parse_number_string(const CharT *p, const CharT *pend, parse_options options) noexcept {
   const chars_format fmt = options.format;
-  const parse_rules rules = options.rules;
-  const CharT decimal_point = CharT(options.decimal_point);
+  const CharT decimal_point = options.decimal_point;
 
   parsed_number_string<CharT> answer;
   answer.valid = false;
@@ -240,7 +239,7 @@ parsed_number_string<CharT> parse_number_string(const CharT *p, const CharT *pen
       return answer;
     }
     // a sign must be followed by an integer or the dot
-    if (!is_integer(*p) && (rules == parse_rules::json_rules || *p != decimal_point))
+    if (!is_integer(*p) && *p != decimal_point)
         return answer;
   }
   const CharT *const start_digits = p;
@@ -275,8 +274,8 @@ parsed_number_string<CharT> parse_number_string(const CharT *p, const CharT *pen
     answer.fraction = span<const CharT>(before, size_t(p - before));
     digit_count -= exponent;
   }
-  // we must have encountered at least one integer (or two if a decimal point exists, with json rules).
-  if (digit_count == 0 || (rules == parse_rules::json_rules && has_decimal_point && digit_count == 1)) {
+  // we must have encountered at least one integer
+  if (digit_count == 0) {
     return answer;
   }
   int64_t exp_number = 0;            // explicit exponential part
@@ -312,11 +311,6 @@ parsed_number_string<CharT> parse_number_string(const CharT *p, const CharT *pen
     // If it scientific and not fixed, we have to bail out.
     if((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) { return answer; }
   }
-  
-  // disallow leading zeros before the decimal point
-  if (rules == parse_rules::json_rules && start_digits[0] == CharT('0') && digit_count >= 2 && is_integer(start_digits[1]))
-      return answer;
-
   answer.lastmatch = p;
   answer.valid = true;
   answer.exp_number = exp_number;
diff --git a/include/fast_float/fast_float.h b/include/fast_float/fast_float.h
index 470d05a..57d39b3 100644
--- a/include/fast_float/fast_float.h
+++ b/include/fast_float/fast_float.h
@@ -13,11 +13,6 @@ enum chars_format {
     general = fixed | scientific
 };
 
-enum parse_rules {
-    std_rules,
-    json_rules,
-};
-
 template <typename CharT>
 struct from_chars_result {
   const CharT *ptr;
@@ -26,34 +21,15 @@ struct from_chars_result {
 
 struct parse_options {
   constexpr explicit parse_options(
-    chars_format fmt = chars_format::general,
-    parse_rules rules = parse_rules::std_rules, 
-    char dot = '.', bool allow_inf_nan = true)
-    : format(fmt), rules(rules), allow_inf_nan(allow_inf_nan), decimal_point(dot) {}
+    chars_format fmt = chars_format::general, char dot = '.')
+    : format(fmt), decimal_point(dot) {}
 
   /** Which number formats are accepted */
   chars_format format;
-  /** Which parsing rules to use */
-  parse_rules rules;
-  /** Whether to allow inf and nan */
-  bool allow_inf_nan;
   /** The character used as decimal point */
   char decimal_point;
 };
 
-struct preparsed_parse_options {
-  constexpr explicit preparsed_parse_options(
-    bool allow_inf_nan = true)
-    : allow_inf_nan(allow_inf_nan) {}
-
-  constexpr preparsed_parse_options(
-    const parse_options& options)
-    : allow_inf_nan(options.allow_inf_nan) {}
-
-  /** Whether to allow inf and nan */
-  bool allow_inf_nan;
-};
-
 /**
  * This function parses the character sequence [first,last) for a number. It parses floating-point numbers expecting
  * a locale-indepent format equivalent to what is used by std::strtod in the default ("C") locale.
@@ -86,15 +62,6 @@ FASTFLOAT_CONSTEXPR20
 from_chars_result<CharT> from_chars_advanced(const CharT *first, const CharT *last,
                                       T &value, parse_options options)  noexcept;
 
-} 
-
-#include "ascii_number.h" // parsed_number_string
-
-namespace fast_float {
-template <typename T, typename CharT>
-FASTFLOAT_CONSTEXPR20
-from_chars_result<CharT> from_chars_preparsed(parsed_number_string<CharT> parsed, 
-    const CharT* first, const CharT* last, T& value, preparsed_parse_options options) noexcept;
 }
 
 // namespace fast_float
diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h
index 1cc220b..99ccd49 100644
--- a/include/fast_float/parse_number.h
+++ b/include/fast_float/parse_number.h
@@ -143,23 +143,30 @@ from_chars_result<CharT> from_chars(const CharT *first, const CharT *last,
 
 template<typename T, typename CharT>
 FASTFLOAT_CONSTEXPR20
-from_chars_result<CharT> from_chars_preparsed(parsed_number_string<CharT> pns, const CharT* first, const CharT* last, T& value, preparsed_parse_options options) noexcept
+from_chars_result<CharT> from_chars_advanced(const CharT *first, const CharT *last,
+                                      T &value, parse_options options)  noexcept  {
 {
   static_assert (std::is_same<T, double>::value || std::is_same<T, float>::value, "only float and double are supported");
 
 
   from_chars_result<CharT> answer;
-  if (!pns.valid) {
-    if (options.allow_inf_nan)
-      return detail::parse_infnan(first, last, value);
-    else {
-      answer.ec = std::errc::invalid_argument;
-      answer.ptr = first;
-      return answer;
-    }
+#ifdef FASTFLOAT_SKIP_WHITE_SPACE  // disabled by default
+  while ((first != last) && fast_float::is_space(uint8_t(*first))) {
+    first++;
   }
-  if (pns.too_many_digits)
+#endif
+  if (first == last) {
+    answer.ec = std::errc::invalid_argument;
+    answer.ptr = first;
+    return answer;
+  }
+  parsed_number_string<CharT> pns = parse_number_string(first, last, options);
+  if (!pns.valid) {
+    return detail::parse_infnan(first, last, value);
+  }
+  if (pns.too_many_digits) {
     parse_truncated_number_string(pns);
+  }
 
   answer.ec = std::errc(); // be optimistic
   answer.ptr = pns.lastmatch;
@@ -220,26 +227,6 @@ from_chars_result<CharT> from_chars_preparsed(parsed_number_string<CharT> pns, c
   return answer;
 }
 
-template<typename T, typename CharT>
-FASTFLOAT_CONSTEXPR20
-from_chars_result<CharT> from_chars_advanced(const CharT *first, const CharT *last,
-                                      T &value, parse_options options)  noexcept  {
-
-  from_chars_result<CharT> answer;
-#ifdef FASTFLOAT_SKIP_WHITE_SPACE  // disabled by default
-  while ((first != last) && fast_float::is_space(uint8_t(*first))) {
-    first++;
-  }
-#endif
-  if (first == last) {
-    answer.ec = std::errc::invalid_argument;
-    answer.ptr = first;
-    return answer;
-  }
-  answer = from_chars_preparsed(parse_number_string(first, last, options), first, last, value, options);
-  return answer;
-}
-
 } // namespace fast_float
 
 #endif

From 5136b181bab4f9268c70a4c3a61ab00a897381bf Mon Sep 17 00:00:00 2001
From: Maya Warrier <maya.warrierm@gmail.com>
Date: Tue, 2 May 2023 01:41:49 -0400
Subject: [PATCH 17/27] Fixes and cleanup

---
 include/fast_float/ascii_number.h | 129 +++++++++++++++---------------
 include/fast_float/float_common.h |   3 +
 2 files changed, 66 insertions(+), 66 deletions(-)

diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
index 82a06b1..a15c2ef 100644
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@@ -9,6 +9,8 @@
 
 #include "float_common.h"
 
+#define FASTFLOAT_SSE2
+
 #ifdef FASTFLOAT_SSE2
 #include <emmintrin.h>
 #endif
@@ -16,6 +18,15 @@
 
 namespace fast_float {
 
+template <typename UC>
+fastfloat_really_inline constexpr bool has_simd_opts() {
+#ifdef FASTFLOAT_HAS_SIMD
+  return std::is_same<UC, char16_t>::value;
+#else
+  return false;
+#endif
+}
+
 // Next function can be micro-optimized, but compilers are entirely
 // able to optimize it well.
 template <typename UC>
@@ -34,42 +45,6 @@ fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) {
     | (val & 0x00000000000000FF) << 56;
 }
 
-
-#ifdef FASTFLOAT_SSE2
-
-fastfloat_really_inline
-__m128i load_packus_masks_c16(void) noexcept {
-FASTFLOAT_SIMD_DISABLE_WARNINGS
-  static const char16_t masks[] = { 0xff, 0xff, 0xff, 0xff };
-  return _mm_loadu_si128(reinterpret_cast<const __m128i*>(masks));
-FASTFLOAT_SIMD_RESTORE_WARNINGS
-}
-
-// packus_masks is an argument only so its value may be preloaded.
-// it should always come from load_packus_masks_c16().
-fastfloat_really_inline
-uint64_t simd_read8_to_u64(const char16_t* chars, const __m128i packus_masks) {
-FASTFLOAT_SIMD_DISABLE_WARNINGS
-  // process 4 and 4 chars simultaneously (loadu_si64 has high latency)
-  // with AVX512BW + AVX512VL, masking is not required as we can use cvtepi16_epi8
-  const char* const p = reinterpret_cast<const char*>(chars); 
-  __m128i i1 = _mm_and_si128(_mm_loadu_si64(p), packus_masks);
-  __m128i i2 = _mm_and_si128(_mm_loadu_si64(p + 8), packus_masks);
-  __m128i packed = _mm_packus_epi16(i1, i2);
-
-  uint64_t val;
-  _mm_storeu_si64(&val, _mm_shuffle_epi32(packed, 0x8));
-  return val;
-FASTFLOAT_SIMD_RESTORE_WARNINGS
-}
-
-// https://quick-bench.com/q/fk6Y07KDGu8XZ9iUtQD8QJTc3Hg
-fastfloat_really_inline
-uint64_t simd_read8_to_u64(const char16_t* chars) {
-  return simd_read8_to_u64(chars, load_packus_masks_c16());
-}
-#endif
-
 // Read 8 UC into a u64. Truncates UC if not char.
 template <typename UC>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
@@ -77,7 +52,7 @@ uint64_t read8_to_u64(const UC *chars) {
   if (cpp20_and_in_constexpr() || !std::is_same<UC, char>::value) {
     uint64_t val = 0;
     for(int i = 0; i < 8; ++i) {
-      val |= uint64_t(char(*chars)) << (i*8);
+      val |= uint64_t(uint8_t(*chars)) << (i*8);
       ++chars;
     }
     return val;
@@ -91,6 +66,35 @@ uint64_t read8_to_u64(const UC *chars) {
   return val;
 }
 
+#ifdef FASTFLOAT_SSE2
+
+fastfloat_really_inline
+uint64_t simd_read8_to_u64(const char16_t* chars) {
+FASTFLOAT_SIMD_DISABLE_WARNINGS
+  static const char16_t kmasks[] = { 0xff, 0xff, 0xff, 0xff };
+  const __m128i masks = _mm_loadu_si128(reinterpret_cast<const __m128i*>(kmasks));
+
+  // pipeline 4 and 4 chars at the same time (since loadu_si64 has high latency)
+  // todo: with AVX512BW + AVX512VL, can use cvtepi16_epi8 instead
+  const char* const p = reinterpret_cast<const char*>(chars); 
+  __m128i i1 = _mm_and_si128(_mm_loadu_si64(p), masks);
+  __m128i i2 = _mm_and_si128(_mm_loadu_si64(p + 8), masks);
+  __m128i packed = _mm_packus_epi16(i1, i2);
+
+  uint64_t val;
+  _mm_storeu_si64(&val, _mm_shuffle_epi32(packed, 0x8));
+  return val;
+FASTFLOAT_SIMD_RESTORE_WARNINGS
+}
+#endif
+
+// dummy for compile
+template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opts<UC>())>
+uint64_t simd_read8_to_u64(UC const*) {
+  return 0;
+}
+
+
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
 void write_u64(uint8_t *chars, uint64_t val) {
   if (cpp20_and_in_constexpr()) {
@@ -122,28 +126,13 @@ uint32_t parse_eight_digits_unrolled(uint64_t val) {
 
 
 // Call this if chars are definitely 8 digits.
+template <typename UC>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
-uint32_t parse_eight_digits_unrolled(const char* chars)  noexcept {
-    return parse_eight_digits_unrolled(read8_to_u64(chars));
-}
-
-fastfloat_really_inline FASTFLOAT_CONSTEXPR20
-uint32_t parse_eight_digits_unrolled(const char16_t* chars)  noexcept {
-  if (cpp20_and_in_constexpr() || !has_simd()) {
-    return parse_eight_digits_unrolled(read8_to_u64(chars));
+uint32_t parse_eight_digits_unrolled(UC const * chars)  noexcept {
+  if (cpp20_and_in_constexpr() || !has_simd_opts<UC>()) {
+    return parse_eight_digits_unrolled(read8_to_u64(chars)); // truncation okay
   }
-#ifdef FASTFLOAT_HAS_SIMD
   return parse_eight_digits_unrolled(simd_read8_to_u64(chars));
-#else
-  // never reaches here, removes warning
-  return 0;
-#endif
-}
-
-// todo, no simd optimization yet
-fastfloat_really_inline FASTFLOAT_CONSTEXPR20
-uint32_t parse_eight_digits_unrolled(const char32_t* chars)  noexcept {
-  return parse_eight_digits_unrolled(read8_to_u64(chars));
 }
 
 
@@ -163,8 +152,12 @@ bool parse_if_eight_digits_unrolled(const char* chars, uint64_t& i) noexcept {
 }
 
 // Call this if chars might not be 8 digits.
-// Using this (instead of is_made_of_eight_digits_fast() then parse_eight_digits_unrolled())
+// Using this (instead of is_made_of_eight_digits_fast() and parse_eight_digits_unrolled())
 // ensures we don't load SIMD registers twice.
+//
+// Benchmark:
+// https://quick-bench.com/q/Bbn0B4WmZsdgS3qDZWpggAY-jgs
+//
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
 bool parse_if_eight_digits_unrolled(const char16_t* chars, uint64_t& i) noexcept {
 #ifdef FASTFLOAT_SSE2
@@ -173,7 +166,6 @@ bool parse_if_eight_digits_unrolled(const char16_t* chars, uint64_t& i) noexcept
   }    
 FASTFLOAT_SIMD_DISABLE_WARNINGS
   const __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i*>(chars));
-  const __m128i packus_masks = load_packus_masks_c16(); // be optimistic, preload
 
   // (x - '0') <= 9
   // http://0x80.pl/articles/simd-parsing-int-sequences.html
@@ -181,7 +173,7 @@ FASTFLOAT_SIMD_DISABLE_WARNINGS
   const __m128i t1 = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-119));
 
   if (_mm_movemask_epi8(t1) == 0) {
-    uint64_t digits = simd_read8_to_u64(chars, packus_masks);
+    uint64_t digits = simd_read8_to_u64(chars);
     i = i * 100000000 + parse_eight_digits_unrolled(digits);
     return true;
   }
@@ -189,6 +181,8 @@ FASTFLOAT_SIMD_DISABLE_WARNINGS
 FASTFLOAT_SIMD_RESTORE_WARNINGS
 
 #else // No SIMD available
+
+  (void)chars; (void)i; // unused
   return false;
 #endif
 }
@@ -212,8 +206,10 @@ struct parsed_number_string_t {
   span<const UC> integer{};  // non-nullable
   span<const UC> fraction{}; // nullable
 };
+
 using byte_span = span<const char>;
 using parsed_number_string = parsed_number_string_t<char>;
+
 // Assuming that you use no more than 19 digits, this will
 // parse an ASCII string.
 template <typename UC>
@@ -265,6 +261,7 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
     while ((p != pend) && is_integer(*p)) {
       uint8_t digit = uint8_t(*p - UC('0'));
       ++p;
+      i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
     }
     exponent = before - p;
     answer.fraction = span<const UC>(before, size_t(p - before));
@@ -336,20 +333,20 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
   return answer;
 }
 
-template <typename CharT>
+template <typename UC>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
-void parse_truncated_number_string(parsed_number_string<CharT>& ps)
+void parse_truncated_number_string(parsed_number_string_t<UC>& ps)
 {
   // Let us start again, this time, avoiding overflows.
   // We don't need to check if is_integer, since we use the
   // pre-tokenized spans.
   uint64_t i = 0;
   int64_t exponent = 0;
-  const CharT* p = ps.integer.ptr;
-  const CharT* const int_end = p + ps.integer.len();
+  const UC* p = ps.integer.ptr;
+  const UC* const int_end = p + ps.integer.len();
   const uint64_t minimal_nineteen_digit_integer{1000000000000000000};
   while ((i < minimal_nineteen_digit_integer) && (p != int_end)) {
-    i = i * 10 + uint64_t(*p - CharT('0'));
+    i = i * 10 + uint64_t(*p - UC('0'));
     ++p;
   }
   if (i >= minimal_nineteen_digit_integer) { // We have a big integers
@@ -357,9 +354,9 @@ void parse_truncated_number_string(parsed_number_string<CharT>& ps)
   }
   else { // We have a value with a fractional component.
     p = ps.fraction.ptr;
-    const CharT* const frac_end = p + ps.fraction.len();
+    const UC* const frac_end = p + ps.fraction.len();
     while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
-      i = i * 10 + uint64_t(*p - CharT('0'));
+      i = i * 10 + uint64_t(*p - UC('0'));
       ++p;
     }
     exponent = ps.fraction.ptr - p + ps.exp_number;
diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h
index f63a090..175389f 100644
--- a/include/fast_float/float_common.h
+++ b/include/fast_float/float_common.h
@@ -117,6 +117,9 @@
 // rust style `try!()` macro, or `?` operator
 #define FASTFLOAT_TRY(x) { if (!(x)) return false; }
 
+#define FASTFLOAT_ENABLE_IF(test) typename std::enable_if<(test), int>::type = 0
+
+
 namespace fast_float {
 
 fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() {

From c811b027eaf481b97d5c139e8efb6915fbef44aa Mon Sep 17 00:00:00 2001
From: Maya Warrier <maya.warrierm@gmail.com>
Date: Tue, 2 May 2023 01:52:00 -0400
Subject: [PATCH 18/27] Remove testing macro

---
 include/fast_float/ascii_number.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
index a15c2ef..59318f2 100644
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@@ -9,8 +9,6 @@
 
 #include "float_common.h"
 
-#define FASTFLOAT_SSE2
-
 #ifdef FASTFLOAT_SSE2
 #include <emmintrin.h>
 #endif

From 4cb09b5f5939a6b7d12a73e692e5dba9edde4651 Mon Sep 17 00:00:00 2001
From: Maya Warrier <maya.warrierm@gmail.com>
Date: Tue, 2 May 2023 13:05:57 -0400
Subject: [PATCH 19/27] Automatically detect SSE2

---
 include/fast_float/ascii_number.h | 6 +++---
 include/fast_float/float_common.h | 8 +++++++-
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
index 59318f2..15fc1cf 100644
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@@ -73,7 +73,7 @@ FASTFLOAT_SIMD_DISABLE_WARNINGS
   const __m128i masks = _mm_loadu_si128(reinterpret_cast<const __m128i*>(kmasks));
 
   // pipeline 4 and 4 chars at the same time (since loadu_si64 has high latency)
-  // todo: with AVX512BW + AVX512VL, can use cvtepi16_epi8 instead
+  // todo: with AVX512BW + AVX512VL, can use cvtepi16_epi8 instead of masking + pack
   const char* const p = reinterpret_cast<const char*>(chars); 
   __m128i i1 = _mm_and_si128(_mm_loadu_si64(p), masks);
   __m128i i2 = _mm_and_si128(_mm_loadu_si64(p + 8), masks);
@@ -150,8 +150,8 @@ bool parse_if_eight_digits_unrolled(const char* chars, uint64_t& i) noexcept {
 }
 
 // Call this if chars might not be 8 digits.
-// Using this (instead of is_made_of_eight_digits_fast() and parse_eight_digits_unrolled())
-// ensures we don't load SIMD registers twice.
+// Using this style (instead of is_made_of_eight_digits_fast() then parse_eight_digits_unrolled())
+// ensures we don't load SIMD registers twice if we don't have to.
 //
 // Benchmark:
 // https://quick-bench.com/q/Bbn0B4WmZsdgS3qDZWpggAY-jgs
diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h
index 175389f..5a5942d 100644
--- a/include/fast_float/float_common.h
+++ b/include/fast_float/float_common.h
@@ -78,8 +78,14 @@
 #endif
 #endif
 
+#if defined(__SSE2__) || \
+  (defined(FASTFLOAT_VISUAL_STUDIO) && \
+    (defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP == 2)))
+#define FASTFLOAT_SSE2 1
+#endif
+
 #ifdef FASTFLOAT_SSE2
-#define FASTFLOAT_HAS_SIMD (1)
+#define FASTFLOAT_HAS_SIMD 1
 #endif
 
 #if defined(__GNUC__)

From 53b065f38dcd5fdfdcf92139c68344e235b12475 Mon Sep 17 00:00:00 2001
From: Maya Warrier <maya.warrierm@gmail.com>
Date: Sun, 7 May 2023 17:38:32 -0400
Subject: [PATCH 20/27] Avoid redundant load in SSE2 code

---
 include/fast_float/ascii_number.h | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
index 15fc1cf..f17aea6 100644
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@@ -67,23 +67,28 @@ uint64_t read8_to_u64(const UC *chars) {
 #ifdef FASTFLOAT_SSE2
 
 fastfloat_really_inline
-uint64_t simd_read8_to_u64(const char16_t* chars) {
+uint64_t simd_read8_to_u64(const __m128i data) {
 FASTFLOAT_SIMD_DISABLE_WARNINGS
-  static const char16_t kmasks[] = { 0xff, 0xff, 0xff, 0xff };
+  static const char16_t kmasks[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
   const __m128i masks = _mm_loadu_si128(reinterpret_cast<const __m128i*>(kmasks));
 
-  // pipeline 4 and 4 chars at the same time (since loadu_si64 has high latency)
   // todo: with AVX512BW + AVX512VL, can use cvtepi16_epi8 instead of masking + pack
-  const char* const p = reinterpret_cast<const char*>(chars); 
-  __m128i i1 = _mm_and_si128(_mm_loadu_si64(p), masks);
-  __m128i i2 = _mm_and_si128(_mm_loadu_si64(p + 8), masks);
-  __m128i packed = _mm_packus_epi16(i1, i2);
+  __m128i masked = _mm_and_si128(data, masks);
+  __m128i packed = _mm_packus_epi16(masked, masked);
 
   uint64_t val;
-  _mm_storeu_si64(&val, _mm_shuffle_epi32(packed, 0x8));
+  _mm_storeu_si64(&val, packed);
   return val;
 FASTFLOAT_SIMD_RESTORE_WARNINGS
 }
+
+fastfloat_really_inline
+uint64_t simd_read8_to_u64(const char16_t* chars) {
+FASTFLOAT_SIMD_DISABLE_WARNINGS
+  return simd_read8_to_u64(_mm_loadu_si128(reinterpret_cast<const __m128i*>(chars)));
+FASTFLOAT_SIMD_RESTORE_WARNINGS
+}
+
 #endif
 
 // dummy for compile
@@ -142,11 +147,11 @@ fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val
 
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
 bool parse_if_eight_digits_unrolled(const char* chars, uint64_t& i) noexcept {
-  const bool is_digits = is_made_of_eight_digits_fast(read8_to_u64(chars));
-  if (is_digits) {
+  if (is_made_of_eight_digits_fast(read8_to_u64(chars))) {
     i = i * 100000000 + parse_eight_digits_unrolled(read8_to_u64(chars));
+    return true;
   }
-  return is_digits;
+  return false;
 }
 
 // Call this if chars might not be 8 digits.
@@ -171,8 +176,7 @@ FASTFLOAT_SIMD_DISABLE_WARNINGS
   const __m128i t1 = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-119));
 
   if (_mm_movemask_epi8(t1) == 0) {
-    uint64_t digits = simd_read8_to_u64(chars);
-    i = i * 100000000 + parse_eight_digits_unrolled(digits);
+    i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data));
     return true;
   }
   else return false;

From 6ede03878977d0a3469d3fc239168333dcf8a3f2 Mon Sep 17 00:00:00 2001
From: Maya Warrier <maya.warrierm@gmail.com>
Date: Tue, 9 May 2023 22:19:23 -0400
Subject: [PATCH 21/27] Apply changes from benchmarked version - Move
 parse_truncated_number_string back inside parse_number_string

---
 include/fast_float/ascii_number.h | 68 +++++++++++++------------------
 include/fast_float/parse_number.h |  3 --
 2 files changed, 28 insertions(+), 43 deletions(-)

diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
index f17aea6..39f2a07 100644
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@@ -151,7 +151,7 @@ bool parse_if_eight_digits_unrolled(const char* chars, uint64_t& i) noexcept {
     i = i * 100000000 + parse_eight_digits_unrolled(read8_to_u64(chars));
     return true;
   }
-  return false;
+  else return false;
 }
 
 // Call this if chars might not be 8 digits.
@@ -199,7 +199,6 @@ template <typename UC>
 struct parsed_number_string_t {
   int64_t exponent{0};
   uint64_t mantissa{0};
-  int64_t exp_number{0};
   UC const * lastmatch{nullptr};
   bool negative{false};
   bool valid{false};
@@ -308,7 +307,6 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
   }
   answer.lastmatch = p;
   answer.valid = true;
-  answer.exp_number = exp_number;
 
   // If we frequently had to deal with long strings of digits,
   // we could extend our code by using a 128-bit integer instead
@@ -326,49 +324,39 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
       start++;
     }
 
-    // exponent/mantissa must be truncated later!
-    // this is unlikely, so don't inline truncation code with the rest of parse_number_string()
-    answer.too_many_digits = digit_count > 19;
+    if (digit_count > 19) {
+      answer.too_many_digits = true;
+      // Let us start again, this time, avoiding overflows.
+      // We don't need to check if is_integer, since we use the
+      // pre-tokenized spans from above.
+      i = 0;
+      p = answer.integer.ptr;
+      UC const* int_end = p + answer.integer.len();
+      const uint64_t minimal_nineteen_digit_integer{ 1000000000000000000 };
+      while ((i < minimal_nineteen_digit_integer) && (p != int_end)) {
+        i = i * 10 + uint64_t(*p - UC('0'));
+        ++p;
+      }
+      if (i >= minimal_nineteen_digit_integer) { // We have a big integers
+        exponent = end_of_integer_part - p + exp_number;
+      }
+      else { // We have a value with a fractional component.
+        p = answer.fraction.ptr;
+        UC const* frac_end = p + answer.fraction.len();
+        while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
+          i = i * 10 + uint64_t(*p - UC('0'));
+          ++p;
+        }
+        exponent = answer.fraction.ptr - p + exp_number;
+      }
+      // We have now corrected both exponent and i, to a truncated value
+    }
   }
   answer.exponent = exponent;
   answer.mantissa = i;
   return answer;
 }
 
-template <typename UC>
-fastfloat_really_inline FASTFLOAT_CONSTEXPR20
-void parse_truncated_number_string(parsed_number_string_t<UC>& ps)
-{
-  // Let us start again, this time, avoiding overflows.
-  // We don't need to check if is_integer, since we use the
-  // pre-tokenized spans.
-  uint64_t i = 0;
-  int64_t exponent = 0;
-  const UC* p = ps.integer.ptr;
-  const UC* const int_end = p + ps.integer.len();
-  const uint64_t minimal_nineteen_digit_integer{1000000000000000000};
-  while ((i < minimal_nineteen_digit_integer) && (p != int_end)) {
-    i = i * 10 + uint64_t(*p - UC('0'));
-    ++p;
-  }
-  if (i >= minimal_nineteen_digit_integer) { // We have a big integers
-    exponent = int_end - p + ps.exp_number;
-  }
-  else { // We have a value with a fractional component.
-    p = ps.fraction.ptr;
-    const UC* const frac_end = p + ps.fraction.len();
-    while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
-      i = i * 10 + uint64_t(*p - UC('0'));
-      ++p;
-    }
-    exponent = ps.fraction.ptr - p + ps.exp_number;
-  }
-  // We have now corrected both exponent and i, to a truncated value
-
-  ps.exponent = exponent;
-  ps.mantissa = i;
-}
-
 } // namespace fast_float
 
 #endif
diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h
index cea24ea..e077b9d 100644
--- a/include/fast_float/parse_number.h
+++ b/include/fast_float/parse_number.h
@@ -166,9 +166,6 @@ from_chars_result_t<UC> from_chars_advanced(UC const * first, UC const * last,
   if (!pns.valid) {
     return detail::parse_infnan(first, last, value);
   }
-  if (pns.too_many_digits) {
-    parse_truncated_number_string(pns);
-  }
 
   answer.ec = std::errc(); // be optimistic
   answer.ptr = pns.lastmatch;

From 38613a39f9eb3763a68fe56f8acf22474ed9c5ed Mon Sep 17 00:00:00 2001
From: Maya Warrier <maya.warrierm@gmail.com>
Date: Wed, 17 May 2023 01:34:33 -0400
Subject: [PATCH 22/27] Fix perf decrease when UC = char

---
 include/fast_float/ascii_number.h | 80 +++++++++++++++----------------
 include/fast_float/float_common.h |  2 +-
 2 files changed, 39 insertions(+), 43 deletions(-)

diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
index 39f2a07..cc0af11 100644
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@@ -17,7 +17,7 @@
 namespace fast_float {
 
 template <typename UC>
-fastfloat_really_inline constexpr bool has_simd_opts() {
+fastfloat_really_inline constexpr bool has_simd_opt() {
 #ifdef FASTFLOAT_HAS_SIMD
   return std::is_same<UC, char16_t>::value;
 #else
@@ -68,18 +68,7 @@ uint64_t read8_to_u64(const UC *chars) {
 
 fastfloat_really_inline
 uint64_t simd_read8_to_u64(const __m128i data) {
-FASTFLOAT_SIMD_DISABLE_WARNINGS
-  static const char16_t kmasks[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
-  const __m128i masks = _mm_loadu_si128(reinterpret_cast<const __m128i*>(kmasks));
-
-  // todo: with AVX512BW + AVX512VL, can use cvtepi16_epi8 instead of masking + pack
-  __m128i masked = _mm_and_si128(data, masks);
-  __m128i packed = _mm_packus_epi16(masked, masked);
-
-  uint64_t val;
-  _mm_storeu_si64(&val, packed);
-  return val;
-FASTFLOAT_SIMD_RESTORE_WARNINGS
+  return _mm_cvtsi128_si64x(_mm_packus_epi16(data, data));
 }
 
 fastfloat_really_inline
@@ -92,7 +81,7 @@ FASTFLOAT_SIMD_RESTORE_WARNINGS
 #endif
 
 // dummy for compile
-template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opts<UC>())>
+template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opt<UC>())>
 uint64_t simd_read8_to_u64(UC const*) {
   return 0;
 }
@@ -132,7 +121,7 @@ uint32_t parse_eight_digits_unrolled(uint64_t val) {
 template <typename UC>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
 uint32_t parse_eight_digits_unrolled(UC const * chars)  noexcept {
-  if (cpp20_and_in_constexpr() || !has_simd_opts<UC>()) {
+  if (cpp20_and_in_constexpr() || !has_simd_opt<UC>()) {
     return parse_eight_digits_unrolled(read8_to_u64(chars)); // truncation okay
   }
   return parse_eight_digits_unrolled(simd_read8_to_u64(chars));
@@ -145,28 +134,18 @@ fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val
      0x8080808080808080));
 }
 
-fastfloat_really_inline FASTFLOAT_CONSTEXPR20
-bool parse_if_eight_digits_unrolled(const char* chars, uint64_t& i) noexcept {
-  if (is_made_of_eight_digits_fast(read8_to_u64(chars))) {
-    i = i * 100000000 + parse_eight_digits_unrolled(read8_to_u64(chars));
-    return true;
-  }
-  else return false;
-}
+
+#ifdef FASTFLOAT_HAS_SIMD
 
 // Call this if chars might not be 8 digits.
 // Using this style (instead of is_made_of_eight_digits_fast() then parse_eight_digits_unrolled())
-// ensures we don't load SIMD registers twice if we don't have to.
-//
-// Benchmark:
-// https://quick-bench.com/q/Bbn0B4WmZsdgS3qDZWpggAY-jgs
-//
+// ensures we don't load SIMD registers twice.
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
-bool parse_if_eight_digits_unrolled(const char16_t* chars, uint64_t& i) noexcept {
-#ifdef FASTFLOAT_SSE2
+bool simd_parse_if_eight_digits_unrolled(const char16_t* chars, uint64_t& i) noexcept {
   if (cpp20_and_in_constexpr()) {
     return false;
-  }    
+  }   
+#ifdef FASTFLOAT_SSE2
 FASTFLOAT_SIMD_DISABLE_WARNINGS
   const __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i*>(chars));
 
@@ -181,18 +160,36 @@ FASTFLOAT_SIMD_DISABLE_WARNINGS
   }
   else return false;
 FASTFLOAT_SIMD_RESTORE_WARNINGS
-
-#else // No SIMD available
-
-  (void)chars; (void)i; // unused
-  return false;
 #endif
 }
 
-// todo, no simd optimization yet
+#endif
+
+// dummy for compile
+template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opt<UC>())>
+uint64_t simd_parse_if_eight_digits_unrolled(UC const*, uint64_t&) {
+  return 0;
+}
+
+
+template <typename UC, FASTFLOAT_ENABLE_IF(!std::is_same<UC, char>::value)>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
-bool parse_if_eight_digits_unrolled(const char32_t*, uint64_t&) noexcept {
-  return false;
+void loop_parse_if_eight_digits(const UC*& p, const UC* const pend, uint64_t& i) {
+  if (!has_simd_opt<UC>()) {
+    return;
+  }
+  while ((std::distance(p, pend) >= 8) && simd_parse_if_eight_digits_unrolled(p, i)) { // in rare cases, this will overflow, but that's ok
+    p += 8;
+  }
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
+void loop_parse_if_eight_digits(const char*& p, const char* const pend, uint64_t& i) {
+  // optimizes better than parse_if_eight_digits_unrolled() for UC = char.
+  while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(read8_to_u64(p))) {
+    i = i * 100000000 + parse_eight_digits_unrolled(read8_to_u64(p)); // in rare cases, this will overflow, but that's ok
+    p += 8;
+  }
 }
 
 template <typename UC>
@@ -256,9 +253,8 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
     UC const * before = p;
     // can occur at most twice without overflowing, but let it occur more, since
     // for integers with many digits, digit parsing is the primary bottleneck.
-    while ((std::distance(p, pend) >= 8) && parse_if_eight_digits_unrolled(p, i)) {  // in rare cases, this will overflow, but that's ok
-      p += 8;
-    }
+    loop_parse_if_eight_digits(p, pend, i);
+
     while ((p != pend) && is_integer(*p)) {
       uint8_t digit = uint8_t(*p - UC('0'));
       ++p;
diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h
index 201e72f..80b022e 100644
--- a/include/fast_float/float_common.h
+++ b/include/fast_float/float_common.h
@@ -157,7 +157,7 @@ using parse_options = parse_options_t<char>;
 // rust style `try!()` macro, or `?` operator
 #define FASTFLOAT_TRY(x) { if (!(x)) return false; }
 
-#define FASTFLOAT_ENABLE_IF(test) typename std::enable_if<(test), int>::type = 0
+#define FASTFLOAT_ENABLE_IF(...) typename std::enable_if<(__VA_ARGS__), int>::type = 0
 
 
 namespace fast_float {

From 064d2b832d705fdb7b81c8066a512bb65ed4362a Mon Sep 17 00:00:00 2001
From: Maya Warrier <maya.warrierm@gmail.com>
Date: Wed, 17 May 2023 02:03:07 -0400
Subject: [PATCH 23/27] Fix Werrors

---
 include/fast_float/ascii_number.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
index cc0af11..e39fafc 100644
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@@ -68,7 +68,9 @@ uint64_t read8_to_u64(const UC *chars) {
 
 fastfloat_really_inline
 uint64_t simd_read8_to_u64(const __m128i data) {
-  return _mm_cvtsi128_si64x(_mm_packus_epi16(data, data));
+FASTFLOAT_SIMD_DISABLE_WARNINGS
+  return uint64_t(_mm_cvtsi128_si64x(_mm_packus_epi16(data, data)));
+FASTFLOAT_SIMD_RESTORE_WARNINGS
 }
 
 fastfloat_really_inline

From 1c9a3088bf9e3af9b838437c9efb00b5d877eec9 Mon Sep 17 00:00:00 2001
From: Maya Warrier <maya.warrierm@gmail.com>
Date: Wed, 17 May 2023 02:35:16 -0400
Subject: [PATCH 24/27] Fix for VS 32-bit

---
 include/fast_float/ascii_number.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
index e39fafc..ba6b0ef 100644
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@@ -69,7 +69,15 @@ uint64_t read8_to_u64(const UC *chars) {
 fastfloat_really_inline
 uint64_t simd_read8_to_u64(const __m128i data) {
 FASTFLOAT_SIMD_DISABLE_WARNINGS
-  return uint64_t(_mm_cvtsi128_si64x(_mm_packus_epi16(data, data)));
+  const __m128i packed = _mm_packus_epi16(data, data);
+#ifdef FASTFLOAT_64BIT
+  return uint64_t(_mm_cvtsi128_si64x(packed));
+#else
+  uint64_t value;
+  // Visual Studio + older versions of GCC don't support _mm_storeu_si64
+  _mm_storel_epi64(reinterpret_cast<__m128i*>(&value), packed);
+  return value;
+#endif
 FASTFLOAT_SIMD_RESTORE_WARNINGS
 }
 

From cb397ef446c3b59c73f24147386e0851ca2b93c1 Mon Sep 17 00:00:00 2001
From: Maya Warrier <maya.warrierm@gmail.com>
Date: Wed, 17 May 2023 05:40:29 -0400
Subject: [PATCH 25/27] Fix for clang (missing _mm_cvtsi128_si64x)

---
 include/fast_float/ascii_number.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
index ba6b0ef..5327a79 100644
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@@ -71,7 +71,7 @@ uint64_t simd_read8_to_u64(const __m128i data) {
 FASTFLOAT_SIMD_DISABLE_WARNINGS
   const __m128i packed = _mm_packus_epi16(data, data);
 #ifdef FASTFLOAT_64BIT
-  return uint64_t(_mm_cvtsi128_si64x(packed));
+  return uint64_t(_mm_cvtsi128_si64(packed));
 #else
   uint64_t value;
   // Visual Studio + older versions of GCC don't support _mm_storeu_si64

From a5632d5b5748e4673b00e1b6d2a89f049031d3b1 Mon Sep 17 00:00:00 2001
From: Maya Warrier <maya.warrierm@gmail.com>
Date: Wed, 17 May 2023 22:54:46 -0400
Subject: [PATCH 26/27] Fix digit comparison, cleanup

---
 include/fast_float/ascii_number.h | 4 ++--
 include/fast_float/float_common.h | 8 --------
 2 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
index 5327a79..481b91d 100644
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@@ -161,8 +161,8 @@ FASTFLOAT_SIMD_DISABLE_WARNINGS
 
   // (x - '0') <= 9
   // http://0x80.pl/articles/simd-parsing-int-sequences.html
-  const __m128i t0 = _mm_sub_epi16(data, _mm_set1_epi16(80));
-  const __m128i t1 = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-119));
+  const __m128i t0 = _mm_add_epi16(data, _mm_set1_epi16(32720));
+  const __m128i t1 = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-32759));
 
   if (_mm_movemask_epi8(t1) == 0) {
     i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data));
diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h
index 80b022e..0418149 100644
--- a/include/fast_float/float_common.h
+++ b/include/fast_float/float_common.h
@@ -170,14 +170,6 @@ fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() {
 #endif
 }
 
-fastfloat_really_inline constexpr bool has_simd() {
-#if FASTFLOAT_HAS_SIMD
-  return true;
-#else
-  return false;
-#endif
-}
-
 // Compares two ASCII strings in a case insensitive manner.
 template <typename UC>
 inline FASTFLOAT_CONSTEXPR14 bool

From b7119471b161ff4ddcae383d55e3c4f060e3eea6 Mon Sep 17 00:00:00 2001
From: Maya Warrier <34803055+mayawarrier@users.noreply.github.com>
Date: Mon, 22 May 2023 13:22:46 -0400
Subject: [PATCH 27/27] Update CONTRIBUTORS

---
 CONTRIBUTORS | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CONTRIBUTORS b/CONTRIBUTORS
index 03e02b4..e339869 100644
--- a/CONTRIBUTORS
+++ b/CONTRIBUTORS
@@ -5,4 +5,5 @@ Neal Richardson
 Tim Paine
 Fabio Pellacini
 Lénárd Szolnoki
-Jan Pharago
\ No newline at end of file
+Jan Pharago
+Maya Warrier
\ No newline at end of file