From 4f0615b4b423683db0456ff9024e3f09b34b8436 Mon Sep 17 00:00:00 2001
From: IRainman <a.rainman@gmail.com>
Date: Mon, 7 Apr 2025 23:21:29 +0300
Subject: [PATCH] Reduce register pressure and cleanup interface for standard.

---
 include/fast_float/ascii_number.h     | 39 +++++++++++++--------------
 include/fast_float/digit_comparison.h | 32 +++++++++++-----------
 include/fast_float/float_common.h     | 38 ++++++++++++++------------
 3 files changed, 56 insertions(+), 53 deletions(-)
diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
index 1edfc42..9236eca 100644
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@@ -260,7 +260,7 @@ enum class parse_error {
 };
 
 template <typename UC> struct parsed_number_string_t {
-  int64_t exponent{0};
+  int16_t exponent{0};
   uint64_t mantissa{0};
   UC const *lastmatch{nullptr};
 #ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
@@ -293,11 +293,11 @@ template <bool basic_json_fmt, typename UC>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string_t<UC>
 parse_number_string(UC const *p, UC const *pend,
                     parse_options_t<UC> const &options) noexcept {
-  // V2008 Cyclomatic complexity:    59.
+  // Cyclomatic complexity https://en.wikipedia.org/wiki/Cyclomatic_complexity
   // Consider refactoring the 'parse_number_string' function.
   // FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN fix this.
   parsed_number_string_t<UC> answer;
-  FASTFLOAT_ASSUME(p < pend); // assume p < pend, so dereference without checks;
+  FASTFLOAT_ASSUME(p < pend); // so dereference without checks;
 #ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
   answer.negative = (*p == UC('-'));
   // C++17 20.19.3.(7.1) explicitly forbids '+' sign here
@@ -338,7 +338,7 @@ parse_number_string(UC const *p, UC const *pend,
     ++p;
   }
   UC const *const end_of_integer_part = p;
-  int64_t digit_count = int64_t(end_of_integer_part - start_digits);
+  int16_t digit_count = static_cast<int16_t>(end_of_integer_part - start_digits);
   answer.integer = span<UC const>(start_digits, size_t(digit_count));
 #ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
   FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
@@ -353,7 +353,7 @@ parse_number_string(UC const *p, UC const *pend,
   }
 #endif
 
-  int64_t exponent = 0;
+  int16_t exponent = 0;
   bool const has_decimal_point = (p != pend) && (*p == options.decimal_point);
   if (has_decimal_point) {
     ++p;
@@ -363,11 +363,11 @@ parse_number_string(UC const *p, UC const *pend,
     loop_parse_if_eight_digits(p, pend, i);
 
     while ((p != pend) && is_integer(*p)) {
-      uint8_t digit = uint8_t(*p - UC('0'));
-      ++p;
+      uint8_t const digit = uint8_t(*p - UC('0'));
       i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
+      ++p;
     }
-    exponent = before - p;
+    exponent = static_cast<int16_t>(before - p);
     answer.fraction = span<UC const>(before, size_t(p - before));
     digit_count -= exponent;
   }
@@ -383,19 +383,20 @@ parse_number_string(UC const *p, UC const *pend,
   else if (digit_count == 0) { // we must have encountered at least one integer!
     return report_parse_error<UC>(p, parse_error::no_digits_in_mantissa);
   }
-  int64_t exp_number = 0; // explicit exponential part
+  int16_t exp_number = 0; // explicit exponential part
   if ((uint64_t(options.format & chars_format::scientific) && (p != pend) &&
        ((UC('e') == *p) || (UC('E') == *p)))
 #ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
-      || (uint64_t(options.format & detail::basic_fortran_fmt) &&
-          ((UC('+') == *p) || (UC('-') == *p) || (UC('d') == *p) ||
-           (UC('D') == *p)))
+      || (uint64_t(options.format & chars_format::fortran) &&
+          ((UC('+') == *p) || (UC('-') == *p)
+        || (UC('d') == *p) || (UC('D') == *p)))
 #endif
   ) {
     UC const *location_of_e = p;
     if (((UC('e') == *p) || (UC('E') == *p))
 #ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
-        || (UC('d') == *p) || (UC('D') == *p)
+        || (uint64_t(options.format & chars_format::fortran) &&
+           ((UC('d') == *p) || (UC('D') == *p)))
 #endif
     ) {
       ++p;
@@ -421,10 +422,8 @@ parse_number_string(UC const *p, UC const *pend,
       p = location_of_e;
     } else {
       while ((p != pend) && is_integer(*p)) {
-        uint8_t digit = uint8_t(*p - UC('0'));
-        if (exp_number < 0x10000000) {
-          exp_number = 10 * exp_number + digit;
-        }
+        uint8_t const digit = uint8_t(*p - UC('0'));
+        exp_number = 10 * exp_number + digit;
         ++p;
       }
       if (neg_exp) {
@@ -475,7 +474,7 @@ parse_number_string(UC const *p, UC const *pend,
         ++p;
       }
       if (i >= minimal_nineteen_digit_integer) { // We have a big integers
-        exponent = end_of_integer_part - p + exp_number;
+        exponent = static_cast<uint16_t>(end_of_integer_part - p) + exp_number;
       } else { // We have a value with a fractional component.
         p = answer.fraction.ptr;
         UC const *frac_end = p + answer.fraction.len();
@@ -483,7 +482,7 @@ parse_number_string(UC const *p, UC const *pend,
           i = i * 10 + uint64_t(*p - UC('0'));
           ++p;
         }
-        exponent = answer.fraction.ptr - p + exp_number;
+        exponent = static_cast<uint16_t>(answer.fraction.ptr - p) + exp_number;
       }
       // We have now corrected both exponent and i, to a truncated value
     }
@@ -548,7 +547,7 @@ parse_int_string(UC const *p, UC const *pend, T &value,
     p++;
   }
 
-  uint8_t const digit_count = size_t(p - start_digits);
+  uint8_t const digit_count = static_cast<uint8_t>(p - start_digits);
 
   if (digit_count == 0) {
     if (has_leading_zeros) {
diff --git a/include/fast_float/digit_comparison.h b/include/fast_float/digit_comparison.h
index a2869f8..ffac7fd 100644
--- a/include/fast_float/digit_comparison.h
+++ b/include/fast_float/digit_comparison.h
@@ -39,10 +39,10 @@ constexpr static uint64_t powers_of_ten_uint64[] = {1UL,
 // effect on performance: in order to have a faster algorithm, we'd need
 // to slow down performance for faster algorithms, and this is still fast.
 template <typename UC>
-fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int32_t
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int16_t
 scientific_exponent(const parsed_number_string_t<UC> &num) noexcept {
   uint64_t mantissa = num.mantissa;
-  int32_t exponent = int32_t(num.exponent);
+  int16_t exponent = num.exponent;
   while (mantissa >= 10000) {
     mantissa /= 10000;
     exponent += 4;
@@ -223,8 +223,8 @@ is_truncated(span<UC const> s) noexcept {
 
 template <typename UC>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void
-parse_eight_digits(UC const *&p, limb &value, unsigned int &counter,
-                   unsigned int &count) noexcept {
+parse_eight_digits(UC const *&p, limb &value, uint16_t &counter,
+                   uint16_t &count) noexcept {
   value = value * 100000000 + parse_eight_digits_unrolled(p);
   p += 8;
   counter += 8;
@@ -233,8 +233,8 @@ parse_eight_digits(UC const *&p, limb &value, unsigned int &counter,
 
 template <typename UC>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void
-parse_one_digit(UC const *&p, limb &value, unsigned int &counter,
-                unsigned int &count) noexcept {
+parse_one_digit(UC const *&p, limb &value, uint16_t &counter,
+                uint16_t &count) noexcept {
   value = value * 10 + limb(*p - UC('0'));
   p++;
   counter++;
@@ -248,7 +248,7 @@ add_native(bigint &big, limb power, limb value) noexcept {
 }
 
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void
-round_up_bigint(bigint &big, unsigned int &count) noexcept {
+round_up_bigint(bigint &big, uint16_t &count) noexcept {
   // need to round-up the digits, but need to avoid rounding
   // ....9999 to ...10000, which could cause a false halfway point.
   add_native(big, 10, 1);
@@ -259,17 +259,17 @@ round_up_bigint(bigint &big, unsigned int &count) noexcept {
 template <typename UC>
 inline FASTFLOAT_CONSTEXPR20 void
 parse_mantissa(bigint &result, const parsed_number_string_t<UC> &num,
-               unsigned int max_digits, unsigned int &digits) noexcept {
+               uint16_t const max_digits, uint16_t &digits) noexcept {
   // try to minimize the number of big integer and scalar multiplication.
   // therefore, try to parse 8 digits at a time, and multiply by the largest
   // scalar value (9 or 19 digits) for each step.
-  unsigned int counter = 0;
+  uint16_t counter = 0;
   digits = 0;
   limb value = 0;
 #ifdef FASTFLOAT_64BIT_LIMB
-  unsigned int step = 19;
+  uint16_t const step = 19;
 #else
-  unsigned int step = 9;
+  uint16_t const step = 9;
 #endif
 
   // process all integer digits.
@@ -370,7 +370,7 @@ positive_digit_comp(bigint &bigmant, int32_t exponent) noexcept {
 // are of the same magnitude.
 template <typename T>
 inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
-negative_digit_comp(bigint &bigmant, const adjusted_mantissa &am,
+negative_digit_comp(bigint &bigmant, const adjusted_mantissa am,
                     const int32_t exponent) noexcept {
   bigint &real_digits = bigmant;
   const int32_t &real_exp = exponent;
@@ -443,13 +443,13 @@ inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa digit_comp(
   // remove the invalid exponent bias
   am.power2 -= invalid_am_bias;
 
-  int32_t sci_exp = scientific_exponent(num);
-  unsigned int max_digits = binary_format<T>::max_digits();
-  unsigned int digits = 0;
+  int16_t sci_exp = scientific_exponent(num);
+  uint16_t const max_digits = static_cast<uint16_t>(binary_format<T>::max_digits());
+  uint16_t digits = 0;
   bigint bigmant;
   parse_mantissa(bigmant, num, max_digits, digits);
   // can't underflow, since digits is at most max_digits.
-  int32_t exponent = sci_exp + 1 - int32_t(digits);
+  int16_t exponent = sci_exp + 1 - digits;
   if (exponent >= 0) {
     return positive_digit_comp<T>(bigmant, exponent);
   } else {
diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h
index 3776118..adb56a3 100644
--- a/include/fast_float/float_common.h
+++ b/include/fast_float/float_common.h
@@ -69,7 +69,7 @@ using from_chars_result = from_chars_result_t<char>;
 template <typename UC> struct parse_options_t {
   FASTFLOAT_CONSTEXPR20 explicit parse_options_t(
       chars_format fmt = chars_format::general, UC dot = UC('.'),
-      const int b = 10) noexcept
+      int const b = 10) noexcept
       : format(fmt), decimal_point(dot), base(static_cast<uint8_t>(b)) {}
 
   /** Which number formats are accepted */
@@ -427,12 +427,10 @@ struct adjusted_mantissa {
   int32_t power2; // a negative value indicates an invalid result
   adjusted_mantissa() noexcept = default;
 
-  fastfloat_really_inline
   constexpr bool operator==(adjusted_mantissa const &o) const noexcept {
     return mantissa == o.mantissa && power2 == o.power2;
   }
 
-  fastfloat_really_inline
   constexpr bool operator!=(adjusted_mantissa const &o) const noexcept {
     return mantissa != o.mantissa || power2 != o.power2;
   }
@@ -449,10 +447,10 @@ template <typename T, typename U = void> struct binary_format_lookup_tables;
 template <typename T> struct binary_format : binary_format_lookup_tables<T> {
   using equiv_uint = equiv_uint_t<T>;
 
-  static constexpr unsigned int mantissa_explicit_bits();
+  static constexpr int mantissa_explicit_bits();
   static constexpr int minimum_exponent();
   static constexpr int infinite_power();
-  static constexpr unsigned int sign_index();
+  static constexpr int sign_index();
   static constexpr int
   min_exponent_fast_path(); // used when fegetround() == FE_TONEAREST
   static constexpr int max_exponent_fast_path();
@@ -464,7 +462,7 @@ template <typename T> struct binary_format : binary_format_lookup_tables<T> {
   static constexpr int largest_power_of_ten();
   static constexpr int smallest_power_of_ten();
   static constexpr T exact_power_of_ten(int64_t power);
-  static constexpr unsigned int max_digits();
+  static constexpr size_t max_digits();
   static constexpr equiv_uint exponent_mask();
   static constexpr equiv_uint mantissa_mask();
   static constexpr equiv_uint hidden_bit_mask();
@@ -572,12 +570,12 @@ inline constexpr int binary_format<float>::min_exponent_fast_path() {
 }
 
 template <>
-inline constexpr unsigned int binary_format<double>::mantissa_explicit_bits() {
+inline constexpr int binary_format<double>::mantissa_explicit_bits() {
   return 52;
 }
 
 template <>
-inline constexpr unsigned int binary_format<float>::mantissa_explicit_bits() {
+inline constexpr int binary_format<float>::mantissa_explicit_bits() {
   return 23;
 }
 
@@ -619,11 +617,11 @@ template <> inline constexpr int binary_format<float>::infinite_power() {
 
 #ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
 
-template <> inline constexpr unsigned int binary_format<double>::sign_index() {
+template <> inline constexpr int binary_format<double>::sign_index() {
   return 63;
 }
 
-template <> inline constexpr unsigned int binary_format<float>::sign_index() {
+template <> inline constexpr int binary_format<float>::sign_index() {
   return 31;
 }
 
@@ -708,7 +706,7 @@ inline constexpr int binary_format<std::float16_t>::max_exponent_fast_path() {
 }
 
 template <>
-inline constexpr unsigned int binary_format<std::float16_t>::mantissa_explicit_bits() {
+inline constexpr int binary_format<std::float16_t>::mantissa_explicit_bits() {
   return 10;
 }
 
@@ -835,7 +833,7 @@ binary_format<std::bfloat16_t>::hidden_bit_mask() {
 }
 
 template <>
-inline constexpr unsigned int binary_format<std::bfloat16_t>::mantissa_explicit_bits() {
+inline constexpr int binary_format<std::bfloat16_t>::mantissa_explicit_bits() {
   return 7;
 }
 
@@ -910,7 +908,7 @@ template <>
 inline constexpr uint64_t
 binary_format<double>::max_mantissa_fast_path(int64_t power) {
   // caller is responsible to ensure that
-  // power >= 0 && power <= 22
+  FASTFLOAT_ASSUME(power >= 0 && power <= 22);
   //
   // Work around clang bug https://godbolt.org/z/zedh7rrhc
   return (void)max_mantissa[0], max_mantissa[power];
@@ -920,7 +918,7 @@ template <>
 inline constexpr uint64_t
 binary_format<float>::max_mantissa_fast_path(int64_t power) {
   // caller is responsible to ensure that
-  // power >= 0 && power <= 10
+  FASTFLOAT_ASSUME(power >= 0 && power <= 10);
   //
   // Work around clang bug https://godbolt.org/z/zedh7rrhc
   return (void)max_mantissa[0], max_mantissa[power];
@@ -929,12 +927,18 @@ binary_format<float>::max_mantissa_fast_path(int64_t power) {
 template <>
 inline constexpr double
 binary_format<double>::exact_power_of_ten(int64_t power) {
+  // caller is responsible to ensure that
+  FASTFLOAT_ASSUME(power >= 0 && power <= 22);
+  //
   // Work around clang bug https://godbolt.org/z/zedh7rrhc
   return (void)powers_of_ten[0], powers_of_ten[power];
 }
 
 template <>
 inline constexpr float binary_format<float>::exact_power_of_ten(int64_t power) {
+  // caller is responsible to ensure that
+  FASTFLOAT_ASSUME(power >= 0 && power <= 10);
+  //
   // Work around clang bug https://godbolt.org/z/zedh7rrhc
   return (void)powers_of_ten[0], powers_of_ten[power];
 }
@@ -956,11 +960,11 @@ template <> inline constexpr int binary_format<float>::smallest_power_of_ten() {
   return -64;
 }
 
-template <> inline constexpr unsigned int binary_format<double>::max_digits() {
+template <> inline constexpr size_t binary_format<double>::max_digits() {
   return 769;
 }
 
-template <> inline constexpr unsigned int binary_format<float>::max_digits() {
+template <> inline constexpr size_t binary_format<float>::max_digits() {
   return 114;
 }
 
@@ -1005,7 +1009,7 @@ fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void to_float(
 #ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
     bool const negative,
 #endif
-    adjusted_mantissa const &am, T &value) noexcept {
+    adjusted_mantissa const am, T &value) noexcept {
   using equiv_uint = equiv_uint_t<T>;
   equiv_uint word = equiv_uint(am.mantissa);
   word = equiv_uint(word | equiv_uint(am.power2)