From d07362c459512cda1e53c82f6b1553d98ec692c7 Mon Sep 17 00:00:00 2001
From: IRainman <a.rainman@gmail.com>
Date: Fri, 26 Dec 2025 00:13:41 +0300
Subject: [PATCH] Final review for a more simple merge. Feature
 FASTFLOAT_TABLE_HACK_CHAR_DIGIT_LUT_DISABLED is removed because hex and bin
 parsing isn't working properly. Properly use FASTFLOAT_SIMD_DISABLE_WARNINGS
 and FASTFLOAT_SIMD_RESTORE_WARNINGS only for instructions that allow
 unaligned loads.

---
 include/fast_float/ascii_number.h      | 54 +++++++++++++-------------
 include/fast_float/bigint.h            |  8 ++--
 include/fast_float/decimal_to_binary.h |  4 +-
 include/fast_float/digit_comparison.h  | 11 +++---
 include/fast_float/fast_float.h        |  2 +-
 include/fast_float/fast_table.h        |  2 -
 include/fast_float/float_common.h      | 27 +++++++++----
 include/fast_float/parse_number.h      | 16 ++++----
 8 files changed, 67 insertions(+), 57 deletions(-)
diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
index a2a4212..889968a 100644
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@@ -84,10 +84,10 @@ fastfloat_really_inline uint64_t simd_read8_to_u64(__m128i const &data) {
 }
 
 fastfloat_really_inline uint64_t simd_read8_to_u64(char16_t const *chars) {
-  return simd_read8_to_u64(_mm_loadu_si128(reinterpret_cast<__m128i const *>(
-      chars))); // TODO: V1032 https://pvs-studio.com/en/docs/warnings/v1032/
-                // The pointer 'chars' is cast to a more strictly aligned
-                // pointer type.
+  FASTFLOAT_SIMD_DISABLE_WARNINGS
+  return simd_read8_to_u64(
+      _mm_loadu_si128(reinterpret_cast<__m128i const *>(chars)));
+  FASTFLOAT_SIMD_RESTORE_WARNINGS
 }
 
 #elif defined(FASTFLOAT_NEON)
@@ -98,8 +98,10 @@ fastfloat_really_inline uint64_t simd_read8_to_u64(uint16x8_t const &data) {
 }
 
 fastfloat_really_inline uint64_t simd_read8_to_u64(char16_t const *chars) {
+  FASTFLOAT_SIMD_DISABLE_WARNINGS
   return simd_read8_to_u64(
       vld1q_u16(reinterpret_cast<uint16_t const *>(chars)));
+  FASTFLOAT_SIMD_RESTORE_WARNINGS
 }
 
 #endif
@@ -118,9 +120,9 @@ uint64_t simd_read8_to_u64(UC const *) {
 // credit  @aqrit
 fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint32_t
 parse_eight_digits_unrolled(uint64_t val) noexcept {
-  constexpr uint64_t mask = 0x000000FF000000FF;
-  constexpr uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
-  constexpr uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
+  uint64_t const mask = 0x000000FF000000FF;
+  uint64_t const mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
+  uint64_t const mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
   val -= 0x3030303030303030;
   val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
   val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
@@ -156,11 +158,11 @@ simd_parse_if_eight_digits_unrolled(char16_t const *chars,
     return false;
   }
 #ifdef FASTFLOAT_SSE2
+  FASTFLOAT_SIMD_DISABLE_WARNINGS
   // Load 8 UTF-16 characters (16 bytes)
-  __m128i const data = _mm_loadu_si128(reinterpret_cast<__m128i const *>(
-      chars)); // TODO: V1032 https://pvs-studio.com/en/docs/warnings/v1032/ The
-               // pointer 'chars' is cast to a more strictly aligned pointer
-               // type.
+  __m128i const data =
+      _mm_loadu_si128(reinterpret_cast<__m128i const *>(chars));
+  FASTFLOAT_SIMD_RESTORE_WARNINGS
 
   // Branchless "are all digits?" trick from Lemire:
   // (x - '0') <= 9  <=> (x + 32720) <= 32729
@@ -175,7 +177,9 @@ simd_parse_if_eight_digits_unrolled(char16_t const *chars,
     return true;
   }
 #elif defined(FASTFLOAT_NEON)
+  FASTFLOAT_SIMD_DISABLE_WARNINGS
   uint16x8_t const data = vld1q_u16(reinterpret_cast<uint16_t const *>(chars));
+  FASTFLOAT_SIMD_RESTORE_WARNINGS
 
   // (x - '0') <= 9
   // http://0x80.pl/articles/simd-parsing-int-sequences.html
@@ -286,7 +290,7 @@ report_parse_error(UC const *p, parse_error error) noexcept {
 template <bool basic_json_fmt, typename UC>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string_t<UC>
 parse_number_string(UC const *p, UC const *pend,
-                    parse_options_t<UC> const &options) noexcept {
+                    parse_options_t<UC> const options) noexcept {
   // Cyclomatic complexity https://en.wikipedia.org/wiki/Cyclomatic_complexity
   // Consider refactoring the 'parse_number_string' function.
   // FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN fix this.
@@ -295,8 +299,8 @@ parse_number_string(UC const *p, UC const *pend,
   FASTFLOAT_ASSUME(p < pend);
 #ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
   answer.negative = (*p == UC('-'));
+  // C++17 20.19.3.(7.1) explicitly forbids '+' sign here
   if (answer.negative ||
-      // C++17 20.19.3.(7.1) explicitly forbids '+' sign here
       ((chars_format_t(options.format & chars_format::allow_leading_plus)) &&
        (!basic_json_fmt && *p == UC('+')))) {
     ++p;
@@ -400,8 +404,11 @@ parse_number_string(UC const *p, UC const *pend,
 #ifdef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
     ++p;
 #else
-    if ((UC('e') == *p) || (UC('E') == *p) || (UC('d') == *p) ||
-        (UC('D') == *p)) {
+    if ((UC('e') == *p) || (UC('E') == *p)
+#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
+    || (UC('d') == *p) || (UC('D') == *p)
+#endif
+    ) {
       ++p;
     }
 #endif
@@ -483,8 +490,8 @@ parse_number_string(UC const *p, UC const *pend,
       p = answer.integer.ptr;
       UC const *int_end = p + answer.integer.len();
       constexpr am_mant_t minimal_nineteen_digit_integer{1000000000000000000};
-      while ((answer.mantissa < minimal_nineteen_digit_integer) &&
-             (p != int_end)) {
+      while ((p != int_end) &&
+             (answer.mantissa < minimal_nineteen_digit_integer)) {
         answer.mantissa = static_cast<am_mant_t>(
             answer.mantissa * 10 + static_cast<am_mant_t>(*p - UC('0')));
         ++p;
@@ -496,8 +503,8 @@ parse_number_string(UC const *p, UC const *pend,
         // We have a value with a significant fractional component.
         p = answer.fraction.ptr;
         UC const *const frac_end = p + answer.fraction.len();
-        while ((answer.mantissa < minimal_nineteen_digit_integer) &&
-               (p != frac_end)) {
+        while ((p != frac_end) &&
+               (answer.mantissa < minimal_nineteen_digit_integer)) {
           answer.mantissa = static_cast<am_mant_t>(
               answer.mantissa * 10 + static_cast<am_mant_t>(*p - UC('0')));
           ++p;
@@ -514,7 +521,7 @@ parse_number_string(UC const *p, UC const *pend,
 template <typename T, typename UC>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
 parse_int_string(UC const *p, UC const *pend, T &value,
-                 parse_options_t<UC> const &options) noexcept {
+                 parse_options_t<UC> const options) noexcept {
 
   from_chars_result_t<UC> answer;
 
@@ -645,14 +652,7 @@ parse_int_string(UC const *p, UC const *pend, T &value,
     loop_parse_if_eight_digits(p, pend, i); // use SIMD if possible
   }
   while (p != pend) {
-#ifdef FASTFLOAT_TABLE_HACK_CHAR_DIGIT_LUT_DISABLED
-    const auto digit = *p;
-    if (!is_integer(digit)) {
-      break;
-    }
-#else
     auto const digit = ch_to_digit(*p);
-#endif
     if (digit >= options.base) {
       break;
     }
diff --git a/include/fast_float/bigint.h b/include/fast_float/bigint.h
index f3f61ec..5aa3faf 100644
--- a/include/fast_float/bigint.h
+++ b/include/fast_float/bigint.h
@@ -281,7 +281,7 @@ template <limb_t size>
 inline FASTFLOAT_CONSTEXPR20 bool small_mul(stackvec<size> &vec,
                                             limb y) noexcept {
   limb carry = 0;
-  for (limb_t index = 0; index++ != vec.len();) {
+  for (limb_t index = 0; index != vec.len(); ++index) {
     vec[index] = scalar_mul(vec[index], y, carry);
   }
   if (carry != 0) {
@@ -302,7 +302,7 @@ FASTFLOAT_CONSTEXPR20 bool large_add_from(stackvec<size> &x, limb_span y,
   }
 
   bool carry = false;
-  for (limb_t index = 0; index++ != y.len();) {
+  for (limb_t index = 0; index != y.len(); ++index) {
     limb xi = x[index + start];
     limb yi = y[index];
     bool c1 = false;
@@ -487,7 +487,7 @@ struct bigint : pow5_tables<> {
     } else if (vec.len() < other.vec.len()) {
       return -1;
     } else {
-      for (limb_t index = vec.len(); index-- != 0;) {
+      for (limb_t index = vec.len(); index != 0; --index) {
         limb xi = vec[index - 1];
         limb yi = other.vec[index - 1];
         if (xi > yi) {
@@ -514,7 +514,7 @@ struct bigint : pow5_tables<> {
     bigint_bits_t const shl = n;
     bigint_bits_t const shr = limb_bits - shl;
     limb prev = 0;
-    for (limb_t index = 0; index++ != vec.len();) {
+    for (limb_t index = 0; index != vec.len(); ++index) {
       limb xi = vec[index];
       vec[index] = (xi << shl) | (prev >> shr);
       prev = xi;
diff --git a/include/fast_float/decimal_to_binary.h b/include/fast_float/decimal_to_binary.h
index 5732f0d..8d4b6a2 100644
--- a/include/fast_float/decimal_to_binary.h
+++ b/include/fast_float/decimal_to_binary.h
@@ -139,8 +139,8 @@ compute_float(int64_t q, uint64_t w) noexcept {
   // branchless approach: value128 product = compute_product(q, w); but in
   // practice, we can win big with the compute_product_approximation if its
   // additional branch is easily predicted. Which is best is data specific.
-  limb_t const upperbit = limb_t(product.high >> 63);
-  limb_t const shift =
+  auto const upperbit = limb_t(product.high >> 63);
+  auto const shift =
       limb_t(upperbit + 64 - binary::mantissa_explicit_bits() - 3);
 
   answer.mantissa = product.high >> shift;
diff --git a/include/fast_float/digit_comparison.h b/include/fast_float/digit_comparison.h
index 084b36d..22c8dd3 100644
--- a/include/fast_float/digit_comparison.h
+++ b/include/fast_float/digit_comparison.h
@@ -58,18 +58,18 @@ scientific_exponent(am_mant_t mantissa, am_pow_t exponent) noexcept {
 // this converts a native floating-point number to an extended-precision float.
 template <typename T>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
-to_extended(T const &value) noexcept {
+to_extended(T const value) noexcept {
   using equiv_uint = equiv_uint_t<T>;
   constexpr equiv_uint exponent_mask = binary_format<T>::exponent_mask();
   constexpr equiv_uint mantissa_mask = binary_format<T>::mantissa_mask();
   constexpr equiv_uint hidden_bit_mask = binary_format<T>::hidden_bit_mask();
 
+  adjusted_mantissa am;
   constexpr am_pow_t bias = binary_format<T>::mantissa_explicit_bits() -
                             binary_format<T>::minimum_exponent();
 
   equiv_uint const bits = bit_cast<equiv_uint, T>(value);
 
-  adjusted_mantissa am;
   if ((bits & exponent_mask) == 0) {
     // denormal
     am.power2 = 1 - bias;
@@ -90,7 +90,7 @@ to_extended(T const &value) noexcept {
 // halfway between b and b+u.
 template <typename T>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
-to_extended_halfway(T const &value) noexcept {
+to_extended_halfway(T const value) noexcept {
   adjusted_mantissa am = to_extended(value);
   am.mantissa <<= 1;
   am.mantissa += 1;
@@ -253,7 +253,7 @@ round_up_bigint(bigint &big, am_digits &count) noexcept {
 
 // parse the significant digits into a big integer
 template <typename T, typename UC>
-fastfloat_really_inline FASTFLOAT_CONSTEXPR20 am_digits
+inline FASTFLOAT_CONSTEXPR20 am_digits
 parse_mantissa(bigint &result, const parsed_number_string_t<UC> &num) noexcept {
   // try to minimize the number of big integer and scalar multiplication.
   // therefore, try to parse 8 digits at a time, and multiply by the largest
@@ -369,8 +369,7 @@ template <typename T>
 inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
 negative_digit_comp(bigint &real_digits, adjusted_mantissa am,
                     am_pow_t const real_exp) noexcept {
-  // get the value of `b`, rounded down, and get a bigint representation of
-  // b+h
+  // get the value of `b`, rounded down, and get a bigint representation of b+h
   adjusted_mantissa am_b = am;
   // gcc7 bug: use a lambda to remove the noexcept qualifier bug with
   // -Wnoexcept-type.
diff --git a/include/fast_float/fast_float.h b/include/fast_float/fast_float.h
index 0d70a8d..8540e78 100644
--- a/include/fast_float/fast_float.h
+++ b/include/fast_float/fast_float.h
@@ -43,7 +43,7 @@ from_chars(UC const *first, UC const *last, T &value,
 template <typename T, typename UC = char>
 FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
 from_chars_advanced(UC const *first, UC const *last, T &value,
-                    parse_options_t<UC> const &options) noexcept;
+                    parse_options_t<UC> const options) noexcept;
 
 /**
  * This function multiplies an integer number by a power of 10 and returns
diff --git a/include/fast_float/fast_table.h b/include/fast_float/fast_table.h
index 536e149..6a6f901 100644
--- a/include/fast_float/fast_table.h
+++ b/include/fast_float/fast_table.h
@@ -1,8 +1,6 @@
 #ifndef FASTFLOAT_FAST_TABLE_H
 #define FASTFLOAT_FAST_TABLE_H
 
-#include <cstdint>
-
 namespace fast_float {
 
 /**
diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h
index 4b59652..51ee29d 100644
--- a/include/fast_float/float_common.h
+++ b/include/fast_float/float_common.h
@@ -205,6 +205,21 @@ FASTFLOAT_CONSTEXPR20 To bit_cast(const From &from) {
 #define FASTFLOAT_HAS_SIMD 1
 #endif
 
+#if defined(__GNUC__)
+// disable -Wcast-align=strict (GCC only)
+#define FASTFLOAT_SIMD_DISABLE_WARNINGS                                        \
+  _Pragma("GCC diagnostic push")                                               \
+      _Pragma("GCC diagnostic ignored \"-Wcast-align\"")
+#else
+#define FASTFLOAT_SIMD_DISABLE_WARNINGS
+#endif
+
+#if defined(__GNUC__)
+#define FASTFLOAT_SIMD_RESTORE_WARNINGS _Pragma("GCC diagnostic pop")
+#else
+#define FASTFLOAT_SIMD_RESTORE_WARNINGS
+#endif
+
 #ifdef FASTFLOAT_VISUAL_STUDIO
 #define fastfloat_really_inline __forceinline
 #else
@@ -288,7 +303,7 @@ template <typename UC>
 inline FASTFLOAT_CONSTEXPR14 bool
 fastfloat_strncasecmp(UC const *actual_mixedcase, UC const *expected_lowercase,
                       uint_fast8_t const length) noexcept {
-  for (uint_fast8_t i = 0; i++ != length;) {
+  for (uint_fast8_t i = 0; i != length; ++i) {
     UC const actual = actual_mixedcase[i];
     if ((actual < 256 ? actual | 32 : actual) != expected_lowercase[i]) {
       return false;
@@ -450,8 +465,10 @@ umul128_generic(uint64_t ab, uint64_t cd, uint64_t *hi) noexcept {
 
 // slow emulation routine for 32-bit
 #if !defined(__MINGW64__)
-fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint64_t
-_umul128(uint64_t ab, uint64_t cd, uint64_t *hi) noexcept {
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint64_t_umul128(uint64_t ab,
+                                                               uint64_t cd,
+                                                               uint64_t *hi)
+                                                               noexcept {
   return umul128_generic(ab, cd, hi);
 }
 #endif // !__MINGW64__
@@ -1186,7 +1203,6 @@ template <> constexpr char8_t const *str_const_inf<char8_t>() {
 #endif
 
 template <typename = void> struct int_luts {
-#ifndef FASTFLOAT_TABLE_HACK_CHAR_DIGIT_LUT_DISABLED
   static constexpr uint8_t chdigit[] = {
       255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
       255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
@@ -1206,7 +1222,6 @@ template <typename = void> struct int_luts {
       255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
       255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
       255};
-#endif
 
   static constexpr uint_fast8_t maxdigits_u64[] = {
       64, 41, 32, 28, 25, 23, 22, 21, 20, 19, 18, 18, 17, 17, 16, 16, 16, 16,
@@ -1237,7 +1252,6 @@ template <typename T> constexpr uint64_t int_luts<T>::min_safe_u64[];
 
 #endif
 
-#ifndef FASTFLOAT_TABLE_HACK_CHAR_DIGIT_LUT_DISABLED
 template <typename UC>
 fastfloat_really_inline constexpr uint_fast8_t ch_to_digit(UC c) noexcept {
   // wchar_t and char can be signed, so we need to be careful.
@@ -1247,7 +1261,6 @@ fastfloat_really_inline constexpr uint_fast8_t ch_to_digit(UC c) noexcept {
       static_cast<UnsignedUC>(
           -((static_cast<UnsignedUC>(c) & ~0xFFull) == 0)))];
 }
-#endif
 
 fastfloat_really_inline constexpr uint_fast8_t
 max_digits_u64(uint_fast8_t base) noexcept {
diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h
index 0a324f6..eb30818 100644
--- a/include/fast_float/parse_number.h
+++ b/include/fast_float/parse_number.h
@@ -148,7 +148,7 @@ template <typename T> struct from_chars_caller {
   template <typename UC>
   FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
   call(UC const *first, UC const *last, T &value,
-       parse_options_t<UC> const &options) noexcept {
+       parse_options_t<UC> const options) noexcept {
     return from_chars_advanced(first, last, value, options);
   }
 };
@@ -322,7 +322,7 @@ from_chars_advanced(parsed_number_string_t<UC> const &pns, T &value) noexcept {
 template <typename T, typename UC>
 FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
 from_chars_float_advanced(UC const *first, UC const *last, T &value,
-                          parse_options_t<UC> const &options) noexcept {
+                          parse_options_t<UC> const options) noexcept {
 
   static_assert(is_supported_float_type<T>::value,
                 "only some floating-point types are supported");
@@ -400,7 +400,7 @@ FASTFLOAT_CONSTEXPR20
     return value;
 
   adjusted_mantissa am =
-      compute_float<binary_format<double>>(decimal_exponent, mantissa);
+      compute_float<binary_format<T>>(decimal_exponent, mantissa);
   to_float(
 #ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
       false,
@@ -415,7 +415,7 @@ FASTFLOAT_CONSTEXPR20
     integer_times_pow10(int64_t const mantissa,
                         int_fast16_t const decimal_exponent) noexcept {
 #ifdef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
-  FASTFLOAT_ASSUME(mantissa > 0);
+  FASTFLOAT_ASSUME(mantissa >= 0);
   const am_mant_t m = static_cast<am_mant_t>(mantissa);
 #else
   const bool is_negative = mantissa < 0;
@@ -493,7 +493,7 @@ integer_times_pow10(Int mantissa, int_fast16_t decimal_exponent) noexcept {
 template <typename T, typename UC>
 FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
 from_chars_int_advanced(UC const *first, UC const *last, T &value,
-                        parse_options_t<UC> const &options) noexcept {
+                        parse_options_t<UC> const options) noexcept {
 
   static_assert(is_supported_integer_type<T>::value,
                 "only integer types are supported");
@@ -533,7 +533,7 @@ template <> struct from_chars_advanced_caller<1> {
   template <typename T, typename UC>
   FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
   call(UC const *first, UC const *last, T &value,
-       parse_options_t<UC> const &options) noexcept {
+       parse_options_t<UC> const options) noexcept {
     return from_chars_float_advanced(first, last, value, options);
   }
 };
@@ -542,7 +542,7 @@ template <> struct from_chars_advanced_caller<2> {
   template <typename T, typename UC>
   FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
   call(UC const *first, UC const *last, T &value,
-       parse_options_t<UC> const &options) noexcept {
+       parse_options_t<UC> const options) noexcept {
     return from_chars_int_advanced(first, last, value, options);
   }
 };
@@ -550,7 +550,7 @@ template <> struct from_chars_advanced_caller<2> {
 template <typename T, typename UC>
 FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
 from_chars_advanced(UC const *first, UC const *last, T &value,
-                    parse_options_t<UC> const &options) noexcept {
+                    parse_options_t<UC> const options) noexcept {
   return from_chars_advanced_caller<
       size_t(is_supported_float_type<T>::value) +
       2 * size_t(is_supported_integer_type<T>::value)>::call(first, last, value,