simpler optimizations

Signed-off-by: Shikhar <shikharish05@gmail.com>
2026-02-05 17:29:57 +08:00 · 2026-01-02 02:45:11 +05:30 · 2026-01-02 02:45:11 +05:30 · b14e6a466a
commit b14e6a466a
parent 13d4b94183
1 changed files with 34 additions and 69 deletions
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@ -32,7 +32,7 @@ template <typename UC> fastfloat_really_inline constexpr bool has_simd_opt() {
 // able to optimize it well.
 template <typename UC>
 fastfloat_really_inline constexpr bool is_integer(UC c) noexcept {
-  return !(c > UC('9') || c < UC('0'));
+  return (unsigned)(c - UC('0')) <= 9u;
 }

 fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) {
@ -83,11 +83,10 @@ read4_to_u32(UC const *chars) {
  uint32_t val;
  ::memcpy(&val, chars, sizeof(uint32_t));
 #if FASTFLOAT_IS_BIG_ENDIAN == 1
-  val = byteswap(val);
+  val = byteswap_32(val);
 #endif
  return val;
 }
-
 #ifdef FASTFLOAT_SSE2

 fastfloat_really_inline uint64_t simd_read8_to_u64(__m128i const data) {
@ -169,6 +168,11 @@ is_made_of_eight_digits_fast(uint64_t val) noexcept {
            0x8080808080808080));
 }

+fastfloat_really_inline constexpr bool
+is_made_of_four_digits_fast(uint32_t val) noexcept {
+  return !((((val + 0x46464646) | (val - 0x30303030)) & 0x80808080));
+}
+
 fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint32_t
 parse_four_digits_unrolled(uint32_t val) noexcept {
  val -= 0x30303030;
@ -648,77 +652,38 @@ parse_int_string(UC const *p, UC const *pend, T &value,
        return answer;
      }

-      uint32_t digits;
      if (len >= 4) {
-        digits = read4_to_u32(p);
-      } else {
-        uint32_t b0 = uint32_t(uint8_t(p[0]));
-        uint32_t b1 = (len > 1) ? uint32_t(uint8_t(p[1])) : 0xFFu;
-        uint32_t b2 = (len > 2) ? uint32_t(uint8_t(p[2])) : 0xFFu;
-        digits = b0 | (b1 << 8) | (b2 << 16) | (0xFFu << 24);
-      }
-
-      uint32_t magic =
-          ((digits + 0x46464646u) | (digits - 0x30303030u)) & 0x80808080u;
-      uint32_t nd = (magic == 0) ? 4u : (uint32_t(countr_zero_32(magic)) >> 3);
-
-      if (nd == 0) {
-        if (has_leading_zeros) {
-          value = 0;
+        uint32_t digits = read4_to_u32(p);
+        if (is_made_of_four_digits_fast(digits)) {
+          uint32_t v = parse_four_digits_unrolled(digits);
+          if (len >= 5 && is_integer(p[4])) {
+            v = v * 10 + uint32_t(p[4] - '0');
+            if (len >= 6 && is_integer(p[5])) {
+              answer.ec = std::errc::result_out_of_range;
+              const UC *q = p + 5;
+              while (q != pend && is_integer(*q)) {
+                q++;
+              }
+              answer.ptr = q;
+              return answer;
+            }
+            if (v > 65535) {
+              answer.ec = std::errc::result_out_of_range;
+              answer.ptr = p + 5;
+              return answer;
+            }
+            value = uint16_t(v);
+            answer.ec = std::errc();
+            answer.ptr = p + 5;
+            return answer;
+          }
+          // 4 digits
+          value = uint16_t(v);
          answer.ec = std::errc();
-          answer.ptr = p;
-          return answer;
-        }
-        answer.ec = std::errc::invalid_argument;
-        answer.ptr = first;
-        return answer;
-      }
-
-      if (nd < 4) {
-        // mask out non-digit bytes and replace with '0' (0x30)
-        uint32_t mask = 0xFFFFFFFFu >> ((4u - nd) * 8u);
-        uint32_t padded = (digits & mask) | (~mask & 0x30303030u);
-        uint32_t v = parse_four_digits_unrolled(padded);
-        constexpr uint32_t divs[] = {0, 1000, 100, 10};
-        value = (uint16_t)(v / divs[nd]);
-        answer.ec = std::errc();
-        answer.ptr = p + nd;
-        return answer;
-      }
-
-      uint32_t v = parse_four_digits_unrolled(digits);
-
-      uint32_t d4 = (len > 4) ? uint32_t(p[4] - '0') : 10u;
-      if (d4 > 9u) {
-        value = (uint16_t)v;
-        answer.ec = std::errc();
-        answer.ptr = p + 4;
-        return answer;
-      }
-
-      if (len > 5) {
-        uint32_t d5 = uint32_t(p[5]) - uint32_t('0');
-        if (d5 <= 9u) {
-          const UC *q = p + 6;
-          while (q < pend && uint32_t(*q) - uint32_t('0') <= 9u)
-            ++q;
-          answer.ec = std::errc::result_out_of_range;
-          answer.ptr = q;
+          answer.ptr = p + 4;
          return answer;
        }
      }
-
-      // overflow check
-      if (v > 6553u || (v == 6553u && d4 > 5u)) {
-        answer.ec = std::errc::result_out_of_range;
-        answer.ptr = p + 5;
-        return answer;
-      }
-
-      value = (uint16_t)(v * 10u + d4);
-      answer.ec = std::errc();
-      answer.ptr = p + 5;
-      return answer;
    }
  }