diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 481b91d..1b8effc 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -13,6 +13,9 @@ #include #endif +#ifdef FASTFLOAT_NEON +#include +#endif namespace fast_float { @@ -88,7 +91,25 @@ FASTFLOAT_SIMD_DISABLE_WARNINGS FASTFLOAT_SIMD_RESTORE_WARNINGS } -#endif +#elif defined(FASTFLOAT_ARM64) + + +fastfloat_really_inline +uint64_t simd_read8_to_u64(const uint16x8_t data) { +FASTFLOAT_SIMD_DISABLE_WARNINGS + uint8x8_t utf8_packed = vmovn_u16(data); + vget_lane_u64(vreinterpret_u64_u8(utf8_packed), 0); +FASTFLOAT_SIMD_RESTORE_WARNINGS +} + +fastfloat_really_inline +uint64_t simd_read8_to_u64(const char16_t* chars) { +FASTFLOAT_SIMD_DISABLE_WARNINGS + return simd_read8_to_u64(vld1q_u16(reinterpret_cast(values))); +FASTFLOAT_SIMD_RESTORE_WARNINGS +} + +#endif // FASTFLOAT_SSE2 // dummy for compile template ())> @@ -170,10 +191,26 @@ FASTFLOAT_SIMD_DISABLE_WARNINGS } else return false; FASTFLOAT_SIMD_RESTORE_WARNINGS -#endif +#elif defined(FASTFLOAT_ARM64) +FASTFLOAT_SIMD_DISABLE_WARNINGS + const uint16x8_t data = vld1q_u16(reinterpret_cast(chars)) + + // (x - '0') <= 9 + // http://0x80.pl/articles/simd-parsing-int-sequences.html + const uint16x8_t t0 = vsubq_u16(data, vmovq_n_u16('0')); + const uint16x8_t mask = vcltq_u16(t0, vmovq_n_u16('9' - '0' + 1)); + + if (vminvq_u16(mask) == 0xFFFF) { + i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data)); + return true; + } + else return false; +FASTFLOAT_SIMD_RESTORE_WARNINGS + +#endif // FASTFLOAT_SSE2 } -#endif +#endif // FASTFLOAT_HAS_SIMD // dummy for compile template ())> diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index b1622b0..6aab7aa 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -121,7 +121,11 @@ using parse_options = parse_options_t; #define FASTFLOAT_SSE2 1 #endif -#ifdef FASTFLOAT_SSE2 +#if defined(__aarch64__) || defined(_M_ARM64) +#define FASTFLOAT_NEON 1 +#endif + +#if defined(FASTFLOAT_SSE2) || defined(FASTFLOAT_ARM64) #define FASTFLOAT_HAS_SIMD 1 #endif