Merge pull request #209 from fastfloat/arm_neon_opti

ARM NEON optimization (UTF-16 inputs)
This commit is contained in:
Daniel Lemire 2023-06-09 14:41:38 -04:00 committed by GitHub
commit 91edc90464
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 48 additions and 5 deletions

View File

@ -13,6 +13,9 @@
#include <emmintrin.h>
#endif
#ifdef FASTFLOAT_NEON
#include <arm_neon.h>
#endif
namespace fast_float {
@ -88,7 +91,25 @@ FASTFLOAT_SIMD_DISABLE_WARNINGS
FASTFLOAT_SIMD_RESTORE_WARNINGS
}
#endif
#elif defined(FASTFLOAT_NEON)
fastfloat_really_inline
uint64_t simd_read8_to_u64(const uint16x8_t data) {
FASTFLOAT_SIMD_DISABLE_WARNINGS
uint8x8_t utf8_packed = vmovn_u16(data);
return vget_lane_u64(vreinterpret_u64_u8(utf8_packed), 0);
FASTFLOAT_SIMD_RESTORE_WARNINGS
}
fastfloat_really_inline
uint64_t simd_read8_to_u64(const char16_t* chars) {
FASTFLOAT_SIMD_DISABLE_WARNINGS
return simd_read8_to_u64(vld1q_u16(reinterpret_cast<const uint16_t*>(chars)));
FASTFLOAT_SIMD_RESTORE_WARNINGS
}
#endif // FASTFLOAT_SSE2
// dummy for compile
template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opt<UC>())>
@ -170,14 +191,32 @@ FASTFLOAT_SIMD_DISABLE_WARNINGS
}
else return false;
FASTFLOAT_SIMD_RESTORE_WARNINGS
#endif
#elif defined(FASTFLOAT_NEON)
FASTFLOAT_SIMD_DISABLE_WARNINGS
const uint16x8_t data = vld1q_u16(reinterpret_cast<const uint16_t*>(chars));
// (x - '0') <= 9
// http://0x80.pl/articles/simd-parsing-int-sequences.html
const uint16x8_t t0 = vsubq_u16(data, vmovq_n_u16('0'));
const uint16x8_t mask = vcltq_u16(t0, vmovq_n_u16('9' - '0' + 1));
if (vminvq_u16(mask) == 0xFFFF) {
i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data));
return true;
}
else return false;
FASTFLOAT_SIMD_RESTORE_WARNINGS
#else
(void)chars; (void)i;
return false;
#endif // FASTFLOAT_SSE2
}
#endif
#endif // FASTFLOAT_HAS_SIMD
// dummy for compile
template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opt<UC>())>
uint64_t simd_parse_if_eight_digits_unrolled(UC const*, uint64_t&) {
bool simd_parse_if_eight_digits_unrolled(UC const*, uint64_t&) {
return 0;
}

View File

@ -121,7 +121,11 @@ using parse_options = parse_options_t<char>;
#define FASTFLOAT_SSE2 1
#endif
#ifdef FASTFLOAT_SSE2
#if defined(__aarch64__) || defined(_M_ARM64)
#define FASTFLOAT_NEON 1
#endif
#if defined(FASTFLOAT_SSE2) || defined(FASTFLOAT_ARM64)
#define FASTFLOAT_HAS_SIMD 1
#endif