Adding ARM NEON optimizations.

This commit is contained in:
Daniel Lemire 2023-06-09 11:19:34 -04:00
parent bc98c5dfdf
commit e8c0378d6d
2 changed files with 45 additions and 4 deletions

View File

@ -13,6 +13,9 @@
#include <emmintrin.h>
#endif
#ifdef FASTFLOAT_NEON
#include <arm_neon.h>
#endif
namespace fast_float {
@ -88,7 +91,25 @@ FASTFLOAT_SIMD_DISABLE_WARNINGS
FASTFLOAT_SIMD_RESTORE_WARNINGS
}
#endif
#elif defined(FASTFLOAT_ARM64)
fastfloat_really_inline
uint64_t simd_read8_to_u64(const uint16x8_t data) {
FASTFLOAT_SIMD_DISABLE_WARNINGS
uint8x8_t utf8_packed = vmovn_u16(data);
vget_lane_u64(vreinterpret_u64_u8(utf8_packed), 0);
FASTFLOAT_SIMD_RESTORE_WARNINGS
}
fastfloat_really_inline
uint64_t simd_read8_to_u64(const char16_t* chars) {
FASTFLOAT_SIMD_DISABLE_WARNINGS
return simd_read8_to_u64(vld1q_u16(reinterpret_cast<const uint16_t*>(values)));
FASTFLOAT_SIMD_RESTORE_WARNINGS
}
#endif // FASTFLOAT_SSE2
// dummy for compile
template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opt<UC>())>
@ -170,10 +191,26 @@ FASTFLOAT_SIMD_DISABLE_WARNINGS
}
else return false;
FASTFLOAT_SIMD_RESTORE_WARNINGS
#endif
#elif defined(FASTFLOAT_ARM64)
FASTFLOAT_SIMD_DISABLE_WARNINGS
const uint16x8_t data = vld1q_u16(reinterpret_cast<const uint16_t*>(chars))
// (x - '0') <= 9
// http://0x80.pl/articles/simd-parsing-int-sequences.html
const uint16x8_t t0 = vsubq_u16(data, vmovq_n_u16('0'));
const uint16x8_t mask = vcltq_u16(t0, vmovq_n_u16('9' - '0' + 1));
if (vminvq_u16(mask) == 0xFFFF) {
i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data));
return true;
}
else return false;
FASTFLOAT_SIMD_RESTORE_WARNINGS
#endif // FASTFLOAT_SSE2
}
#endif
#endif // FASTFLOAT_HAS_SIMD
// dummy for compile
template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opt<UC>())>

View File

@ -121,7 +121,11 @@ using parse_options = parse_options_t<char>;
#define FASTFLOAT_SSE2 1
#endif
#ifdef FASTFLOAT_SSE2
#if defined(__aarch64__) || defined(_M_ARM64)
#define FASTFLOAT_NEON 1
#endif
#if defined(FASTFLOAT_SSE2) || defined(FASTFLOAT_ARM64)
#define FASTFLOAT_HAS_SIMD 1
#endif