Fix perf decrease when UC = char

This commit is contained in:
Maya Warrier 2023-05-17 01:34:33 -04:00
parent 6ede038789
commit 38613a39f9
2 changed files with 39 additions and 43 deletions

View File

@ -17,7 +17,7 @@
namespace fast_float { namespace fast_float {
template <typename UC> template <typename UC>
fastfloat_really_inline constexpr bool has_simd_opts() { fastfloat_really_inline constexpr bool has_simd_opt() {
#ifdef FASTFLOAT_HAS_SIMD #ifdef FASTFLOAT_HAS_SIMD
return std::is_same<UC, char16_t>::value; return std::is_same<UC, char16_t>::value;
#else #else
@ -68,18 +68,7 @@ uint64_t read8_to_u64(const UC *chars) {
fastfloat_really_inline fastfloat_really_inline
uint64_t simd_read8_to_u64(const __m128i data) { uint64_t simd_read8_to_u64(const __m128i data) {
FASTFLOAT_SIMD_DISABLE_WARNINGS return _mm_cvtsi128_si64x(_mm_packus_epi16(data, data));
static const char16_t kmasks[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
const __m128i masks = _mm_loadu_si128(reinterpret_cast<const __m128i*>(kmasks));
// todo: with AVX512BW + AVX512VL, can use cvtepi16_epi8 instead of masking + pack
__m128i masked = _mm_and_si128(data, masks);
__m128i packed = _mm_packus_epi16(masked, masked);
uint64_t val;
_mm_storeu_si64(&val, packed);
return val;
FASTFLOAT_SIMD_RESTORE_WARNINGS
} }
fastfloat_really_inline fastfloat_really_inline
@ -92,7 +81,7 @@ FASTFLOAT_SIMD_RESTORE_WARNINGS
#endif #endif
// dummy for compile // dummy for compile
template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opts<UC>())> template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opt<UC>())>
uint64_t simd_read8_to_u64(UC const*) { uint64_t simd_read8_to_u64(UC const*) {
return 0; return 0;
} }
@ -132,7 +121,7 @@ uint32_t parse_eight_digits_unrolled(uint64_t val) {
template <typename UC> template <typename UC>
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
uint32_t parse_eight_digits_unrolled(UC const * chars) noexcept { uint32_t parse_eight_digits_unrolled(UC const * chars) noexcept {
if (cpp20_and_in_constexpr() || !has_simd_opts<UC>()) { if (cpp20_and_in_constexpr() || !has_simd_opt<UC>()) {
return parse_eight_digits_unrolled(read8_to_u64(chars)); // truncation okay return parse_eight_digits_unrolled(read8_to_u64(chars)); // truncation okay
} }
return parse_eight_digits_unrolled(simd_read8_to_u64(chars)); return parse_eight_digits_unrolled(simd_read8_to_u64(chars));
@ -145,28 +134,18 @@ fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val
0x8080808080808080)); 0x8080808080808080));
} }
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
bool parse_if_eight_digits_unrolled(const char* chars, uint64_t& i) noexcept { #ifdef FASTFLOAT_HAS_SIMD
if (is_made_of_eight_digits_fast(read8_to_u64(chars))) {
i = i * 100000000 + parse_eight_digits_unrolled(read8_to_u64(chars));
return true;
}
else return false;
}
// Call this if chars might not be 8 digits. // Call this if chars might not be 8 digits.
// Using this style (instead of is_made_of_eight_digits_fast() then parse_eight_digits_unrolled()) // Using this style (instead of is_made_of_eight_digits_fast() then parse_eight_digits_unrolled())
// ensures we don't load SIMD registers twice if we don't have to. // ensures we don't load SIMD registers twice.
//
// Benchmark:
// https://quick-bench.com/q/Bbn0B4WmZsdgS3qDZWpggAY-jgs
//
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
bool parse_if_eight_digits_unrolled(const char16_t* chars, uint64_t& i) noexcept { bool simd_parse_if_eight_digits_unrolled(const char16_t* chars, uint64_t& i) noexcept {
#ifdef FASTFLOAT_SSE2
if (cpp20_and_in_constexpr()) { if (cpp20_and_in_constexpr()) {
return false; return false;
} }
#ifdef FASTFLOAT_SSE2
FASTFLOAT_SIMD_DISABLE_WARNINGS FASTFLOAT_SIMD_DISABLE_WARNINGS
const __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i*>(chars)); const __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i*>(chars));
@ -181,18 +160,36 @@ FASTFLOAT_SIMD_DISABLE_WARNINGS
} }
else return false; else return false;
FASTFLOAT_SIMD_RESTORE_WARNINGS FASTFLOAT_SIMD_RESTORE_WARNINGS
#else // No SIMD available
(void)chars; (void)i; // unused
return false;
#endif #endif
} }
// todo, no simd optimization yet #endif
// dummy for compile
template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opt<UC>())>
uint64_t simd_parse_if_eight_digits_unrolled(UC const*, uint64_t&) {
return 0;
}
template <typename UC, FASTFLOAT_ENABLE_IF(!std::is_same<UC, char>::value)>
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 fastfloat_really_inline FASTFLOAT_CONSTEXPR20
bool parse_if_eight_digits_unrolled(const char32_t*, uint64_t&) noexcept { void loop_parse_if_eight_digits(const UC*& p, const UC* const pend, uint64_t& i) {
return false; if (!has_simd_opt<UC>()) {
return;
}
while ((std::distance(p, pend) >= 8) && simd_parse_if_eight_digits_unrolled(p, i)) { // in rare cases, this will overflow, but that's ok
p += 8;
}
}
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
void loop_parse_if_eight_digits(const char*& p, const char* const pend, uint64_t& i) {
// optimizes better than parse_if_eight_digits_unrolled() for UC = char.
while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(read8_to_u64(p))) {
i = i * 100000000 + parse_eight_digits_unrolled(read8_to_u64(p)); // in rare cases, this will overflow, but that's ok
p += 8;
}
} }
template <typename UC> template <typename UC>
@ -256,9 +253,8 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
UC const * before = p; UC const * before = p;
// can occur at most twice without overflowing, but let it occur more, since // can occur at most twice without overflowing, but let it occur more, since
// for integers with many digits, digit parsing is the primary bottleneck. // for integers with many digits, digit parsing is the primary bottleneck.
while ((std::distance(p, pend) >= 8) && parse_if_eight_digits_unrolled(p, i)) { // in rare cases, this will overflow, but that's ok loop_parse_if_eight_digits(p, pend, i);
p += 8;
}
while ((p != pend) && is_integer(*p)) { while ((p != pend) && is_integer(*p)) {
uint8_t digit = uint8_t(*p - UC('0')); uint8_t digit = uint8_t(*p - UC('0'));
++p; ++p;

View File

@ -157,7 +157,7 @@ using parse_options = parse_options_t<char>;
// rust style `try!()` macro, or `?` operator // rust style `try!()` macro, or `?` operator
#define FASTFLOAT_TRY(x) { if (!(x)) return false; } #define FASTFLOAT_TRY(x) { if (!(x)) return false; }
#define FASTFLOAT_ENABLE_IF(test) typename std::enable_if<(test), int>::type = 0 #define FASTFLOAT_ENABLE_IF(...) typename std::enable_if<(__VA_ARGS__), int>::type = 0
namespace fast_float { namespace fast_float {