mirror of
https://github.com/fastfloat/fast_float.git
synced 2025-12-06 16:56:57 +08:00
Fix perf decrease when UC = char
This commit is contained in:
parent
6ede038789
commit
38613a39f9
@ -17,7 +17,7 @@
|
|||||||
namespace fast_float {
|
namespace fast_float {
|
||||||
|
|
||||||
template <typename UC>
|
template <typename UC>
|
||||||
fastfloat_really_inline constexpr bool has_simd_opts() {
|
fastfloat_really_inline constexpr bool has_simd_opt() {
|
||||||
#ifdef FASTFLOAT_HAS_SIMD
|
#ifdef FASTFLOAT_HAS_SIMD
|
||||||
return std::is_same<UC, char16_t>::value;
|
return std::is_same<UC, char16_t>::value;
|
||||||
#else
|
#else
|
||||||
@ -68,18 +68,7 @@ uint64_t read8_to_u64(const UC *chars) {
|
|||||||
|
|
||||||
fastfloat_really_inline
|
fastfloat_really_inline
|
||||||
uint64_t simd_read8_to_u64(const __m128i data) {
|
uint64_t simd_read8_to_u64(const __m128i data) {
|
||||||
FASTFLOAT_SIMD_DISABLE_WARNINGS
|
return _mm_cvtsi128_si64x(_mm_packus_epi16(data, data));
|
||||||
static const char16_t kmasks[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
|
|
||||||
const __m128i masks = _mm_loadu_si128(reinterpret_cast<const __m128i*>(kmasks));
|
|
||||||
|
|
||||||
// todo: with AVX512BW + AVX512VL, can use cvtepi16_epi8 instead of masking + pack
|
|
||||||
__m128i masked = _mm_and_si128(data, masks);
|
|
||||||
__m128i packed = _mm_packus_epi16(masked, masked);
|
|
||||||
|
|
||||||
uint64_t val;
|
|
||||||
_mm_storeu_si64(&val, packed);
|
|
||||||
return val;
|
|
||||||
FASTFLOAT_SIMD_RESTORE_WARNINGS
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fastfloat_really_inline
|
fastfloat_really_inline
|
||||||
@ -92,7 +81,7 @@ FASTFLOAT_SIMD_RESTORE_WARNINGS
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// dummy for compile
|
// dummy for compile
|
||||||
template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opts<UC>())>
|
template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opt<UC>())>
|
||||||
uint64_t simd_read8_to_u64(UC const*) {
|
uint64_t simd_read8_to_u64(UC const*) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -132,7 +121,7 @@ uint32_t parse_eight_digits_unrolled(uint64_t val) {
|
|||||||
template <typename UC>
|
template <typename UC>
|
||||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
|
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
|
||||||
uint32_t parse_eight_digits_unrolled(UC const * chars) noexcept {
|
uint32_t parse_eight_digits_unrolled(UC const * chars) noexcept {
|
||||||
if (cpp20_and_in_constexpr() || !has_simd_opts<UC>()) {
|
if (cpp20_and_in_constexpr() || !has_simd_opt<UC>()) {
|
||||||
return parse_eight_digits_unrolled(read8_to_u64(chars)); // truncation okay
|
return parse_eight_digits_unrolled(read8_to_u64(chars)); // truncation okay
|
||||||
}
|
}
|
||||||
return parse_eight_digits_unrolled(simd_read8_to_u64(chars));
|
return parse_eight_digits_unrolled(simd_read8_to_u64(chars));
|
||||||
@ -145,28 +134,18 @@ fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val
|
|||||||
0x8080808080808080));
|
0x8080808080808080));
|
||||||
}
|
}
|
||||||
|
|
||||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
|
|
||||||
bool parse_if_eight_digits_unrolled(const char* chars, uint64_t& i) noexcept {
|
#ifdef FASTFLOAT_HAS_SIMD
|
||||||
if (is_made_of_eight_digits_fast(read8_to_u64(chars))) {
|
|
||||||
i = i * 100000000 + parse_eight_digits_unrolled(read8_to_u64(chars));
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
else return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Call this if chars might not be 8 digits.
|
// Call this if chars might not be 8 digits.
|
||||||
// Using this style (instead of is_made_of_eight_digits_fast() then parse_eight_digits_unrolled())
|
// Using this style (instead of is_made_of_eight_digits_fast() then parse_eight_digits_unrolled())
|
||||||
// ensures we don't load SIMD registers twice if we don't have to.
|
// ensures we don't load SIMD registers twice.
|
||||||
//
|
|
||||||
// Benchmark:
|
|
||||||
// https://quick-bench.com/q/Bbn0B4WmZsdgS3qDZWpggAY-jgs
|
|
||||||
//
|
|
||||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
|
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
|
||||||
bool parse_if_eight_digits_unrolled(const char16_t* chars, uint64_t& i) noexcept {
|
bool simd_parse_if_eight_digits_unrolled(const char16_t* chars, uint64_t& i) noexcept {
|
||||||
#ifdef FASTFLOAT_SSE2
|
|
||||||
if (cpp20_and_in_constexpr()) {
|
if (cpp20_and_in_constexpr()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
#ifdef FASTFLOAT_SSE2
|
||||||
FASTFLOAT_SIMD_DISABLE_WARNINGS
|
FASTFLOAT_SIMD_DISABLE_WARNINGS
|
||||||
const __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i*>(chars));
|
const __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i*>(chars));
|
||||||
|
|
||||||
@ -181,18 +160,36 @@ FASTFLOAT_SIMD_DISABLE_WARNINGS
|
|||||||
}
|
}
|
||||||
else return false;
|
else return false;
|
||||||
FASTFLOAT_SIMD_RESTORE_WARNINGS
|
FASTFLOAT_SIMD_RESTORE_WARNINGS
|
||||||
|
|
||||||
#else // No SIMD available
|
|
||||||
|
|
||||||
(void)chars; (void)i; // unused
|
|
||||||
return false;
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// todo, no simd optimization yet
|
#endif
|
||||||
|
|
||||||
|
// dummy for compile
|
||||||
|
template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opt<UC>())>
|
||||||
|
uint64_t simd_parse_if_eight_digits_unrolled(UC const*, uint64_t&) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename UC, FASTFLOAT_ENABLE_IF(!std::is_same<UC, char>::value)>
|
||||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
|
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
|
||||||
bool parse_if_eight_digits_unrolled(const char32_t*, uint64_t&) noexcept {
|
void loop_parse_if_eight_digits(const UC*& p, const UC* const pend, uint64_t& i) {
|
||||||
return false;
|
if (!has_simd_opt<UC>()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
while ((std::distance(p, pend) >= 8) && simd_parse_if_eight_digits_unrolled(p, i)) { // in rare cases, this will overflow, but that's ok
|
||||||
|
p += 8;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
|
||||||
|
void loop_parse_if_eight_digits(const char*& p, const char* const pend, uint64_t& i) {
|
||||||
|
// optimizes better than parse_if_eight_digits_unrolled() for UC = char.
|
||||||
|
while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(read8_to_u64(p))) {
|
||||||
|
i = i * 100000000 + parse_eight_digits_unrolled(read8_to_u64(p)); // in rare cases, this will overflow, but that's ok
|
||||||
|
p += 8;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename UC>
|
template <typename UC>
|
||||||
@ -256,9 +253,8 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
|
|||||||
UC const * before = p;
|
UC const * before = p;
|
||||||
// can occur at most twice without overflowing, but let it occur more, since
|
// can occur at most twice without overflowing, but let it occur more, since
|
||||||
// for integers with many digits, digit parsing is the primary bottleneck.
|
// for integers with many digits, digit parsing is the primary bottleneck.
|
||||||
while ((std::distance(p, pend) >= 8) && parse_if_eight_digits_unrolled(p, i)) { // in rare cases, this will overflow, but that's ok
|
loop_parse_if_eight_digits(p, pend, i);
|
||||||
p += 8;
|
|
||||||
}
|
|
||||||
while ((p != pend) && is_integer(*p)) {
|
while ((p != pend) && is_integer(*p)) {
|
||||||
uint8_t digit = uint8_t(*p - UC('0'));
|
uint8_t digit = uint8_t(*p - UC('0'));
|
||||||
++p;
|
++p;
|
||||||
|
|||||||
@ -157,7 +157,7 @@ using parse_options = parse_options_t<char>;
|
|||||||
// rust style `try!()` macro, or `?` operator
|
// rust style `try!()` macro, or `?` operator
|
||||||
#define FASTFLOAT_TRY(x) { if (!(x)) return false; }
|
#define FASTFLOAT_TRY(x) { if (!(x)) return false; }
|
||||||
|
|
||||||
#define FASTFLOAT_ENABLE_IF(test) typename std::enable_if<(test), int>::type = 0
|
#define FASTFLOAT_ENABLE_IF(...) typename std::enable_if<(__VA_ARGS__), int>::type = 0
|
||||||
|
|
||||||
|
|
||||||
namespace fast_float {
|
namespace fast_float {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user