mirror of
https://github.com/fastfloat/fast_float.git
synced 2025-12-06 16:56:57 +08:00
SIMD optimization to parse 8 char16_t at a time
This commit is contained in:
parent
f59f73c4da
commit
8a9a9d538a
@ -35,8 +35,7 @@ fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) {
|
||||
}
|
||||
|
||||
fastfloat_really_inline
|
||||
uint64_t fast_read_u64(const char* chars)
|
||||
{
|
||||
uint64_t fast_read_u64(const char* chars) {
|
||||
uint64_t val;
|
||||
::memcpy(&val, chars, sizeof(uint64_t));
|
||||
return val;
|
||||
@ -44,8 +43,7 @@ uint64_t fast_read_u64(const char* chars)
|
||||
|
||||
// https://quick-bench.com/q/fk6Y07KDGu8XZ9iUtQD8QJTc3Hg
|
||||
fastfloat_really_inline
|
||||
uint64_t fast_read_u64(const char16_t* chars)
|
||||
{
|
||||
uint64_t fast_read_u64(const char16_t* chars) {
|
||||
#if FASTFLOAT_SSE2
|
||||
FASTFLOAT_SIMD_DISABLE_WARNINGS
|
||||
static const char16_t masks[] = {0xff, 0xff, 0xff, 0xff};
|
||||
@ -63,7 +61,7 @@ FASTFLOAT_SIMD_DISABLE_WARNINGS
|
||||
return val;
|
||||
FASTFLOAT_SIMD_RESTORE_WARNINGS
|
||||
#else
|
||||
alignas(8) unsigned char bytes[8];
|
||||
unsigned char bytes[8];
|
||||
for (int i = 0; i < 8; ++i)
|
||||
bytes[i] = (unsigned char)chars[i];
|
||||
|
||||
@ -122,24 +120,74 @@ uint32_t parse_eight_digits_unrolled(uint64_t val) {
|
||||
return uint32_t(val);
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
|
||||
uint32_t parse_eight_digits_unrolled(const CharT *chars) noexcept {
|
||||
return parse_eight_digits_unrolled(read_u64(chars));
|
||||
}
|
||||
|
||||
// credit @aqrit
|
||||
fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val) noexcept {
|
||||
return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) &
|
||||
0x8080808080808080));
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
|
||||
bool is_made_of_eight_digits_fast(const CharT *chars) noexcept {
|
||||
uint32_t parse_eight_digits_unrolled(const char* chars) noexcept {
|
||||
return parse_eight_digits_unrolled(read_u64(chars));
|
||||
}
|
||||
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
|
||||
bool is_made_of_eight_digits_fast(const char *chars) noexcept {
|
||||
return is_made_of_eight_digits_fast(read_u64(chars));
|
||||
}
|
||||
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
|
||||
bool parse_if_eight_digits_unrolled(const char* chars, std::uint64_t& i) noexcept {
|
||||
const bool all = is_made_of_eight_digits_fast(chars);
|
||||
if (all) i = i * 100000000 * parse_eight_digits_unrolled(chars);
|
||||
return all;
|
||||
}
|
||||
|
||||
// http://0x80.pl/articles/simd-parsing-int-sequences.html
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
|
||||
bool parse_if_eight_digits_unrolled(const char16_t* chars, std::uint64_t& i) noexcept {
|
||||
if (cpp20_and_in_constexpr() || !FASTFLOAT_SSE2) {
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
if (chars[i] < u'0' || chars[i] > u'9')
|
||||
return false;
|
||||
}
|
||||
i = i * 100000000 + parse_eight_digits_unrolled(read_u64(chars));
|
||||
return true;
|
||||
}
|
||||
#if FASTFLOAT_SSE2
|
||||
FASTFLOAT_SIMD_DISABLE_WARNINGS
|
||||
const __m128i data = _mm_loadu_si128(reinterpret_cast<const char*>(chars));
|
||||
|
||||
// (x - '0') <= 9
|
||||
const __m128i t0 = _mm_sub_epi16(data, _mm_set1_epi16(80));
|
||||
const __m128i t1 = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-119));
|
||||
const bool is_digits = _mm_movemask_epi8(t1) == 0;
|
||||
|
||||
if (is_digits) {
|
||||
// x - '0'
|
||||
const __m128i s1digits16 = _mm_sub_epi16(data, _mm_set1_epi16('0'));
|
||||
// 10 * x(b) + x(b-1) -> 2 digit numbers
|
||||
const __m128i s2digits32 = _mm_madd_epi16(s1digits16, _mm_setr_epi16(10, 1, 10, 1, 10, 1, 10, 1);
|
||||
const __m128i s2digits16 = _mm_packus_epi16(s2digits32, s2digits32);
|
||||
// 100 * x(b) + x(b-1) -> 4 digit numbers
|
||||
const __m128i s4digits32 = _mm_madd_epi16(s2digits16, _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1));
|
||||
const __m128i s4digits16 = _mm_packus_epi16(s4digits32, s4digits32);
|
||||
// 10000 * x(b) + x(b-1) -> 8 digit number
|
||||
const __m128i s8digits32 = _mm_madd_epi16(s4digits16, _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1));
|
||||
|
||||
uint32_t value;
|
||||
_mm_storeu_si32(&value, s8digits32);
|
||||
|
||||
i = i * 100000000 + value;
|
||||
return true;
|
||||
}
|
||||
else return false;
|
||||
FASTFLOAT_SIMD_RESTORE_WARNINGS
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
typedef span<const char> byte_span;
|
||||
|
||||
template <typename CharT>
|
||||
@ -203,8 +251,7 @@ parsed_number_string<CharT> parse_number_string(const CharT *p, const CharT *pen
|
||||
const CharT* before = p;
|
||||
// can occur at most twice without overflowing, but let it occur more, since
|
||||
// for integers with many digits, digit parsing is the primary bottleneck.
|
||||
while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) {
|
||||
i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok
|
||||
while ((std::distance(p, pend) >= 8) && parse_if_eight_digits_unrolled(p)) { // in rare cases, this will overflow, but that's ok
|
||||
p += 8;
|
||||
}
|
||||
while ((p != pend) && is_integer(*p)) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user