* Upgrade manually vectorized code to support SSE4.2, final cleanup and documentation for SSE2 code.

This commit is contained in:
IRainman 2025-10-22 01:13:25 +03:00
parent f757eb45a2
commit f396c23809

View File

@ -74,7 +74,7 @@ read8_to_u64(UC const *chars) {
fastfloat_really_inline uint64_t simd_read8_to_u64(__m128i const &data) {
FASTFLOAT_SIMD_DISABLE_WARNINGS
// _mm_packus_epi16 is SSE4.1+, converts 8×u16 → 8×u8
// _mm_packus_epi16 is SSE2+, converts 8×u16 → 8×u8
__m128i const packed = _mm_packus_epi16(data, data);
#ifdef FASTFLOAT_SSE42
return static_cast<uint64_t>(_mm_cvtsi128_si64(packed));
@ -171,13 +171,10 @@ simd_parse_if_eight_digits_unrolled(char16_t const *chars,
#ifdef FASTFLOAT_SSE42
// --- Digit range check using SSE4.2 comparisons ---
// Validate: '0' (0x30) ≤ x ≤ '9' (0x39)
const __m128i ascii0 = _mm_set1_epi16(u'0');
const __m128i ascii9 = _mm_set1_epi16(u'9');
__m128i below0 = _mm_cmplt_epi16(data, ascii0); // x < '0'
__m128i above9 = _mm_cmpgt_epi16(data, ascii9); // x > '9'
__m128i invalid = _mm_or_si128(below0, above9);
// Validate: '0' (0x0030) ≤ x ≤ '9' (0x0039)
__m128i const below0 = _mm_cmplt_epi16(data, _mm_set1_epi16(u'0')); // x < '0'
__m128i const above9 = _mm_cmpgt_epi16(data, _mm_set1_epi16(u'9')); // x > '9'
__m128i const invalid = _mm_or_si128(below0, above9);
// Check if any invalid byte exists
if (_mm_testz_si128(invalid, invalid)) { // SSE4.1/4.2: zero flag test
@ -186,18 +183,16 @@ simd_parse_if_eight_digits_unrolled(char16_t const *chars,
// (x - '0') <= 9 <=> (x + 32720) <= 32729
// encoded as signed comparison: (x + 32720) > -32759 ? not digit : digit
// http://0x80.pl/articles/simd-parsing-int-sequences.html
__m128i const adjust = _mm_set1_epi16(32720);
__m128i const cutoff = _mm_set1_epi16(-32759);
__m128i const t0 = _mm_add_epi16(data, adjust);
__m128i const mask = _mm_cmpgt_epi16(t0, cutoff);
__m128i const t0 = _mm_add_epi16(data, _mm_set1_epi16(32720));
__m128i const mask = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-32759));
// If mask == 0 → all digits valid.
if (_mm_movemask_epi8(mask) == 0) {
#endif
i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data));
return true;
} else
return false;
#endif
FASTFLOAT_SIMD_RESTORE_WARNINGS
#elif defined(FASTFLOAT_NEON)
FASTFLOAT_SIMD_DISABLE_WARNINGS
@ -218,10 +213,10 @@ simd_parse_if_eight_digits_unrolled(char16_t const *chars,
(void)chars;
(void)i;
return false;
#endif // FASTFLOAT_SSE2
#endif
}
#endif // FASTFLOAT_HAS_SIMD
#endif
// MSVC SFINAE is broken pre-VS2017
#if defined(_MSC_VER) && _MSC_VER <= 1900