From 4cb09b5f5939a6b7d12a73e692e5dba9edde4651 Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Tue, 2 May 2023 13:05:57 -0400 Subject: [PATCH] Automatically detect SSE2 --- include/fast_float/ascii_number.h | 6 +++--- include/fast_float/float_common.h | 8 +++++++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 59318f2..15fc1cf 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -73,7 +73,7 @@ FASTFLOAT_SIMD_DISABLE_WARNINGS const __m128i masks = _mm_loadu_si128(reinterpret_cast(kmasks)); // pipeline 4 and 4 chars at the same time (since loadu_si64 has high latency) - // todo: with AVX512BW + AVX512VL, can use cvtepi16_epi8 instead + // todo: with AVX512BW + AVX512VL, can use cvtepi16_epi8 instead of masking + pack const char* const p = reinterpret_cast(chars); __m128i i1 = _mm_and_si128(_mm_loadu_si64(p), masks); __m128i i2 = _mm_and_si128(_mm_loadu_si64(p + 8), masks); @@ -150,8 +150,8 @@ bool parse_if_eight_digits_unrolled(const char* chars, uint64_t& i) noexcept { } // Call this if chars might not be 8 digits. -// Using this (instead of is_made_of_eight_digits_fast() and parse_eight_digits_unrolled()) -// ensures we don't load SIMD registers twice. +// Using this style (instead of is_made_of_eight_digits_fast() then parse_eight_digits_unrolled()) +// ensures we don't load SIMD registers twice if we don't have to. // // Benchmark: // https://quick-bench.com/q/Bbn0B4WmZsdgS3qDZWpggAY-jgs diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index 175389f..5a5942d 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -78,8 +78,14 @@ #endif #endif +#if defined(__SSE2__) || \ + (defined(FASTFLOAT_VISUAL_STUDIO) && \ + (defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP == 2))) +#define FASTFLOAT_SSE2 1 +#endif + #ifdef FASTFLOAT_SSE2 -#define FASTFLOAT_HAS_SIMD (1) +#define FASTFLOAT_HAS_SIMD 1 #endif #if defined(__GNUC__)