From ba656ace7887df1176fdb9e58c33acc89bb9b62b Mon Sep 17 00:00:00 2001 From: IRainman Date: Tue, 30 Dec 2025 20:15:17 +0300 Subject: [PATCH] improvements in the code generation for byteswap and leading_zero to all compilers. --- include/fast_float/ascii_number.h | 13 ++++++++----- include/fast_float/float_common.h | 16 ++++++++++++++-- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 41cf977..80d4565 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -34,16 +34,19 @@ fastfloat_really_inline constexpr bool is_integer(UC c) noexcept { return !(c > UC('9') || c < UC('0')); } -#if FASTFLOAT_HAS_BYTESWAP == 0 fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) noexcept { +#if FASTFLOAT_HAS_BYTESWAP == 1 + return std::byteswap(val); +#elif defined(__has_builtin) && __has_builtin(__builtin_bswap64) + return __builtin_bswap64(val); +#elif defined(_MSC_VER) + return _byteswap_uint64(val); +#else return (val & 0xFF00000000000000) >> 56 | (val & 0x00FF000000000000) >> 40 | (val & 0x0000FF0000000000) >> 24 | (val & 0x000000FF00000000) >> 8 | (val & 0x00000000FF000000) << 8 | (val & 0x0000000000FF0000) << 24 | (val & 0x000000000000FF00) << 40 | (val & 0x00000000000000FF) << 56; -} -#elif FASTFLOAT_HAS_BYTESWAP == 1 -fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) noexcept { - return std::byteswap(val); +#endif } #endif diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index 7191b04..079eb6f 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -411,7 +411,11 @@ leading_zeroes(uint64_t input_num) noexcept { return leading_zeroes_generic(input_num); } #ifdef FASTFLOAT_VISUAL_STUDIO -#if defined(_M_X64) || defined(_M_ARM64) +#if defined(__AVX2__) + // use lzcnt on MSVC only on AVX2 capable CPU's that all have this BMI + // instruction + return __lzcnt64(x); +#elif defined(_M_X64) || defined(_M_ARM64) unsigned long leading_zero; // Search the mask data from most significant bit (MSB) // to least significant bit (LSB) for a set bit (1). @@ -420,8 +424,16 @@ leading_zeroes(uint64_t input_num) noexcept { #else return static_cast(leading_zeroes_generic(input_num)); #endif +#elif __has_builtin(__builtin_clzll) + return static_cast(__builtin_clzll(x)); #else - return static_cast(__builtin_clzll(input_num)); + // Unlike MSVC, clang and gcc recognize this implementation and replace + // it with the assembly instructions which are appropriate for the + // target (lzcnt or bsr + zero handling). + int n = 64; + for (; leading_zero > 0; leading_zero >>= 1) + --n; + return static_cast(n); #endif }