improvements in the code generation for byteswap and leading_zero to all compilers.

This commit is contained in:
IRainman 2025-12-30 20:15:17 +03:00
parent 49106981a3
commit ba656ace78
2 changed files with 22 additions and 7 deletions

View File

@ -34,16 +34,19 @@ fastfloat_really_inline constexpr bool is_integer(UC c) noexcept {
return !(c > UC('9') || c < UC('0'));
}
#if FASTFLOAT_HAS_BYTESWAP == 0
fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) noexcept {
#if FASTFLOAT_HAS_BYTESWAP == 1
return std::byteswap(val);
#elif defined(__has_builtin) && __has_builtin(__builtin_bswap64)
return __builtin_bswap64(val);
#elif defined(_MSC_VER)
return _byteswap_uint64(val);
#else
return (val & 0xFF00000000000000) >> 56 | (val & 0x00FF000000000000) >> 40 |
(val & 0x0000FF0000000000) >> 24 | (val & 0x000000FF00000000) >> 8 |
(val & 0x00000000FF000000) << 8 | (val & 0x0000000000FF0000) << 24 |
(val & 0x000000000000FF00) << 40 | (val & 0x00000000000000FF) << 56;
}
#elif FASTFLOAT_HAS_BYTESWAP == 1
fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) noexcept {
return std::byteswap(val);
#endif
}
#endif

View File

@ -411,7 +411,11 @@ leading_zeroes(uint64_t input_num) noexcept {
return leading_zeroes_generic(input_num);
}
#ifdef FASTFLOAT_VISUAL_STUDIO
#if defined(_M_X64) || defined(_M_ARM64)
#if defined(__AVX2__)
// use lzcnt on MSVC only on AVX2 capable CPU's that all have this BMI
// instruction
return __lzcnt64(x);
#elif defined(_M_X64) || defined(_M_ARM64)
unsigned long leading_zero;
// Search the mask data from most significant bit (MSB)
// to least significant bit (LSB) for a set bit (1).
@ -420,8 +424,16 @@ leading_zeroes(uint64_t input_num) noexcept {
#else
return static_cast<limb_t>(leading_zeroes_generic(input_num));
#endif
#elif __has_builtin(__builtin_clzll)
return static_cast<limb_t>(__builtin_clzll(x));
#else
return static_cast<limb_t>(__builtin_clzll(input_num));
// Unlike MSVC, clang and gcc recognize this implementation and replace
// it with the assembly instructions which are appropriate for the
// target (lzcnt or bsr + zero handling).
int n = 64;
for (; leading_zero > 0; leading_zero >>= 1)
--n;
return static_cast<limb_t>(n);
#endif
}