mirror of
https://github.com/fastfloat/fast_float.git
synced 2026-02-07 18:26:45 +08:00
improvements in the code generation for byteswap and leading_zero to all compilers.
This commit is contained in:
parent
49106981a3
commit
ba656ace78
@ -34,16 +34,19 @@ fastfloat_really_inline constexpr bool is_integer(UC c) noexcept {
|
|||||||
return !(c > UC('9') || c < UC('0'));
|
return !(c > UC('9') || c < UC('0'));
|
||||||
}
|
}
|
||||||
|
|
||||||
#if FASTFLOAT_HAS_BYTESWAP == 0
|
|
||||||
fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) noexcept {
|
fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) noexcept {
|
||||||
|
#if FASTFLOAT_HAS_BYTESWAP == 1
|
||||||
|
return std::byteswap(val);
|
||||||
|
#elif defined(__has_builtin) && __has_builtin(__builtin_bswap64)
|
||||||
|
return __builtin_bswap64(val);
|
||||||
|
#elif defined(_MSC_VER)
|
||||||
|
return _byteswap_uint64(val);
|
||||||
|
#else
|
||||||
return (val & 0xFF00000000000000) >> 56 | (val & 0x00FF000000000000) >> 40 |
|
return (val & 0xFF00000000000000) >> 56 | (val & 0x00FF000000000000) >> 40 |
|
||||||
(val & 0x0000FF0000000000) >> 24 | (val & 0x000000FF00000000) >> 8 |
|
(val & 0x0000FF0000000000) >> 24 | (val & 0x000000FF00000000) >> 8 |
|
||||||
(val & 0x00000000FF000000) << 8 | (val & 0x0000000000FF0000) << 24 |
|
(val & 0x00000000FF000000) << 8 | (val & 0x0000000000FF0000) << 24 |
|
||||||
(val & 0x000000000000FF00) << 40 | (val & 0x00000000000000FF) << 56;
|
(val & 0x000000000000FF00) << 40 | (val & 0x00000000000000FF) << 56;
|
||||||
}
|
#endif
|
||||||
#elif FASTFLOAT_HAS_BYTESWAP == 1
|
|
||||||
fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) noexcept {
|
|
||||||
return std::byteswap(val);
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
@ -411,7 +411,11 @@ leading_zeroes(uint64_t input_num) noexcept {
|
|||||||
return leading_zeroes_generic(input_num);
|
return leading_zeroes_generic(input_num);
|
||||||
}
|
}
|
||||||
#ifdef FASTFLOAT_VISUAL_STUDIO
|
#ifdef FASTFLOAT_VISUAL_STUDIO
|
||||||
#if defined(_M_X64) || defined(_M_ARM64)
|
#if defined(__AVX2__)
|
||||||
|
// use lzcnt on MSVC only on AVX2 capable CPU's that all have this BMI
|
||||||
|
// instruction
|
||||||
|
return __lzcnt64(x);
|
||||||
|
#elif defined(_M_X64) || defined(_M_ARM64)
|
||||||
unsigned long leading_zero;
|
unsigned long leading_zero;
|
||||||
// Search the mask data from most significant bit (MSB)
|
// Search the mask data from most significant bit (MSB)
|
||||||
// to least significant bit (LSB) for a set bit (1).
|
// to least significant bit (LSB) for a set bit (1).
|
||||||
@ -420,8 +424,16 @@ leading_zeroes(uint64_t input_num) noexcept {
|
|||||||
#else
|
#else
|
||||||
return static_cast<limb_t>(leading_zeroes_generic(input_num));
|
return static_cast<limb_t>(leading_zeroes_generic(input_num));
|
||||||
#endif
|
#endif
|
||||||
|
#elif __has_builtin(__builtin_clzll)
|
||||||
|
return static_cast<limb_t>(__builtin_clzll(x));
|
||||||
#else
|
#else
|
||||||
return static_cast<limb_t>(__builtin_clzll(input_num));
|
// Unlike MSVC, clang and gcc recognize this implementation and replace
|
||||||
|
// it with the assembly instructions which are appropriate for the
|
||||||
|
// target (lzcnt or bsr + zero handling).
|
||||||
|
int n = 64;
|
||||||
|
for (; leading_zero > 0; leading_zero >>= 1)
|
||||||
|
--n;
|
||||||
|
return static_cast<limb_t>(n);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user