Speed up do_format_decimal (#4630)

This commit is contained in:
user202729 2025-12-26 00:21:20 +08:00 committed by GitHub
parent 91d1aced0a
commit 7ad8004d57
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1041,6 +1041,22 @@ inline auto digits2(size_t value) noexcept -> const char* {
return &data[value * 2];
}
// Given i in [0, 100), let x be the first 7 digits after
// the decimal point of i / 100 in base 2, the first 2 bytes
// after digits2_i(x) is the string representation of i.
inline auto digits2_i(size_t value) -> const char* {
alignas(2) static const char data[] =
"00010203 0405060707080910 1112"
"131414151617 18192021 222324 "
"25262728 2930313232333435 3637"
"383939404142 43444546 474849 "
"50515253 5455565757585960 6162"
"636464656667 68697071 727374 "
"75767778 7980818282838485 8687"
"888989909192 93949596 979899 ";
return &data[value * 2];
}
template <typename Char> constexpr auto getsign(sign s) -> Char {
return static_cast<char>(((' ' << 24) | ('+' << 16) | ('-' << 8)) >>
(static_cast<int>(s) * 8));
@ -1209,6 +1225,16 @@ FMT_CONSTEXPR20 FMT_INLINE void write2digits(Char* out, size_t value) {
*out = static_cast<Char>('0' + value % 10);
}
template <typename Char>
FMT_INLINE void write2digits_i(Char* out, size_t value) {
if (std::is_same<Char, char>::value && !FMT_OPTIMIZE_SIZE) {
memcpy(out, digits2_i(value), 2);
return;
}
*out++ = static_cast<Char>(digits2_i(value)[0]);
*out = static_cast<Char>(digits2_i(value)[1]);
}
// Formats a decimal unsigned integer value writing to out pointing to a buffer
// of specified size. The caller must ensure that the buffer is large enough.
template <typename Char, typename UInt>
@ -1217,12 +1243,19 @@ FMT_CONSTEXPR20 auto do_format_decimal(Char* out, UInt value, int size)
FMT_ASSERT(size >= count_digits(value), "invalid digit count");
unsigned n = to_unsigned(size);
while (value >= 100) {
// Integer division is slow so do it for a group of two digits instead
// of for every digit. The idea comes from the talk by Alexandrescu
// "Three Optimization Tips for C++". See speed-test for a comparison.
n -= 2;
write2digits(out + n, static_cast<unsigned>(value % 100));
value /= 100;
if (!is_constant_evaluated() && sizeof(UInt) == 4) {
auto p = value * static_cast<uint64_t>((1ull << 39) / 100 + 1);
write2digits_i(out + n, p >> (39 - 7) & ((1 << 7) - 1));
value = static_cast<UInt>(p >> 39) +
(static_cast<UInt>(value >= (100u << 25)) << 25);
} else {
// Integer division is slow so do it for a group of two digits instead
// of for every digit. The idea comes from the talk by Alexandrescu
// "Three Optimization Tips for C++". See speed-test for a comparison.
write2digits(out + n, static_cast<unsigned>(value % 100));
value /= 100;
}
}
if (value >= 10) {
n -= 2;