From 7ad8004d575a7eb5aed7b5911c1ae298f7258052 Mon Sep 17 00:00:00 2001 From: user202729 <25191436+user202729@users.noreply.github.com> Date: Fri, 26 Dec 2025 00:21:20 +0800 Subject: [PATCH] Speed up do_format_decimal (#4630) --- include/fmt/format.h | 43 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/include/fmt/format.h b/include/fmt/format.h index 7c486691..b9da317a 100644 --- a/include/fmt/format.h +++ b/include/fmt/format.h @@ -1041,6 +1041,22 @@ inline auto digits2(size_t value) noexcept -> const char* { return &data[value * 2]; } +// Given i in [0, 100), let x be the first 7 digits after +// the decimal point of i / 100 in base 2, the first 2 bytes +// after digits2_i(x) is the string representation of i. +inline auto digits2_i(size_t value) -> const char* { + alignas(2) static const char data[] = + "00010203 0405060707080910 1112" + "131414151617 18192021 222324 " + "25262728 2930313232333435 3637" + "383939404142 43444546 474849 " + "50515253 5455565757585960 6162" + "636464656667 68697071 727374 " + "75767778 7980818282838485 8687" + "888989909192 93949596 979899 "; + return &data[value * 2]; +} + template constexpr auto getsign(sign s) -> Char { return static_cast(((' ' << 24) | ('+' << 16) | ('-' << 8)) >> (static_cast(s) * 8)); @@ -1209,6 +1225,16 @@ FMT_CONSTEXPR20 FMT_INLINE void write2digits(Char* out, size_t value) { *out = static_cast('0' + value % 10); } +template +FMT_INLINE void write2digits_i(Char* out, size_t value) { + if (std::is_same::value && !FMT_OPTIMIZE_SIZE) { + memcpy(out, digits2_i(value), 2); + return; + } + *out++ = static_cast(digits2_i(value)[0]); + *out = static_cast(digits2_i(value)[1]); +} + // Formats a decimal unsigned integer value writing to out pointing to a buffer // of specified size. The caller must ensure that the buffer is large enough. template @@ -1217,12 +1243,19 @@ FMT_CONSTEXPR20 auto do_format_decimal(Char* out, UInt value, int size) FMT_ASSERT(size >= count_digits(value), "invalid digit count"); unsigned n = to_unsigned(size); while (value >= 100) { - // Integer division is slow so do it for a group of two digits instead - // of for every digit. The idea comes from the talk by Alexandrescu - // "Three Optimization Tips for C++". See speed-test for a comparison. n -= 2; - write2digits(out + n, static_cast(value % 100)); - value /= 100; + if (!is_constant_evaluated() && sizeof(UInt) == 4) { + auto p = value * static_cast((1ull << 39) / 100 + 1); + write2digits_i(out + n, p >> (39 - 7) & ((1 << 7) - 1)); + value = static_cast(p >> 39) + + (static_cast(value >= (100u << 25)) << 25); + } else { + // Integer division is slow so do it for a group of two digits instead + // of for every digit. The idea comes from the talk by Alexandrescu + // "Three Optimization Tips for C++". See speed-test for a comparison. + write2digits(out + n, static_cast(value % 100)); + value /= 100; + } } if (value >= 10) { n -= 2;