let us using 'unlikely' hints.

This commit is contained in:
Daniel Lemire 2026-06-05 22:01:27 -04:00
parent 3067491f41
commit b72e07132c
2 changed files with 28 additions and 16 deletions

View File

@ -197,13 +197,25 @@ using parse_options = parse_options_t<char>;
#define fastfloat_really_inline inline __attribute__((always_inline)) #define fastfloat_really_inline inline __attribute__((always_inline))
#endif #endif
// Force a function OUT of line and onto the cold path. Used for the rare // Branch-probability hint marking the rare slow-path branches as cold, so the
// slow-path re-parse so the force-inlined hot scanner is not duplicated into // optimizer keeps the out-of-line slow-path re-parse off the hot path (and does
// the caller (which bloated the hot frame and hurt ILP on some targets). // not duplicate the force-inlined hot scanner into the caller, which bloated
#ifdef FASTFLOAT_VISUAL_STUDIO // the hot frame and hurt ILP on some targets). Used at the call site as
#define fastfloat_noinline __declspec(noinline) // if fastfloat_unlikely(cond) { ... }
// (the macro supplies the parentheses). It expands to the standard [[unlikely]]
// attribute in C++20 or newer, otherwise to __builtin_expect on GCC/Clang, or
// to a no-op elsewhere (e.g. pre-C++20 MSVC, which has no equivalent hint).
// The [[unlikely]] branch is gated on the language version, not just on
// __has_cpp_attribute: GCC and Clang report the attribute as available even
// under -std=c++17, where using it would trip -Wc++20-extensions/-Werror.
#if (__cplusplus >= 202002L || \
(defined(_MSVC_LANG) && _MSVC_LANG >= 202002L)) && \
defined(__has_cpp_attribute) && __has_cpp_attribute(unlikely) >= 201803L
#define fastfloat_unlikely(x) (x) [[unlikely]]
#elif defined(__GNUC__) || defined(__clang__)
#define fastfloat_unlikely(x) (__builtin_expect(!!(x), 0))
#else #else
#define fastfloat_noinline __attribute__((noinline, cold)) #define fastfloat_unlikely(x) (x)
#endif #endif
#ifndef FASTFLOAT_ASSERT #ifndef FASTFLOAT_ASSERT

View File

@ -289,15 +289,15 @@ from_chars_advanced(parsed_number_string_t<UC> &pns, T &value) noexcept {
return answer; return answer;
} }
// Cold, out-of-line slow path: re-parse materializing the integer/fraction // Slow path: re-parse materializing the integer/fraction spans the hot no-span
// spans the hot no-span parse skipped, then run the full algorithm. Marked // parse skipped, then run the full algorithm. The two callers reach it only
// noinline+cold so the force-inlined spans scanner is emitted ONCE off the hot // through a fastfloat_unlikely branch, so the optimizer keeps this re-parse off
// path rather than duplicated into from_chars_float_advanced (which bloated the // the hot path on its own (no function-level noinline needed).
// hot frame). from_chars_advanced already handles both the too_many_digits // from_chars_advanced already handles both the too_many_digits disambiguation
// disambiguation and the am.power2<0 digit_comp recompute, so both slow // and the am.power2<0 digit_comp recompute, so both slow branches collapse to
// branches collapse to one helper call. // one helper call.
template <typename T, typename UC> template <typename T, typename UC>
fastfloat_noinline FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC> FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
parse_number_slow_path(UC const *first, UC const *last, T &value, parse_number_slow_path(UC const *first, UC const *last, T &value,
parse_options_t<UC> options, bool bjf) noexcept { parse_options_t<UC> options, bool bjf) noexcept {
parsed_number_string_t<UC> pns = parsed_number_string_t<UC> pns =
@ -351,7 +351,7 @@ from_chars_float_advanced(UC const *first, UC const *last, T &value,
// Slow path A (rare): > 19 significant digits. The no-span parse left the // Slow path A (rare): > 19 significant digits. The no-span parse left the
// mantissa un-truncated and skipped the span-based recompute; the cold helper // mantissa un-truncated and skipped the span-based recompute; the cold helper
// re-parses with spans and runs the full algorithm. // re-parses with spans and runs the full algorithm.
if (pns.too_many_digits) { if fastfloat_unlikely (pns.too_many_digits) {
return parse_number_slow_path<T, UC>(first, last, value, options, bjf); return parse_number_slow_path<T, UC>(first, last, value, options, bjf);
} }
@ -368,7 +368,7 @@ from_chars_float_advanced(UC const *first, UC const *last, T &value,
// integer/fraction spans. Route to the cold helper (clinger there is a // integer/fraction spans. Route to the cold helper (clinger there is a
// dead-effect since it already failed here; the cold re-parse + digit_comp // dead-effect since it already failed here; the cold re-parse + digit_comp
// via from_chars_advanced reproduces this branch). // via from_chars_advanced reproduces this branch).
if (am.power2 < 0) { if fastfloat_unlikely (am.power2 < 0) {
return parse_number_slow_path<T, UC>(first, last, value, options, bjf); return parse_number_slow_path<T, UC>(first, last, value, options, bjf);
} }
to_float(pns.negative, am, value); to_float(pns.negative, am, value);