From b72e07132c1a36adf4c4c29665931e322ab704ae Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 5 Jun 2026 22:01:27 -0400 Subject: [PATCH] let us using 'unlikely' hints. --- include/fast_float/float_common.h | 24 ++++++++++++++++++------ include/fast_float/parse_number.h | 20 ++++++++++---------- 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index bd41bf1..ee7a6d0 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -197,13 +197,25 @@ using parse_options = parse_options_t; #define fastfloat_really_inline inline __attribute__((always_inline)) #endif -// Force a function OUT of line and onto the cold path. Used for the rare -// slow-path re-parse so the force-inlined hot scanner is not duplicated into -// the caller (which bloated the hot frame and hurt ILP on some targets). -#ifdef FASTFLOAT_VISUAL_STUDIO -#define fastfloat_noinline __declspec(noinline) +// Branch-probability hint marking the rare slow-path branches as cold, so the +// optimizer keeps the out-of-line slow-path re-parse off the hot path (and does +// not duplicate the force-inlined hot scanner into the caller, which bloated +// the hot frame and hurt ILP on some targets). Used at the call site as +// if fastfloat_unlikely(cond) { ... } +// (the macro supplies the parentheses). It expands to the standard [[unlikely]] +// attribute in C++20 or newer, otherwise to __builtin_expect on GCC/Clang, or +// to a no-op elsewhere (e.g. pre-C++20 MSVC, which has no equivalent hint). +// The [[unlikely]] branch is gated on the language version, not just on +// __has_cpp_attribute: GCC and Clang report the attribute as available even +// under -std=c++17, where using it would trip -Wc++20-extensions/-Werror. +#if (__cplusplus >= 202002L || \ + (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L)) && \ + defined(__has_cpp_attribute) && __has_cpp_attribute(unlikely) >= 201803L +#define fastfloat_unlikely(x) (x) [[unlikely]] +#elif defined(__GNUC__) || defined(__clang__) +#define fastfloat_unlikely(x) (__builtin_expect(!!(x), 0)) #else -#define fastfloat_noinline __attribute__((noinline, cold)) +#define fastfloat_unlikely(x) (x) #endif #ifndef FASTFLOAT_ASSERT diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index be38781..a844bc8 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -289,15 +289,15 @@ from_chars_advanced(parsed_number_string_t &pns, T &value) noexcept { return answer; } -// Cold, out-of-line slow path: re-parse materializing the integer/fraction -// spans the hot no-span parse skipped, then run the full algorithm. Marked -// noinline+cold so the force-inlined spans scanner is emitted ONCE off the hot -// path rather than duplicated into from_chars_float_advanced (which bloated the -// hot frame). from_chars_advanced already handles both the too_many_digits -// disambiguation and the am.power2<0 digit_comp recompute, so both slow -// branches collapse to one helper call. +// Slow path: re-parse materializing the integer/fraction spans the hot no-span +// parse skipped, then run the full algorithm. The two callers reach it only +// through a fastfloat_unlikely branch, so the optimizer keeps this re-parse off +// the hot path on its own (no function-level noinline needed). +// from_chars_advanced already handles both the too_many_digits disambiguation +// and the am.power2<0 digit_comp recompute, so both slow branches collapse to +// one helper call. template -fastfloat_noinline FASTFLOAT_CONSTEXPR20 from_chars_result_t +FASTFLOAT_CONSTEXPR20 from_chars_result_t parse_number_slow_path(UC const *first, UC const *last, T &value, parse_options_t options, bool bjf) noexcept { parsed_number_string_t pns = @@ -351,7 +351,7 @@ from_chars_float_advanced(UC const *first, UC const *last, T &value, // Slow path A (rare): > 19 significant digits. The no-span parse left the // mantissa un-truncated and skipped the span-based recompute; the cold helper // re-parses with spans and runs the full algorithm. - if (pns.too_many_digits) { + if fastfloat_unlikely (pns.too_many_digits) { return parse_number_slow_path(first, last, value, options, bjf); } @@ -368,7 +368,7 @@ from_chars_float_advanced(UC const *first, UC const *last, T &value, // integer/fraction spans. Route to the cold helper (clinger there is a // dead-effect since it already failed here; the cold re-parse + digit_comp // via from_chars_advanced reproduces this branch). - if (am.power2 < 0) { + if fastfloat_unlikely (am.power2 < 0) { return parse_number_slow_path(first, last, value, options, bjf); } to_float(pns.negative, am, value);