Merge pull request #387 from fastfloat/pr386

Using unlikely markers for PR386
This commit is contained in:
Daniel Lemire 2026-06-07 14:12:38 -04:00 committed by GitHub
commit 3044c9b182
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 128 additions and 28 deletions

View File

@ -330,10 +330,17 @@ report_parse_error(UC const *p, parse_error error) {
// Assuming that you use no more than 19 digits, this will
// parse an ASCII string.
//
// store_spans is a *runtime* flag (not a template parameter, deliberately: a
// template would create a second instantiation of this whole function and the
// extra icache pressure wipes out the gain). When false, the integer/fraction
// spans (read only by the rare digit_comp slow path) are not materialized,
// which keeps the fat parsed_number_string_t off the hot path. The caller
// re-parses with store_spans=true if the slow path is actually reached.
template <bool basic_json_fmt, typename UC>
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string_t<UC>
parse_number_string(UC const *p, UC const *pend,
parse_options_t<UC> options) noexcept {
parse_number_string(UC const *p, UC const *pend, parse_options_t<UC> options,
bool store_spans = true) noexcept {
chars_format const fmt = detail::adjust_for_feature_macros(options.format);
UC const decimal_point = options.decimal_point;
@ -402,7 +409,9 @@ parse_number_string(UC const *p, UC const *pend,
}
UC const *const end_of_integer_part = p;
int64_t digit_count = int64_t(end_of_integer_part - start_digits);
answer.integer = span<UC const>(start_digits, size_t(digit_count));
if (store_spans) {
answer.integer = span<UC const>(start_digits, size_t(digit_count));
}
FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
// at least 1 digit in integer part, without leading zeros
if (digit_count == 0) {
@ -429,7 +438,9 @@ parse_number_string(UC const *p, UC const *pend,
i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
}
exponent = before - p;
answer.fraction = span<UC const>(before, size_t(p - before));
if (store_spans) {
answer.fraction = span<UC const>(before, size_t(p - before));
}
digit_count -= exponent;
}
FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
@ -514,29 +525,35 @@ parse_number_string(UC const *p, UC const *pend,
if (digit_count > 19) {
answer.too_many_digits = true;
// Let us start again, this time, avoiding overflows.
// We don't need to call if is_integer, since we use the
// pre-tokenized spans from above.
i = 0;
p = answer.integer.ptr;
UC const *int_end = p + answer.integer.len();
uint64_t const minimal_nineteen_digit_integer{1000000000000000000};
while ((i < minimal_nineteen_digit_integer) && (p != int_end)) {
i = i * 10 + uint64_t(*p - UC('0'));
++p;
}
if (i >= minimal_nineteen_digit_integer) { // We have a big integer
exponent = end_of_integer_part - p + exp_number;
} else { // We have a value with a fractional component.
p = answer.fraction.ptr;
UC const *frac_end = p + answer.fraction.len();
while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
// The truncation recompute below reads the integer/fraction spans. When
// store_spans is false we didn't materialize them, so just flag
// too_many_digits; the caller re-parses with store_spans=true to obtain
// the corrected mantissa/exponent before taking the slow path.
if (store_spans) {
// Let us start again, this time, avoiding overflows.
// We don't need to call if is_integer, since we use the
// pre-tokenized spans from above.
i = 0;
p = answer.integer.ptr;
UC const *int_end = p + answer.integer.len();
uint64_t const minimal_nineteen_digit_integer{1000000000000000000};
while ((i < minimal_nineteen_digit_integer) && (p != int_end)) {
i = i * 10 + uint64_t(*p - UC('0'));
++p;
}
exponent = answer.fraction.ptr - p + exp_number;
if (i >= minimal_nineteen_digit_integer) { // We have a big integer
exponent = end_of_integer_part - p + exp_number;
} else { // We have a value with a fractional component.
p = answer.fraction.ptr;
UC const *frac_end = p + answer.fraction.len();
while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
i = i * 10 + uint64_t(*p - UC('0'));
++p;
}
exponent = answer.fraction.ptr - p + exp_number;
}
// We have now corrected both exponent and i, to a truncated value
}
// We have now corrected both exponent and i, to a truncated value
}
}
answer.exponent = exponent;

View File

@ -197,6 +197,28 @@ using parse_options = parse_options_t<char>;
#define fastfloat_really_inline inline __attribute__((always_inline))
#endif
// Branch-probability hint marking the rare slow-path branches as cold, so the
// optimizer keeps the out-of-line slow-path re-parse off the hot path (and does
// not duplicate the force-inlined hot scanner into the caller, which bloated
// the hot frame and hurt ILP on some targets). Used at the call site as
// if fastfloat_unlikely(cond) { ... }
// (the macro supplies the parentheses). It expands to the standard [[unlikely]]
// attribute when supported, otherwise to __builtin_expect on GCC/Clang, or
// to a no-op elsewhere (e.g. pre-C++20 MSVC, which has no equivalent hint).
#ifdef __has_cpp_attribute
#if __has_cpp_attribute(unlikely) >= 201803L
#define FASTFLOAT_USE_UNLIKELY_ATTR 1
#endif
#endif
#ifdef FASTFLOAT_USE_UNLIKELY_ATTR
#define fastfloat_unlikely(x) (x) [[unlikely]]
#elif defined(__GNUC__) || defined(__clang__)
#define fastfloat_unlikely(x) (__builtin_expect(!!(x), 0))
#else
#define fastfloat_unlikely(x) (x)
#endif
#ifndef FASTFLOAT_ASSERT
#define FASTFLOAT_ASSERT(x) \
{ ((void)(x)); }

View File

@ -289,6 +289,23 @@ from_chars_advanced(parsed_number_string_t<UC> &pns, T &value) noexcept {
return answer;
}
// Slow path: re-parse materializing the integer/fraction spans the hot no-span
// parse skipped, then run the full algorithm. The two callers reach it only
// through a fastfloat_unlikely branch, so the optimizer keeps this re-parse off
// the hot path on its own (no function-level noinline needed).
// from_chars_advanced already handles both the too_many_digits disambiguation
// and the am.power2<0 digit_comp recompute, so both slow branches collapse to
// one helper call.
template <typename T, typename UC>
FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
parse_number_slow_path(UC const *first, UC const *last, T &value,
parse_options_t<UC> options, bool bjf) noexcept {
parsed_number_string_t<UC> pns =
bjf ? parse_number_string<true, UC>(first, last, options, true)
: parse_number_string<false, UC>(first, last, options, true);
return from_chars_advanced(pns, value);
}
template <typename T, typename UC>
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
from_chars_float_advanced(UC const *first, UC const *last, T &value,
@ -312,10 +329,15 @@ from_chars_float_advanced(UC const *first, UC const *last, T &value,
answer.ptr = first;
return answer;
}
bool const bjf = uint64_t(fmt & detail::basic_json_fmt) != 0;
// Fast path: parse WITHOUT materializing the integer/fraction spans (read
// only by the rare slow paths). Skipping their stores keeps the fat
// parsed_number_string_t off the hot path. store_spans is a runtime argument,
// so this reuses the single parse_number_string instantiation.
parsed_number_string_t<UC> pns =
uint64_t(fmt & detail::basic_json_fmt)
? parse_number_string<true, UC>(first, last, options)
: parse_number_string<false, UC>(first, last, options);
bjf ? parse_number_string<true, UC>(first, last, options, false)
: parse_number_string<false, UC>(first, last, options, false);
if (!pns.valid) {
if (uint64_t(fmt & chars_format::no_infnan)) {
answer.ec = std::errc::invalid_argument;
@ -326,8 +348,47 @@ from_chars_float_advanced(UC const *first, UC const *last, T &value,
}
}
// call overload that takes parsed_number_string_t directly.
return from_chars_advanced(pns, value);
// Slow path A (rare): > 19 significant digits. The no-span parse left the
// mantissa un-truncated and skipped the span-based recompute; the cold helper
// re-parses with spans and runs the full algorithm.
//
// We have to disable -Wc++20-extensions for the [[unlikely]] attribute
// See comment for @jwakely at
// https://github.com/fastfloat/fast_float/pull/387#discussion_r3366943539
// This is unfortunate.
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wc++20-extensions"
#endif
if fastfloat_unlikely (pns.too_many_digits) {
return parse_number_slow_path<T, UC>(first, last, value, options, bjf);
}
answer.ec = std::errc(); // be optimistic
answer.ptr = pns.lastmatch;
if (clinger_fast_path_impl(pns.mantissa, pns.exponent, pns.negative, value)) {
return answer;
}
adjusted_mantissa am =
compute_float<binary_format<T>>(pns.exponent, pns.mantissa);
// Slow path B (rare): Eisel-Lemire could not resolve; digit_comp needs the
// integer/fraction spans. Route to the cold helper (clinger there is a
// dead-effect since it already failed here; the cold re-parse + digit_comp
// via from_chars_advanced reproduces this branch).
if fastfloat_unlikely (am.power2 < 0) {
return parse_number_slow_path<T, UC>(first, last, value, options, bjf);
}
#ifdef __clang__
#pragma clang diagnostic pop
#endif
to_float(pns.negative, am, value);
// Test for over/underflow.
if ((pns.mantissa != 0 && am.mantissa == 0 && am.power2 == 0) ||
am.power2 == binary_format<T>::infinite_power()) {
answer.ec = std::errc::result_out_of_range;
}
return answer;
}
template <typename T, typename UC, typename>