mirror of
https://github.com/fastfloat/fast_float.git
synced 2026-06-15 08:26:08 +08:00
Unroll the integer-part digit scan (straight-line for the common 1-5 digit case)
parse_number_string scans the integer part one byte at a time in a while loop, while the fraction already uses the 8-digit SWAR loop. Most integer parts are 1-5 digits, so the loop back-edge dominates. Peel the first five iterations into nested ifs, falling through to the original while for longer runs. Semantics are identical (i = 10*i + digit, advancing p); no behavior change. AWS m8g.metal-24xl (Graviton4), -O3 -march=native, simple_fastfloat_benchmark, from_chars->double. base vs patch measured back-to-back, mean of 2 runs: canada: gcc +3.1%, clang +2.8% mesh: gcc +5.4%, clang +5.1% random: ~flat (1-digit integer part) No regression; gcc and clang agree. Alternatives benchmarked and rejected: reusing loop_parse_if_eight_digits for the integer part regressed 5-8% (integer parts are too short for 8-digit SWAR setup); a counted for(k<5) loop matched on gcc but clang optimized it worse (canada -0.9%). The explicit peel is the only form solidly positive on both compilers.
This commit is contained in:
parent
7790aa6231
commit
b64d014e2f
@ -354,14 +354,37 @@ parse_number_string(UC const *p, UC const *pend,
|
|||||||
|
|
||||||
uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad)
|
uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad)
|
||||||
|
|
||||||
|
// Straight-line unroll of the integer-part scan: most integer parts are
|
||||||
|
// 1-5 digits, so peeling the first iterations eliminates the loop back-edge
|
||||||
|
// for the common case. Semantics are identical to the original `while` loop:
|
||||||
|
// i = 10*i + digit, advancing p.
|
||||||
|
if ((p != pend) && is_integer(*p)) {
|
||||||
|
i = uint64_t(*p - UC('0'));
|
||||||
|
++p;
|
||||||
|
if ((p != pend) && is_integer(*p)) {
|
||||||
|
i = 10 * i + uint64_t(*p - UC('0'));
|
||||||
|
++p;
|
||||||
|
if ((p != pend) && is_integer(*p)) {
|
||||||
|
i = 10 * i + uint64_t(*p - UC('0'));
|
||||||
|
++p;
|
||||||
|
if ((p != pend) && is_integer(*p)) {
|
||||||
|
i = 10 * i + uint64_t(*p - UC('0'));
|
||||||
|
++p;
|
||||||
|
if ((p != pend) && is_integer(*p)) {
|
||||||
|
i = 10 * i + uint64_t(*p - UC('0'));
|
||||||
|
++p;
|
||||||
while ((p != pend) && is_integer(*p)) {
|
while ((p != pend) && is_integer(*p)) {
|
||||||
// a multiplication by 10 is cheaper than an arbitrary integer
|
// a multiplication by 10 is cheaper than an arbitrary integer
|
||||||
// multiplication
|
// multiplication
|
||||||
i = 10 * i +
|
i = 10 * i +
|
||||||
uint64_t(*p -
|
uint64_t(*p - UC('0')); // might overflow, handled later
|
||||||
UC('0')); // might overflow, we will handle the overflow later
|
|
||||||
++p;
|
++p;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
UC const *const end_of_integer_part = p;
|
UC const *const end_of_integer_part = p;
|
||||||
int64_t digit_count = int64_t(end_of_integer_part - start_digits);
|
int64_t digit_count = int64_t(end_of_integer_part - start_digits);
|
||||||
answer.integer = span<UC const>(start_digits, size_t(digit_count));
|
answer.integer = span<UC const>(start_digits, size_t(digit_count));
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user