mirror of
https://github.com/fastfloat/fast_float.git
synced 2026-02-07 18:26:45 +08:00
Merge pull request #28 from lemire/dlemire/aqrit_magic
Magical optimizations from @aqrit
This commit is contained in:
commit
caade69916
@ -15,22 +15,29 @@ namespace fast_float {
|
|||||||
fastfloat_really_inline bool is_integer(char c) noexcept { return c >= '0' && c <= '9'; }
|
fastfloat_really_inline bool is_integer(char c) noexcept { return c >= '0' && c <= '9'; }
|
||||||
|
|
||||||
|
|
||||||
// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
|
// credit @aqrit
|
||||||
|
fastfloat_really_inline uint32_t parse_eight_digits_unrolled(uint64_t val) {
|
||||||
|
const uint64_t mask = 0x000000FF000000FF;
|
||||||
|
const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
|
||||||
|
const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
|
||||||
|
val -= 0x3030303030303030;
|
||||||
|
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
||||||
|
val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
|
||||||
|
return uint32_t(val);
|
||||||
|
}
|
||||||
|
|
||||||
fastfloat_really_inline uint32_t parse_eight_digits_unrolled(const char *chars) noexcept {
|
fastfloat_really_inline uint32_t parse_eight_digits_unrolled(const char *chars) noexcept {
|
||||||
uint64_t val;
|
uint64_t val;
|
||||||
::memcpy(&val, chars, sizeof(uint64_t));
|
::memcpy(&val, chars, sizeof(uint64_t));
|
||||||
val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8;
|
return parse_eight_digits_unrolled(val);
|
||||||
val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16;
|
|
||||||
return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// credit @aqrit
|
||||||
fastfloat_really_inline bool is_made_of_eight_digits_fast(uint64_t val) noexcept {
|
fastfloat_really_inline bool is_made_of_eight_digits_fast(uint64_t val) noexcept {
|
||||||
return (((val & 0xF0F0F0F0F0F0F0F0) |
|
return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) &
|
||||||
(((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) ==
|
0x8080808080808080));
|
||||||
0x3333333333333333);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
fastfloat_really_inline bool is_made_of_eight_digits_fast(const char *chars) noexcept {
|
fastfloat_really_inline bool is_made_of_eight_digits_fast(const char *chars) noexcept {
|
||||||
uint64_t val;
|
uint64_t val;
|
||||||
::memcpy(&val, chars, 8);
|
::memcpy(&val, chars, 8);
|
||||||
|
|||||||
@ -91,7 +91,7 @@ from_chars_result from_chars(const char *first, const char *last,
|
|||||||
}
|
}
|
||||||
answer.ec = std::errc(); // be optimistic
|
answer.ec = std::errc(); // be optimistic
|
||||||
answer.ptr = pns.lastmatch;
|
answer.ptr = pns.lastmatch;
|
||||||
|
// Next is Clinger's fast path.
|
||||||
if (binary_format<T>::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format<T>::max_exponent_fast_path() && pns.mantissa <=binary_format<T>::max_mantissa_fast_path()) {
|
if (binary_format<T>::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format<T>::max_exponent_fast_path() && pns.mantissa <=binary_format<T>::max_mantissa_fast_path()) {
|
||||||
value = T(pns.mantissa);
|
value = T(pns.mantissa);
|
||||||
if (pns.exponent < 0) { value = value / binary_format<T>::exact_power_of_ten(-pns.exponent); }
|
if (pns.exponent < 0) { value = value / binary_format<T>::exact_power_of_ten(-pns.exponent); }
|
||||||
|
|||||||
@ -360,6 +360,8 @@ adjusted_mantissa parse_long_mantissa(const char *first, const char* last) {
|
|||||||
// credit: R. Oudompheng who first implemented this fast path (to my knowledge).
|
// credit: R. Oudompheng who first implemented this fast path (to my knowledge).
|
||||||
// It is rough, but it does the job of accelerating the slow path since most
|
// It is rough, but it does the job of accelerating the slow path since most
|
||||||
// long streams of digits are determined after 19 digits.
|
// long streams of digits are determined after 19 digits.
|
||||||
|
// Note that mantissa+1 cannot overflow since mantissa < 10**19 and so
|
||||||
|
// mantissa+1 <= 10**19 < 2**64.
|
||||||
adjusted_mantissa am1 = compute_float<binary>(exponent, mantissa);
|
adjusted_mantissa am1 = compute_float<binary>(exponent, mantissa);
|
||||||
adjusted_mantissa am2 = compute_float<binary>(exponent, mantissa+1);
|
adjusted_mantissa am2 = compute_float<binary>(exponent, mantissa+1);
|
||||||
// They must both agree and be both a successful result.
|
// They must both agree and be both a successful result.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user