mirror of
https://github.com/fastfloat/fast_float.git
synced 2025-12-07 17:26:51 +08:00
Merge pull request #28 from lemire/dlemire/aqrit_magic
Magical optimizations from @aqrit
This commit is contained in:
commit
caade69916
@ -15,22 +15,29 @@ namespace fast_float {
|
||||
fastfloat_really_inline bool is_integer(char c) noexcept { return c >= '0' && c <= '9'; }
|
||||
|
||||
|
||||
// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
|
||||
// credit @aqrit
|
||||
fastfloat_really_inline uint32_t parse_eight_digits_unrolled(uint64_t val) {
|
||||
const uint64_t mask = 0x000000FF000000FF;
|
||||
const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
|
||||
const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
|
||||
val -= 0x3030303030303030;
|
||||
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
||||
val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
|
||||
return uint32_t(val);
|
||||
}
|
||||
|
||||
fastfloat_really_inline uint32_t parse_eight_digits_unrolled(const char *chars) noexcept {
|
||||
uint64_t val;
|
||||
::memcpy(&val, chars, sizeof(uint64_t));
|
||||
val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8;
|
||||
val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16;
|
||||
return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32);
|
||||
return parse_eight_digits_unrolled(val);
|
||||
}
|
||||
|
||||
// credit @aqrit
|
||||
fastfloat_really_inline bool is_made_of_eight_digits_fast(uint64_t val) noexcept {
|
||||
return (((val & 0xF0F0F0F0F0F0F0F0) |
|
||||
(((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) ==
|
||||
0x3333333333333333);
|
||||
return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) &
|
||||
0x8080808080808080));
|
||||
}
|
||||
|
||||
|
||||
fastfloat_really_inline bool is_made_of_eight_digits_fast(const char *chars) noexcept {
|
||||
uint64_t val;
|
||||
::memcpy(&val, chars, 8);
|
||||
|
||||
@ -91,7 +91,7 @@ from_chars_result from_chars(const char *first, const char *last,
|
||||
}
|
||||
answer.ec = std::errc(); // be optimistic
|
||||
answer.ptr = pns.lastmatch;
|
||||
|
||||
// Next is Clinger's fast path.
|
||||
if (binary_format<T>::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format<T>::max_exponent_fast_path() && pns.mantissa <=binary_format<T>::max_mantissa_fast_path()) {
|
||||
value = T(pns.mantissa);
|
||||
if (pns.exponent < 0) { value = value / binary_format<T>::exact_power_of_ten(-pns.exponent); }
|
||||
|
||||
@ -360,6 +360,8 @@ adjusted_mantissa parse_long_mantissa(const char *first, const char* last) {
|
||||
// credit: R. Oudompheng who first implemented this fast path (to my knowledge).
|
||||
// It is rough, but it does the job of accelerating the slow path since most
|
||||
// long streams of digits are determined after 19 digits.
|
||||
// Note that mantissa+1 cannot overflow since mantissa < 10**19 and so
|
||||
// mantissa+1 <= 10**19 < 2**64.
|
||||
adjusted_mantissa am1 = compute_float<binary>(exponent, mantissa);
|
||||
adjusted_mantissa am2 = compute_float<binary>(exponent, mantissa+1);
|
||||
// They must both agree and be both a successful result.
|
||||
|
||||
@ -18,8 +18,8 @@ for q in range(-342,0):
|
||||
# truncate
|
||||
while(c >= (1<<128)):
|
||||
c //= 2
|
||||
format(c)
|
||||
|
||||
format(c)
|
||||
|
||||
for q in range(0,308+1):
|
||||
power5 = 5 ** q
|
||||
# move the most significant bit in position
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user