Merge pull request #28 from lemire/dlemire/aqrit_magic

Magical optimizations from @aqrit
This commit is contained in:
Daniel Lemire 2020-11-23 18:23:28 -05:00 committed by GitHub
commit caade69916
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 20 additions and 11 deletions

View File

@ -15,22 +15,29 @@ namespace fast_float {
fastfloat_really_inline bool is_integer(char c) noexcept { return c >= '0' && c <= '9'; }
// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
// credit @aqrit
fastfloat_really_inline uint32_t parse_eight_digits_unrolled(uint64_t val) {
const uint64_t mask = 0x000000FF000000FF;
const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
val -= 0x3030303030303030;
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
return uint32_t(val);
}
fastfloat_really_inline uint32_t parse_eight_digits_unrolled(const char *chars) noexcept {
uint64_t val;
::memcpy(&val, chars, sizeof(uint64_t));
val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8;
val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16;
return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32);
return parse_eight_digits_unrolled(val);
}
// credit @aqrit
fastfloat_really_inline bool is_made_of_eight_digits_fast(uint64_t val) noexcept {
return (((val & 0xF0F0F0F0F0F0F0F0) |
(((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) ==
0x3333333333333333);
return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) &
0x8080808080808080));
}
fastfloat_really_inline bool is_made_of_eight_digits_fast(const char *chars) noexcept {
uint64_t val;
::memcpy(&val, chars, 8);

View File

@ -91,7 +91,7 @@ from_chars_result from_chars(const char *first, const char *last,
}
answer.ec = std::errc(); // be optimistic
answer.ptr = pns.lastmatch;
// Next is Clinger's fast path.
if (binary_format<T>::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format<T>::max_exponent_fast_path() && pns.mantissa <=binary_format<T>::max_mantissa_fast_path()) {
value = T(pns.mantissa);
if (pns.exponent < 0) { value = value / binary_format<T>::exact_power_of_ten(-pns.exponent); }

View File

@ -360,6 +360,8 @@ adjusted_mantissa parse_long_mantissa(const char *first, const char* last) {
// credit: R. Oudompheng who first implemented this fast path (to my knowledge).
// It is rough, but it does the job of accelerating the slow path since most
// long streams of digits are determined after 19 digits.
// Note that mantissa+1 cannot overflow since mantissa < 10**19 and so
// mantissa+1 <= 10**19 < 2**64.
adjusted_mantissa am1 = compute_float<binary>(exponent, mantissa);
adjusted_mantissa am2 = compute_float<binary>(exponent, mantissa+1);
// They must both agree and be both a successful result.

View File

@ -18,8 +18,8 @@ for q in range(-342,0):
# truncate
while(c >= (1<<128)):
c //= 2
format(c)
format(c)
for q in range(0,308+1):
power5 = 5 ** q
# move the most significant bit in position