This is an experimental branch that might lead to some faster performance.

It is currently unusable.
This commit is contained in:
Daniel Lemire 2020-11-04 20:38:43 -05:00
parent 9b102a95ab
commit 933d43b5ca

View File

@ -67,6 +67,7 @@ fastfloat_really_inline
parsed_number_string parse_number_string(const char *p, const char *pend, chars_format fmt) noexcept { parsed_number_string parse_number_string(const char *p, const char *pend, chars_format fmt) noexcept {
parsed_number_string answer; parsed_number_string answer;
answer.valid = false; answer.valid = false;
answer.too_many_digits = false;
answer.negative = (*p == '-'); answer.negative = (*p == '-');
if ((*p == '-') || (*p == '+')) { if ((*p == '-') || (*p == '+')) {
++p; ++p;
@ -78,43 +79,80 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_
} }
} }
const char *const start_digits = p; const char *const start_digits = p;
// skip leading zeroes
while ((p != pend) && (*p == '0')) { p++; }
// We can go forward up to 19 characters without overflow for sure, we might even go 20 characters
// or more if we have a decimal separator. We will adjust accordingly.
const char *pend_overflow_free = p + 19 > pend ? pend : p + 19;
uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad) uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad)
while ((p != pend) && is_integer(*p)) { while ((p != pend_overflow_free) && is_integer(*p)) {
// a multiplication by 10 is cheaper than an arbitrary integer // a multiplication by 10 is cheaper than an arbitrary integer
// multiplication // multiplication
i = 10 * i + i = 10 * i +
(*p - '0'); // might overflow, we will handle the overflow later (*p - '0');
++p; ++p;
} }
int64_t exponent = 0; int64_t exponent = 0;
if ((p != pend) && (*p == '.')) { if ((p != pend_overflow_free) && (*p == '.')) {
++p; ++p;
const char *first_after_period = p; const char *first_after_period = p;
if ((p + 8 <= pend) && is_made_of_eight_digits_fast(p)) { if (i == 0) {
i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok // Keep on skipping leading zeroes avec the decimal separator.
while ((p != pend) && (*p == '0')) { p++; }
// reset the ending point
pend_overflow_free = p + 19 > pend ? pend : p + 19;
} else if(pend_overflow_free < pend) {
pend_overflow_free++; // go one further thanks to '.'
}
if ((p + 8 <= pend_overflow_free) && is_made_of_eight_digits_fast(p)) {
i = i * 100000000 + parse_eight_digits_unrolled(p);
p += 8; p += 8;
if ((p + 8 <= pend) && is_made_of_eight_digits_fast(p)) { if ((p + 8 <= pend_overflow_free) && is_made_of_eight_digits_fast(p)) {
i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok i = i * 100000000 + parse_eight_digits_unrolled(p);
p += 8; p += 8;
} }
} }
while ((p != pend) && is_integer(*p)) { while ((p != pend_overflow_free) && is_integer(*p)) {
uint8_t digit = uint8_t(*p - '0'); uint8_t digit = uint8_t(*p - '0');
++p; ++p;
i = i * 10 + digit; // in rare cases, this will overflow, but that's ok i = i * 10 + digit;
} }
exponent = first_after_period - p; exponent = first_after_period - p;
} }
// we must have encountered at least one integer! // we must have encountered at least one integer!
if ((start_digits == p) || ((start_digits == p - 1) && (*start_digits == '.') )) { // We only need this check if i == 0 which is preditably unlikely.
return answer; if(i == 0) {
if ((start_digits == p) || ((start_digits == p - 1) && (*start_digits == '.') )) {
return answer;
}
}
if((p == pend_overflow_free) && (pend_overflow_free < pend)) { // We possibly have an overflow!
bool found_non_zero{false};
if((exponent == 0) && (*(p-1) != '.')) {
// We have not yet encountered the '.'
// We do the pre-decimal part first.
while ((p != pend) && is_integer(*p)) {
found_non_zero |= (*p != '0');
p++;
exponent += 1;
}
if ((p != pend) && (*p == '.')) { p++; }
while ((p != pend) && is_integer(*p)) {
found_non_zero |= (*p != '0');
p++;
}
} else {
// This is the easy case, we just have to skip all of the digits!
while ((p != pend) && is_integer(*p)) {
found_non_zero |= (*p != '0');
p++;
}
}
answer.too_many_digits = found_non_zero;
} }
int32_t digit_count =
int32_t(p - start_digits - 1); // used later to guard against overflows
if ((p != pend) && (('e' == *p) || ('E' == *p))) { if ((p != pend) && (('e' == *p) || ('E' == *p))) {
if((fmt & chars_format::fixed) && !(fmt & chars_format::scientific)) { return answer; } if((fmt & chars_format::fixed) && !(fmt & chars_format::scientific)) { return answer; }
int64_t exp_number = 0; // exponential part int64_t exp_number = 0; // exponential part
@ -142,26 +180,6 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_
} }
answer.lastmatch = p; answer.lastmatch = p;
answer.valid = true; answer.valid = true;
// If we frequently had to deal with long strings of digits,
// we could extend our code by using a 128-bit integer instead
// of a 64-bit integer. However, this is uncommon.
if (((digit_count >= 19))) { // this is uncommon
// It is possible that the integer had an overflow.
// We have to handle the case where we have 0.0000somenumber.
const char *start = start_digits;
while (*start == '0' || (*start == '.')) {
start++;
}
// we over-decrement by one when there is a decimal separator
digit_count -= int(start - start_digits);
if (digit_count >= 19) {
answer.mantissa = 0xFFFFFFFFFFFFFFFF; // important: we don't want the mantissa to be used in a fast path uninitialized.
answer.too_many_digits = true;
return answer;
}
}
answer.too_many_digits = false;
answer.exponent = exponent; answer.exponent = exponent;
answer.mantissa = i; answer.mantissa = i;
return answer; return answer;