mirror of
https://github.com/fastfloat/fast_float.git
synced 2025-12-08 01:36:49 +08:00
Merge pull request #51 from fastfloat/dlemire/alt_long
Improves long-significand performance
This commit is contained in:
commit
b61ed01b65
@ -60,6 +60,7 @@ fastfloat_really_inline
|
||||
parsed_number_string parse_number_string(const char *p, const char *pend, chars_format fmt) noexcept {
|
||||
parsed_number_string answer;
|
||||
answer.valid = false;
|
||||
answer.too_many_digits = false;
|
||||
answer.negative = (*p == '-');
|
||||
if ((*p == '-') || (*p == '+')) {
|
||||
++p;
|
||||
@ -81,10 +82,11 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_
|
||||
uint64_t(*p - '0'); // might overflow, we will handle the overflow later
|
||||
++p;
|
||||
}
|
||||
const char *const end_of_integer_part = p;
|
||||
int64_t digit_count = int64_t(end_of_integer_part - start_digits);
|
||||
int64_t exponent = 0;
|
||||
if ((p != pend) && (*p == '.')) {
|
||||
++p;
|
||||
const char *first_after_period = p;
|
||||
#if FASTFLOAT_IS_BIG_ENDIAN == 0
|
||||
// Fast approach only tested under little endian systems
|
||||
if ((p + 8 <= pend) && is_made_of_eight_digits_fast(p)) {
|
||||
@ -101,19 +103,16 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_
|
||||
++p;
|
||||
i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
|
||||
}
|
||||
exponent = first_after_period - p;
|
||||
exponent = end_of_integer_part + 1 - p;
|
||||
digit_count -= exponent;
|
||||
}
|
||||
// we must have encountered at least one integer!
|
||||
if ((start_digits == p) || ((start_digits == p - 1) && (*start_digits == '.') )) {
|
||||
if (digit_count == 0) {
|
||||
return answer;
|
||||
}
|
||||
// digit_count is the exact number of digits.
|
||||
int32_t digit_count =
|
||||
int32_t(p - start_digits); // used later to guard against overflows
|
||||
if(exponent > 0) {digit_count--;}
|
||||
int64_t exp_number = 0; // explicit exponential part
|
||||
if ((fmt & chars_format::scientific) && (p != pend) && (('e' == *p) || ('E' == *p))) {
|
||||
const char * location_of_e = p;
|
||||
int64_t exp_number = 0; // exponential part
|
||||
++p;
|
||||
bool neg_exp = false;
|
||||
if ((p != pend) && ('-' == *p)) {
|
||||
@ -137,7 +136,8 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_
|
||||
}
|
||||
++p;
|
||||
}
|
||||
exponent += (neg_exp ? -exp_number : exp_number);
|
||||
if(neg_exp) { exp_number = - exp_number; }
|
||||
exponent += exp_number;
|
||||
}
|
||||
} else {
|
||||
// If it scientific and not fixed, we have to bail out.
|
||||
@ -151,25 +151,40 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_
|
||||
// of a 64-bit integer. However, this is uncommon.
|
||||
//
|
||||
// We can deal with up to 19 digits.
|
||||
if (((digit_count > 19))) { // this is uncommon
|
||||
if (digit_count > 19) { // this is uncommon
|
||||
// It is possible that the integer had an overflow.
|
||||
// We have to handle the case where we have 0.0000somenumber.
|
||||
// We need to be mindful of the case where we only have zeroes...
|
||||
// E.g., 0.000000000...000.
|
||||
const char *start = start_digits;
|
||||
while ((start != pend) && (*start == '0' || *start == '.')) {
|
||||
if(*start == '.') { digit_count++; } // We will subtract it again later.
|
||||
if(*start == '0') { digit_count --; }
|
||||
start++;
|
||||
}
|
||||
// We over-decrement by one when there is a decimal separator
|
||||
digit_count -= int(start - start_digits);
|
||||
if (digit_count > 19) {
|
||||
answer.mantissa = 0xFFFFFFFFFFFFFFFF; // important: we don't want the mantissa to be used in a fast path uninitialized.
|
||||
answer.too_many_digits = true;
|
||||
return answer;
|
||||
// Let us start again, this time, avoiding overflows.
|
||||
i = 0;
|
||||
p = start_digits;
|
||||
const uint64_t minimal_nineteen_digit_integer{1000000000000000000};
|
||||
while((i < minimal_nineteen_digit_integer) && (p != pend) && is_integer(*p)) {
|
||||
i = i * 10 + uint64_t(*p - '0');
|
||||
++p;
|
||||
}
|
||||
if (i >= minimal_nineteen_digit_integer) { // We have a big integers
|
||||
exponent = end_of_integer_part - p + exp_number;
|
||||
} else { // We have a value with a fractional component.
|
||||
p++; // skip the '.'
|
||||
const char *first_after_period = p;
|
||||
while((i < minimal_nineteen_digit_integer) && (p != pend) && is_integer(*p)) {
|
||||
i = i * 10 + uint64_t(*p - '0');
|
||||
++p;
|
||||
}
|
||||
exponent = first_after_period - p + exp_number;
|
||||
}
|
||||
// We have now corrected both exponent and i, to a truncated value
|
||||
}
|
||||
}
|
||||
answer.too_many_digits = false;
|
||||
answer.exponent = exponent;
|
||||
answer.mantissa = i;
|
||||
return answer;
|
||||
|
||||
@ -184,6 +184,9 @@ struct adjusted_mantissa {
|
||||
bool operator==(const adjusted_mantissa &o) const {
|
||||
return mantissa == o.mantissa && power2 == o.power2;
|
||||
}
|
||||
bool operator!=(const adjusted_mantissa &o) const {
|
||||
return mantissa != o.mantissa || power2 != o.power2;
|
||||
}
|
||||
};
|
||||
|
||||
struct decimal {
|
||||
@ -200,44 +203,6 @@ struct decimal {
|
||||
// Moves are allowed:
|
||||
decimal(decimal &&) = default;
|
||||
decimal &operator=(decimal &&other) = default;
|
||||
// Generates a mantissa by truncating to 19 digits.
|
||||
// This function should be reasonably fast.
|
||||
// Note that the user is responsible to ensure that digits are
|
||||
// initialized to zero when there are fewer than 19.
|
||||
inline uint64_t to_truncated_mantissa() {
|
||||
#if FASTFLOAT_IS_BIG_ENDIAN == 1
|
||||
uint64_t mantissa = 0;
|
||||
for (uint32_t i = 0; i < max_digit_without_overflow;
|
||||
i++) {
|
||||
mantissa = mantissa * 10 + digits[i]; // can be accelerated
|
||||
}
|
||||
return mantissa;
|
||||
#else
|
||||
uint64_t val;
|
||||
// 8 first digits
|
||||
::memcpy(&val, digits, sizeof(uint64_t));
|
||||
val = val * 2561 >> 8;
|
||||
val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16;
|
||||
uint64_t mantissa =
|
||||
uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32);
|
||||
// 8 more digits for a total of 16
|
||||
::memcpy(&val, digits + sizeof(uint64_t), sizeof(uint64_t));
|
||||
val = val * 2561 >> 8;
|
||||
val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16;
|
||||
uint32_t eight_digits_value =
|
||||
uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32);
|
||||
mantissa = 100000000 * mantissa + eight_digits_value;
|
||||
for (uint32_t i = 2 * sizeof(uint64_t); i < max_digit_without_overflow;
|
||||
i++) {
|
||||
mantissa = mantissa * 10 + digits[i]; // can be accelerated
|
||||
}
|
||||
return mantissa;
|
||||
#endif
|
||||
}
|
||||
// Generate an exponent matching to_truncated_mantissa()
|
||||
inline int32_t to_truncated_exponent() {
|
||||
return decimal_point - int32_t(max_digit_without_overflow);
|
||||
}
|
||||
};
|
||||
|
||||
constexpr static double powers_of_ten_double[] = {
|
||||
@ -372,4 +337,4 @@ inline OStream& operator<<(OStream &out, const fast_float::decimal &d) {
|
||||
return out;
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
@ -66,6 +66,25 @@ from_chars_result parse_infnan(const char *first, const char *last, T &value) n
|
||||
answer.ptr = first;
|
||||
return answer;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
fastfloat_really_inline void to_float(bool negative, adjusted_mantissa am, T &value) {
|
||||
uint64_t word = am.mantissa;
|
||||
word |= uint64_t(am.power2) << binary_format<T>::mantissa_explicit_bits();
|
||||
word = negative
|
||||
? word | (uint64_t(1) << binary_format<T>::sign_index()) : word;
|
||||
#if FASTFLOAT_IS_BIG_ENDIAN == 1
|
||||
if (std::is_same<T, float>::value) {
|
||||
::memcpy(&value, (char *)&word + 4, sizeof(T)); // extract value at offset 4-7 if float on big-endian
|
||||
} else {
|
||||
::memcpy(&value, &word, sizeof(T));
|
||||
}
|
||||
#else
|
||||
// For little-endian systems:
|
||||
::memcpy(&value, &word, sizeof(T));
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
|
||||
@ -92,31 +111,23 @@ from_chars_result from_chars(const char *first, const char *last,
|
||||
answer.ec = std::errc(); // be optimistic
|
||||
answer.ptr = pns.lastmatch;
|
||||
// Next is Clinger's fast path.
|
||||
if (binary_format<T>::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format<T>::max_exponent_fast_path() && pns.mantissa <=binary_format<T>::max_mantissa_fast_path()) {
|
||||
if (binary_format<T>::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format<T>::max_exponent_fast_path() && pns.mantissa <=binary_format<T>::max_mantissa_fast_path() && !pns.too_many_digits) {
|
||||
value = T(pns.mantissa);
|
||||
if (pns.exponent < 0) { value = value / binary_format<T>::exact_power_of_ten(-pns.exponent); }
|
||||
else { value = value * binary_format<T>::exact_power_of_ten(pns.exponent); }
|
||||
if (pns.negative) { value = -value; }
|
||||
return answer;
|
||||
}
|
||||
adjusted_mantissa am = pns.too_many_digits ? parse_long_mantissa<binary_format<T>>(first,last) : compute_float<binary_format<T>>(pns.exponent, pns.mantissa);
|
||||
adjusted_mantissa am = compute_float<binary_format<T>>(pns.exponent, pns.mantissa);
|
||||
if(pns.too_many_digits) {
|
||||
if(am != compute_float<binary_format<T>>(pns.exponent, pns.mantissa + 1)) {
|
||||
am.power2 = -1; // value is invalid.
|
||||
}
|
||||
}
|
||||
// If we called compute_float<binary_format<T>>(pns.exponent, pns.mantissa) and we have an invalid power (am.power2 < 0),
|
||||
// then we need to go the long way around again. This is very uncommon.
|
||||
if(am.power2 < 0) { am = parse_long_mantissa<binary_format<T>>(first,last); }
|
||||
uint64_t word = am.mantissa;
|
||||
word |= uint64_t(am.power2) << binary_format<T>::mantissa_explicit_bits();
|
||||
word = pns.negative
|
||||
? word | (uint64_t(1) << binary_format<T>::sign_index()) : word;
|
||||
#if FASTFLOAT_IS_BIG_ENDIAN == 1
|
||||
if (std::is_same<T, float>::value) {
|
||||
::memcpy(&value, (char *)&word + 4, sizeof(T)); // extract value at offset 4-7 if float on big-endian
|
||||
} else {
|
||||
::memcpy(&value, &word, sizeof(T));
|
||||
}
|
||||
#else
|
||||
// For little-endian systems:
|
||||
::memcpy(&value, &word, sizeof(T));
|
||||
#endif
|
||||
to_float(pns.negative, am, value);
|
||||
return answer;
|
||||
}
|
||||
|
||||
|
||||
@ -353,19 +353,6 @@ adjusted_mantissa compute_float(decimal &d) {
|
||||
template <typename binary>
|
||||
adjusted_mantissa parse_long_mantissa(const char *first, const char* last) {
|
||||
decimal d = parse_decimal(first, last);
|
||||
// In some cases we can get lucky and looking at only the first 19 digits is enough.
|
||||
// Let us try that.
|
||||
const uint64_t mantissa = d.to_truncated_mantissa();
|
||||
const int64_t exponent = d.to_truncated_exponent();
|
||||
// credit: R. Oudompheng who first implemented this fast path (to my knowledge).
|
||||
// It is rough, but it does the job of accelerating the slow path since most
|
||||
// long streams of digits are determined after 19 digits.
|
||||
// Note that mantissa+1 cannot overflow since mantissa < 10**19 and so
|
||||
// mantissa+1 <= 10**19 < 2**64.
|
||||
adjusted_mantissa am1 = compute_float<binary>(exponent, mantissa);
|
||||
adjusted_mantissa am2 = compute_float<binary>(exponent, mantissa+1);
|
||||
// They must both agree and be both a successful result.
|
||||
if(( am1 == am2 ) && (am1.power2 >= 0)) { return am1; }
|
||||
return compute_float<binary>(d);
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user