mirror of
https://github.com/fastfloat/fast_float.git
synced 2025-12-06 16:56:57 +08:00
Merge pull request #15 from lemire/dlemire/long_path
Implement a fast "slow path"
This commit is contained in:
commit
5c50a4c5d5
@ -16,31 +16,36 @@ fastfloat_really_inline bool is_integer(char c) noexcept { return (c >= '0' &&
|
||||
// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
|
||||
fastfloat_really_inline uint32_t parse_eight_digits_unrolled(const char *chars) noexcept {
|
||||
uint64_t val;
|
||||
memcpy(&val, chars, sizeof(uint64_t));
|
||||
::memcpy(&val, chars, sizeof(uint64_t));
|
||||
val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8;
|
||||
val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16;
|
||||
return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32);
|
||||
}
|
||||
|
||||
fastfloat_really_inline bool is_made_of_eight_digits_fast(const char *chars) noexcept {
|
||||
uint64_t val;
|
||||
memcpy(&val, chars, 8);
|
||||
fastfloat_really_inline bool is_made_of_eight_digits_fast(uint64_t val) noexcept {
|
||||
return (((val & 0xF0F0F0F0F0F0F0F0) |
|
||||
(((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) ==
|
||||
0x3333333333333333);
|
||||
}
|
||||
|
||||
|
||||
fastfloat_really_inline bool is_made_of_eight_digits_fast(const char *chars) noexcept {
|
||||
uint64_t val;
|
||||
::memcpy(&val, chars, 8);
|
||||
return is_made_of_eight_digits_fast(val);
|
||||
}
|
||||
|
||||
|
||||
fastfloat_really_inline uint32_t parse_four_digits_unrolled(const char *chars) noexcept {
|
||||
uint32_t val;
|
||||
memcpy(&val, chars, sizeof(uint32_t));
|
||||
::memcpy(&val, chars, sizeof(uint32_t));
|
||||
val = (val & 0x0F0F0F0F) * 2561 >> 8;
|
||||
return (val & 0x00FF00FF) * 6553601 >> 16;
|
||||
}
|
||||
|
||||
fastfloat_really_inline bool is_made_of_four_digits_fast(const char *chars) noexcept {
|
||||
uint32_t val;
|
||||
memcpy(&val, chars, 4);
|
||||
::memcpy(&val, chars, 4);
|
||||
return (((val & 0xF0F0F0F0) |
|
||||
(((val + 0x06060606) & 0xF0F0F0F0) >> 4)) ==
|
||||
0x33333333);
|
||||
@ -162,95 +167,20 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_
|
||||
return answer;
|
||||
}
|
||||
|
||||
// This should always succeed since it follows a call to parse_number_string.
|
||||
// It assumes that there are more than 19 mantissa digits to parse.
|
||||
parsed_number_string parse_truncated_decimal(const char *&p, const char *pend) noexcept {
|
||||
parsed_number_string answer;
|
||||
answer.valid = true;
|
||||
answer.negative = (*p == '-');
|
||||
if ((*p == '-') || (*p == '+')) {
|
||||
++p;
|
||||
}
|
||||
size_t number_of_digits{0};
|
||||
|
||||
|
||||
uint64_t i = 0;
|
||||
|
||||
while ((p != pend) && is_integer(*p)) {
|
||||
// a multiplication by 10 is cheaper than an arbitrary integer
|
||||
// multiplication
|
||||
if(number_of_digits < 19) {
|
||||
|
||||
uint8_t digit = uint8_t(*p - '0');
|
||||
i = 10 * i + digit;
|
||||
number_of_digits ++;
|
||||
}
|
||||
++p;
|
||||
}
|
||||
int64_t exponent = 0;
|
||||
if ((p != pend) && (*p == '.')) {
|
||||
++p;
|
||||
const char *first_after_period = p;
|
||||
|
||||
while ((p != pend) && is_integer(*p)) {
|
||||
if(number_of_digits < 19) {
|
||||
uint8_t digit = uint8_t(*p - '0');
|
||||
i = i * 10 + digit;
|
||||
number_of_digits ++;
|
||||
} else if (exponent == 0) {
|
||||
exponent = first_after_period - p;
|
||||
}
|
||||
++p;
|
||||
}
|
||||
}
|
||||
|
||||
if ((p != pend) && (('e' == *p) || ('E' == *p))) {
|
||||
int64_t exp_number = 0; // exponential part
|
||||
++p;
|
||||
bool neg_exp = false;
|
||||
if ((p != pend) && ('-' == *p)) {
|
||||
neg_exp = true;
|
||||
++p;
|
||||
} else if ((p != pend) && ('+' == *p)) {
|
||||
++p;
|
||||
}
|
||||
if ((p == pend) || !is_integer(*p)) {
|
||||
return answer;
|
||||
}
|
||||
while ((p != pend) && is_integer(*p)) {
|
||||
uint8_t digit = uint8_t(*p - '0');
|
||||
if (exp_number < 0x10000) {
|
||||
exp_number = 10 * exp_number + digit;
|
||||
}
|
||||
++p;
|
||||
}
|
||||
exponent += (neg_exp ? -exp_number : exp_number);
|
||||
}
|
||||
answer.lastmatch = p;
|
||||
answer.valid = true;
|
||||
answer.too_many_digits = true; // assumed
|
||||
answer.exponent = exponent;
|
||||
answer.mantissa = i;
|
||||
return answer;
|
||||
}
|
||||
|
||||
|
||||
// This should always succeed since it follows a call to parse_number_string.
|
||||
decimal parse_decimal(const char *&p, const char *pend) noexcept {
|
||||
decimal parse_decimal(const char *p, const char *pend) noexcept {
|
||||
decimal answer;
|
||||
answer.num_digits = 0;
|
||||
answer.decimal_point = 0;
|
||||
answer.negative = false;
|
||||
answer.truncated = false;
|
||||
// skip leading whitespace
|
||||
while (fast_float::is_space(*p)) {
|
||||
p++;
|
||||
}
|
||||
// any whitespace has been skipped.
|
||||
answer.negative = (*p == '-');
|
||||
if ((*p == '-') || (*p == '+')) {
|
||||
++p;
|
||||
}
|
||||
|
||||
// skip leading zeroes
|
||||
while ((p != pend) && (*p == '0')) {
|
||||
++p;
|
||||
}
|
||||
@ -273,8 +203,17 @@ decimal parse_decimal(const char *&p, const char *pend) noexcept {
|
||||
++p;
|
||||
}
|
||||
}
|
||||
while ((p + 8 <= pend) && (answer.num_digits + 8 < max_digits)) {
|
||||
uint64_t val;
|
||||
::memcpy(&val, p, sizeof(uint64_t));
|
||||
if(! is_made_of_eight_digits_fast(val)) break;
|
||||
val -= 0x3030303030303030;
|
||||
::memcpy(answer.digits + answer.num_digits, &val, sizeof(uint64_t));
|
||||
answer.num_digits += 8;
|
||||
p += 8;
|
||||
}
|
||||
while ((p != pend) && is_integer(*p)) {
|
||||
if (answer.num_digits + 1 < max_digits) {
|
||||
if (answer.num_digits < max_digits) {
|
||||
answer.digits[answer.num_digits] = uint8_t(*p - '0');
|
||||
} else {
|
||||
answer.truncated = true;
|
||||
@ -299,7 +238,7 @@ decimal parse_decimal(const char *&p, const char *pend) noexcept {
|
||||
uint8_t digit = uint8_t(*p - '0');
|
||||
if (exp_number < 0x10000) {
|
||||
exp_number = 10 * exp_number + digit;
|
||||
}
|
||||
}
|
||||
++p;
|
||||
}
|
||||
answer.decimal_point += (neg_exp ? -exp_number : exp_number);
|
||||
|
||||
@ -160,6 +160,7 @@ adjusted_mantissa compute_float(int64_t q, uint64_t w) noexcept {
|
||||
return answer;
|
||||
}
|
||||
|
||||
|
||||
} // namespace fast_float
|
||||
|
||||
#endif
|
||||
|
||||
@ -45,7 +45,7 @@ bool is_space(uint8_t c) {
|
||||
|
||||
namespace {
|
||||
constexpr uint32_t max_digits = 768;
|
||||
|
||||
constexpr uint32_t max_digit_without_overflow = 19;
|
||||
constexpr int32_t decimal_point_range = 2047;
|
||||
} // namespace
|
||||
|
||||
@ -126,7 +126,11 @@ value128 full_multiplication(uint64_t value1, uint64_t value2) {
|
||||
struct adjusted_mantissa {
|
||||
uint64_t mantissa;
|
||||
int power2;
|
||||
adjusted_mantissa() : mantissa(0), power2(0) {}
|
||||
adjusted_mantissa() = default;
|
||||
//bool operator==(const adjusted_mantissa &o) const = default;
|
||||
bool operator==(const adjusted_mantissa &o) const {
|
||||
return mantissa == o.mantissa && power2 == o.power2;
|
||||
}
|
||||
};
|
||||
|
||||
struct decimal {
|
||||
@ -135,6 +139,40 @@ struct decimal {
|
||||
bool negative;
|
||||
bool truncated;
|
||||
uint8_t digits[max_digits];
|
||||
decimal() = default;
|
||||
// Copies are not allowed since this is a fat object.
|
||||
decimal(const decimal &) = delete;
|
||||
// Copies are not allowed since this is a fat object.
|
||||
decimal & operator=(const decimal &) = delete;
|
||||
// Moves are allowed:
|
||||
decimal(decimal &&) = default;
|
||||
decimal& operator=(decimal&& other) = default;
|
||||
// Generates a mantissa by truncating to 19 digits; this function assumes
|
||||
// that num_digits >= 19 (the caller is responsible for the check).
|
||||
// This function should be reasonably fast.
|
||||
inline uint64_t to_truncated_mantissa() {
|
||||
uint64_t val;
|
||||
// 8 first digits
|
||||
::memcpy(&val, digits, sizeof(uint64_t));
|
||||
val = val * 2561 >> 8;
|
||||
val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16;
|
||||
uint64_t mantissa = uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32);
|
||||
// 8 more digits for a total of 16
|
||||
::memcpy(&val, digits + sizeof(uint64_t), sizeof(uint64_t));
|
||||
val = val * 2561 >> 8;
|
||||
val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16;
|
||||
uint32_t eight_digits_value = uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32);
|
||||
mantissa = 100000000 * mantissa + eight_digits_value;
|
||||
for(uint32_t i = 2*sizeof(uint64_t); i < max_digit_without_overflow; i++) {
|
||||
mantissa = mantissa * 10 + digits[i]; // can be accelerated
|
||||
}
|
||||
return mantissa;
|
||||
}
|
||||
// Generate san exponent matching to_truncated_mantissa()
|
||||
inline int32_t to_truncated_exponent() {
|
||||
return decimal_point - max_digit_without_overflow;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
constexpr static double powers_of_ten_double[] = {
|
||||
@ -265,4 +303,13 @@ constexpr float binary_format<float>::exact_power_of_ten(int64_t power) {
|
||||
|
||||
} // namespace fast_float
|
||||
|
||||
// for convenience:
|
||||
#include <ostream>
|
||||
std::ostream& operator<<(std::ostream& out, const fast_float::decimal& d) {
|
||||
out << "0.";
|
||||
for(size_t i = 0; i < d.num_digits; i++) { out << int32_t(d.digits[i]); }
|
||||
out << " * 10 ** " << d.decimal_point;
|
||||
return out;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@ -107,7 +107,7 @@ from_chars_result from_chars(const char *first, const char *last,
|
||||
word |= uint64_t(am.power2) << binary_format<T>::mantissa_explicit_bits();
|
||||
word = pns.negative
|
||||
? word | (uint64_t(1) << binary_format<T>::sign_index()) : word;
|
||||
memcpy(&value, &word, sizeof(T));
|
||||
::memcpy(&value, &word, sizeof(T));
|
||||
return answer;
|
||||
}
|
||||
|
||||
|
||||
@ -28,20 +28,6 @@ inline void trim(decimal &h) {
|
||||
}
|
||||
}
|
||||
|
||||
/** If you ever want to see what is going on, the following function might prove handy:
|
||||
* **/
|
||||
void print(const decimal d, int32_t exp2 = 0) {
|
||||
printf("0.");
|
||||
for(size_t i = 0; i < d.num_digits; i++) {
|
||||
printf("%d", int(d.digits[i]));
|
||||
}
|
||||
printf(" * 10 **%d ", d.decimal_point);
|
||||
printf(" * 2 **%d ", exp2);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
uint32_t number_of_digits_decimal_left_shift(decimal &h, uint32_t shift) {
|
||||
@ -368,8 +354,23 @@ adjusted_mantissa compute_float(decimal &d) {
|
||||
template <typename binary>
|
||||
adjusted_mantissa parse_long_mantissa(const char *first, const char* last) {
|
||||
decimal d = parse_decimal(first, last);
|
||||
const uint64_t mantissa = d.to_truncated_mantissa();
|
||||
const int64_t exponent = d.to_truncated_exponent();
|
||||
// credit: Nigel Tao who first implemented this fast path (to my knowledge).
|
||||
// It is rough, but it does the job of accelerating the slow path since most
|
||||
// long streams of digits are determined after 19 digits.
|
||||
adjusted_mantissa am1 = compute_float<binary>(exponent, mantissa);
|
||||
adjusted_mantissa am2 = compute_float<binary>(exponent, mantissa+1);
|
||||
if( am1 == am2 ) { return am1; }
|
||||
return compute_float<binary>(d);
|
||||
}
|
||||
|
||||
} // namespace fast_float
|
||||
#endif
|
||||
|
||||
/*
|
||||
uint32_t num_digits;
|
||||
int32_t decimal_point;
|
||||
bool negative;
|
||||
bool truncated;
|
||||
uint8_t digits[max_digits];*/
|
||||
@ -139,6 +139,7 @@ int main() {
|
||||
|
||||
|
||||
std::cout << "======= 64 bits " << std::endl;
|
||||
Assert(basic_test_64bit("2.22507385850720212418870147920222032907240528279439037814303133837435107319244194686754406432563881851382188218502438069999947733013005649884107791928741341929297200970481951993067993290969042784064731682041565926728632933630474670123316852983422152744517260835859654566319282835244787787799894310779783833699159288594555213714181128458251145584319223079897504395086859412457230891738946169368372321191373658977977723286698840356390251044443035457396733706583981055420456693824658413747607155981176573877626747665912387199931904006317334709003012790188175203447190250028061277777916798391090578584006464715943810511489154282775041174682194133952466682503431306181587829379004205392375072083366693241580002758391118854188641513168478436313080237596295773983001708984375e-308", 0x1.0000000000002p-1022));
|
||||
Assert(basic_test_64bit("1.0000000000000006661338147750939242541790008544921875",1.0000000000000007));
|
||||
Assert(basic_test_64bit("1090544144181609348835077142190",0x1.b8779f2474dfbp+99));
|
||||
Assert(basic_test_64bit("2.2250738585072013e-308",2.2250738585072013e-308));
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user