mirror of
https://github.com/fastfloat/fast_float.git
synced 2025-12-07 01:06:48 +08:00
Merge pull request #96 from Alexhuszagh/bigint
Implement the big-integer arithmetic algorithm.
This commit is contained in:
commit
3f0ba09a95
@ -91,15 +91,19 @@ CXX20_CONSTEXPR fastfloat_really_inline bool is_made_of_eight_digits_fast(const
|
|||||||
return is_made_of_eight_digits_fast(read_u64(chars));
|
return is_made_of_eight_digits_fast(read_u64(chars));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct parsed_number_string {
|
typedef span<const char> byte_span;
|
||||||
int64_t exponent;
|
|
||||||
uint64_t mantissa;
|
|
||||||
const char *lastmatch;
|
|
||||||
bool negative;
|
|
||||||
bool valid;
|
|
||||||
bool too_many_digits;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
struct parsed_number_string {
|
||||||
|
int64_t exponent{0};
|
||||||
|
uint64_t mantissa{0};
|
||||||
|
const char *lastmatch{nullptr};
|
||||||
|
bool negative{false};
|
||||||
|
bool valid{false};
|
||||||
|
bool too_many_digits{false};
|
||||||
|
// contains the range of the significant digits
|
||||||
|
byte_span integer{}; // non-nullable
|
||||||
|
byte_span fraction{}; // nullable
|
||||||
|
};
|
||||||
|
|
||||||
// Assuming that you use no more than 19 digits, this will
|
// Assuming that you use no more than 19 digits, this will
|
||||||
// parse an ASCII string.
|
// parse an ASCII string.
|
||||||
@ -125,6 +129,10 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
|
|||||||
|
|
||||||
uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad)
|
uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad)
|
||||||
|
|
||||||
|
while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) {
|
||||||
|
i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok
|
||||||
|
p += 8;
|
||||||
|
}
|
||||||
while ((p != pend) && is_integer(*p)) {
|
while ((p != pend) && is_integer(*p)) {
|
||||||
// a multiplication by 10 is cheaper than an arbitrary integer
|
// a multiplication by 10 is cheaper than an arbitrary integer
|
||||||
// multiplication
|
// multiplication
|
||||||
@ -134,24 +142,24 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
|
|||||||
}
|
}
|
||||||
const char *const end_of_integer_part = p;
|
const char *const end_of_integer_part = p;
|
||||||
int64_t digit_count = int64_t(end_of_integer_part - start_digits);
|
int64_t digit_count = int64_t(end_of_integer_part - start_digits);
|
||||||
|
answer.integer = byte_span(start_digits, size_t(digit_count));
|
||||||
int64_t exponent = 0;
|
int64_t exponent = 0;
|
||||||
if ((p != pend) && (*p == decimal_point)) {
|
if ((p != pend) && (*p == decimal_point)) {
|
||||||
++p;
|
++p;
|
||||||
// Fast approach only tested under little endian systems
|
const char* before = p;
|
||||||
if ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) {
|
// can occur at most twice without overflowing, but let it occur more, since
|
||||||
i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok
|
// for integers with many digits, digit parsing is the primary bottleneck.
|
||||||
p += 8;
|
while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) {
|
||||||
if ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) {
|
|
||||||
i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok
|
i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok
|
||||||
p += 8;
|
p += 8;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
while ((p != pend) && is_integer(*p)) {
|
while ((p != pend) && is_integer(*p)) {
|
||||||
uint8_t digit = uint8_t(*p - '0');
|
uint8_t digit = uint8_t(*p - '0');
|
||||||
++p;
|
++p;
|
||||||
i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
|
i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
|
||||||
}
|
}
|
||||||
exponent = end_of_integer_part + 1 - p;
|
exponent = before - p;
|
||||||
|
answer.fraction = byte_span(before, size_t(p - before));
|
||||||
digit_count -= exponent;
|
digit_count -= exponent;
|
||||||
}
|
}
|
||||||
// we must have encountered at least one integer!
|
// we must have encountered at least one integer!
|
||||||
@ -179,7 +187,7 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
|
|||||||
} else {
|
} else {
|
||||||
while ((p != pend) && is_integer(*p)) {
|
while ((p != pend) && is_integer(*p)) {
|
||||||
uint8_t digit = uint8_t(*p - '0');
|
uint8_t digit = uint8_t(*p - '0');
|
||||||
if (exp_number < 0x10000) {
|
if (exp_number < 0x10000000) {
|
||||||
exp_number = 10 * exp_number + digit;
|
exp_number = 10 * exp_number + digit;
|
||||||
}
|
}
|
||||||
++p;
|
++p;
|
||||||
@ -212,23 +220,26 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
|
|||||||
if (digit_count > 19) {
|
if (digit_count > 19) {
|
||||||
answer.too_many_digits = true;
|
answer.too_many_digits = true;
|
||||||
// Let us start again, this time, avoiding overflows.
|
// Let us start again, this time, avoiding overflows.
|
||||||
|
// We don't need to check if is_integer, since we use the
|
||||||
|
// pre-tokenized spans from above.
|
||||||
i = 0;
|
i = 0;
|
||||||
p = start_digits;
|
p = answer.integer.ptr;
|
||||||
|
const char* int_end = p + answer.integer.len();
|
||||||
const uint64_t minimal_nineteen_digit_integer{1000000000000000000};
|
const uint64_t minimal_nineteen_digit_integer{1000000000000000000};
|
||||||
while((i < minimal_nineteen_digit_integer) && (p != pend) && is_integer(*p)) {
|
while((i < minimal_nineteen_digit_integer) && (p != int_end)) {
|
||||||
i = i * 10 + uint64_t(*p - '0');
|
i = i * 10 + uint64_t(*p - '0');
|
||||||
++p;
|
++p;
|
||||||
}
|
}
|
||||||
if (i >= minimal_nineteen_digit_integer) { // We have a big integers
|
if (i >= minimal_nineteen_digit_integer) { // We have a big integers
|
||||||
exponent = end_of_integer_part - p + exp_number;
|
exponent = end_of_integer_part - p + exp_number;
|
||||||
} else { // We have a value with a fractional component.
|
} else { // We have a value with a fractional component.
|
||||||
p++; // skip the dot
|
p = answer.fraction.ptr;
|
||||||
const char *first_after_period = p;
|
const char* frac_end = p + answer.fraction.len();
|
||||||
while((i < minimal_nineteen_digit_integer) && (p != pend) && is_integer(*p)) {
|
while((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
|
||||||
i = i * 10 + uint64_t(*p - '0');
|
i = i * 10 + uint64_t(*p - '0');
|
||||||
++p;
|
++p;
|
||||||
}
|
}
|
||||||
exponent = first_after_period - p + exp_number;
|
exponent = answer.fraction.ptr - p + exp_number;
|
||||||
}
|
}
|
||||||
// We have now corrected both exponent and i, to a truncated value
|
// We have now corrected both exponent and i, to a truncated value
|
||||||
}
|
}
|
||||||
@ -238,108 +249,6 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
|
|||||||
return answer;
|
return answer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// This should always succeed since it follows a call to parse_number_string
|
|
||||||
// This function could be optimized. In particular, we could stop after 19 digits
|
|
||||||
// and try to bail out. Furthermore, we should be able to recover the computed
|
|
||||||
// exponent from the pass in parse_number_string.
|
|
||||||
CXX20_CONSTEXPR fastfloat_really_inline decimal parse_decimal(const char *p, const char *pend, parse_options options) noexcept {
|
|
||||||
const char decimal_point = options.decimal_point;
|
|
||||||
|
|
||||||
decimal answer;
|
|
||||||
answer.num_digits = 0;
|
|
||||||
answer.decimal_point = 0;
|
|
||||||
answer.truncated = false;
|
|
||||||
answer.negative = (*p == '-');
|
|
||||||
if (*p == '-') { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here
|
|
||||||
++p;
|
|
||||||
}
|
|
||||||
// skip leading zeroes
|
|
||||||
while ((p != pend) && (*p == '0')) {
|
|
||||||
++p;
|
|
||||||
}
|
|
||||||
while ((p != pend) && is_integer(*p)) {
|
|
||||||
if (answer.num_digits < max_digits) {
|
|
||||||
answer.digits[answer.num_digits] = uint8_t(*p - '0');
|
|
||||||
}
|
|
||||||
answer.num_digits++;
|
|
||||||
++p;
|
|
||||||
}
|
|
||||||
if ((p != pend) && (*p == decimal_point)) {
|
|
||||||
++p;
|
|
||||||
const char *first_after_period = p;
|
|
||||||
// if we have not yet encountered a zero, we have to skip it as well
|
|
||||||
if(answer.num_digits == 0) {
|
|
||||||
// skip zeros
|
|
||||||
while ((p != pend) && (*p == '0')) {
|
|
||||||
++p;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// We expect that this loop will often take the bulk of the running time
|
|
||||||
// because when a value has lots of digits, these digits often
|
|
||||||
while ((std::distance(p, pend) >= 8) && (answer.num_digits + 8 < max_digits)) {
|
|
||||||
uint64_t val = read_u64(p);
|
|
||||||
if(! is_made_of_eight_digits_fast(val)) { break; }
|
|
||||||
// We have eight digits, process them in one go!
|
|
||||||
val -= 0x3030303030303030;
|
|
||||||
write_u64(answer.digits + answer.num_digits, val);
|
|
||||||
answer.num_digits += 8;
|
|
||||||
p += 8;
|
|
||||||
}
|
|
||||||
while ((p != pend) && is_integer(*p)) {
|
|
||||||
if (answer.num_digits < max_digits) {
|
|
||||||
answer.digits[answer.num_digits] = uint8_t(*p - '0');
|
|
||||||
}
|
|
||||||
answer.num_digits++;
|
|
||||||
++p;
|
|
||||||
}
|
|
||||||
answer.decimal_point = int32_t(first_after_period - p);
|
|
||||||
}
|
|
||||||
// We want num_digits to be the number of significant digits, excluding
|
|
||||||
// leading *and* trailing zeros! Otherwise the truncated flag later is
|
|
||||||
// going to be misleading.
|
|
||||||
if(answer.num_digits > 0) {
|
|
||||||
// We potentially need the answer.num_digits > 0 guard because we
|
|
||||||
// prune leading zeros. So with answer.num_digits > 0, we know that
|
|
||||||
// we have at least one non-zero digit.
|
|
||||||
const char *preverse = p - 1;
|
|
||||||
int32_t trailing_zeros = 0;
|
|
||||||
while ((*preverse == '0') || (*preverse == decimal_point)) {
|
|
||||||
if(*preverse == '0') { trailing_zeros++; };
|
|
||||||
--preverse;
|
|
||||||
}
|
|
||||||
answer.decimal_point += int32_t(answer.num_digits);
|
|
||||||
answer.num_digits -= uint32_t(trailing_zeros);
|
|
||||||
}
|
|
||||||
if(answer.num_digits > max_digits) {
|
|
||||||
answer.truncated = true;
|
|
||||||
answer.num_digits = max_digits;
|
|
||||||
}
|
|
||||||
if ((p != pend) && (('e' == *p) || ('E' == *p))) {
|
|
||||||
++p;
|
|
||||||
bool neg_exp = false;
|
|
||||||
if ((p != pend) && ('-' == *p)) {
|
|
||||||
neg_exp = true;
|
|
||||||
++p;
|
|
||||||
} else if ((p != pend) && ('+' == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1)
|
|
||||||
++p;
|
|
||||||
}
|
|
||||||
int32_t exp_number = 0; // exponential part
|
|
||||||
while ((p != pend) && is_integer(*p)) {
|
|
||||||
uint8_t digit = uint8_t(*p - '0');
|
|
||||||
if (exp_number < 0x10000) {
|
|
||||||
exp_number = 10 * exp_number + digit;
|
|
||||||
}
|
|
||||||
++p;
|
|
||||||
}
|
|
||||||
answer.decimal_point += (neg_exp ? -exp_number : exp_number);
|
|
||||||
}
|
|
||||||
// In very rare cases, we may have fewer than 19 digits, we want to be able to reliably
|
|
||||||
// assume that all digits up to max_digit_without_overflow have been initialized.
|
|
||||||
for(uint32_t i = answer.num_digits; i < max_digit_without_overflow; i++) { answer.digits[i] = 0; }
|
|
||||||
|
|
||||||
return answer;
|
|
||||||
}
|
|
||||||
} // namespace fast_float
|
} // namespace fast_float
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
590
include/fast_float/bigint.h
Normal file
590
include/fast_float/bigint.h
Normal file
@ -0,0 +1,590 @@
|
|||||||
|
#ifndef FASTFLOAT_BIGINT_H
|
||||||
|
#define FASTFLOAT_BIGINT_H
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <climits>
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
|
#include "float_common.h"
|
||||||
|
|
||||||
|
namespace fast_float {
|
||||||
|
|
||||||
|
// the limb width: we want efficient multiplication of double the bits in
|
||||||
|
// limb, or for 64-bit limbs, at least 64-bit multiplication where we can
|
||||||
|
// extract the high and low parts efficiently. this is every 64-bit
|
||||||
|
// architecture except for sparc, which emulates 128-bit multiplication.
|
||||||
|
// we might have platforms where `CHAR_BIT` is not 8, so let's avoid
|
||||||
|
// doing `8 * sizeof(limb)`.
|
||||||
|
#if defined(FASTFLOAT_64BIT) && !defined(__sparc)
|
||||||
|
#define FASTFLOAT_64BIT_LIMB
|
||||||
|
typedef uint64_t limb;
|
||||||
|
constexpr size_t limb_bits = 64;
|
||||||
|
#else
|
||||||
|
#define FASTFLOAT_32BIT_LIMB
|
||||||
|
typedef uint32_t limb;
|
||||||
|
constexpr size_t limb_bits = 32;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef span<limb> limb_span;
|
||||||
|
|
||||||
|
// number of bits in a bigint. this needs to be at least the number
|
||||||
|
// of bits required to store the largest bigint, which is
|
||||||
|
// `log2(10**(digits + max_exp))`, or `log2(10**(767 + 342))`, or
|
||||||
|
// ~3600 bits, so we round to 4000.
|
||||||
|
constexpr size_t bigint_bits = 4000;
|
||||||
|
constexpr size_t bigint_limbs = bigint_bits / limb_bits;
|
||||||
|
|
||||||
|
// vector-like type that is allocated on the stack. the entire
|
||||||
|
// buffer is pre-allocated, and only the length changes.
|
||||||
|
template <uint16_t size>
|
||||||
|
struct stackvec {
|
||||||
|
limb data[size];
|
||||||
|
// we never need more than 150 limbs
|
||||||
|
uint16_t length{0};
|
||||||
|
|
||||||
|
stackvec() = default;
|
||||||
|
stackvec(const stackvec &) = delete;
|
||||||
|
stackvec &operator=(const stackvec &) = delete;
|
||||||
|
stackvec(stackvec &&) = delete;
|
||||||
|
stackvec &operator=(stackvec &&other) = delete;
|
||||||
|
|
||||||
|
// create stack vector from existing limb span.
|
||||||
|
stackvec(limb_span s) {
|
||||||
|
FASTFLOAT_ASSERT(try_extend(s));
|
||||||
|
}
|
||||||
|
|
||||||
|
limb& operator[](size_t index) noexcept {
|
||||||
|
FASTFLOAT_DEBUG_ASSERT(index < length);
|
||||||
|
return data[index];
|
||||||
|
}
|
||||||
|
const limb& operator[](size_t index) const noexcept {
|
||||||
|
FASTFLOAT_DEBUG_ASSERT(index < length);
|
||||||
|
return data[index];
|
||||||
|
}
|
||||||
|
// index from the end of the container
|
||||||
|
const limb& rindex(size_t index) const noexcept {
|
||||||
|
FASTFLOAT_DEBUG_ASSERT(index < length);
|
||||||
|
size_t rindex = length - index - 1;
|
||||||
|
return data[rindex];
|
||||||
|
}
|
||||||
|
|
||||||
|
// set the length, without bounds checking.
|
||||||
|
void set_len(size_t len) noexcept {
|
||||||
|
length = uint16_t(len);
|
||||||
|
}
|
||||||
|
constexpr size_t len() const noexcept {
|
||||||
|
return length;
|
||||||
|
}
|
||||||
|
constexpr bool is_empty() const noexcept {
|
||||||
|
return length == 0;
|
||||||
|
}
|
||||||
|
constexpr size_t capacity() const noexcept {
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
// append item to vector, without bounds checking
|
||||||
|
void push_unchecked(limb value) noexcept {
|
||||||
|
data[length] = value;
|
||||||
|
length++;
|
||||||
|
}
|
||||||
|
// append item to vector, returning if item was added
|
||||||
|
bool try_push(limb value) noexcept {
|
||||||
|
if (len() < capacity()) {
|
||||||
|
push_unchecked(value);
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// add items to the vector, from a span, without bounds checking
|
||||||
|
void extend_unchecked(limb_span s) noexcept {
|
||||||
|
limb* ptr = data + length;
|
||||||
|
::memcpy((void*)ptr, (const void*)s.ptr, sizeof(limb) * s.len());
|
||||||
|
set_len(len() + s.len());
|
||||||
|
}
|
||||||
|
// try to add items to the vector, returning if items were added
|
||||||
|
bool try_extend(limb_span s) noexcept {
|
||||||
|
if (len() + s.len() <= capacity()) {
|
||||||
|
extend_unchecked(s);
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// resize the vector, without bounds checking
|
||||||
|
// if the new size is longer than the vector, assign value to each
|
||||||
|
// appended item.
|
||||||
|
void resize_unchecked(size_t new_len, limb value) noexcept {
|
||||||
|
if (new_len > len()) {
|
||||||
|
size_t count = new_len - len();
|
||||||
|
limb* first = data + len();
|
||||||
|
limb* last = first + count;
|
||||||
|
::std::fill(first, last, value);
|
||||||
|
set_len(new_len);
|
||||||
|
} else {
|
||||||
|
set_len(new_len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// try to resize the vector, returning if the vector was resized.
|
||||||
|
bool try_resize(size_t new_len, limb value) noexcept {
|
||||||
|
if (new_len > capacity()) {
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
resize_unchecked(new_len, value);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// check if any limbs are non-zero after the given index.
|
||||||
|
// this needs to be done in reverse order, since the index
|
||||||
|
// is relative to the most significant limbs.
|
||||||
|
bool nonzero(size_t index) const noexcept {
|
||||||
|
while (index < len()) {
|
||||||
|
if (rindex(index) != 0) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// normalize the big integer, so most-significant zero limbs are removed.
|
||||||
|
void normalize() noexcept {
|
||||||
|
while (len() > 0 && rindex(0) == 0) {
|
||||||
|
length--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
fastfloat_really_inline
|
||||||
|
uint64_t empty_hi64(bool& truncated) noexcept {
|
||||||
|
truncated = false;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
fastfloat_really_inline
|
||||||
|
uint64_t uint64_hi64(uint64_t r0, bool& truncated) noexcept {
|
||||||
|
truncated = false;
|
||||||
|
int shl = leading_zeroes(r0);
|
||||||
|
return r0 << shl;
|
||||||
|
}
|
||||||
|
|
||||||
|
fastfloat_really_inline
|
||||||
|
uint64_t uint64_hi64(uint64_t r0, uint64_t r1, bool& truncated) noexcept {
|
||||||
|
int shl = leading_zeroes(r0);
|
||||||
|
if (shl == 0) {
|
||||||
|
truncated = r1 != 0;
|
||||||
|
return r0;
|
||||||
|
} else {
|
||||||
|
int shr = 64 - shl;
|
||||||
|
truncated = (r1 << shl) != 0;
|
||||||
|
return (r0 << shl) | (r1 >> shr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fastfloat_really_inline
|
||||||
|
uint64_t uint32_hi64(uint32_t r0, bool& truncated) noexcept {
|
||||||
|
return uint64_hi64(r0, truncated);
|
||||||
|
}
|
||||||
|
|
||||||
|
fastfloat_really_inline
|
||||||
|
uint64_t uint32_hi64(uint32_t r0, uint32_t r1, bool& truncated) noexcept {
|
||||||
|
uint64_t x0 = r0;
|
||||||
|
uint64_t x1 = r1;
|
||||||
|
return uint64_hi64((x0 << 32) | x1, truncated);
|
||||||
|
}
|
||||||
|
|
||||||
|
fastfloat_really_inline
|
||||||
|
uint64_t uint32_hi64(uint32_t r0, uint32_t r1, uint32_t r2, bool& truncated) noexcept {
|
||||||
|
uint64_t x0 = r0;
|
||||||
|
uint64_t x1 = r1;
|
||||||
|
uint64_t x2 = r2;
|
||||||
|
return uint64_hi64(x0, (x1 << 32) | x2, truncated);
|
||||||
|
}
|
||||||
|
|
||||||
|
// add two small integers, checking for overflow.
|
||||||
|
// we want an efficient operation. for msvc, where
|
||||||
|
// we don't have built-in intrinsics, this is still
|
||||||
|
// pretty fast.
|
||||||
|
fastfloat_really_inline
|
||||||
|
limb scalar_add(limb x, limb y, bool& overflow) noexcept {
|
||||||
|
limb z;
|
||||||
|
|
||||||
|
// gcc and clang
|
||||||
|
#if defined(__has_builtin)
|
||||||
|
#if __has_builtin(__builtin_add_overflow)
|
||||||
|
overflow = __builtin_add_overflow(x, y, &z);
|
||||||
|
return z;
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// generic, this still optimizes correctly on MSVC.
|
||||||
|
z = x + y;
|
||||||
|
overflow = z < x;
|
||||||
|
return z;
|
||||||
|
}
|
||||||
|
|
||||||
|
// multiply two small integers, getting both the high and low bits.
|
||||||
|
fastfloat_really_inline
|
||||||
|
limb scalar_mul(limb x, limb y, limb& carry) noexcept {
|
||||||
|
#ifdef FASTFLOAT_64BIT_LIMB
|
||||||
|
#if defined(__SIZEOF_INT128__)
|
||||||
|
// GCC and clang both define it as an extension.
|
||||||
|
__uint128_t z = __uint128_t(x) * __uint128_t(y) + __uint128_t(carry);
|
||||||
|
carry = limb(z >> limb_bits);
|
||||||
|
return limb(z);
|
||||||
|
#else
|
||||||
|
// fallback, no native 128-bit integer multiplication with carry.
|
||||||
|
// on msvc, this optimizes identically, somehow.
|
||||||
|
value128 z = full_multiplication(x, y);
|
||||||
|
bool overflow;
|
||||||
|
z.low = scalar_add(z.low, carry, overflow);
|
||||||
|
z.high += uint64_t(overflow); // cannot overflow
|
||||||
|
carry = z.high;
|
||||||
|
return z.low;
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
uint64_t z = uint64_t(x) * uint64_t(y) + uint64_t(carry);
|
||||||
|
carry = limb(z >> limb_bits);
|
||||||
|
return limb(z);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// add scalar value to bigint starting from offset.
|
||||||
|
// used in grade school multiplication
|
||||||
|
template <uint16_t size>
|
||||||
|
inline bool small_add_from(stackvec<size>& vec, limb y, size_t start) noexcept {
|
||||||
|
size_t index = start;
|
||||||
|
limb carry = y;
|
||||||
|
bool overflow;
|
||||||
|
while (carry != 0 && index < vec.len()) {
|
||||||
|
vec[index] = scalar_add(vec[index], carry, overflow);
|
||||||
|
carry = limb(overflow);
|
||||||
|
index += 1;
|
||||||
|
}
|
||||||
|
if (carry != 0) {
|
||||||
|
FASTFLOAT_TRY(vec.try_push(carry));
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// add scalar value to bigint.
|
||||||
|
template <uint16_t size>
|
||||||
|
fastfloat_really_inline bool small_add(stackvec<size>& vec, limb y) noexcept {
|
||||||
|
return small_add_from(vec, y, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// multiply bigint by scalar value.
|
||||||
|
template <uint16_t size>
|
||||||
|
inline bool small_mul(stackvec<size>& vec, limb y) noexcept {
|
||||||
|
limb carry = 0;
|
||||||
|
for (size_t index = 0; index < vec.len(); index++) {
|
||||||
|
vec[index] = scalar_mul(vec[index], y, carry);
|
||||||
|
}
|
||||||
|
if (carry != 0) {
|
||||||
|
FASTFLOAT_TRY(vec.try_push(carry));
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// add bigint to bigint starting from index.
|
||||||
|
// used in grade school multiplication
|
||||||
|
template <uint16_t size>
|
||||||
|
bool large_add_from(stackvec<size>& x, limb_span y, size_t start) noexcept {
|
||||||
|
// the effective x buffer is from `xstart..x.len()`, so exit early
|
||||||
|
// if we can't get that current range.
|
||||||
|
if (x.len() < start || y.len() > x.len() - start) {
|
||||||
|
FASTFLOAT_TRY(x.try_resize(y.len() + start, 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
bool carry = false;
|
||||||
|
for (size_t index = 0; index < y.len(); index++) {
|
||||||
|
limb xi = x[index + start];
|
||||||
|
limb yi = y[index];
|
||||||
|
bool c1 = false;
|
||||||
|
bool c2 = false;
|
||||||
|
xi = scalar_add(xi, yi, c1);
|
||||||
|
if (carry) {
|
||||||
|
xi = scalar_add(xi, 1, c2);
|
||||||
|
}
|
||||||
|
x[index + start] = xi;
|
||||||
|
carry = c1 | c2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// handle overflow
|
||||||
|
if (carry) {
|
||||||
|
FASTFLOAT_TRY(small_add_from(x, 1, y.len() + start));
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// add bigint to bigint.
|
||||||
|
template <uint16_t size>
|
||||||
|
fastfloat_really_inline bool large_add_from(stackvec<size>& x, limb_span y) noexcept {
|
||||||
|
return large_add_from(x, y, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// grade-school multiplication algorithm
|
||||||
|
template <uint16_t size>
|
||||||
|
bool long_mul(stackvec<size>& x, limb_span y) noexcept {
|
||||||
|
limb_span xs = limb_span(x.data, x.len());
|
||||||
|
stackvec<size> z(xs);
|
||||||
|
limb_span zs = limb_span(z.data, z.len());
|
||||||
|
|
||||||
|
if (y.len() != 0) {
|
||||||
|
limb y0 = y[0];
|
||||||
|
FASTFLOAT_TRY(small_mul(x, y0));
|
||||||
|
for (size_t index = 1; index < y.len(); index++) {
|
||||||
|
limb yi = y[index];
|
||||||
|
stackvec<size> zi;
|
||||||
|
if (yi != 0) {
|
||||||
|
// re-use the same buffer throughout
|
||||||
|
zi.set_len(0);
|
||||||
|
FASTFLOAT_TRY(zi.try_extend(zs));
|
||||||
|
FASTFLOAT_TRY(small_mul(zi, yi));
|
||||||
|
limb_span zis = limb_span(zi.data, zi.len());
|
||||||
|
FASTFLOAT_TRY(large_add_from(x, zis, index));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
x.normalize();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// grade-school multiplication algorithm
|
||||||
|
template <uint16_t size>
|
||||||
|
bool large_mul(stackvec<size>& x, limb_span y) noexcept {
|
||||||
|
if (y.len() == 1) {
|
||||||
|
FASTFLOAT_TRY(small_mul(x, y[0]));
|
||||||
|
} else {
|
||||||
|
FASTFLOAT_TRY(long_mul(x, y));
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// big integer type. implements a small subset of big integer
|
||||||
|
// arithmetic, using simple algorithms since asymptotically
|
||||||
|
// faster algorithms are slower for a small number of limbs.
|
||||||
|
// all operations assume the big-integer is normalized.
|
||||||
|
struct bigint {
|
||||||
|
// storage of the limbs, in little-endian order.
|
||||||
|
stackvec<bigint_limbs> vec;
|
||||||
|
|
||||||
|
bigint(): vec() {}
|
||||||
|
bigint(const bigint &) = delete;
|
||||||
|
bigint &operator=(const bigint &) = delete;
|
||||||
|
bigint(bigint &&) = delete;
|
||||||
|
bigint &operator=(bigint &&other) = delete;
|
||||||
|
|
||||||
|
bigint(uint64_t value): vec() {
|
||||||
|
#ifdef FASTFLOAT_64BIT_LIMB
|
||||||
|
vec.push_unchecked(value);
|
||||||
|
#else
|
||||||
|
vec.push_unchecked(uint32_t(value));
|
||||||
|
vec.push_unchecked(uint32_t(value >> 32));
|
||||||
|
#endif
|
||||||
|
vec.normalize();
|
||||||
|
}
|
||||||
|
|
||||||
|
// get the high 64 bits from the vector, and if bits were truncated.
|
||||||
|
// this is to get the significant digits for the float.
|
||||||
|
uint64_t hi64(bool& truncated) const noexcept {
|
||||||
|
#ifdef FASTFLOAT_64BIT_LIMB
|
||||||
|
if (vec.len() == 0) {
|
||||||
|
return empty_hi64(truncated);
|
||||||
|
} else if (vec.len() == 1) {
|
||||||
|
return uint64_hi64(vec.rindex(0), truncated);
|
||||||
|
} else {
|
||||||
|
uint64_t result = uint64_hi64(vec.rindex(0), vec.rindex(1), truncated);
|
||||||
|
truncated |= vec.nonzero(2);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
if (vec.len() == 0) {
|
||||||
|
return empty_hi64(truncated);
|
||||||
|
} else if (vec.len() == 1) {
|
||||||
|
return uint32_hi64(vec.rindex(0), truncated);
|
||||||
|
} else if (vec.len() == 2) {
|
||||||
|
return uint32_hi64(vec.rindex(0), vec.rindex(1), truncated);
|
||||||
|
} else {
|
||||||
|
uint64_t result = uint32_hi64(vec.rindex(0), vec.rindex(1), vec.rindex(2), truncated);
|
||||||
|
truncated |= vec.nonzero(3);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// compare two big integers, returning the large value.
|
||||||
|
// assumes both are normalized. if the return value is
|
||||||
|
// negative, other is larger, if the return value is
|
||||||
|
// positive, this is larger, otherwise they are equal.
|
||||||
|
// the limbs are stored in little-endian order, so we
|
||||||
|
// must compare the limbs in ever order.
|
||||||
|
int compare(const bigint& other) const noexcept {
|
||||||
|
if (vec.len() > other.vec.len()) {
|
||||||
|
return 1;
|
||||||
|
} else if (vec.len() < other.vec.len()) {
|
||||||
|
return -1;
|
||||||
|
} else {
|
||||||
|
for (size_t index = vec.len(); index > 0; index--) {
|
||||||
|
limb xi = vec[index - 1];
|
||||||
|
limb yi = other.vec[index - 1];
|
||||||
|
if (xi > yi) {
|
||||||
|
return 1;
|
||||||
|
} else if (xi < yi) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// shift left each limb n bits, carrying over to the new limb
|
||||||
|
// returns true if we were able to shift all the digits.
|
||||||
|
bool shl_bits(size_t n) noexcept {
|
||||||
|
// Internally, for each item, we shift left by n, and add the previous
|
||||||
|
// right shifted limb-bits.
|
||||||
|
// For example, we transform (for u8) shifted left 2, to:
|
||||||
|
// b10100100 b01000010
|
||||||
|
// b10 b10010001 b00001000
|
||||||
|
FASTFLOAT_DEBUG_ASSERT(n != 0);
|
||||||
|
FASTFLOAT_DEBUG_ASSERT(n < sizeof(limb) * 8);
|
||||||
|
|
||||||
|
size_t shl = n;
|
||||||
|
size_t shr = limb_bits - shl;
|
||||||
|
limb prev = 0;
|
||||||
|
for (size_t index = 0; index < vec.len(); index++) {
|
||||||
|
limb xi = vec[index];
|
||||||
|
vec[index] = (xi << shl) | (prev >> shr);
|
||||||
|
prev = xi;
|
||||||
|
}
|
||||||
|
|
||||||
|
limb carry = prev >> shr;
|
||||||
|
if (carry != 0) {
|
||||||
|
return vec.try_push(carry);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// move the limbs left by `n` limbs.
|
||||||
|
bool shl_limbs(size_t n) noexcept {
|
||||||
|
FASTFLOAT_DEBUG_ASSERT(n != 0);
|
||||||
|
if (n + vec.len() > vec.capacity()) {
|
||||||
|
return false;
|
||||||
|
} else if (!vec.is_empty()) {
|
||||||
|
// move limbs
|
||||||
|
limb* dst = vec.data + n;
|
||||||
|
const limb* src = vec.data;
|
||||||
|
::memmove(dst, src, sizeof(limb) * vec.len());
|
||||||
|
// fill in empty limbs
|
||||||
|
limb* first = vec.data;
|
||||||
|
limb* last = first + n;
|
||||||
|
::std::fill(first, last, 0);
|
||||||
|
vec.set_len(n + vec.len());
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// move the limbs left by `n` bits.
|
||||||
|
bool shl(size_t n) noexcept {
|
||||||
|
size_t rem = n % limb_bits;
|
||||||
|
size_t div = n / limb_bits;
|
||||||
|
if (rem != 0) {
|
||||||
|
FASTFLOAT_TRY(shl_bits(rem));
|
||||||
|
}
|
||||||
|
if (div != 0) {
|
||||||
|
FASTFLOAT_TRY(shl_limbs(div));
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// get the number of leading zeros in the bigint.
|
||||||
|
int ctlz() const noexcept {
|
||||||
|
if (vec.is_empty()) {
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
#ifdef FASTFLOAT_64BIT_LIMB
|
||||||
|
return leading_zeroes(vec.rindex(0));
|
||||||
|
#else
|
||||||
|
// no use defining a specialized leading_zeroes for a 32-bit type.
|
||||||
|
uint64_t r0 = vec.rindex(0);
|
||||||
|
return leading_zeroes(r0 << 32);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// get the number of bits in the bigint.
|
||||||
|
int bit_length() const noexcept {
|
||||||
|
int lz = ctlz();
|
||||||
|
return int(limb_bits * vec.len()) - lz;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool mul(limb y) noexcept {
|
||||||
|
return small_mul(vec, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool add(limb y) noexcept {
|
||||||
|
return small_add(vec, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
// multiply as if by 2 raised to a power.
|
||||||
|
bool pow2(uint32_t exp) noexcept {
|
||||||
|
return shl(exp);
|
||||||
|
}
|
||||||
|
|
||||||
|
// multiply as if by 5 raised to a power.
|
||||||
|
bool pow5(uint32_t exp) noexcept {
|
||||||
|
// multiply by a power of 5
|
||||||
|
static constexpr uint32_t large_step = 135;
|
||||||
|
static constexpr uint64_t small_power_of_5[] = {
|
||||||
|
1UL, 5UL, 25UL, 125UL, 625UL, 3125UL, 15625UL, 78125UL, 390625UL,
|
||||||
|
1953125UL, 9765625UL, 48828125UL, 244140625UL, 1220703125UL,
|
||||||
|
6103515625UL, 30517578125UL, 152587890625UL, 762939453125UL,
|
||||||
|
3814697265625UL, 19073486328125UL, 95367431640625UL, 476837158203125UL,
|
||||||
|
2384185791015625UL, 11920928955078125UL, 59604644775390625UL,
|
||||||
|
298023223876953125UL, 1490116119384765625UL, 7450580596923828125UL,
|
||||||
|
};
|
||||||
|
#ifdef FASTFLOAT_64BIT_LIMB
|
||||||
|
constexpr static limb large_power_of_5[] = {
|
||||||
|
1414648277510068013UL, 9180637584431281687UL, 4539964771860779200UL,
|
||||||
|
10482974169319127550UL, 198276706040285095UL};
|
||||||
|
#else
|
||||||
|
constexpr static limb large_power_of_5[] = {
|
||||||
|
4279965485U, 329373468U, 4020270615U, 2137533757U, 4287402176U,
|
||||||
|
1057042919U, 1071430142U, 2440757623U, 381945767U, 46164893U};
|
||||||
|
#endif
|
||||||
|
size_t large_length = sizeof(large_power_of_5) / sizeof(limb);
|
||||||
|
limb_span large = limb_span(large_power_of_5, large_length);
|
||||||
|
while (exp >= large_step) {
|
||||||
|
FASTFLOAT_TRY(large_mul(vec, large));
|
||||||
|
exp -= large_step;
|
||||||
|
}
|
||||||
|
#ifdef FASTFLOAT_64BIT_LIMB
|
||||||
|
uint32_t small_step = 27;
|
||||||
|
limb max_native = 7450580596923828125UL;
|
||||||
|
#else
|
||||||
|
uint32_t small_step = 13;
|
||||||
|
limb max_native = 1220703125U;
|
||||||
|
#endif
|
||||||
|
while (exp >= small_step) {
|
||||||
|
FASTFLOAT_TRY(small_mul(vec, max_native));
|
||||||
|
exp -= small_step;
|
||||||
|
}
|
||||||
|
if (exp != 0) {
|
||||||
|
FASTFLOAT_TRY(small_mul(vec, limb(small_power_of_5[exp])));
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// multiply as if by 10 raised to a power.
|
||||||
|
bool pow10(uint32_t exp) noexcept {
|
||||||
|
FASTFLOAT_TRY(pow5(exp));
|
||||||
|
return pow2(exp);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace fast_float
|
||||||
|
|
||||||
|
#endif
|
||||||
@ -55,11 +55,34 @@ namespace detail {
|
|||||||
* where
|
* where
|
||||||
* p = log(5**-q)/log(2) = -q * log(5)/log(2)
|
* p = log(5**-q)/log(2) = -q * log(5)/log(2)
|
||||||
*/
|
*/
|
||||||
constexpr fastfloat_really_inline int power(int q) noexcept {
|
constexpr fastfloat_really_inline int32_t power(int32_t q) noexcept {
|
||||||
return (((152170 + 65536) * q) >> 16) + 63;
|
return (((152170 + 65536) * q) >> 16) + 63;
|
||||||
}
|
}
|
||||||
} // namespace detail
|
} // namespace detail
|
||||||
|
|
||||||
|
// create an adjusted mantissa, biased by the invalid power2
|
||||||
|
// for significant digits already multiplied by 10 ** q.
|
||||||
|
template <typename binary>
|
||||||
|
fastfloat_really_inline
|
||||||
|
adjusted_mantissa compute_error_scaled(int64_t q, uint64_t w, int lz) noexcept {
|
||||||
|
int hilz = int(w >> 63) ^ 1;
|
||||||
|
adjusted_mantissa answer;
|
||||||
|
answer.mantissa = w << hilz;
|
||||||
|
int bias = binary::mantissa_explicit_bits() - binary::minimum_exponent();
|
||||||
|
answer.power2 = int32_t(detail::power(int32_t(q)) + bias - hilz - lz - 62 + invalid_am_bias);
|
||||||
|
return answer;
|
||||||
|
}
|
||||||
|
|
||||||
|
// w * 10 ** q, without rounding the representation up.
|
||||||
|
// the power2 in the exponent will be adjusted by invalid_am_bias.
|
||||||
|
template <typename binary>
|
||||||
|
fastfloat_really_inline
|
||||||
|
adjusted_mantissa compute_error(int64_t q, uint64_t w) noexcept {
|
||||||
|
int lz = leading_zeroes(w);
|
||||||
|
w <<= lz;
|
||||||
|
value128 product = compute_product_approximation<binary::mantissa_explicit_bits() + 3>(q, w);
|
||||||
|
return compute_error_scaled<binary>(q, product.high, lz);
|
||||||
|
}
|
||||||
|
|
||||||
// w * 10 ** q
|
// w * 10 ** q
|
||||||
// The returned value should be a valid ieee64 number that simply need to be packed.
|
// The returned value should be a valid ieee64 number that simply need to be packed.
|
||||||
@ -101,8 +124,7 @@ adjusted_mantissa compute_float(int64_t q, uint64_t w) noexcept {
|
|||||||
const bool inside_safe_exponent = (q >= -27) && (q <= 55); // always good because 5**q <2**128 when q>=0,
|
const bool inside_safe_exponent = (q >= -27) && (q <= 55); // always good because 5**q <2**128 when q>=0,
|
||||||
// and otherwise, for q<0, we have 5**-q<2**64 and the 128-bit reciprocal allows for exact computation.
|
// and otherwise, for q<0, we have 5**-q<2**64 and the 128-bit reciprocal allows for exact computation.
|
||||||
if(!inside_safe_exponent) {
|
if(!inside_safe_exponent) {
|
||||||
answer.power2 = -1; // This (a negative value) indicates an error condition.
|
return compute_error_scaled<binary>(q, product.high, lz);
|
||||||
return answer;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// The "compute_product_approximation" function can be slightly slower than a branchless approach:
|
// The "compute_product_approximation" function can be slightly slower than a branchless approach:
|
||||||
@ -113,7 +135,7 @@ adjusted_mantissa compute_float(int64_t q, uint64_t w) noexcept {
|
|||||||
|
|
||||||
answer.mantissa = product.high >> (upperbit + 64 - binary::mantissa_explicit_bits() - 3);
|
answer.mantissa = product.high >> (upperbit + 64 - binary::mantissa_explicit_bits() - 3);
|
||||||
|
|
||||||
answer.power2 = int(detail::power(int(q)) + upperbit - lz - binary::minimum_exponent());
|
answer.power2 = int32_t(detail::power(int32_t(q)) + upperbit - lz - binary::minimum_exponent());
|
||||||
if (answer.power2 <= 0) { // we have a subnormal?
|
if (answer.power2 <= 0) { // we have a subnormal?
|
||||||
// Here have that answer.power2 <= 0 so -answer.power2 >= 0
|
// Here have that answer.power2 <= 0 so -answer.power2 >= 0
|
||||||
if(-answer.power2 + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure.
|
if(-answer.power2 + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure.
|
||||||
@ -167,7 +189,6 @@ adjusted_mantissa compute_float(int64_t q, uint64_t w) noexcept {
|
|||||||
return answer;
|
return answer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
} // namespace fast_float
|
} // namespace fast_float
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
423
include/fast_float/digit_comparison.h
Normal file
423
include/fast_float/digit_comparison.h
Normal file
@ -0,0 +1,423 @@
|
|||||||
|
#ifndef FASTFLOAT_DIGIT_COMPARISON_H
|
||||||
|
#define FASTFLOAT_DIGIT_COMPARISON_H
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <cstring>
|
||||||
|
#include <iterator>
|
||||||
|
|
||||||
|
#include "float_common.h"
|
||||||
|
#include "bigint.h"
|
||||||
|
#include "ascii_number.h"
|
||||||
|
|
||||||
|
namespace fast_float {
|
||||||
|
|
||||||
|
// 1e0 to 1e19
|
||||||
|
constexpr static uint64_t powers_of_ten_uint64[] = {
|
||||||
|
1UL, 10UL, 100UL, 1000UL, 10000UL, 100000UL, 1000000UL, 10000000UL, 100000000UL,
|
||||||
|
1000000000UL, 10000000000UL, 100000000000UL, 1000000000000UL, 10000000000000UL,
|
||||||
|
100000000000000UL, 1000000000000000UL, 10000000000000000UL, 100000000000000000UL,
|
||||||
|
1000000000000000000UL, 10000000000000000000UL};
|
||||||
|
|
||||||
|
// calculate the exponent, in scientific notation, of the number.
|
||||||
|
// this algorithm is not even close to optimized, but it has no practical
|
||||||
|
// effect on performance: in order to have a faster algorithm, we'd need
|
||||||
|
// to slow down performance for faster algorithms, and this is still fast.
|
||||||
|
fastfloat_really_inline int32_t scientific_exponent(parsed_number_string& num) noexcept {
|
||||||
|
uint64_t mantissa = num.mantissa;
|
||||||
|
int32_t exponent = int32_t(num.exponent);
|
||||||
|
while (mantissa >= 10000) {
|
||||||
|
mantissa /= 10000;
|
||||||
|
exponent += 4;
|
||||||
|
}
|
||||||
|
while (mantissa >= 100) {
|
||||||
|
mantissa /= 100;
|
||||||
|
exponent += 2;
|
||||||
|
}
|
||||||
|
while (mantissa >= 10) {
|
||||||
|
mantissa /= 10;
|
||||||
|
exponent += 1;
|
||||||
|
}
|
||||||
|
return exponent;
|
||||||
|
}
|
||||||
|
|
||||||
|
// this converts a native floating-point number to an extended-precision float.
|
||||||
|
template <typename T>
|
||||||
|
fastfloat_really_inline adjusted_mantissa to_extended(T value) noexcept {
|
||||||
|
adjusted_mantissa am;
|
||||||
|
int32_t bias = binary_format<T>::mantissa_explicit_bits() - binary_format<T>::minimum_exponent();
|
||||||
|
if (std::is_same<T, float>::value) {
|
||||||
|
constexpr uint32_t exponent_mask = 0x7F800000;
|
||||||
|
constexpr uint32_t mantissa_mask = 0x007FFFFF;
|
||||||
|
constexpr uint64_t hidden_bit_mask = 0x00800000;
|
||||||
|
uint32_t bits;
|
||||||
|
::memcpy(&bits, &value, sizeof(T));
|
||||||
|
if ((bits & exponent_mask) == 0) {
|
||||||
|
// denormal
|
||||||
|
am.power2 = 1 - bias;
|
||||||
|
am.mantissa = bits & mantissa_mask;
|
||||||
|
} else {
|
||||||
|
// normal
|
||||||
|
am.power2 = int32_t((bits & exponent_mask) >> binary_format<T>::mantissa_explicit_bits());
|
||||||
|
am.power2 -= bias;
|
||||||
|
am.mantissa = (bits & mantissa_mask) | hidden_bit_mask;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
constexpr uint64_t exponent_mask = 0x7FF0000000000000;
|
||||||
|
constexpr uint64_t mantissa_mask = 0x000FFFFFFFFFFFFF;
|
||||||
|
constexpr uint64_t hidden_bit_mask = 0x0010000000000000;
|
||||||
|
uint64_t bits;
|
||||||
|
::memcpy(&bits, &value, sizeof(T));
|
||||||
|
if ((bits & exponent_mask) == 0) {
|
||||||
|
// denormal
|
||||||
|
am.power2 = 1 - bias;
|
||||||
|
am.mantissa = bits & mantissa_mask;
|
||||||
|
} else {
|
||||||
|
// normal
|
||||||
|
am.power2 = int32_t((bits & exponent_mask) >> binary_format<T>::mantissa_explicit_bits());
|
||||||
|
am.power2 -= bias;
|
||||||
|
am.mantissa = (bits & mantissa_mask) | hidden_bit_mask;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return am;
|
||||||
|
}
|
||||||
|
|
||||||
|
// get the extended precision value of the halfway point between b and b+u.
|
||||||
|
// we are given a native float that represents b, so we need to adjust it
|
||||||
|
// halfway between b and b+u.
|
||||||
|
template <typename T>
|
||||||
|
fastfloat_really_inline adjusted_mantissa to_extended_halfway(T value) noexcept {
|
||||||
|
adjusted_mantissa am = to_extended(value);
|
||||||
|
am.mantissa <<= 1;
|
||||||
|
am.mantissa += 1;
|
||||||
|
am.power2 -= 1;
|
||||||
|
return am;
|
||||||
|
}
|
||||||
|
|
||||||
|
// round an extended-precision float to the nearest machine float.
|
||||||
|
template <typename T, typename callback>
|
||||||
|
fastfloat_really_inline void round(adjusted_mantissa& am, callback cb) noexcept {
|
||||||
|
int32_t mantissa_shift = 64 - binary_format<T>::mantissa_explicit_bits() - 1;
|
||||||
|
if (-am.power2 >= mantissa_shift) {
|
||||||
|
// have a denormal float
|
||||||
|
int32_t shift = -am.power2 + 1;
|
||||||
|
cb(am, std::min(shift, 64));
|
||||||
|
// check for round-up: if rounding-nearest carried us to the hidden bit.
|
||||||
|
am.power2 = (am.mantissa < (uint64_t(1) << binary_format<T>::mantissa_explicit_bits())) ? 0 : 1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// have a normal float, use the default shift.
|
||||||
|
cb(am, mantissa_shift);
|
||||||
|
|
||||||
|
// check for carry
|
||||||
|
if (am.mantissa >= (uint64_t(2) << binary_format<T>::mantissa_explicit_bits())) {
|
||||||
|
am.mantissa = (uint64_t(1) << binary_format<T>::mantissa_explicit_bits());
|
||||||
|
am.power2++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// check for infinite: we could have carried to an infinite power
|
||||||
|
am.mantissa &= ~(uint64_t(1) << binary_format<T>::mantissa_explicit_bits());
|
||||||
|
if (am.power2 >= binary_format<T>::infinite_power()) {
|
||||||
|
am.power2 = binary_format<T>::infinite_power();
|
||||||
|
am.mantissa = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename callback>
|
||||||
|
fastfloat_really_inline
|
||||||
|
void round_nearest_tie_even(adjusted_mantissa& am, int32_t shift, callback cb) noexcept {
|
||||||
|
uint64_t mask;
|
||||||
|
uint64_t halfway;
|
||||||
|
if (shift == 64) {
|
||||||
|
mask = UINT64_MAX;
|
||||||
|
} else {
|
||||||
|
mask = (uint64_t(1) << shift) - 1;
|
||||||
|
}
|
||||||
|
if (shift == 0) {
|
||||||
|
halfway = 0;
|
||||||
|
} else {
|
||||||
|
halfway = uint64_t(1) << (shift - 1);
|
||||||
|
}
|
||||||
|
uint64_t truncated_bits = am.mantissa & mask;
|
||||||
|
uint64_t is_above = truncated_bits > halfway;
|
||||||
|
uint64_t is_halfway = truncated_bits == halfway;
|
||||||
|
|
||||||
|
// shift digits into position
|
||||||
|
if (shift == 64) {
|
||||||
|
am.mantissa = 0;
|
||||||
|
} else {
|
||||||
|
am.mantissa >>= shift;
|
||||||
|
}
|
||||||
|
am.power2 += shift;
|
||||||
|
|
||||||
|
bool is_odd = (am.mantissa & 1) == 1;
|
||||||
|
am.mantissa += uint64_t(cb(is_odd, is_halfway, is_above));
|
||||||
|
}
|
||||||
|
|
||||||
|
fastfloat_really_inline void round_down(adjusted_mantissa& am, int32_t shift) noexcept {
|
||||||
|
if (shift == 64) {
|
||||||
|
am.mantissa = 0;
|
||||||
|
} else {
|
||||||
|
am.mantissa >>= shift;
|
||||||
|
}
|
||||||
|
am.power2 += shift;
|
||||||
|
}
|
||||||
|
|
||||||
|
fastfloat_really_inline void skip_zeros(const char*& first, const char* last) noexcept {
|
||||||
|
uint64_t val;
|
||||||
|
while (std::distance(first, last) >= 8) {
|
||||||
|
::memcpy(&val, first, sizeof(uint64_t));
|
||||||
|
if (val != 0x3030303030303030) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
first += 8;
|
||||||
|
}
|
||||||
|
while (first != last) {
|
||||||
|
if (*first != '0') {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
first++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// determine if any non-zero digits were truncated.
|
||||||
|
// all characters must be valid digits.
|
||||||
|
fastfloat_really_inline bool is_truncated(const char* first, const char* last) noexcept {
|
||||||
|
// do 8-bit optimizations, can just compare to 8 literal 0s.
|
||||||
|
uint64_t val;
|
||||||
|
while (std::distance(first, last) >= 8) {
|
||||||
|
::memcpy(&val, first, sizeof(uint64_t));
|
||||||
|
if (val != 0x3030303030303030) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
first += 8;
|
||||||
|
}
|
||||||
|
while (first != last) {
|
||||||
|
if (*first != '0') {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
first++;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
fastfloat_really_inline bool is_truncated(byte_span s) noexcept {
|
||||||
|
return is_truncated(s.ptr, s.ptr + s.len());
|
||||||
|
}
|
||||||
|
|
||||||
|
fastfloat_really_inline
|
||||||
|
void parse_eight_digits(const char*& p, limb& value, size_t& counter, size_t& count) noexcept {
|
||||||
|
value = value * 100000000 + parse_eight_digits_unrolled(p);
|
||||||
|
p += 8;
|
||||||
|
counter += 8;
|
||||||
|
count += 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
fastfloat_really_inline
|
||||||
|
void parse_one_digit(const char*& p, limb& value, size_t& counter, size_t& count) noexcept {
|
||||||
|
value = value * 10 + limb(*p - '0');
|
||||||
|
p++;
|
||||||
|
counter++;
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
|
||||||
|
fastfloat_really_inline
|
||||||
|
void add_native(bigint& big, limb power, limb value) noexcept {
|
||||||
|
big.mul(power);
|
||||||
|
big.add(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
fastfloat_really_inline void round_up_bigint(bigint& big, size_t& count) noexcept {
|
||||||
|
// need to round-up the digits, but need to avoid rounding
|
||||||
|
// ....9999 to ...10000, which could cause a false halfway point.
|
||||||
|
add_native(big, 10, 1);
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// parse the significant digits into a big integer
|
||||||
|
inline void parse_mantissa(bigint& result, parsed_number_string& num, size_t max_digits, size_t& digits) noexcept {
|
||||||
|
// try to minimize the number of big integer and scalar multiplication.
|
||||||
|
// therefore, try to parse 8 digits at a time, and multiply by the largest
|
||||||
|
// scalar value (9 or 19 digits) for each step.
|
||||||
|
size_t counter = 0;
|
||||||
|
digits = 0;
|
||||||
|
limb value = 0;
|
||||||
|
#ifdef FASTFLOAT_64BIT_LIMB
|
||||||
|
size_t step = 19;
|
||||||
|
#else
|
||||||
|
size_t step = 9;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// process all integer digits.
|
||||||
|
const char* p = num.integer.ptr;
|
||||||
|
const char* pend = p + num.integer.len();
|
||||||
|
skip_zeros(p, pend);
|
||||||
|
// process all digits, in increments of step per loop
|
||||||
|
while (p != pend) {
|
||||||
|
while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) {
|
||||||
|
parse_eight_digits(p, value, counter, digits);
|
||||||
|
}
|
||||||
|
while (counter < step && p != pend && digits < max_digits) {
|
||||||
|
parse_one_digit(p, value, counter, digits);
|
||||||
|
}
|
||||||
|
if (digits == max_digits) {
|
||||||
|
// add the temporary value, then check if we've truncated any digits
|
||||||
|
add_native(result, limb(powers_of_ten_uint64[counter]), value);
|
||||||
|
bool truncated = is_truncated(p, pend);
|
||||||
|
if (num.fraction.ptr != nullptr) {
|
||||||
|
truncated |= is_truncated(num.fraction);
|
||||||
|
}
|
||||||
|
if (truncated) {
|
||||||
|
round_up_bigint(result, digits);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
add_native(result, limb(powers_of_ten_uint64[counter]), value);
|
||||||
|
counter = 0;
|
||||||
|
value = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// add our fraction digits, if they're available.
|
||||||
|
if (num.fraction.ptr != nullptr) {
|
||||||
|
p = num.fraction.ptr;
|
||||||
|
pend = p + num.fraction.len();
|
||||||
|
if (digits == 0) {
|
||||||
|
skip_zeros(p, pend);
|
||||||
|
}
|
||||||
|
// process all digits, in increments of step per loop
|
||||||
|
while (p != pend) {
|
||||||
|
while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) {
|
||||||
|
parse_eight_digits(p, value, counter, digits);
|
||||||
|
}
|
||||||
|
while (counter < step && p != pend && digits < max_digits) {
|
||||||
|
parse_one_digit(p, value, counter, digits);
|
||||||
|
}
|
||||||
|
if (digits == max_digits) {
|
||||||
|
// add the temporary value, then check if we've truncated any digits
|
||||||
|
add_native(result, limb(powers_of_ten_uint64[counter]), value);
|
||||||
|
bool truncated = is_truncated(p, pend);
|
||||||
|
if (truncated) {
|
||||||
|
round_up_bigint(result, digits);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
add_native(result, limb(powers_of_ten_uint64[counter]), value);
|
||||||
|
counter = 0;
|
||||||
|
value = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (counter != 0) {
|
||||||
|
add_native(result, limb(powers_of_ten_uint64[counter]), value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline adjusted_mantissa positive_digit_comp(bigint& bigmant, int32_t exponent) noexcept {
|
||||||
|
FASTFLOAT_ASSERT(bigmant.pow10(uint32_t(exponent)));
|
||||||
|
adjusted_mantissa answer;
|
||||||
|
bool truncated;
|
||||||
|
answer.mantissa = bigmant.hi64(truncated);
|
||||||
|
int bias = binary_format<T>::mantissa_explicit_bits() - binary_format<T>::minimum_exponent();
|
||||||
|
answer.power2 = bigmant.bit_length() - 64 + bias;
|
||||||
|
|
||||||
|
round<T>(answer, [truncated](adjusted_mantissa& a, int32_t shift) {
|
||||||
|
round_nearest_tie_even(a, shift, [truncated](bool is_odd, bool is_halfway, bool is_above) -> bool {
|
||||||
|
return is_above || (is_halfway && truncated) || (is_odd && is_halfway);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
return answer;
|
||||||
|
}
|
||||||
|
|
||||||
|
// the scaling here is quite simple: we have, for the real digits `m * 10^e`,
|
||||||
|
// and for the theoretical digits `n * 2^f`. Since `e` is always negative,
|
||||||
|
// to scale them identically, we do `n * 2^f * 5^-f`, so we now have `m * 2^e`.
|
||||||
|
// we then need to scale by `2^(f- e)`, and then the two significant digits
|
||||||
|
// are of the same magnitude.
|
||||||
|
template <typename T>
|
||||||
|
inline adjusted_mantissa negative_digit_comp(bigint& bigmant, adjusted_mantissa am, int32_t exponent) noexcept {
|
||||||
|
bigint& real_digits = bigmant;
|
||||||
|
int32_t real_exp = exponent;
|
||||||
|
|
||||||
|
// get the value of `b`, rounded down, and get a bigint representation of b+h
|
||||||
|
adjusted_mantissa am_b = am;
|
||||||
|
// gcc7 buf: use a lambda to remove the noexcept qualifier bug with -Wnoexcept-type.
|
||||||
|
round<T>(am_b, [](adjusted_mantissa&a, int32_t shift) { round_down(a, shift); });
|
||||||
|
T b;
|
||||||
|
to_float(false, am_b, b);
|
||||||
|
adjusted_mantissa theor = to_extended_halfway(b);
|
||||||
|
bigint theor_digits(theor.mantissa);
|
||||||
|
int32_t theor_exp = theor.power2;
|
||||||
|
|
||||||
|
// scale real digits and theor digits to be same power.
|
||||||
|
int32_t pow2_exp = theor_exp - real_exp;
|
||||||
|
uint32_t pow5_exp = uint32_t(-real_exp);
|
||||||
|
if (pow5_exp != 0) {
|
||||||
|
FASTFLOAT_ASSERT(theor_digits.pow5(pow5_exp));
|
||||||
|
}
|
||||||
|
if (pow2_exp > 0) {
|
||||||
|
FASTFLOAT_ASSERT(theor_digits.pow2(uint32_t(pow2_exp)));
|
||||||
|
} else if (pow2_exp < 0) {
|
||||||
|
FASTFLOAT_ASSERT(real_digits.pow2(uint32_t(-pow2_exp)));
|
||||||
|
}
|
||||||
|
|
||||||
|
// compare digits, and use it to director rounding
|
||||||
|
int ord = real_digits.compare(theor_digits);
|
||||||
|
adjusted_mantissa answer = am;
|
||||||
|
round<T>(answer, [ord](adjusted_mantissa& a, int32_t shift) {
|
||||||
|
round_nearest_tie_even(a, shift, [ord](bool is_odd, bool _, bool __) -> bool {
|
||||||
|
(void)_; // not needed, since we've done our comparison
|
||||||
|
(void)__; // not needed, since we've done our comparison
|
||||||
|
if (ord > 0) {
|
||||||
|
return true;
|
||||||
|
} else if (ord < 0) {
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
return is_odd;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
return answer;
|
||||||
|
}
|
||||||
|
|
||||||
|
// parse the significant digits as a big integer to unambiguously round the
|
||||||
|
// the significant digits. here, we are trying to determine how to round
|
||||||
|
// an extended float representation close to `b+h`, halfway between `b`
|
||||||
|
// (the float rounded-down) and `b+u`, the next positive float. this
|
||||||
|
// algorithm is always correct, and uses one of two approaches. when
|
||||||
|
// the exponent is positive relative to the significant digits (such as
|
||||||
|
// 1234), we create a big-integer representation, get the high 64-bits,
|
||||||
|
// determine if any lower bits are truncated, and use that to direct
|
||||||
|
// rounding. in case of a negative exponent relative to the significant
|
||||||
|
// digits (such as 1.2345), we create a theoretical representation of
|
||||||
|
// `b` as a big-integer type, scaled to the same binary exponent as
|
||||||
|
// the actual digits. we then compare the big integer representations
|
||||||
|
// of both, and use that to direct rounding.
|
||||||
|
template <typename T>
|
||||||
|
inline adjusted_mantissa digit_comp(parsed_number_string& num, adjusted_mantissa am) noexcept {
|
||||||
|
// remove the invalid exponent bias
|
||||||
|
am.power2 -= invalid_am_bias;
|
||||||
|
|
||||||
|
int32_t sci_exp = scientific_exponent(num);
|
||||||
|
size_t max_digits = binary_format<T>::max_digits();
|
||||||
|
size_t digits = 0;
|
||||||
|
bigint bigmant;
|
||||||
|
parse_mantissa(bigmant, num, max_digits, digits);
|
||||||
|
// can't underflow, since digits is at most max_digits.
|
||||||
|
int32_t exponent = sci_exp + 1 - int32_t(digits);
|
||||||
|
if (exponent >= 0) {
|
||||||
|
return positive_digit_comp<T>(bigmant, exponent);
|
||||||
|
} else {
|
||||||
|
return negative_digit_comp<T>(bigmant, am, exponent);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace fast_float
|
||||||
|
|
||||||
|
#endif
|
||||||
@ -4,6 +4,7 @@
|
|||||||
#include <cfloat>
|
#include <cfloat>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
#if (defined(__x86_64) || defined(__x86_64__) || defined(_M_X64) \
|
#if (defined(__x86_64) || defined(__x86_64__) || defined(_M_X64) \
|
||||||
|| defined(__amd64) || defined(__aarch64__) || defined(_M_ARM64) \
|
|| defined(__amd64) || defined(__aarch64__) || defined(_M_ARM64) \
|
||||||
@ -87,6 +88,18 @@
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef FASTFLOAT_ASSERT
|
||||||
|
#define FASTFLOAT_ASSERT(x) { if (!(x)) abort(); }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef FASTFLOAT_DEBUG_ASSERT
|
||||||
|
#include <cassert>
|
||||||
|
#define FASTFLOAT_DEBUG_ASSERT(x) assert(x)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// rust style `try!()` macro, or `?` operator
|
||||||
|
#define FASTFLOAT_TRY(x) { if (!(x)) return false; }
|
||||||
|
|
||||||
namespace fast_float {
|
namespace fast_float {
|
||||||
|
|
||||||
// Compares two ASCII strings in a case insensitive manner.
|
// Compares two ASCII strings in a case insensitive manner.
|
||||||
@ -103,11 +116,23 @@ CXX20_CONSTEXPR inline bool fastfloat_strncasecmp(const char *input1, const char
|
|||||||
#error "FLT_EVAL_METHOD should be defined, please include cfloat."
|
#error "FLT_EVAL_METHOD should be defined, please include cfloat."
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace {
|
// a pointer and a length to a contiguous block of memory
|
||||||
constexpr uint32_t max_digits = 768;
|
template <typename T>
|
||||||
constexpr uint32_t max_digit_without_overflow = 19;
|
struct span {
|
||||||
constexpr int32_t decimal_point_range = 2047;
|
const T* ptr;
|
||||||
} // namespace
|
size_t length;
|
||||||
|
span(const T* _ptr, size_t _length) : ptr(_ptr), length(_length) {}
|
||||||
|
span() : ptr(nullptr), length(0) {}
|
||||||
|
|
||||||
|
constexpr size_t len() noexcept {
|
||||||
|
return length;
|
||||||
|
}
|
||||||
|
|
||||||
|
const T& operator[](size_t index) const noexcept {
|
||||||
|
FASTFLOAT_DEBUG_ASSERT(index < length);
|
||||||
|
return ptr[index];
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
struct value128 {
|
struct value128 {
|
||||||
uint64_t low;
|
uint64_t low;
|
||||||
@ -186,10 +211,9 @@ fastfloat_really_inline value128 full_multiplication(uint64_t a,
|
|||||||
return answer;
|
return answer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
struct adjusted_mantissa {
|
struct adjusted_mantissa {
|
||||||
uint64_t mantissa{0};
|
uint64_t mantissa{0};
|
||||||
int power2{0}; // a negative value indicates an invalid result
|
int32_t power2{0}; // a negative value indicates an invalid result
|
||||||
adjusted_mantissa() = default;
|
adjusted_mantissa() = default;
|
||||||
bool operator==(const adjusted_mantissa &o) const {
|
bool operator==(const adjusted_mantissa &o) const {
|
||||||
return mantissa == o.mantissa && power2 == o.power2;
|
return mantissa == o.mantissa && power2 == o.power2;
|
||||||
@ -199,21 +223,8 @@ struct adjusted_mantissa {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct decimal {
|
// Bias so we can get the real exponent with an invalid adjusted_mantissa.
|
||||||
uint32_t num_digits{0};
|
constexpr static int32_t invalid_am_bias = -0x8000;
|
||||||
int32_t decimal_point{0};
|
|
||||||
bool negative{false};
|
|
||||||
bool truncated{false};
|
|
||||||
uint8_t digits[max_digits];
|
|
||||||
decimal() = default;
|
|
||||||
// Copies are not allowed since this is a fat object.
|
|
||||||
decimal(const decimal &) = delete;
|
|
||||||
// Copies are not allowed since this is a fat object.
|
|
||||||
decimal &operator=(const decimal &) = delete;
|
|
||||||
// Moves are allowed:
|
|
||||||
decimal(decimal &&) = default;
|
|
||||||
decimal &operator=(decimal &&other) = default;
|
|
||||||
};
|
|
||||||
|
|
||||||
constexpr static double powers_of_ten_double[] = {
|
constexpr static double powers_of_ten_double[] = {
|
||||||
1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11,
|
1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11,
|
||||||
@ -234,6 +245,7 @@ template <typename T> struct binary_format {
|
|||||||
static inline constexpr int largest_power_of_ten();
|
static inline constexpr int largest_power_of_ten();
|
||||||
static inline constexpr int smallest_power_of_ten();
|
static inline constexpr int smallest_power_of_ten();
|
||||||
static inline constexpr T exact_power_of_ten(int64_t power);
|
static inline constexpr T exact_power_of_ten(int64_t power);
|
||||||
|
static inline constexpr size_t max_digits();
|
||||||
};
|
};
|
||||||
|
|
||||||
template <> inline constexpr int binary_format<double>::mantissa_explicit_bits() {
|
template <> inline constexpr int binary_format<double>::mantissa_explicit_bits() {
|
||||||
@ -334,17 +346,32 @@ inline constexpr int binary_format<float>::smallest_power_of_ten() {
|
|||||||
return -65;
|
return -65;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace fast_float
|
template <> inline constexpr size_t binary_format<double>::max_digits() {
|
||||||
|
return 769;
|
||||||
// for convenience:
|
}
|
||||||
template<class OStream>
|
template <> inline constexpr size_t binary_format<float>::max_digits() {
|
||||||
inline OStream& operator<<(OStream &out, const fast_float::decimal &d) {
|
return 114;
|
||||||
out << "0.";
|
|
||||||
for (size_t i = 0; i < d.num_digits; i++) {
|
|
||||||
out << int32_t(d.digits[i]);
|
|
||||||
}
|
|
||||||
out << " * 10 ** " << d.decimal_point;
|
|
||||||
return out;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
CXX20_CONSTEXPR
|
||||||
|
fastfloat_really_inline void to_float(bool negative, adjusted_mantissa am, T &value) {
|
||||||
|
uint64_t word = am.mantissa;
|
||||||
|
word |= uint64_t(am.power2) << binary_format<T>::mantissa_explicit_bits();
|
||||||
|
word = negative
|
||||||
|
? word | (uint64_t(1) << binary_format<T>::sign_index()) : word;
|
||||||
|
#if FASTFLOAT_IS_BIG_ENDIAN == 1
|
||||||
|
if (std::is_same<T, float>::value) {
|
||||||
|
::memcpy(&value, (char *)&word + 4, sizeof(T)); // extract value at offset 4-7 if float on big-endian
|
||||||
|
} else {
|
||||||
|
::memcpy(&value, &word, sizeof(T));
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
// For little-endian systems:
|
||||||
|
::memcpy(&value, &word, sizeof(T));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace fast_float
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
#include "ascii_number.h"
|
#include "ascii_number.h"
|
||||||
#include "decimal_to_binary.h"
|
#include "decimal_to_binary.h"
|
||||||
#include "simple_decimal_conversion.h"
|
#include "digit_comparison.h"
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
@ -60,28 +60,8 @@ CXX20_CONSTEXPR from_chars_result parse_infnan(const char *first, const char *la
|
|||||||
return answer;
|
return answer;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
|
||||||
CXX20_CONSTEXPR fastfloat_really_inline void to_float(bool negative, adjusted_mantissa am, T &value) {
|
|
||||||
uint64_t word = am.mantissa;
|
|
||||||
word |= uint64_t(am.power2) << binary_format<T>::mantissa_explicit_bits();
|
|
||||||
word = negative
|
|
||||||
? word | (uint64_t(1) << binary_format<T>::sign_index()) : word;
|
|
||||||
#if FASTFLOAT_IS_BIG_ENDIAN == 1
|
|
||||||
if (std::is_same<T, float>::value) {
|
|
||||||
::memcpy(&value, (char *)&word + 4, sizeof(T)); // extract value at offset 4-7 if float on big-endian
|
|
||||||
} else {
|
|
||||||
::memcpy(&value, &word, sizeof(T));
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
// For little-endian systems:
|
|
||||||
::memcpy(&value, &word, sizeof(T));
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace detail
|
} // namespace detail
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
CXX20_CONSTEXPR from_chars_result from_chars(const char *first, const char *last,
|
CXX20_CONSTEXPR from_chars_result from_chars(const char *first, const char *last,
|
||||||
T &value, chars_format fmt /*= chars_format::general*/) noexcept {
|
T &value, chars_format fmt /*= chars_format::general*/) noexcept {
|
||||||
@ -116,15 +96,15 @@ CXX20_CONSTEXPR from_chars_result from_chars_advanced(const char *first, const c
|
|||||||
return answer;
|
return answer;
|
||||||
}
|
}
|
||||||
adjusted_mantissa am = compute_float<binary_format<T>>(pns.exponent, pns.mantissa);
|
adjusted_mantissa am = compute_float<binary_format<T>>(pns.exponent, pns.mantissa);
|
||||||
if(pns.too_many_digits) {
|
if(pns.too_many_digits && am.power2 >= 0) {
|
||||||
if(am != compute_float<binary_format<T>>(pns.exponent, pns.mantissa + 1)) {
|
if(am != compute_float<binary_format<T>>(pns.exponent, pns.mantissa + 1)) {
|
||||||
am.power2 = -1; // value is invalid.
|
am = compute_error<binary_format<T>>(pns.exponent, pns.mantissa);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// If we called compute_float<binary_format<T>>(pns.exponent, pns.mantissa) and we have an invalid power (am.power2 < 0),
|
// If we called compute_float<binary_format<T>>(pns.exponent, pns.mantissa) and we have an invalid power (am.power2 < 0),
|
||||||
// then we need to go the long way around again. This is very uncommon.
|
// then we need to go the long way around again. This is very uncommon.
|
||||||
if(am.power2 < 0) { am = parse_long_mantissa<binary_format<T>>(first, last, options); }
|
if(am.power2 < 0) { am = digit_comp<T>(pns, am); }
|
||||||
detail::to_float(pns.negative, am, value);
|
to_float(pns.negative, am, value);
|
||||||
return answer;
|
return answer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -22,8 +22,8 @@ for filename in ['LICENSE-MIT', 'LICENSE-APACHE']:
|
|||||||
|
|
||||||
# code
|
# code
|
||||||
for filename in [ 'fast_float.h', 'float_common.h', 'ascii_number.h',
|
for filename in [ 'fast_float.h', 'float_common.h', 'ascii_number.h',
|
||||||
'fast_table.h', 'decimal_to_binary.h', 'ascii_number.h',
|
'fast_table.h', 'decimal_to_binary.h', 'bigint.h',
|
||||||
'simple_decimal_conversion.h', 'parse_number.h']:
|
'ascii_number.h', 'digit_comparison.h', 'parse_number.h']:
|
||||||
with open('include/fast_float/' + filename) as f:
|
with open('include/fast_float/' + filename) as f:
|
||||||
text = ''
|
text = ''
|
||||||
for line in f:
|
for line in f:
|
||||||
@ -45,8 +45,8 @@ text = '\n\n'.join([
|
|||||||
processed_files['LICENSE-' + args.license],
|
processed_files['LICENSE-' + args.license],
|
||||||
processed_files['fast_float.h'], processed_files['float_common.h'],
|
processed_files['fast_float.h'], processed_files['float_common.h'],
|
||||||
processed_files['ascii_number.h'], processed_files['fast_table.h'],
|
processed_files['ascii_number.h'], processed_files['fast_table.h'],
|
||||||
processed_files['decimal_to_binary.h'], processed_files['ascii_number.h'],
|
processed_files['decimal_to_binary.h'], processed_files['bigint.h'],
|
||||||
processed_files['simple_decimal_conversion.h'],
|
processed_files['ascii_number.h'], processed_files['digit_comparison.h'],
|
||||||
processed_files['parse_number.h']])
|
processed_files['parse_number.h']])
|
||||||
|
|
||||||
if args.output:
|
if args.output:
|
||||||
|
|||||||
@ -532,6 +532,7 @@ TEST_CASE("64bit.general") {
|
|||||||
verify("0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000044501477170144022721148195934182639518696390927032912960468522194496444440421538910330590478162701758282983178260792422137401728773891892910553144148156412434867599762821265346585071045737627442980259622449029037796981144446145705102663115100318287949527959668236039986479250965780342141637013812613333119898765515451440315261253813266652951306000184917766328660755595837392240989947807556594098101021612198814605258742579179000071675999344145086087205681577915435923018910334964869420614052182892431445797605163650903606514140377217442262561590244668525767372446430075513332450079650686719491377688478005309963967709758965844137894433796621993967316936280457084866613206797017728916080020698679408551343728867675409720757232455434770912461317493580281734466552734375", 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000044501477170144022721148195934182639518696390927032912960468522194496444440421538910330590478162701758282983178260792422137401728773891892910553144148156412434867599762821265346585071045737627442980259622449029037796981144446145705102663115100318287949527959668236039986479250965780342141637013812613333119898765515451440315261253813266652951306000184917766328660755595837392240989947807556594098101021612198814605258742579179000071675999344145086087205681577915435923018910334964869420614052182892431445797605163650903606514140377217442262561590244668525767372446430075513332450079650686719491377688478005309963967709758965844137894433796621993967316936280457084866613206797017728916080020698679408551343728867675409720757232455434770912461317493580281734466552734375);
|
verify("0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000044501477170144022721148195934182639518696390927032912960468522194496444440421538910330590478162701758282983178260792422137401728773891892910553144148156412434867599762821265346585071045737627442980259622449029037796981144446145705102663115100318287949527959668236039986479250965780342141637013812613333119898765515451440315261253813266652951306000184917766328660755595837392240989947807556594098101021612198814605258742579179000071675999344145086087205681577915435923018910334964869420614052182892431445797605163650903606514140377217442262561590244668525767372446430075513332450079650686719491377688478005309963967709758965844137894433796621993967316936280457084866613206797017728916080020698679408551343728867675409720757232455434770912461317493580281734466552734375", 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000044501477170144022721148195934182639518696390927032912960468522194496444440421538910330590478162701758282983178260792422137401728773891892910553144148156412434867599762821265346585071045737627442980259622449029037796981144446145705102663115100318287949527959668236039986479250965780342141637013812613333119898765515451440315261253813266652951306000184917766328660755595837392240989947807556594098101021612198814605258742579179000071675999344145086087205681577915435923018910334964869420614052182892431445797605163650903606514140377217442262561590244668525767372446430075513332450079650686719491377688478005309963967709758965844137894433796621993967316936280457084866613206797017728916080020698679408551343728867675409720757232455434770912461317493580281734466552734375);
|
||||||
verify("0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000022250738585072008890245868760858598876504231122409594654935248025624400092282356951787758888037591552642309780950434312085877387158357291821993020294379224223559819827501242041788969571311791082261043971979604000454897391938079198936081525613113376149842043271751033627391549782731594143828136275113838604094249464942286316695429105080201815926642134996606517803095075913058719846423906068637102005108723282784678843631944515866135041223479014792369585208321597621066375401613736583044193603714778355306682834535634005074073040135602968046375918583163124224521599262546494300836851861719422417646455137135420132217031370496583210154654068035397417906022589503023501937519773030945763173210852507299305089761582519159720757232455434770912461317493580281734466552734375", 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000022250738585072008890245868760858598876504231122409594654935248025624400092282356951787758888037591552642309780950434312085877387158357291821993020294379224223559819827501242041788969571311791082261043971979604000454897391938079198936081525613113376149842043271751033627391549782731594143828136275113838604094249464942286316695429105080201815926642134996606517803095075913058719846423906068637102005108723282784678843631944515866135041223479014792369585208321597621066375401613736583044193603714778355306682834535634005074073040135602968046375918583163124224521599262546494300836851861719422417646455137135420132217031370496583210154654068035397417906022589503023501937519773030945763173210852507299305089761582519159720757232455434770912461317493580281734466552734375);
|
verify("0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000022250738585072008890245868760858598876504231122409594654935248025624400092282356951787758888037591552642309780950434312085877387158357291821993020294379224223559819827501242041788969571311791082261043971979604000454897391938079198936081525613113376149842043271751033627391549782731594143828136275113838604094249464942286316695429105080201815926642134996606517803095075913058719846423906068637102005108723282784678843631944515866135041223479014792369585208321597621066375401613736583044193603714778355306682834535634005074073040135602968046375918583163124224521599262546494300836851861719422417646455137135420132217031370496583210154654068035397417906022589503023501937519773030945763173210852507299305089761582519159720757232455434770912461317493580281734466552734375", 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000022250738585072008890245868760858598876504231122409594654935248025624400092282356951787758888037591552642309780950434312085877387158357291821993020294379224223559819827501242041788969571311791082261043971979604000454897391938079198936081525613113376149842043271751033627391549782731594143828136275113838604094249464942286316695429105080201815926642134996606517803095075913058719846423906068637102005108723282784678843631944515866135041223479014792369585208321597621066375401613736583044193603714778355306682834535634005074073040135602968046375918583163124224521599262546494300836851861719422417646455137135420132217031370496583210154654068035397417906022589503023501937519773030945763173210852507299305089761582519159720757232455434770912461317493580281734466552734375);
|
||||||
verify("1438456663141390273526118207642235581183227845246331231162636653790368152091394196930365828634687637948157940776599182791387527135353034738357134110310609455693900824193549772792016543182680519740580354365467985440183598701312257624545562331397018329928613196125590274187720073914818062530830316533158098624984118889298281371812288789537310599037529113415438738954894752124724983067241108764488346454376699018673078404751121414804937224240805993123816932326223683090770561597570457793932985826162604255884529134126396282202126526253389383421806727954588525596114379801269094096329805054803089299736996870951258573010877404407451953846698609198213926882692078557033228265259305481198526059813164469187586693257335779522020407645498684263339921905227556616698129967412891282231685504660671277927198290009824680186319750978665734576683784255802269708917361719466043175201158849097881370477111850171579869056016061666173029059588433776015644439705050377554277696143928278093453792803846252715966016733222646442382892123940052441346822429721593884378212558701004356924243030059517489346646577724622498919752597382095222500311124181823512251071356181769376577651390028297796156208815375089159128394945710515861334486267101797497111125909272505194792870889617179758703442608016143343262159998149700606597792535574457560429226974273443630323818747730771316763398572110874959981923732463076884528677392654150010269822239401993427482376513231389212353583573566376915572650916866553612366187378959554983566712767093372906030188976220169058025354973622211666504549316958271880975697143546564469806791358707318873075708383345004090151974068325838177531266954177406661392229801349994695941509935655355652985723782153570084089560139142231.738475042362596875449154552392299548947138162081694168675340677843807613129780449323363759027012972466987370921816813162658754726545121090545507240267000456594786540949605260722461937870630634874991729398208026467698131898691830012167897399682179601734569071423681e-733", std::numeric_limits<double>::infinity());
|
verify("1438456663141390273526118207642235581183227845246331231162636653790368152091394196930365828634687637948157940776599182791387527135353034738357134110310609455693900824193549772792016543182680519740580354365467985440183598701312257624545562331397018329928613196125590274187720073914818062530830316533158098624984118889298281371812288789537310599037529113415438738954894752124724983067241108764488346454376699018673078404751121414804937224240805993123816932326223683090770561597570457793932985826162604255884529134126396282202126526253389383421806727954588525596114379801269094096329805054803089299736996870951258573010877404407451953846698609198213926882692078557033228265259305481198526059813164469187586693257335779522020407645498684263339921905227556616698129967412891282231685504660671277927198290009824680186319750978665734576683784255802269708917361719466043175201158849097881370477111850171579869056016061666173029059588433776015644439705050377554277696143928278093453792803846252715966016733222646442382892123940052441346822429721593884378212558701004356924243030059517489346646577724622498919752597382095222500311124181823512251071356181769376577651390028297796156208815375089159128394945710515861334486267101797497111125909272505194792870889617179758703442608016143343262159998149700606597792535574457560429226974273443630323818747730771316763398572110874959981923732463076884528677392654150010269822239401993427482376513231389212353583573566376915572650916866553612366187378959554983566712767093372906030188976220169058025354973622211666504549316958271880975697143546564469806791358707318873075708383345004090151974068325838177531266954177406661392229801349994695941509935655355652985723782153570084089560139142231.738475042362596875449154552392299548947138162081694168675340677843807613129780449323363759027012972466987370921816813162658754726545121090545507240267000456594786540949605260722461937870630634874991729398208026467698131898691830012167897399682179601734569071423681e-733", std::numeric_limits<double>::infinity());
|
||||||
|
verify("-2240084132271013504.131248280843119943687942846658579428", -0x1.f1660a65b00bfp+60);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE("64bit.decimal_point") {
|
TEST_CASE("64bit.decimal_point") {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user