mirror of
https://github.com/fastfloat/fast_float.git
synced 2025-12-06 16:56:57 +08:00
fixes and cleanup for the parse_number_string function.
exponent value is always less than in16_t. original main: Tests: time is: 44278ms. size of my tests 389.0k size of my program 164.0k my main: Tests: time is: 42015ms. size of my tests 389.0k size of my program 164.0k my main with FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN Tests: time is: 41282ms. size of my tests 386.5k size of my program 161.5k After this I'll try it on my partner Linux machine with the original tests and compare much better.
This commit is contained in:
parent
2da25b51c8
commit
8e1fda5d08
@ -50,7 +50,7 @@ fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
|
||||
read8_to_u64(UC const *chars) {
|
||||
if (cpp20_and_in_constexpr() || !std::is_same<UC, char>::value) {
|
||||
uint64_t val = 0;
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
for (uint8_t i = 0; i != 8; ++i) {
|
||||
val |= uint64_t(uint8_t(*chars)) << (i * 8);
|
||||
++chars;
|
||||
}
|
||||
@ -261,7 +261,7 @@ enum class parse_error {
|
||||
|
||||
template <typename UC> struct parsed_number_string_t {
|
||||
uint64_t mantissa{0};
|
||||
int32_t exponent{0};
|
||||
int16_t exponent{0};
|
||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
bool negative{false};
|
||||
#endif
|
||||
@ -327,18 +327,17 @@ parse_number_string(UC const *p, UC const *pend,
|
||||
|
||||
UC const *const start_digits = p;
|
||||
|
||||
uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad)
|
||||
|
||||
// an unsigned int avoids signed overflows (which are bad)
|
||||
while ((p != pend) && is_integer(*p)) {
|
||||
// a multiplication by 10 is cheaper than an arbitrary integer
|
||||
// multiplication
|
||||
i = 10 * i +
|
||||
answer.mantissa = 10 * answer.mantissa +
|
||||
uint64_t(*p -
|
||||
UC('0')); // might overflow, we will handle the overflow later
|
||||
++p;
|
||||
}
|
||||
UC const *const end_of_integer_part = p;
|
||||
uint32_t digit_count = uint32_t(end_of_integer_part - start_digits);
|
||||
uint16_t digit_count = uint16_t(end_of_integer_part - start_digits);
|
||||
answer.integer = span<UC const>(start_digits, digit_count);
|
||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
|
||||
@ -353,43 +352,46 @@ parse_number_string(UC const *p, UC const *pend,
|
||||
}
|
||||
#endif
|
||||
|
||||
int32_t exponent = 0;
|
||||
bool const has_decimal_point = (p != pend) && (*p == options.decimal_point);
|
||||
if (has_decimal_point) {
|
||||
++p;
|
||||
UC const *before = p;
|
||||
uint16_t fraction = 0;
|
||||
// can occur at most twice without overflowing, but let it occur more, since
|
||||
// for integers with many digits, digit parsing is the primary bottleneck.
|
||||
loop_parse_if_eight_digits(p, pend, i);
|
||||
loop_parse_if_eight_digits(p, pend, answer.mantissa);
|
||||
|
||||
while ((p != pend) && is_integer(*p)) {
|
||||
uint8_t const digit = uint8_t(*p - UC('0'));
|
||||
i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
|
||||
answer.mantissa = answer.mantissa * 10 + digit; // in rare cases, this will overflow, but that's ok
|
||||
++p;
|
||||
}
|
||||
exponent = int32_t(before - p);
|
||||
answer.fraction = span<UC const>(before, uint32_t(p - before));
|
||||
digit_count -= exponent;
|
||||
}
|
||||
fraction = uint16_t(before - p);
|
||||
answer.fraction = span<UC const>(before, uint16_t(p - before));
|
||||
digit_count -= fraction;
|
||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
|
||||
// at least 1 digit in fractional part
|
||||
if (has_decimal_point && exponent == 0) {
|
||||
if (has_decimal_point && fraction == 0) {
|
||||
return report_parse_error<UC>(p,
|
||||
parse_error::no_digits_in_fractional_part);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else if (digit_count == 0) { // we must have encountered at least one integer!
|
||||
return report_parse_error<UC>(p, parse_error::no_digits_in_mantissa);
|
||||
}
|
||||
int32_t exp_number = 0; // explicit exponential part
|
||||
// We have now parsed the integer and the fraction part of the mantissa.
|
||||
|
||||
// Now we can parse the exponent part.
|
||||
if (p != pend &&
|
||||
(uint8_t(options.format & chars_format::scientific) &&
|
||||
((UC('e') == *p) || (UC('E') == *p)))
|
||||
(UC('e') == *p) || (UC('E') == *p))
|
||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
|| (uint8_t(options.format & detail::basic_fortran_fmt) &&
|
||||
(UC('d') == *p) || (UC('D') == *p))
|
||||
((UC('+') == *p) || (UC('-') == *p) ||
|
||||
(UC('d') == *p) || (UC('D') == *p)))
|
||||
#endif
|
||||
) {
|
||||
UC const *location_of_e = p;
|
||||
@ -416,14 +418,16 @@ parse_number_string(UC const *p, UC const *pend,
|
||||
p = location_of_e;
|
||||
} else {
|
||||
while ((p != pend) && is_integer(*p)) {
|
||||
if (answer.exponent < 0x1000) {
|
||||
// check for exponent overflow if we have too many digits.
|
||||
uint8_t const digit = uint8_t(*p - UC('0'));
|
||||
exp_number = 10 * exp_number + digit;
|
||||
answer.exponent = 10 * answer.exponent + digit;
|
||||
}
|
||||
++p;
|
||||
}
|
||||
if (neg_exp) {
|
||||
exp_number = -exp_number;
|
||||
answer.exponent = -answer.exponent;
|
||||
}
|
||||
exponent += exp_number;
|
||||
}
|
||||
} else {
|
||||
// If it scientific and not fixed, we have to bail out.
|
||||
@ -459,30 +463,28 @@ parse_number_string(UC const *p, UC const *pend,
|
||||
// Let us start again, this time, avoiding overflows.
|
||||
// We don't need to check if is_integer, since we use the
|
||||
// pre-tokenized spans from above.
|
||||
i = 0;
|
||||
answer.mantissa = 0;
|
||||
p = answer.integer.ptr;
|
||||
UC const *int_end = p + answer.integer.len();
|
||||
uint64_t const minimal_nineteen_digit_integer{1000000000000000000};
|
||||
while ((i < minimal_nineteen_digit_integer) && (p != int_end)) {
|
||||
i = i * 10 + uint64_t(*p - UC('0'));
|
||||
while ((answer.mantissa < minimal_nineteen_digit_integer) && (p != int_end)) {
|
||||
answer.mantissa = answer.mantissa * 10 + uint64_t(*p - UC('0'));
|
||||
++p;
|
||||
}
|
||||
if (i >= minimal_nineteen_digit_integer) { // We have a big integers
|
||||
exponent = uint32_t(end_of_integer_part - p) + exp_number;
|
||||
if (answer.mantissa >= minimal_nineteen_digit_integer) { // We have a big integers
|
||||
answer.exponent += int16_t(end_of_integer_part - p);
|
||||
} else { // We have a value with a fractional component.
|
||||
p = answer.fraction.ptr;
|
||||
UC const *frac_end = p + answer.fraction.len();
|
||||
while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
|
||||
i = i * 10 + uint64_t(*p - UC('0'));
|
||||
while ((answer.mantissa < minimal_nineteen_digit_integer) && (p != frac_end)) {
|
||||
answer.mantissa = answer.mantissa * 10 + uint64_t(*p - UC('0'));
|
||||
++p;
|
||||
}
|
||||
exponent = uint32_t(answer.fraction.ptr - p) + exp_number;
|
||||
answer.exponent += int16_t(answer.fraction.ptr - p);
|
||||
}
|
||||
// We have now corrected both exponent and i, to a truncated value
|
||||
// We have now corrected both exponent and mantissa, to a truncated value
|
||||
}
|
||||
}
|
||||
answer.exponent = exponent;
|
||||
answer.mantissa = i;
|
||||
return answer;
|
||||
}
|
||||
|
||||
@ -518,7 +520,6 @@ parse_int_string(UC const *p, UC const *pend, T &value,
|
||||
|
||||
UC const *const start_num = p;
|
||||
|
||||
// use SIMD here?
|
||||
while (p != pend && *p == UC('0')) {
|
||||
++p;
|
||||
}
|
||||
@ -541,7 +542,7 @@ parse_int_string(UC const *p, UC const *pend, T &value,
|
||||
p++;
|
||||
}
|
||||
|
||||
uint32_t const digit_count = uint32_t(p - start_digits);
|
||||
uint16_t const digit_count = uint16_t(p - start_digits);
|
||||
|
||||
if (digit_count == 0) {
|
||||
if (has_leading_zeros) {
|
||||
|
||||
@ -19,11 +19,11 @@ namespace fast_float {
|
||||
#if defined(FASTFLOAT_64BIT) && !defined(__sparc)
|
||||
#define FASTFLOAT_64BIT_LIMB 1
|
||||
typedef uint64_t limb;
|
||||
constexpr uint32_t limb_bits = 64;
|
||||
constexpr uint16_t limb_bits = 64;
|
||||
#else
|
||||
#define FASTFLOAT_32BIT_LIMB
|
||||
typedef uint32_t limb;
|
||||
constexpr uint32_t limb_bits = 32;
|
||||
constexpr uint16_t limb_bits = 32;
|
||||
#endif
|
||||
|
||||
typedef span<limb> limb_span;
|
||||
@ -32,15 +32,15 @@ typedef span<limb> limb_span;
|
||||
// of bits required to store the largest bigint, which is
|
||||
// `log2(10**(digits + max_exp))`, or `log2(10**(767 + 342))`, or
|
||||
// ~3600 bits, so we round to 4000.
|
||||
constexpr uint32_t bigint_bits = 4000;
|
||||
constexpr uint32_t bigint_limbs = bigint_bits / limb_bits;
|
||||
constexpr uint16_t bigint_bits = 4000;
|
||||
constexpr uint16_t bigint_limbs = bigint_bits / limb_bits;
|
||||
|
||||
// vector-like type that is allocated on the stack. the entire
|
||||
// buffer is pre-allocated, and only the length changes.
|
||||
template <uint32_t size> struct stackvec {
|
||||
template <uint8_t size> struct stackvec {
|
||||
limb data[size];
|
||||
// we never need more than 150 limbs
|
||||
uint32_t length{0};
|
||||
uint8_t length{0};
|
||||
|
||||
FASTFLOAT_CONSTEXPR20 stackvec() noexcept = default;
|
||||
stackvec(stackvec const &) = delete;
|
||||
@ -53,33 +53,33 @@ template <uint32_t size> struct stackvec {
|
||||
FASTFLOAT_ASSERT(try_extend(s));
|
||||
}
|
||||
|
||||
FASTFLOAT_CONSTEXPR14 limb &operator[](uint32_t index) noexcept {
|
||||
FASTFLOAT_CONSTEXPR14 limb &operator[](uint16_t index) noexcept {
|
||||
FASTFLOAT_DEBUG_ASSERT(index < length);
|
||||
return data[index];
|
||||
}
|
||||
|
||||
FASTFLOAT_CONSTEXPR14 const limb &operator[](uint32_t index) const noexcept {
|
||||
FASTFLOAT_CONSTEXPR14 const limb &operator[](uint16_t index) const noexcept {
|
||||
FASTFLOAT_DEBUG_ASSERT(index < length);
|
||||
return data[index];
|
||||
}
|
||||
|
||||
// index from the end of the container
|
||||
FASTFLOAT_CONSTEXPR14 const limb &rindex(uint32_t index) const noexcept {
|
||||
FASTFLOAT_CONSTEXPR14 const limb &rindex(uint16_t index) const noexcept {
|
||||
FASTFLOAT_DEBUG_ASSERT(index < length);
|
||||
uint32_t rindex = length - index - 1;
|
||||
uint16_t rindex = length - index - 1;
|
||||
return data[rindex];
|
||||
}
|
||||
|
||||
// set the length, without bounds checking.
|
||||
FASTFLOAT_CONSTEXPR14 void set_len(uint32_t len) noexcept {
|
||||
FASTFLOAT_CONSTEXPR14 void set_len(uint8_t len) noexcept {
|
||||
length = len;
|
||||
}
|
||||
|
||||
constexpr uint32_t len() const noexcept { return length; }
|
||||
constexpr uint8_t len() const noexcept { return length; }
|
||||
|
||||
constexpr bool is_empty() const noexcept { return length == 0; }
|
||||
|
||||
constexpr uint32_t capacity() const noexcept { return size; }
|
||||
constexpr uint8_t capacity() const noexcept { return size; }
|
||||
|
||||
// append item to vector, without bounds checking
|
||||
FASTFLOAT_CONSTEXPR14 void push_unchecked(limb value) noexcept {
|
||||
@ -118,9 +118,9 @@ template <uint32_t size> struct stackvec {
|
||||
// if the new size is longer than the vector, assign value to each
|
||||
// appended item.
|
||||
FASTFLOAT_CONSTEXPR20
|
||||
void resize_unchecked(uint32_t new_len, limb value) noexcept {
|
||||
void resize_unchecked(uint8_t new_len, limb value) noexcept {
|
||||
if (new_len > len()) {
|
||||
uint32_t count = new_len - len();
|
||||
uint8_t count = new_len - len();
|
||||
limb *first = data + len();
|
||||
limb *last = first + count;
|
||||
::std::fill(first, last, value);
|
||||
@ -131,7 +131,7 @@ template <uint32_t size> struct stackvec {
|
||||
}
|
||||
|
||||
// try to resize the vector, returning if the vector was resized.
|
||||
FASTFLOAT_CONSTEXPR20 bool try_resize(uint32_t new_len, limb value) noexcept {
|
||||
FASTFLOAT_CONSTEXPR20 bool try_resize(uint8_t new_len, limb value) noexcept {
|
||||
if (new_len > capacity()) {
|
||||
return false;
|
||||
} else {
|
||||
@ -143,7 +143,7 @@ template <uint32_t size> struct stackvec {
|
||||
// check if any limbs are non-zero after the given index.
|
||||
// this needs to be done in reverse order, since the index
|
||||
// is relative to the most significant limbs.
|
||||
FASTFLOAT_CONSTEXPR14 bool nonzero(uint32_t index) const noexcept {
|
||||
FASTFLOAT_CONSTEXPR14 bool nonzero(uint16_t index) const noexcept {
|
||||
while (index < len()) {
|
||||
if (rindex(index) != 0) {
|
||||
return true;
|
||||
@ -258,10 +258,10 @@ scalar_mul(limb x, limb y, limb &carry) noexcept {
|
||||
|
||||
// add scalar value to bigint starting from offset.
|
||||
// used in grade school multiplication
|
||||
template <uint32_t size>
|
||||
template <uint8_t size>
|
||||
inline FASTFLOAT_CONSTEXPR20 bool small_add_from(stackvec<size> &vec, limb y,
|
||||
uint32_t start) noexcept {
|
||||
uint32_t index = start;
|
||||
uint8_t index = (uint8_t)start;
|
||||
limb carry = y;
|
||||
bool overflow;
|
||||
while (carry != 0 && index < vec.len()) {
|
||||
@ -276,18 +276,18 @@ inline FASTFLOAT_CONSTEXPR20 bool small_add_from(stackvec<size> &vec, limb y,
|
||||
}
|
||||
|
||||
// add scalar value to bigint.
|
||||
template <uint32_t size>
|
||||
template <uint8_t size>
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool
|
||||
small_add(stackvec<size> &vec, limb y) noexcept {
|
||||
return small_add_from(vec, y, 0);
|
||||
}
|
||||
|
||||
// multiply bigint by scalar value.
|
||||
template <uint32_t size>
|
||||
template <uint8_t size>
|
||||
inline FASTFLOAT_CONSTEXPR20 bool small_mul(stackvec<size> &vec,
|
||||
limb y) noexcept {
|
||||
limb carry = 0;
|
||||
for (uint32_t index = 0; index != vec.len(); ++index) {
|
||||
for (uint8_t index = 0; index != vec.len(); ++index) {
|
||||
vec[index] = scalar_mul(vec[index], y, carry);
|
||||
}
|
||||
if (carry != 0) {
|
||||
@ -298,9 +298,9 @@ inline FASTFLOAT_CONSTEXPR20 bool small_mul(stackvec<size> &vec,
|
||||
|
||||
// add bigint to bigint starting from index.
|
||||
// used in grade school multiplication
|
||||
template <uint32_t size>
|
||||
template <uint8_t size>
|
||||
FASTFLOAT_CONSTEXPR20 bool large_add_from(stackvec<size> &x, limb_span y,
|
||||
uint32_t start) noexcept {
|
||||
uint8_t start) noexcept {
|
||||
// the effective x buffer is from `xstart..x.len()`, so exit early
|
||||
// if we can't get that current range.
|
||||
if (x.len() < start || y.len() > x.len() - start) {
|
||||
@ -308,7 +308,7 @@ FASTFLOAT_CONSTEXPR20 bool large_add_from(stackvec<size> &x, limb_span y,
|
||||
}
|
||||
|
||||
bool carry = false;
|
||||
for (uint32_t index = 0; index < y.len(); ++index) {
|
||||
for (uint8_t index = 0; index < y.len(); ++index) {
|
||||
limb xi = x[index + start];
|
||||
limb yi = y[index];
|
||||
bool c1 = false;
|
||||
@ -329,14 +329,14 @@ FASTFLOAT_CONSTEXPR20 bool large_add_from(stackvec<size> &x, limb_span y,
|
||||
}
|
||||
|
||||
// add bigint to bigint.
|
||||
template <uint32_t size>
|
||||
template <uint8_t size>
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool
|
||||
large_add_from(stackvec<size> &x, limb_span y) noexcept {
|
||||
return large_add_from(x, y, 0);
|
||||
}
|
||||
|
||||
// grade-school multiplication algorithm
|
||||
template <uint32_t size>
|
||||
template <uint8_t size>
|
||||
FASTFLOAT_CONSTEXPR20 bool long_mul(stackvec<size> &x, limb_span y) noexcept {
|
||||
limb_span xs = limb_span(x.data, x.len());
|
||||
stackvec<size> z(xs);
|
||||
@ -345,7 +345,7 @@ FASTFLOAT_CONSTEXPR20 bool long_mul(stackvec<size> &x, limb_span y) noexcept {
|
||||
if (y.len() != 0) {
|
||||
limb y0 = y[0];
|
||||
FASTFLOAT_TRY(small_mul(x, y0));
|
||||
for (uint32_t index = 1; index != y.len(); ++index) {
|
||||
for (uint8_t index = 1; index != y.len(); ++index) {
|
||||
limb yi = y[index];
|
||||
stackvec<size> zi;
|
||||
if (yi != 0) {
|
||||
@ -364,7 +364,7 @@ FASTFLOAT_CONSTEXPR20 bool long_mul(stackvec<size> &x, limb_span y) noexcept {
|
||||
}
|
||||
|
||||
// grade-school multiplication algorithm
|
||||
template <uint32_t size>
|
||||
template <uint8_t size>
|
||||
FASTFLOAT_CONSTEXPR20 bool large_mul(stackvec<size> &x, limb_span y) noexcept {
|
||||
if (y.len() == 1) {
|
||||
FASTFLOAT_TRY(small_mul(x, y[0]));
|
||||
@ -493,7 +493,7 @@ struct bigint : pow5_tables<> {
|
||||
} else if (vec.len() < other.vec.len()) {
|
||||
return -1;
|
||||
} else {
|
||||
for (uint32_t index = vec.len(); index > 0; --index) {
|
||||
for (uint8_t index = vec.len(); index > 0; --index) {
|
||||
limb xi = vec[index - 1];
|
||||
limb yi = other.vec[index - 1];
|
||||
if (xi > yi) {
|
||||
@ -508,7 +508,7 @@ struct bigint : pow5_tables<> {
|
||||
|
||||
// shift left each limb n bits, carrying over to the new limb
|
||||
// returns true if we were able to shift all the digits.
|
||||
FASTFLOAT_CONSTEXPR20 bool shl_bits(uint32_t n) noexcept {
|
||||
FASTFLOAT_CONSTEXPR20 bool shl_bits(uint16_t n) noexcept {
|
||||
// Internally, for each item, we shift left by n, and add the previous
|
||||
// right shifted limb-bits.
|
||||
// For example, we transform (for u8) shifted left 2, to:
|
||||
@ -517,10 +517,10 @@ struct bigint : pow5_tables<> {
|
||||
FASTFLOAT_DEBUG_ASSERT(n != 0);
|
||||
FASTFLOAT_DEBUG_ASSERT(n < sizeof(limb) * 8);
|
||||
|
||||
uint32_t const shl = n;
|
||||
uint32_t const shr = limb_bits - shl;
|
||||
uint16_t const shl = n;
|
||||
uint16_t const shr = limb_bits - shl;
|
||||
limb prev = 0;
|
||||
for (uint32_t index = 0; index != vec.len(); ++index) {
|
||||
for (uint8_t index = 0; index != vec.len(); ++index) {
|
||||
limb xi = vec[index];
|
||||
vec[index] = (xi << shl) | (prev >> shr);
|
||||
prev = xi;
|
||||
@ -534,7 +534,7 @@ struct bigint : pow5_tables<> {
|
||||
}
|
||||
|
||||
// move the limbs left by `n` limbs.
|
||||
FASTFLOAT_CONSTEXPR20 bool shl_limbs(uint32_t n) noexcept {
|
||||
FASTFLOAT_CONSTEXPR20 bool shl_limbs(int16_t n) noexcept {
|
||||
FASTFLOAT_DEBUG_ASSERT(n != 0);
|
||||
if (n + vec.len() > vec.capacity()) {
|
||||
return false;
|
||||
@ -555,9 +555,9 @@ struct bigint : pow5_tables<> {
|
||||
}
|
||||
|
||||
// move the limbs left by `n` bits.
|
||||
FASTFLOAT_CONSTEXPR20 bool shl(uint32_t n) noexcept {
|
||||
uint32_t const rem = n % limb_bits;
|
||||
uint32_t const div = n / limb_bits;
|
||||
FASTFLOAT_CONSTEXPR20 bool shl(uint16_t n) noexcept {
|
||||
uint16_t const rem = n % limb_bits;
|
||||
uint16_t const div = n / limb_bits;
|
||||
if (rem != 0) {
|
||||
FASTFLOAT_TRY(shl_bits(rem));
|
||||
}
|
||||
@ -568,7 +568,7 @@ struct bigint : pow5_tables<> {
|
||||
}
|
||||
|
||||
// get the number of leading zeros in the bigint.
|
||||
FASTFLOAT_CONSTEXPR20 int ctlz() const noexcept {
|
||||
FASTFLOAT_CONSTEXPR20 uint8_t ctlz() const noexcept {
|
||||
if (vec.is_empty()) {
|
||||
return 0;
|
||||
} else {
|
||||
@ -583,9 +583,9 @@ struct bigint : pow5_tables<> {
|
||||
}
|
||||
|
||||
// get the number of bits in the bigint.
|
||||
FASTFLOAT_CONSTEXPR20 int bit_length() const noexcept {
|
||||
int lz = ctlz();
|
||||
return int(limb_bits * vec.len()) - lz;
|
||||
FASTFLOAT_CONSTEXPR20 uint16_t bit_length() const noexcept {
|
||||
uint16_t lz = ctlz();
|
||||
return uint16_t(limb_bits * vec.len()) - lz;
|
||||
}
|
||||
|
||||
FASTFLOAT_CONSTEXPR20 bool mul(limb y) noexcept { return small_mul(vec, y); }
|
||||
@ -593,22 +593,22 @@ struct bigint : pow5_tables<> {
|
||||
FASTFLOAT_CONSTEXPR20 bool add(limb y) noexcept { return small_add(vec, y); }
|
||||
|
||||
// multiply as if by 2 raised to a power.
|
||||
FASTFLOAT_CONSTEXPR20 bool pow2(uint32_t exp) noexcept { return shl(exp); }
|
||||
FASTFLOAT_CONSTEXPR20 bool pow2(int16_t exp) noexcept { return shl(exp); }
|
||||
|
||||
// multiply as if by 5 raised to a power.
|
||||
FASTFLOAT_CONSTEXPR20 bool pow5(uint32_t exp) noexcept {
|
||||
FASTFLOAT_CONSTEXPR20 bool pow5(int16_t exp) noexcept {
|
||||
// multiply by a power of 5
|
||||
size_t const large_length = sizeof(large_power_of_5) / sizeof(limb);
|
||||
uint8_t const large_length = sizeof(large_power_of_5) / sizeof(limb);
|
||||
limb_span const large = limb_span(large_power_of_5, large_length);
|
||||
while (exp >= large_step) {
|
||||
FASTFLOAT_TRY(large_mul(vec, large));
|
||||
exp -= large_step;
|
||||
}
|
||||
#ifdef FASTFLOAT_64BIT_LIMB
|
||||
uint32_t const small_step = 27;
|
||||
uint8_t const small_step = 27;
|
||||
limb const max_native = 7450580596923828125UL;
|
||||
#else
|
||||
uint32_t const small_step = 13;
|
||||
uint8_t const small_step = 13;
|
||||
limb const max_native = 1220703125U;
|
||||
#endif
|
||||
while (exp >= small_step) {
|
||||
@ -627,7 +627,7 @@ struct bigint : pow5_tables<> {
|
||||
}
|
||||
|
||||
// multiply as if by 10 raised to a power.
|
||||
FASTFLOAT_CONSTEXPR20 bool pow10(uint32_t exp) noexcept {
|
||||
FASTFLOAT_CONSTEXPR20 bool pow10(int16_t exp) noexcept {
|
||||
FASTFLOAT_TRY(pow5(exp));
|
||||
return pow2(exp);
|
||||
}
|
||||
|
||||
@ -71,12 +71,12 @@ constexpr fastfloat_really_inline int32_t power(int32_t q) noexcept {
|
||||
// for significant digits already multiplied by 10 ** q.
|
||||
template <typename binary>
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR14 adjusted_mantissa
|
||||
compute_error_scaled(int64_t q, uint64_t w, int lz) noexcept {
|
||||
int hilz = int(w >> 63) ^ 1;
|
||||
compute_error_scaled(int64_t q, uint64_t w, int32_t lz) noexcept {
|
||||
int32_t hilz = int32_t(w >> 63) ^ 1;
|
||||
adjusted_mantissa answer;
|
||||
answer.mantissa = w << hilz;
|
||||
int bias = binary::mantissa_explicit_bits() - binary::minimum_exponent();
|
||||
answer.power2 = int32_t(detail::power(int32_t(q)) + bias - hilz - lz - 62 +
|
||||
int32_t bias = binary::mantissa_explicit_bits() - binary::minimum_exponent();
|
||||
answer.power2 = int16_t(detail::power(int32_t(q)) + bias - hilz - lz - 62 +
|
||||
invalid_am_bias);
|
||||
return answer;
|
||||
}
|
||||
@ -143,7 +143,7 @@ compute_float(int64_t q, uint64_t w) noexcept {
|
||||
|
||||
answer.mantissa = product.high >> shift;
|
||||
|
||||
answer.power2 = int32_t(detail::power(int32_t(q)) + upperbit - lz -
|
||||
answer.power2 = int16_t(detail::power(int32_t(q)) + upperbit - lz -
|
||||
binary::minimum_exponent());
|
||||
if (answer.power2 <= 0) { // we have a subnormal?
|
||||
// Here have that answer.power2 <= 0 so -answer.power2 >= 0
|
||||
@ -196,7 +196,7 @@ compute_float(int64_t q, uint64_t w) noexcept {
|
||||
answer.mantissa >>= 1;
|
||||
if (answer.mantissa >= (uint64_t(2) << binary::mantissa_explicit_bits())) {
|
||||
answer.mantissa = (uint64_t(1) << binary::mantissa_explicit_bits());
|
||||
answer.power2++; // undo previous addition
|
||||
++answer.power2; // undo previous addition
|
||||
}
|
||||
|
||||
answer.mantissa &= ~(uint64_t(1) << binary::mantissa_explicit_bits());
|
||||
|
||||
@ -39,10 +39,10 @@ constexpr static uint64_t powers_of_ten_uint64[] = {1UL,
|
||||
// effect on performance: in order to have a faster algorithm, we'd need
|
||||
// to slow down performance for faster algorithms, and this is still fast.
|
||||
template <typename UC>
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int32_t
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int16_t
|
||||
scientific_exponent(parsed_number_string_t<UC> const &num) noexcept {
|
||||
uint64_t mantissa = num.mantissa;
|
||||
int32_t exponent = num.exponent;
|
||||
int16_t exponent = num.exponent;
|
||||
while (mantissa >= 10000) {
|
||||
mantissa /= 10000;
|
||||
exponent += 4;
|
||||
@ -68,7 +68,7 @@ to_extended(T const &value) noexcept {
|
||||
constexpr equiv_uint hidden_bit_mask = binary_format<T>::hidden_bit_mask();
|
||||
|
||||
adjusted_mantissa am;
|
||||
int32_t bias = binary_format<T>::mantissa_explicit_bits() -
|
||||
int16_t bias = binary_format<T>::mantissa_explicit_bits() -
|
||||
binary_format<T>::minimum_exponent();
|
||||
equiv_uint bits;
|
||||
#if FASTFLOAT_HAS_BIT_CAST
|
||||
@ -82,7 +82,7 @@ to_extended(T const &value) noexcept {
|
||||
am.mantissa = bits & mantissa_mask;
|
||||
} else {
|
||||
// normal
|
||||
am.power2 = int32_t((bits & exponent_mask) >>
|
||||
am.power2 = int16_t((bits & exponent_mask) >>
|
||||
binary_format<T>::mantissa_explicit_bits());
|
||||
am.power2 -= bias;
|
||||
am.mantissa = (bits & mantissa_mask) | hidden_bit_mask;
|
||||
@ -108,11 +108,11 @@ to_extended_halfway(T const &value) noexcept {
|
||||
template <typename T, typename callback>
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void round(adjusted_mantissa &am,
|
||||
callback cb) noexcept {
|
||||
int32_t mantissa_shift = 64 - binary_format<T>::mantissa_explicit_bits() - 1;
|
||||
int16_t mantissa_shift = 64 - binary_format<T>::mantissa_explicit_bits() - 1;
|
||||
if (-am.power2 >= mantissa_shift) {
|
||||
// have a denormal float
|
||||
int32_t shift = -am.power2 + 1;
|
||||
cb(am, std::min<int32_t>(shift, 64));
|
||||
int16_t shift = -am.power2 + 1;
|
||||
cb(am, std::min<int16_t>(shift, 64));
|
||||
// check for round-up: if rounding-nearest carried us to the hidden bit.
|
||||
am.power2 = (am.mantissa <
|
||||
(uint64_t(1) << binary_format<T>::mantissa_explicit_bits()))
|
||||
@ -128,7 +128,7 @@ fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void round(adjusted_mantissa &am,
|
||||
if (am.mantissa >=
|
||||
(uint64_t(2) << binary_format<T>::mantissa_explicit_bits())) {
|
||||
am.mantissa = (uint64_t(1) << binary_format<T>::mantissa_explicit_bits());
|
||||
am.power2++;
|
||||
++am.power2;
|
||||
}
|
||||
|
||||
// check for infinite: we could have carried to an infinite power
|
||||
@ -141,7 +141,7 @@ fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void round(adjusted_mantissa &am,
|
||||
|
||||
template <typename callback>
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void
|
||||
round_nearest_tie_even(adjusted_mantissa &am, int32_t shift,
|
||||
round_nearest_tie_even(adjusted_mantissa &am, int16_t shift,
|
||||
callback cb) noexcept {
|
||||
uint64_t const mask = (shift == 64) ? UINT64_MAX : (uint64_t(1) << shift) - 1;
|
||||
uint64_t const halfway = (shift == 0) ? 0 : uint64_t(1) << (shift - 1);
|
||||
@ -162,7 +162,7 @@ round_nearest_tie_even(adjusted_mantissa &am, int32_t shift,
|
||||
}
|
||||
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void
|
||||
round_down(adjusted_mantissa &am, int32_t shift) noexcept {
|
||||
round_down(adjusted_mantissa &am, int16_t shift) noexcept {
|
||||
if (shift == 64) {
|
||||
am.mantissa = 0;
|
||||
} else {
|
||||
@ -342,17 +342,17 @@ parse_mantissa(bigint &result, const parsed_number_string_t<UC> &num) noexcept {
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline FASTFLOAT_CONSTEXPR20 void
|
||||
positive_digit_comp(bigint &bigmant, adjusted_mantissa &am,
|
||||
int32_t const exponent) noexcept {
|
||||
FASTFLOAT_ASSERT(bigmant.pow10(uint32_t(exponent)));
|
||||
inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
|
||||
positive_digit_comp(bigint &bigmant, adjusted_mantissa am,
|
||||
int16_t const exponent) noexcept {
|
||||
FASTFLOAT_ASSERT(bigmant.pow10(exponent));
|
||||
bool truncated;
|
||||
am.mantissa = bigmant.hi64(truncated);
|
||||
int32_t bias = binary_format<T>::mantissa_explicit_bits() -
|
||||
int16_t bias = binary_format<T>::mantissa_explicit_bits() -
|
||||
binary_format<T>::minimum_exponent();
|
||||
am.power2 = bigmant.bit_length() - 64 + bias;
|
||||
|
||||
round<T>(am, [truncated](adjusted_mantissa &a, int32_t shift) {
|
||||
round<T>(am, [truncated](adjusted_mantissa &a, int16_t shift) {
|
||||
round_nearest_tie_even(
|
||||
a, shift,
|
||||
[truncated](bool is_odd, bool is_halfway, bool is_above) -> bool {
|
||||
@ -360,6 +360,8 @@ positive_digit_comp(bigint &bigmant, adjusted_mantissa &am,
|
||||
(is_odd && is_halfway);
|
||||
});
|
||||
});
|
||||
|
||||
return am;
|
||||
}
|
||||
|
||||
// the scaling here is quite simple: we have, for the real digits `m * 10^e`,
|
||||
@ -368,11 +370,11 @@ positive_digit_comp(bigint &bigmant, adjusted_mantissa &am,
|
||||
// we then need to scale by `2^(f- e)`, and then the two significant digits
|
||||
// are of the same magnitude.
|
||||
template <typename T>
|
||||
inline FASTFLOAT_CONSTEXPR20 void
|
||||
negative_digit_comp(bigint &bigmant, adjusted_mantissa &am,
|
||||
int32_t const exponent) noexcept {
|
||||
inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
|
||||
negative_digit_comp(bigint &bigmant, adjusted_mantissa am,
|
||||
int16_t const exponent) noexcept {
|
||||
bigint &real_digits = bigmant;
|
||||
const int32_t &real_exp = exponent;
|
||||
int16_t const &real_exp = exponent;
|
||||
|
||||
T b;
|
||||
{
|
||||
@ -381,7 +383,7 @@ negative_digit_comp(bigint &bigmant, adjusted_mantissa &am,
|
||||
// gcc7 bug: use a lambda to remove the noexcept qualifier bug with
|
||||
// -Wnoexcept-type.
|
||||
round<T>(am_b,
|
||||
[](adjusted_mantissa &a, int32_t shift) { round_down(a, shift); });
|
||||
[](adjusted_mantissa &a, int16_t shift) { round_down(a, shift); });
|
||||
to_float(
|
||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
false,
|
||||
@ -390,23 +392,23 @@ negative_digit_comp(bigint &bigmant, adjusted_mantissa &am,
|
||||
}
|
||||
adjusted_mantissa theor = to_extended_halfway(b);
|
||||
bigint theor_digits(theor.mantissa);
|
||||
int32_t theor_exp = theor.power2;
|
||||
int16_t theor_exp = theor.power2;
|
||||
|
||||
// scale real digits and theor digits to be same power.
|
||||
int32_t pow2_exp = theor_exp - real_exp;
|
||||
uint32_t pow5_exp = uint32_t(-real_exp);
|
||||
int16_t pow2_exp = theor_exp - real_exp;
|
||||
uint16_t pow5_exp = uint16_t(-real_exp);
|
||||
if (pow5_exp != 0) {
|
||||
FASTFLOAT_ASSERT(theor_digits.pow5(pow5_exp));
|
||||
}
|
||||
if (pow2_exp > 0) {
|
||||
FASTFLOAT_ASSERT(theor_digits.pow2(uint32_t(pow2_exp)));
|
||||
FASTFLOAT_ASSERT(theor_digits.pow2(pow2_exp));
|
||||
} else if (pow2_exp < 0) {
|
||||
FASTFLOAT_ASSERT(real_digits.pow2(uint32_t(-pow2_exp)));
|
||||
FASTFLOAT_ASSERT(real_digits.pow2(-pow2_exp));
|
||||
}
|
||||
|
||||
// compare digits, and use it to director rounding
|
||||
int ord = real_digits.compare(theor_digits);
|
||||
round<T>(am, [ord](adjusted_mantissa &a, int32_t shift) {
|
||||
round<T>(am, [ord](adjusted_mantissa &a, int16_t shift) {
|
||||
round_nearest_tie_even(
|
||||
a, shift, [ord](bool is_odd, bool _, bool __) -> bool {
|
||||
(void)_; // not needed, since we've done our comparison
|
||||
@ -420,6 +422,8 @@ negative_digit_comp(bigint &bigmant, adjusted_mantissa &am,
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
return am;
|
||||
}
|
||||
|
||||
// parse the significant digits as a big integer to unambiguously round the
|
||||
@ -436,21 +440,21 @@ negative_digit_comp(bigint &bigmant, adjusted_mantissa &am,
|
||||
// the actual digits. we then compare the big integer representations
|
||||
// of both, and use that to direct rounding.
|
||||
template <typename T, typename UC>
|
||||
inline FASTFLOAT_CONSTEXPR20 void digit_comp(
|
||||
parsed_number_string_t<UC> const &num, adjusted_mantissa &am) noexcept {
|
||||
inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa digit_comp(
|
||||
parsed_number_string_t<UC> const &num, adjusted_mantissa am) noexcept {
|
||||
// remove the invalid exponent bias
|
||||
am.power2 -= invalid_am_bias;
|
||||
|
||||
bigint bigmant;
|
||||
int32_t const sci_exp = scientific_exponent(num);
|
||||
int16_t const sci_exp = scientific_exponent(num);
|
||||
|
||||
uint16_t const digits = parse_mantissa<T, UC>(bigmant, num);
|
||||
// can't underflow, since digits is at most max_digits.
|
||||
int32_t const exponent = sci_exp + 1 - digits;
|
||||
int16_t const exponent = sci_exp + 1 - digits;
|
||||
if (exponent >= 0) {
|
||||
positive_digit_comp<T>(bigmant, am, exponent);
|
||||
return positive_digit_comp<T>(bigmant, am, exponent);
|
||||
} else {
|
||||
negative_digit_comp<T>(bigmant, am, exponent);
|
||||
return negative_digit_comp<T>(bigmant, am, exponent);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -293,15 +293,15 @@ fastfloat_strncasecmp(UC const *actual_mixedcase, UC const *expected_lowercase,
|
||||
// a pointer and a length to a contiguous block of memory
|
||||
template <typename T> struct span {
|
||||
T const *ptr;
|
||||
uint32_t length;
|
||||
uint16_t length;
|
||||
|
||||
constexpr span(T const *_ptr, uint32_t _length) : ptr(_ptr), length(_length) {}
|
||||
constexpr span(T const *_ptr, uint16_t _length) : ptr(_ptr), length(_length) {}
|
||||
|
||||
constexpr span() : ptr(nullptr), length(0) {}
|
||||
|
||||
constexpr uint32_t len() const noexcept { return length; }
|
||||
constexpr uint16_t len() const noexcept { return length; }
|
||||
|
||||
FASTFLOAT_CONSTEXPR14 const T &operator[](uint32_t index) const noexcept {
|
||||
FASTFLOAT_CONSTEXPR14 const T &operator[](uint16_t index) const noexcept {
|
||||
FASTFLOAT_DEBUG_ASSERT(index < length);
|
||||
return ptr[index];
|
||||
}
|
||||
@ -318,8 +318,8 @@ struct value128 {
|
||||
};
|
||||
|
||||
/* Helper C++14 constexpr generic implementation of leading_zeroes */
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int
|
||||
leading_zeroes_generic(uint64_t input_num, int last_bit = 0) {
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint8_t
|
||||
leading_zeroes_generic(uint64_t input_num, uint64_t last_bit = 0) {
|
||||
if (input_num & uint64_t(0xffffffff00000000)) {
|
||||
input_num >>= 32;
|
||||
last_bit |= 32;
|
||||
@ -343,11 +343,11 @@ leading_zeroes_generic(uint64_t input_num, int last_bit = 0) {
|
||||
if (input_num & uint64_t(0x2)) { /* input_num >>= 1; */
|
||||
last_bit |= 1;
|
||||
}
|
||||
return 63 - last_bit;
|
||||
return 63 - (uint8_t)last_bit;
|
||||
}
|
||||
|
||||
/* result might be undefined when input_num is zero */
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 int
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint8_t
|
||||
leading_zeroes(uint64_t input_num) noexcept {
|
||||
assert(input_num > 0);
|
||||
FASTFLOAT_ASSUME(input_num > 0);
|
||||
@ -360,12 +360,12 @@ leading_zeroes(uint64_t input_num) noexcept {
|
||||
// Search the mask data from most significant bit (MSB)
|
||||
// to least significant bit (LSB) for a set bit (1).
|
||||
_BitScanReverse64(&leading_zero, input_num);
|
||||
return (int)(63 - leading_zero);
|
||||
return (uint8_t)(63 - leading_zero);
|
||||
#else
|
||||
return leading_zeroes_generic(input_num);
|
||||
return (uint8_t)leading_zeroes_generic(input_num);
|
||||
#endif
|
||||
#else
|
||||
return __builtin_clzll(input_num);
|
||||
return (uint8_t)__builtin_clzll(input_num);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -429,7 +429,7 @@ full_multiplication(uint64_t a, uint64_t b) noexcept {
|
||||
|
||||
struct adjusted_mantissa {
|
||||
uint64_t mantissa;
|
||||
int32_t power2; // a negative value indicates an invalid result
|
||||
int16_t power2; // a negative value indicates an invalid result
|
||||
adjusted_mantissa() noexcept = default;
|
||||
|
||||
constexpr bool operator==(adjusted_mantissa const &o) const noexcept {
|
||||
|
||||
@ -283,7 +283,7 @@ from_chars_advanced(parsed_number_string_t<UC> const &pns, T &value) noexcept {
|
||||
// and we have an invalid power (am.power2 < 0), then we need to go the long
|
||||
// way around again. This is very uncommon.
|
||||
if (am.power2 < 0) {
|
||||
digit_comp<T>(pns, am);
|
||||
am = digit_comp<T>(pns, am);
|
||||
}
|
||||
to_float(
|
||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user