mirror of
https://github.com/fastfloat/fast_float.git
synced 2025-12-06 16:56:57 +08:00
review of the parse_number_string function: now it's much faster, safer and easy to understand.
This commit is contained in:
parent
4b94a612cf
commit
23a9c3f54d
@ -238,7 +238,7 @@ loop_parse_if_eight_digits(char const *&p, char const *const pend,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
enum class parse_error {
|
enum class parse_error : uint_fast8_t {
|
||||||
no_error,
|
no_error,
|
||||||
// A sign must be followed by an integer or dot.
|
// A sign must be followed by an integer or dot.
|
||||||
missing_integer_or_dot_after_sign,
|
missing_integer_or_dot_after_sign,
|
||||||
@ -301,8 +301,8 @@ parse_number_string(UC const *p, UC const *pend,
|
|||||||
FASTFLOAT_ASSUME(p < pend); // so dereference without checks;
|
FASTFLOAT_ASSUME(p < pend); // so dereference without checks;
|
||||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||||
answer.negative = (*p == UC('-'));
|
answer.negative = (*p == UC('-'));
|
||||||
// C++17 20.19.3.(7.1) explicitly forbids '+' sign here
|
if (answer.negative ||
|
||||||
if ((*p == UC('-')) ||
|
// C++17 20.19.3.(7.1) explicitly forbids '+' sign here
|
||||||
((chars_format_t(options.format & chars_format::allow_leading_plus)) &&
|
((chars_format_t(options.format & chars_format::allow_leading_plus)) &&
|
||||||
(!basic_json_fmt && *p == UC('+')))) {
|
(!basic_json_fmt && *p == UC('+')))) {
|
||||||
++p;
|
++p;
|
||||||
@ -338,10 +338,13 @@ parse_number_string(UC const *p, UC const *pend,
|
|||||||
*p - UC('0'))); // might overflow, we will handle the overflow later
|
*p - UC('0'))); // might overflow, we will handle the overflow later
|
||||||
++p;
|
++p;
|
||||||
}
|
}
|
||||||
|
|
||||||
UC const *const end_of_integer_part = p;
|
UC const *const end_of_integer_part = p;
|
||||||
am_digits digit_count =
|
am_digits digit_count =
|
||||||
static_cast<am_digits>(end_of_integer_part - start_digits);
|
static_cast<am_digits>(end_of_integer_part - start_digits);
|
||||||
answer.integer = span<UC const>(start_digits, digit_count);
|
answer.integer = span<UC const>(start_digits, digit_count);
|
||||||
|
// We have now parsed the integer part of the mantissa.
|
||||||
|
|
||||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||||
FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
|
FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
|
||||||
// at least 1 digit in integer part, without leading zeros
|
// at least 1 digit in integer part, without leading zeros
|
||||||
@ -355,8 +358,8 @@ parse_number_string(UC const *p, UC const *pend,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
bool const has_decimal_point = (p != pend) && (*p == options.decimal_point);
|
// We can now parse the fraction part of the mantissa.
|
||||||
if (has_decimal_point) {
|
if ((p != pend) && (*p == options.decimal_point)) {
|
||||||
++p;
|
++p;
|
||||||
UC const *before = p;
|
UC const *before = p;
|
||||||
// can occur at most twice without overflowing, but let it occur more, since
|
// can occur at most twice without overflowing, but let it occur more, since
|
||||||
@ -378,7 +381,7 @@ parse_number_string(UC const *p, UC const *pend,
|
|||||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||||
FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
|
FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
|
||||||
// at least 1 digit in fractional part
|
// at least 1 digit in fractional part
|
||||||
if (has_decimal_point && answer.exponent == 0) {
|
if (answer.exponent == 0) {
|
||||||
return report_parse_error<UC>(
|
return report_parse_error<UC>(
|
||||||
p, parse_error::no_digits_in_fractional_part);
|
p, parse_error::no_digits_in_fractional_part);
|
||||||
}
|
}
|
||||||
@ -392,44 +395,79 @@ parse_number_string(UC const *p, UC const *pend,
|
|||||||
|
|
||||||
// Now we can parse the explicit exponential part.
|
// Now we can parse the explicit exponential part.
|
||||||
am_pow_t exp_number = 0; // explicit exponential part
|
am_pow_t exp_number = 0; // explicit exponential part
|
||||||
if (((p != pend) &&
|
bool neg_exp = false;
|
||||||
(((chars_format_t(options.format & chars_format::scientific)) &&
|
if (p != pend) {
|
||||||
((UC('e') == *p) || (UC('E') == *p))))
|
UC const *location_of_e;
|
||||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
if (chars_format_t(options.format & chars_format::scientific)) {
|
||||||
|| (((chars_format_t(options.format & detail::basic_fortran_fmt))) &&
|
switch (*p) {
|
||||||
((UC('+') == *p) || (UC('-') == *p) || (UC('d') == *p) ||
|
case UC('e'):
|
||||||
(UC('D') == *p)))
|
case UC('E'):
|
||||||
#endif
|
location_of_e = p;
|
||||||
)) {
|
|
||||||
UC const *location_of_e = p;
|
|
||||||
#ifdef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
|
||||||
++p;
|
|
||||||
#else
|
|
||||||
if ((UC('e') == *p) || (UC('E') == *p) || (UC('d') == *p) ||
|
|
||||||
(UC('D') == *p)) {
|
|
||||||
++p;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
bool neg_exp = false;
|
|
||||||
if (p != pend) {
|
|
||||||
if (UC('-') == *p) {
|
|
||||||
neg_exp = true;
|
|
||||||
++p;
|
|
||||||
} else if (UC('+') == *p) {
|
|
||||||
// '+' on exponent is allowed by C++17 20.19.3.(7.1)
|
|
||||||
++p;
|
++p;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
// If it scientific and not fixed, we have to bail out.
|
||||||
|
if (!chars_format_t(options.format & chars_format::fixed)) {
|
||||||
|
return report_parse_error<UC>(p,
|
||||||
|
parse_error::missing_exponential_part);
|
||||||
|
}
|
||||||
|
// In fixed notation we will be ignoring the 'e'.
|
||||||
|
location_of_e = nullptr;
|
||||||
|
}
|
||||||
|
if (location_of_e && p != pend) {
|
||||||
|
switch (*p) {
|
||||||
|
case UC('-'):
|
||||||
|
neg_exp = true;
|
||||||
|
++p;
|
||||||
|
break;
|
||||||
|
case UC('+'): // '+' on exponent is allowed by C++17 20.19.3.(7.1)
|
||||||
|
++p;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
// If it scientific and not fixed, we have to bail out.
|
||||||
|
if (!chars_format_t(options.format & chars_format::fixed)) {
|
||||||
|
return report_parse_error<UC>(
|
||||||
|
p, parse_error::missing_exponential_part);
|
||||||
|
}
|
||||||
|
// In fixed notation we will be ignoring the 'e'.
|
||||||
|
location_of_e = nullptr;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ((p == pend) || !is_integer(*p)) {
|
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||||
if (!(chars_format_t(options.format & chars_format::fixed))) {
|
else if (chars_format_t(options.format & detail::basic_fortran_fmt)) {
|
||||||
// The exponential part is invalid for scientific notation, so it
|
switch (*p) {
|
||||||
// must be a trailing token for fixed notation. However, fixed
|
case UC('d'):
|
||||||
// notation is disabled, so report a scientific notation error.
|
case UC('D'):
|
||||||
|
++p;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
// In Fortran the d symbol is optional.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
switch (*p) {
|
||||||
|
case UC('-'):
|
||||||
|
neg_exp = true;
|
||||||
|
location_of_e = p;
|
||||||
|
++p;
|
||||||
|
break;
|
||||||
|
case UC('+'):
|
||||||
|
location_of_e = p;
|
||||||
|
++p;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
// In Fortran the sign is mandatory.
|
||||||
return report_parse_error<UC>(p, parse_error::missing_exponential_part);
|
return report_parse_error<UC>(p, parse_error::missing_exponential_part);
|
||||||
}
|
}
|
||||||
// Otherwise, we will be ignoring the 'e'.
|
}
|
||||||
p = location_of_e;
|
#endif
|
||||||
} else {
|
else {
|
||||||
|
location_of_e = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (location_of_e) {
|
||||||
|
// We have a valid scientific notation, let's parse the explicit
|
||||||
|
// exponent.
|
||||||
while ((p != pend) && is_integer(*p)) {
|
while ((p != pend) && is_integer(*p)) {
|
||||||
if (exp_number < 0x1000) {
|
if (exp_number < 0x1000) {
|
||||||
// check for exponent overflow if we have too many digits.
|
// check for exponent overflow if we have too many digits.
|
||||||
@ -443,35 +481,41 @@ parse_number_string(UC const *p, UC const *pend,
|
|||||||
}
|
}
|
||||||
answer.exponent += exp_number;
|
answer.exponent += exp_number;
|
||||||
}
|
}
|
||||||
} else {
|
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||||
// If it scientific and not fixed, we have to bail out.
|
else if (chars_format_t(options.format & detail::basic_fortran_fmt)) {
|
||||||
if ((chars_format_t(options.format & chars_format::scientific)) &&
|
// In Fortran the number in exponent part is mandatory.
|
||||||
!(chars_format_t(options.format & chars_format::fixed))) {
|
|
||||||
return report_parse_error<UC>(p, parse_error::missing_exponential_part);
|
return report_parse_error<UC>(p, parse_error::missing_exponential_part);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We parsed all parts of the number, let's save progress.
|
||||||
answer.lastmatch = p;
|
answer.lastmatch = p;
|
||||||
answer.valid = true;
|
answer.valid = true;
|
||||||
|
|
||||||
// If we frequently had to deal with long strings of digits,
|
// Now we can check for errors.
|
||||||
|
|
||||||
|
// TODO: If we frequently had to deal with long strings of digits,
|
||||||
// we could extend our code by using a 128-bit integer instead
|
// we could extend our code by using a 128-bit integer instead
|
||||||
// of a 64-bit integer. However, this is uncommon.
|
// of a 64-bit integer. However, this is uncommon.
|
||||||
//
|
|
||||||
// We can deal with up to 19 digits.
|
// We can deal with up to 19 digits.
|
||||||
if (digit_count > 19) { // this is uncommon
|
if (digit_count > 19) {
|
||||||
// It is possible that the integer had an overflow.
|
// It is possible that the integer had an overflow.
|
||||||
// We have to handle the case where we have 0.0000somenumber.
|
// We have to handle the case where we have 0.0000somenumber.
|
||||||
// We need to be mindful of the case where we only have zeroes...
|
// We need to be mindful of the case where we only have zeroes...
|
||||||
// E.g., 0.000000000...000.
|
// E.g., 0.000000000...000.
|
||||||
UC const *start = start_digits;
|
UC const *start = start_digits;
|
||||||
while ((start != pend) &&
|
do { // we already have some numbers, so we can skip first check safely
|
||||||
(*start == UC('0') || *start == options.decimal_point)) {
|
if ((*start == UC('0') || *start == options.decimal_point)) {
|
||||||
if (*start == UC('0')) {
|
if (*start == UC('0')) {
|
||||||
--digit_count;
|
--digit_count;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
++start;
|
} while (++start != pend);
|
||||||
}
|
|
||||||
|
|
||||||
|
// We have to check if we have a number with more than 19 significant
|
||||||
|
// digits.
|
||||||
if (digit_count > 19) {
|
if (digit_count > 19) {
|
||||||
answer.too_many_digits = true;
|
answer.too_many_digits = true;
|
||||||
// Let us start again, this time, avoiding overflows.
|
// Let us start again, this time, avoiding overflows.
|
||||||
@ -480,19 +524,20 @@ parse_number_string(UC const *p, UC const *pend,
|
|||||||
answer.mantissa = 0;
|
answer.mantissa = 0;
|
||||||
p = answer.integer.ptr;
|
p = answer.integer.ptr;
|
||||||
UC const *int_end = p + answer.integer.len();
|
UC const *int_end = p + answer.integer.len();
|
||||||
am_mant_t const minimal_nineteen_digit_integer{1000000000000000000};
|
constexpr am_mant_t minimal_nineteen_digit_integer{1000000000000000000};
|
||||||
while ((answer.mantissa < minimal_nineteen_digit_integer) &&
|
while ((answer.mantissa < minimal_nineteen_digit_integer) &&
|
||||||
(p != int_end)) {
|
(p != int_end)) {
|
||||||
answer.mantissa = static_cast<am_mant_t>(
|
answer.mantissa = static_cast<am_mant_t>(
|
||||||
answer.mantissa * 10 + static_cast<am_mant_t>(*p - UC('0')));
|
answer.mantissa * 10 + static_cast<am_mant_t>(*p - UC('0')));
|
||||||
++p;
|
++p;
|
||||||
}
|
}
|
||||||
if (answer.mantissa >=
|
if (answer.mantissa >= minimal_nineteen_digit_integer) {
|
||||||
minimal_nineteen_digit_integer) { // We have a big integers
|
// We have a big integers, so skip the fraction part completely.
|
||||||
answer.exponent = am_pow_t(end_of_integer_part - p) + exp_number;
|
answer.exponent = am_pow_t(end_of_integer_part - p) + exp_number;
|
||||||
} else { // We have a value with a fractional component.
|
} else {
|
||||||
|
// We have a value with a significant fractional component.
|
||||||
p = answer.fraction.ptr;
|
p = answer.fraction.ptr;
|
||||||
UC const *frac_end = p + answer.fraction.len();
|
UC const *const frac_end = p + answer.fraction.len();
|
||||||
while ((answer.mantissa < minimal_nineteen_digit_integer) &&
|
while ((answer.mantissa < minimal_nineteen_digit_integer) &&
|
||||||
(p != frac_end)) {
|
(p != frac_end)) {
|
||||||
answer.mantissa = static_cast<am_mant_t>(
|
answer.mantissa = static_cast<am_mant_t>(
|
||||||
@ -501,9 +546,10 @@ parse_number_string(UC const *p, UC const *pend,
|
|||||||
}
|
}
|
||||||
answer.exponent = am_pow_t(answer.fraction.ptr - p) + exp_number;
|
answer.exponent = am_pow_t(answer.fraction.ptr - p) + exp_number;
|
||||||
}
|
}
|
||||||
// We have now corrected both exponent and mantissa, to a truncated value
|
|
||||||
}
|
}
|
||||||
|
// We have now corrected both exponent and mantissa, to a truncated value
|
||||||
}
|
}
|
||||||
|
|
||||||
return answer;
|
return answer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user