From c849b7a8ff0520d2d337e23ef9bcca5d4f274619 Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Sat, 15 Apr 2023 23:16:01 -0400 Subject: [PATCH] Option to forbid nan/inf, refactor --- include/fast_float/ascii_number.h | 80 ++++++++++++++++--------------- include/fast_float/fast_float.h | 24 ++++++++-- include/fast_float/float_common.h | 5 +- include/fast_float/parse_number.h | 9 ++-- 4 files changed, 72 insertions(+), 46 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index aef072d..9fbe9ac 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -157,7 +157,7 @@ uint32_t parse_eight_digits_unrolled(const char16_t* chars) noexcept { if (cpp20_and_in_constexpr() || !has_simd()) { return parse_eight_digits_unrolled(read_u64(chars)); } -#if !FASTFLOAT_SSE2 +#if !FASTFLOAT_HAS_SIMD return 0; // never reaches here, satisfy compiler #else FASTFLOAT_SIMD_DISABLE_WARNINGS @@ -184,7 +184,7 @@ bool parse_if_eight_digits_unrolled(const char16_t* chars, std::uint64_t& i) noe i = i * 100000000 + parse_eight_digits_unrolled(read_u64(chars)); return true; } -#if !FASTFLOAT_SSE2 +#if !FASTFLOAT_HAS_SIMD return false; // never reaches here, satisfy compiler #else FASTFLOAT_SIMD_DISABLE_WARNINGS @@ -210,10 +210,10 @@ template struct parsed_number_string { int64_t exponent{0}; uint64_t mantissa{0}; + int64_t exp_number{0}; const CharT *lastmatch{nullptr}; bool negative{false}; bool valid{false}; - bool is_64bit_int{false}; bool too_many_digits{false}; // contains the range of the significant digits span integer{}; // non-nullable @@ -224,7 +224,7 @@ struct parsed_number_string { // parse an ASCII string. template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -parsed_number_string parse_number_string(const CharT *p, const CharT *pend, parse_options options, const bool parse_ints = false) noexcept { +parsed_number_string parse_number_string(const CharT *p, const CharT *pend, parse_options options) noexcept { const chars_format fmt = options.format; const parse_rules rules = options.rules; const CharT decimal_point = static_cast(options.decimal_point); @@ -322,7 +322,7 @@ parsed_number_string parse_number_string(const CharT *p, const CharT *pen answer.lastmatch = p; answer.valid = true; - answer.is_64bit_int = (p == end_of_integer_part); + answer.exp_number = exp_number; // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead @@ -339,45 +339,49 @@ parsed_number_string parse_number_string(const CharT *p, const CharT *pen if(*start == static_cast('0')) { digit_count --; } start++; } - constexpr uint64_t minimal_twenty_digit_integer{10000000000000000000ULL}; - // maya: A 64-bit number may have up to 20 digits! - // If we're parsing ints, preserve accuracy up to 20 digits - // instead of rounding them to a floating point value. - answer.too_many_digits = rules == parse_rules::json_rules && parse_ints && answer.is_64bit_int ? - (digit_count > 20 || i < minimal_twenty_digit_integer) : digit_count > 19; - - if (answer.too_many_digits) { - answer.is_64bit_int = false; - // Let us start again, this time, avoiding overflows. - // We don't need to check if is_integer, since we use the - // pre-tokenized spans from above. - i = 0; - p = answer.integer.ptr; - const CharT* int_end = p + answer.integer.len(); - const uint64_t minimal_nineteen_digit_integer{1000000000000000000}; - while((i < minimal_nineteen_digit_integer) && (p != int_end)) { - i = i * 10 + uint64_t(*p - static_cast('0')); - ++p; - } - if (i >= minimal_nineteen_digit_integer) { // We have a big integers - exponent = end_of_integer_part - p + exp_number; - } else { // We have a value with a fractional component. - p = answer.fraction.ptr; - const CharT* frac_end = p + answer.fraction.len(); - while((i < minimal_nineteen_digit_integer) && (p != frac_end)) { - i = i * 10 + uint64_t(*p - static_cast('0')); - ++p; - } - exponent = answer.fraction.ptr - p + exp_number; - } - // We have now corrected both exponent and i, to a truncated value - } + + // exponent/mantissa must be truncated later + answer.too_many_digits = digit_count > 19; } answer.exponent = exponent; answer.mantissa = i; return answer; } +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +void truncate_exponent_mantissa(parsed_number_string& ps) +{ + // Let us start again, this time, avoiding overflows. + // We don't need to check if is_integer, since we use the + // pre-tokenized spans. + uint64_t i = 0; + int64_t exponent = 0; + const CharT* p = ps.integer.ptr; + const CharT* const int_end = p + ps.integer.len(); + const uint64_t minimal_nineteen_digit_integer{1000000000000000000}; + while ((i < minimal_nineteen_digit_integer) && (p != int_end)) { + i = i * 10 + uint64_t(*p - static_cast('0')); + ++p; + } + if (i >= minimal_nineteen_digit_integer) { // We have a big integers + exponent = int_end - p + ps.exp_number; + } + else { // We have a value with a fractional component. + p = ps.fraction.ptr; + const CharT* const frac_end = p + ps.fraction.len(); + while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) { + i = i * 10 + uint64_t(*p - static_cast('0')); + ++p; + } + exponent = ps.fraction.ptr - p + ps.exp_number; + } + // We have now corrected both exponent and i, to a truncated value + + ps.exponent = exponent; + ps.mantissa = i; +} + } // namespace fast_float #endif diff --git a/include/fast_float/fast_float.h b/include/fast_float/fast_float.h index 91870a7..470d05a 100644 --- a/include/fast_float/fast_float.h +++ b/include/fast_float/fast_float.h @@ -26,18 +26,34 @@ struct from_chars_result { struct parse_options { constexpr explicit parse_options( - chars_format fmt = chars_format::general, - parse_rules rules = parse_rules::std_rules, char dot = '.') - : format(fmt), rules(rules), decimal_point(dot) {} + chars_format fmt = chars_format::general, + parse_rules rules = parse_rules::std_rules, + char dot = '.', bool allow_inf_nan = true) + : format(fmt), rules(rules), allow_inf_nan(allow_inf_nan), decimal_point(dot) {} /** Which number formats are accepted */ chars_format format; /** Which parsing rules to use */ parse_rules rules; + /** Whether to allow inf and nan */ + bool allow_inf_nan; /** The character used as decimal point */ char decimal_point; }; +struct preparsed_parse_options { + constexpr explicit preparsed_parse_options( + bool allow_inf_nan = true) + : allow_inf_nan(allow_inf_nan) {} + + constexpr preparsed_parse_options( + const parse_options& options) + : allow_inf_nan(options.allow_inf_nan) {} + + /** Whether to allow inf and nan */ + bool allow_inf_nan; +}; + /** * This function parses the character sequence [first,last) for a number. It parses floating-point numbers expecting * a locale-indepent format equivalent to what is used by std::strtod in the default ("C") locale. @@ -78,7 +94,7 @@ namespace fast_float { template FASTFLOAT_CONSTEXPR20 from_chars_result from_chars_preparsed(parsed_number_string parsed, - const CharT* first, const CharT* last, T& value) noexcept; + const CharT* first, const CharT* last, T& value, preparsed_parse_options options) noexcept; } // namespace fast_float diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index fe64126..4fb03fc 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -78,6 +78,9 @@ #endif #endif +#if FASTFLOAT_SSE2 +#define FASTFLOAT_HAS_SIMD (1) +#endif #if defined(__GNUC__) #define FASTFLOAT_SIMD_DISABLE_WARNINGS \ @@ -124,7 +127,7 @@ fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() { } fastfloat_really_inline constexpr bool has_simd() { -#if FASTFLOAT_SSE2 +#if FASTFLOAT_HAS_SIMD return true; #else return false; diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index 2c2d2c3..2f30e35 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -143,15 +143,18 @@ from_chars_result from_chars(const CharT *first, const CharT *last, template FASTFLOAT_CONSTEXPR20 -from_chars_result from_chars_preparsed(parsed_number_string pns, const CharT* first, const CharT* last, T& value) noexcept +from_chars_result from_chars_preparsed(parsed_number_string pns, const CharT* first, const CharT* last, T& value, preparsed_parse_options options) noexcept { static_assert (std::is_same::value || std::is_same::value, "only float and double are supported"); - + from_chars_result answer; if (!pns.valid) { - return detail::parse_infnan(first, last, value); + return options.allow_inf_nan ? detail::parse_infnan(first, last, value) : answer; } + if (pns.too_many_digits) + truncate_exponent_mantissa(pns); + answer.ec = std::errc(); // be optimistic answer.ptr = pns.lastmatch; // The implementation of the Clinger's fast path is convoluted because