From 3cafcca2ffe7112cb8d8b57cc51a218759db743b Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Wed, 29 Mar 2023 02:14:12 -0400 Subject: [PATCH] Add support for json parsing rules and integers --- include/fast_float/ascii_number.h | 36 ++++++++++++++++++++++--------- include/fast_float/fast_float.h | 19 ++++++++++++---- include/fast_float/parse_number.h | 4 ++-- 3 files changed, 43 insertions(+), 16 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 9ad754e..83d7f9b 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -96,10 +96,10 @@ typedef span byte_span; struct parsed_number_string { int64_t exponent{0}; uint64_t mantissa{0}; - uint64_t integer_value{-1}; const char *lastmatch{nullptr}; bool negative{false}; bool valid{false}; + bool is_64bit_uint{false}; bool too_many_digits{false}; // contains the range of the significant digits byte_span integer{}; // non-nullable @@ -111,6 +111,8 @@ struct parsed_number_string { fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string parse_number_string(const char *p, const char *pend, parse_options options) noexcept { const chars_format fmt = options.format; + const parse_rules rules = options.rules; + const bool parse_ints = options.parse_ints; const char decimal_point = options.decimal_point; parsed_number_string answer; @@ -126,9 +128,9 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ if (p == pend) { return answer; } - if (!is_integer(*p) && (*p != decimal_point)) { // a sign must be followed by an integer or the dot - return answer; - } + // a sign must be followed by an integer or the dot + if (!is_integer(*p) && (rules == parse_rules::json_rules || *p != decimal_point)) + return answer; } const char *const start_digits = p; @@ -144,9 +146,9 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ const char *const end_of_integer_part = p; int64_t digit_count = int64_t(end_of_integer_part - start_digits); answer.integer = byte_span(start_digits, size_t(digit_count)); - answer.integer_value = i; int64_t exponent = 0; - if ((p != pend) && (*p == decimal_point)) { + const bool has_decimal_point = (p != pend) && (*p == decimal_point); + if (has_decimal_point) { ++p; const char* before = p; // can occur at most twice without overflowing, but let it occur more, since @@ -164,8 +166,8 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ answer.fraction = byte_span(before, size_t(p - before)); digit_count -= exponent; } - // we must have encountered at least one integer! - if (digit_count == 0) { + // we must have encountered at least one integer (or two if a decimal point exists, with json rules). + if (digit_count == 0 || (rules == parse_rules::json_rules && has_decimal_point && digit_count == 1)) { return answer; } int64_t exp_number = 0; // explicit exponential part @@ -201,6 +203,11 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ // If it scientific and not fixed, we have to bail out. if((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) { return answer; } } + + // disallow leading zeros before the decimal point + if (rules == parse_rules::json_rules && start_digits[0] == '0' && digit_count >= 2 && is_integer(start_digits[1])) + return answer; + answer.lastmatch = p; answer.valid = true; @@ -219,8 +226,16 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ if(*start == '0') { digit_count --; } start++; } - if (digit_count > 19) { - answer.too_many_digits = true; + constexpr uint64_t minimal_twenty_digit_integer{10000000000000000000}; + // maya: A 64-bit number may have up to 20 digits, not 19! + // If we're parsing ints, preserve accuracy up to 20 digits instead + // of converting them to the closest floating point value. + answer.too_many_digits = rules == parse_rules::json_rules && parse_ints ? + answer.is_integer && (digit_count > 20 || i < minimal_twenty_digit_integer) : + digit_count > 19; + + if (answer.too_many_digits) { + answer.is_64bit_uint = false; // Let us start again, this time, avoiding overflows. // We don't need to check if is_integer, since we use the // pre-tokenized spans from above. @@ -245,6 +260,7 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ } // We have now corrected both exponent and i, to a truncated value } + else answer.is_64bit_uint = (p == end_of_integer_part); } answer.exponent = exponent; answer.mantissa = i; diff --git a/include/fast_float/fast_float.h b/include/fast_float/fast_float.h index d84405d..2bfabdc 100644 --- a/include/fast_float/fast_float.h +++ b/include/fast_float/fast_float.h @@ -13,6 +13,10 @@ enum chars_format { general = fixed | scientific }; +enum parse_rules { + std_rules, + json_rules, +}; struct from_chars_result { const char *ptr; @@ -20,12 +24,18 @@ struct from_chars_result { }; struct parse_options { - constexpr explicit parse_options(chars_format fmt = chars_format::general, - char dot = '.') - : format(fmt), decimal_point(dot) {} + constexpr explicit parse_options( + chars_format fmt = chars_format::general, + parse_rules rules = parse_rules::std_rules, + bool parse_ints = false, char dot = '.', ) + : format(fmt), rules(rules), parse_ints(parse_ints), decimal_point(dot) {} /** Which number formats are accepted */ chars_format format; + /** Which parsing rules to use */ + parse_rules rules; + /* Whether to parse integers too, only applicable with json_rules */ + bool parse_ints; /** The character used as decimal point */ char decimal_point; }; @@ -69,7 +79,8 @@ from_chars_result from_chars_advanced(const char *first, const char *last, namespace fast_float { template FASTFLOAT_CONSTEXPR20 -from_chars_result from_chars_preparsed(parsed_number_string parsed, T& value) noexcept; +from_chars_result from_chars_preparsed(parsed_number_string parsed, + const char* first, const char* last, T& value) noexcept; } // namespace fast_float diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index c880f1e..c13b641 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -141,7 +141,7 @@ from_chars_result from_chars(const char *first, const char *last, template FASTFLOAT_CONSTEXPR20 -from_chars_result from_chars_preparsed(parsed_number_string pns, T& value) noexcept +from_chars_result from_chars_preparsed(parsed_number_string pns, const char* first, const char* last, T& value) noexcept { static_assert (std::is_same::value || std::is_same::value, "only float and double are supported"); @@ -221,7 +221,7 @@ from_chars_result from_chars_advanced(const char *first, const char *last, answer.ptr = first; return answer; } - answer = from_chars_preparsed(parse_number_string(first, last, options), value); + answer = from_chars_preparsed(parse_number_string(first, last, options), first, last, value); return answer; }