Merge pull request #252 from LeszekSwirski/parse-error

Record parse failure reason and location
2025-12-06 16:56:57 +08:00 · 2024-08-03 10:08:48 -04:00 · 2024-08-03 10:08:48 -04:00 · 0e7a10ad80
commit 0e7a10ad80
parent 3838b00751 b6ce2c4de6
2 changed files with 97 additions and 15 deletions
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@ -234,6 +234,25 @@ void loop_parse_if_eight_digits(const char*& p, const char* const pend, uint64_t
  }
 }
 enum class parse_error {
  no_error,
  // [JSON-only] The minus sign must be followed by an integer.
  missing_integer_after_sign,
  // A sign must be followed by an integer or dot.
  missing_integer_or_dot_after_sign,
  // [JSON-only] The integer part must not have leading zeros.
  leading_zeros_in_integer_part,
  // [JSON-only] The integer part must have at least one digit.
  no_digits_in_integer_part,
  // [JSON-only] If there is a decimal point, there must be digits in the
  // fractional part.
  no_digits_in_fractional_part,
  // The mantissa must have at least one digit.
  no_digits_in_mantissa,
  // Scientific notation requires an exponential part.
  missing_exponential_part,
 };
 template <typename UC>
 struct parsed_number_string_t {
  int64_t exponent{0};
@ -245,11 +264,22 @@ struct parsed_number_string_t {
  // contains the range of the significant digits
  span<const UC> integer{};  // non-nullable
  span<const UC> fraction{}; // nullable
  parse_error error{parse_error::no_error};
 };
 using byte_span = span<const char>;
 using parsed_number_string = parsed_number_string_t<char>;
 template <typename UC>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string_t<UC>
 report_parse_error(UC const* p, parse_error error) {
  parsed_number_string_t<UC> answer;
  answer.valid = false;
  answer.lastmatch = p;
  answer.error = error;
  return answer;
 }
 // Assuming that you use no more than 19 digits, this will
 // parse an ASCII string.
 template <typename UC>
@ -269,15 +299,16 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
 #endif
    ++p;
    if (p == pend) {
-      return answer;
+        return report_parse_error<UC>(
            p, parse_error::missing_integer_or_dot_after_sign);
    }
    if (fmt & FASTFLOAT_JSONFMT) {
      if (!is_integer(*p)) { // a sign must be followed by an integer
-        return answer;
+        return report_parse_error<UC>(p, parse_error::missing_integer_after_sign);
      }    
    } else {
      if (!is_integer(*p) && (*p != decimal_point)) { // a sign must be followed by an integer or the dot
-        return answer;
+        return report_parse_error<UC>(p, parse_error::missing_integer_or_dot_after_sign);
      }
    }
  }
@ -297,8 +328,12 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
  answer.integer = span<const UC>(start_digits, size_t(digit_count));
  if (fmt & FASTFLOAT_JSONFMT) {
    // at least 1 digit in integer part, without leading zeros
-    if (digit_count == 0 || (start_digits[0] == UC('0') && digit_count > 1)) {
+    if (digit_count == 0) {
-      return answer;
+      return report_parse_error<UC>(p, parse_error::no_digits_in_integer_part);
    }
    if ((start_digits[0] == UC('0') && digit_count > 1)) {
      return report_parse_error<UC>(start_digits,
                                    parse_error::leading_zeros_in_integer_part);
    }
  }
@ -323,11 +358,10 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
  if (fmt & FASTFLOAT_JSONFMT) {
    // at least 1 digit in fractional part
    if (has_decimal_point && exponent == 0) {
-      return answer;
+      return report_parse_error<UC>(p, parse_error::no_digits_in_fractional_part);
    }
-  } 
+  } else if (digit_count == 0) {  // we must have encountered at least one integer!
-  else if (digit_count == 0) { // we must have encountered at least one integer!
+    return report_parse_error<UC>(p, parse_error::no_digits_in_mantissa);
    return answer;
  }
  int64_t exp_number = 0;            // explicit exponential part
  if ( ((fmt & chars_format::scientific) &&
@ -350,8 +384,10 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
    }
    if ((p == pend) || !is_integer(*p)) {
      if(!(fmt & chars_format::fixed)) {
-        // We are in error.
+        // The exponential part is invalid for scientific notation, so it must
-        return answer;
+        // be a trailing token for fixed notation. However, fixed notation is
        // disabled, so report a scientific notation error.
        return report_parse_error<UC>(p, parse_error::missing_exponential_part);
      }
      // Otherwise, we will be ignoring the 'e'.
      p = location_of_e;
@ -368,7 +404,9 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
    }
  } else {
    // If it scientific and not fixed, we have to bail out.
-    if((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) { return answer; }
+    if ((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) {
      return report_parse_error<UC>(p, parse_error::missing_exponential_part);
    }
  }
  answer.lastmatch = p;
  answer.valid = true;
--- a/tests/json_fmt.cpp
+++ b/tests/json_fmt.cpp
@ -45,6 +45,15 @@ struct AcceptedValue {
  ExpectedResult expected;
 };
 struct RejectReason {
  fast_float::parse_error error;
  intptr_t location_offset;
 };
 struct RejectedValue {
  std::string input;
  RejectReason reason;
 };
 int main() {
  const std::vector<AcceptedValue> accept{
      {"-0.2", {-0.2, ""}},
@ -55,8 +64,18 @@ int main() {
      {"1e", {1., "e"}},
      {"1e+", {1., "e+"}},
      {"inf", {std::numeric_limits<double>::infinity(), ""}}};
-  const std::vector<std::string> reject{"-.2", "00.02", "0.e+1", "00.e+1",
+  const std::vector<RejectedValue> reject{
-                                        ".25", "+0.25", "inf",   "nan(snan)"};
+      {"-.2", {fast_float::parse_error::missing_integer_after_sign, 1}},
      {"00.02", {fast_float::parse_error::leading_zeros_in_integer_part, 0}},
      {"0.e+1", {fast_float::parse_error::no_digits_in_fractional_part, 2}},
      {"00.e+1", {fast_float::parse_error::leading_zeros_in_integer_part, 0}},
      {".25", {fast_float::parse_error::no_digits_in_integer_part, 0}},
      // The following cases already start as invalid JSON, so they are
      // handled as trailing junk and the error is for not having digits in the
      // empty string before the invalid token.
      {"+0.25", {fast_float::parse_error::no_digits_in_integer_part, 0}},
      {"inf", {fast_float::parse_error::no_digits_in_integer_part, 0}},
      {"nan(snan)", {fast_float::parse_error::no_digits_in_integer_part, 0}}};
  for (std::size_t i = 0; i < accept.size(); ++i)
  {
@ -80,7 +99,7 @@ int main() {
  for (std::size_t i = 0; i < reject.size(); ++i)
  {
-    const auto& s = reject[i];
+    const auto& s = reject[i].input;
    double result;
    auto answer = fast_float::from_chars(s.data(), s.data() + s.size(), result, fast_float::chars_format::json);
    if (answer.ec == std::errc()) {
@ -89,6 +108,31 @@ int main() {
    }
  }
  for (std::size_t i = 0; i < reject.size(); ++i)
  {
    const auto& f = reject[i].input;
    const auto& expected_reason = reject[i].reason;
    auto answer = fast_float::parse_number_string(
        f.data(), f.data() + f.size(),
        fast_float::parse_options(fast_float::chars_format::json));
    if (answer.valid) {
      std::cerr << "json parse accepted invalid json " << f << std::endl;
      return EXIT_FAILURE;
    }
    if (answer.error != expected_reason.error) {
      std::cerr << "json parse failure had invalid error reason " << f
                << std::endl;
      return EXIT_FAILURE;
    }
    intptr_t error_location = answer.lastmatch - f.data();
    if (error_location != expected_reason.location_offset) {
      std::cerr << "json parse failure had invalid error location " << f
                << " (expected " << expected_reason.location_offset << " got "
                << error_location << ")" << std::endl;
      return EXIT_FAILURE;
    }
  }
  if(main_readme() != EXIT_SUCCESS) { return EXIT_FAILURE; }
  if(main_readme2() != EXIT_SUCCESS) { return EXIT_FAILURE; }