mirror of
https://github.com/fastfloat/fast_float.git
synced 2025-12-06 16:56:57 +08:00
Merge pull request #252 from LeszekSwirski/parse-error
Record parse failure reason and location
This commit is contained in:
commit
0e7a10ad80
@ -234,6 +234,25 @@ void loop_parse_if_eight_digits(const char*& p, const char* const pend, uint64_t
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum class parse_error {
|
||||||
|
no_error,
|
||||||
|
// [JSON-only] The minus sign must be followed by an integer.
|
||||||
|
missing_integer_after_sign,
|
||||||
|
// A sign must be followed by an integer or dot.
|
||||||
|
missing_integer_or_dot_after_sign,
|
||||||
|
// [JSON-only] The integer part must not have leading zeros.
|
||||||
|
leading_zeros_in_integer_part,
|
||||||
|
// [JSON-only] The integer part must have at least one digit.
|
||||||
|
no_digits_in_integer_part,
|
||||||
|
// [JSON-only] If there is a decimal point, there must be digits in the
|
||||||
|
// fractional part.
|
||||||
|
no_digits_in_fractional_part,
|
||||||
|
// The mantissa must have at least one digit.
|
||||||
|
no_digits_in_mantissa,
|
||||||
|
// Scientific notation requires an exponential part.
|
||||||
|
missing_exponential_part,
|
||||||
|
};
|
||||||
|
|
||||||
template <typename UC>
|
template <typename UC>
|
||||||
struct parsed_number_string_t {
|
struct parsed_number_string_t {
|
||||||
int64_t exponent{0};
|
int64_t exponent{0};
|
||||||
@ -245,11 +264,22 @@ struct parsed_number_string_t {
|
|||||||
// contains the range of the significant digits
|
// contains the range of the significant digits
|
||||||
span<const UC> integer{}; // non-nullable
|
span<const UC> integer{}; // non-nullable
|
||||||
span<const UC> fraction{}; // nullable
|
span<const UC> fraction{}; // nullable
|
||||||
|
parse_error error{parse_error::no_error};
|
||||||
};
|
};
|
||||||
|
|
||||||
using byte_span = span<const char>;
|
using byte_span = span<const char>;
|
||||||
using parsed_number_string = parsed_number_string_t<char>;
|
using parsed_number_string = parsed_number_string_t<char>;
|
||||||
|
|
||||||
|
template <typename UC>
|
||||||
|
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string_t<UC>
|
||||||
|
report_parse_error(UC const* p, parse_error error) {
|
||||||
|
parsed_number_string_t<UC> answer;
|
||||||
|
answer.valid = false;
|
||||||
|
answer.lastmatch = p;
|
||||||
|
answer.error = error;
|
||||||
|
return answer;
|
||||||
|
}
|
||||||
|
|
||||||
// Assuming that you use no more than 19 digits, this will
|
// Assuming that you use no more than 19 digits, this will
|
||||||
// parse an ASCII string.
|
// parse an ASCII string.
|
||||||
template <typename UC>
|
template <typename UC>
|
||||||
@ -269,15 +299,16 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
|
|||||||
#endif
|
#endif
|
||||||
++p;
|
++p;
|
||||||
if (p == pend) {
|
if (p == pend) {
|
||||||
return answer;
|
return report_parse_error<UC>(
|
||||||
|
p, parse_error::missing_integer_or_dot_after_sign);
|
||||||
}
|
}
|
||||||
if (fmt & FASTFLOAT_JSONFMT) {
|
if (fmt & FASTFLOAT_JSONFMT) {
|
||||||
if (!is_integer(*p)) { // a sign must be followed by an integer
|
if (!is_integer(*p)) { // a sign must be followed by an integer
|
||||||
return answer;
|
return report_parse_error<UC>(p, parse_error::missing_integer_after_sign);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (!is_integer(*p) && (*p != decimal_point)) { // a sign must be followed by an integer or the dot
|
if (!is_integer(*p) && (*p != decimal_point)) { // a sign must be followed by an integer or the dot
|
||||||
return answer;
|
return report_parse_error<UC>(p, parse_error::missing_integer_or_dot_after_sign);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -297,8 +328,12 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
|
|||||||
answer.integer = span<const UC>(start_digits, size_t(digit_count));
|
answer.integer = span<const UC>(start_digits, size_t(digit_count));
|
||||||
if (fmt & FASTFLOAT_JSONFMT) {
|
if (fmt & FASTFLOAT_JSONFMT) {
|
||||||
// at least 1 digit in integer part, without leading zeros
|
// at least 1 digit in integer part, without leading zeros
|
||||||
if (digit_count == 0 || (start_digits[0] == UC('0') && digit_count > 1)) {
|
if (digit_count == 0) {
|
||||||
return answer;
|
return report_parse_error<UC>(p, parse_error::no_digits_in_integer_part);
|
||||||
|
}
|
||||||
|
if ((start_digits[0] == UC('0') && digit_count > 1)) {
|
||||||
|
return report_parse_error<UC>(start_digits,
|
||||||
|
parse_error::leading_zeros_in_integer_part);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -323,11 +358,10 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
|
|||||||
if (fmt & FASTFLOAT_JSONFMT) {
|
if (fmt & FASTFLOAT_JSONFMT) {
|
||||||
// at least 1 digit in fractional part
|
// at least 1 digit in fractional part
|
||||||
if (has_decimal_point && exponent == 0) {
|
if (has_decimal_point && exponent == 0) {
|
||||||
return answer;
|
return report_parse_error<UC>(p, parse_error::no_digits_in_fractional_part);
|
||||||
}
|
}
|
||||||
}
|
} else if (digit_count == 0) { // we must have encountered at least one integer!
|
||||||
else if (digit_count == 0) { // we must have encountered at least one integer!
|
return report_parse_error<UC>(p, parse_error::no_digits_in_mantissa);
|
||||||
return answer;
|
|
||||||
}
|
}
|
||||||
int64_t exp_number = 0; // explicit exponential part
|
int64_t exp_number = 0; // explicit exponential part
|
||||||
if ( ((fmt & chars_format::scientific) &&
|
if ( ((fmt & chars_format::scientific) &&
|
||||||
@ -350,8 +384,10 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
|
|||||||
}
|
}
|
||||||
if ((p == pend) || !is_integer(*p)) {
|
if ((p == pend) || !is_integer(*p)) {
|
||||||
if(!(fmt & chars_format::fixed)) {
|
if(!(fmt & chars_format::fixed)) {
|
||||||
// We are in error.
|
// The exponential part is invalid for scientific notation, so it must
|
||||||
return answer;
|
// be a trailing token for fixed notation. However, fixed notation is
|
||||||
|
// disabled, so report a scientific notation error.
|
||||||
|
return report_parse_error<UC>(p, parse_error::missing_exponential_part);
|
||||||
}
|
}
|
||||||
// Otherwise, we will be ignoring the 'e'.
|
// Otherwise, we will be ignoring the 'e'.
|
||||||
p = location_of_e;
|
p = location_of_e;
|
||||||
@ -368,7 +404,9 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// If it scientific and not fixed, we have to bail out.
|
// If it scientific and not fixed, we have to bail out.
|
||||||
if((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) { return answer; }
|
if ((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) {
|
||||||
|
return report_parse_error<UC>(p, parse_error::missing_exponential_part);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
answer.lastmatch = p;
|
answer.lastmatch = p;
|
||||||
answer.valid = true;
|
answer.valid = true;
|
||||||
|
|||||||
@ -45,6 +45,15 @@ struct AcceptedValue {
|
|||||||
ExpectedResult expected;
|
ExpectedResult expected;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct RejectReason {
|
||||||
|
fast_float::parse_error error;
|
||||||
|
intptr_t location_offset;
|
||||||
|
};
|
||||||
|
struct RejectedValue {
|
||||||
|
std::string input;
|
||||||
|
RejectReason reason;
|
||||||
|
};
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
const std::vector<AcceptedValue> accept{
|
const std::vector<AcceptedValue> accept{
|
||||||
{"-0.2", {-0.2, ""}},
|
{"-0.2", {-0.2, ""}},
|
||||||
@ -55,8 +64,18 @@ int main() {
|
|||||||
{"1e", {1., "e"}},
|
{"1e", {1., "e"}},
|
||||||
{"1e+", {1., "e+"}},
|
{"1e+", {1., "e+"}},
|
||||||
{"inf", {std::numeric_limits<double>::infinity(), ""}}};
|
{"inf", {std::numeric_limits<double>::infinity(), ""}}};
|
||||||
const std::vector<std::string> reject{"-.2", "00.02", "0.e+1", "00.e+1",
|
const std::vector<RejectedValue> reject{
|
||||||
".25", "+0.25", "inf", "nan(snan)"};
|
{"-.2", {fast_float::parse_error::missing_integer_after_sign, 1}},
|
||||||
|
{"00.02", {fast_float::parse_error::leading_zeros_in_integer_part, 0}},
|
||||||
|
{"0.e+1", {fast_float::parse_error::no_digits_in_fractional_part, 2}},
|
||||||
|
{"00.e+1", {fast_float::parse_error::leading_zeros_in_integer_part, 0}},
|
||||||
|
{".25", {fast_float::parse_error::no_digits_in_integer_part, 0}},
|
||||||
|
// The following cases already start as invalid JSON, so they are
|
||||||
|
// handled as trailing junk and the error is for not having digits in the
|
||||||
|
// empty string before the invalid token.
|
||||||
|
{"+0.25", {fast_float::parse_error::no_digits_in_integer_part, 0}},
|
||||||
|
{"inf", {fast_float::parse_error::no_digits_in_integer_part, 0}},
|
||||||
|
{"nan(snan)", {fast_float::parse_error::no_digits_in_integer_part, 0}}};
|
||||||
|
|
||||||
for (std::size_t i = 0; i < accept.size(); ++i)
|
for (std::size_t i = 0; i < accept.size(); ++i)
|
||||||
{
|
{
|
||||||
@ -80,7 +99,7 @@ int main() {
|
|||||||
|
|
||||||
for (std::size_t i = 0; i < reject.size(); ++i)
|
for (std::size_t i = 0; i < reject.size(); ++i)
|
||||||
{
|
{
|
||||||
const auto& s = reject[i];
|
const auto& s = reject[i].input;
|
||||||
double result;
|
double result;
|
||||||
auto answer = fast_float::from_chars(s.data(), s.data() + s.size(), result, fast_float::chars_format::json);
|
auto answer = fast_float::from_chars(s.data(), s.data() + s.size(), result, fast_float::chars_format::json);
|
||||||
if (answer.ec == std::errc()) {
|
if (answer.ec == std::errc()) {
|
||||||
@ -89,6 +108,31 @@ int main() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (std::size_t i = 0; i < reject.size(); ++i)
|
||||||
|
{
|
||||||
|
const auto& f = reject[i].input;
|
||||||
|
const auto& expected_reason = reject[i].reason;
|
||||||
|
auto answer = fast_float::parse_number_string(
|
||||||
|
f.data(), f.data() + f.size(),
|
||||||
|
fast_float::parse_options(fast_float::chars_format::json));
|
||||||
|
if (answer.valid) {
|
||||||
|
std::cerr << "json parse accepted invalid json " << f << std::endl;
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
if (answer.error != expected_reason.error) {
|
||||||
|
std::cerr << "json parse failure had invalid error reason " << f
|
||||||
|
<< std::endl;
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
intptr_t error_location = answer.lastmatch - f.data();
|
||||||
|
if (error_location != expected_reason.location_offset) {
|
||||||
|
std::cerr << "json parse failure had invalid error location " << f
|
||||||
|
<< " (expected " << expected_reason.location_offset << " got "
|
||||||
|
<< error_location << ")" << std::endl;
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if(main_readme() != EXIT_SUCCESS) { return EXIT_FAILURE; }
|
if(main_readme() != EXIT_SUCCESS) { return EXIT_FAILURE; }
|
||||||
if(main_readme2() != EXIT_SUCCESS) { return EXIT_FAILURE; }
|
if(main_readme2() != EXIT_SUCCESS) { return EXIT_FAILURE; }
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user