From 9ebac23081ee5bca04c1e73ce454b686a2653473 Mon Sep 17 00:00:00 2001 From: IRainman Date: Thu, 6 Mar 2025 22:25:05 +0300 Subject: [PATCH] Added a config option FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN for faster and more compact code parsing numbers with input support only positive C/C++ style numbers without nan or inf. That case is very useful in mathematical applications, game development, CSS parsing, embedded code, etc... Additional improve in constant initialization. --- include/fast_float/ascii_number.h | 83 ++++++++++++++++----------- include/fast_float/digit_comparison.h | 6 +- include/fast_float/float_common.h | 36 ++++++++++-- include/fast_float/parse_number.h | 35 ++++++++--- 4 files changed, 113 insertions(+), 47 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index ec34a1c..4b298b3 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -234,6 +234,7 @@ loop_parse_if_eight_digits(char const *&p, char const *const pend, enum class parse_error { no_error, +#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN // [JSON-only] The minus sign must be followed by an integer. missing_integer_after_sign, // A sign must be followed by an integer or dot. @@ -245,6 +246,7 @@ enum class parse_error { // [JSON-only] If there is a decimal point, there must be digits in the // fractional part. no_digits_in_fractional_part, +#endif // The mantissa must have at least one digit. no_digits_in_mantissa, // Scientific notation requires an exponential part. @@ -255,7 +257,9 @@ template struct parsed_number_string_t { int64_t exponent{0}; uint64_t mantissa{0}; UC const *lastmatch{nullptr}; +#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN bool negative{false}; +#endif bool valid{false}; bool too_many_digits{false}; // contains the range of the significant digits @@ -288,34 +292,32 @@ parse_number_string(UC const *p, UC const *pend, answer.valid = false; answer.too_many_digits = false; [[assume(p < pend)]]; // assume p < pend, so dereference without checks; - if (!uint64_t(options.format & chars_format::disallow_leading_sign)) { - answer.negative = (*p == UC('-')); - // C++17 20.19.3.(7.1) explicitly forbids '+' sign here - if ((*p == UC('-')) || - (uint64_t(options.format & chars_format::allow_leading_plus) && - !uint64_t(options.format & detail::basic_json_fmt) && *p == UC('+'))) { - ++p; - if (p == pend) { +#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN + answer.negative = (*p == UC('-')); + // C++17 20.19.3.(7.1) explicitly forbids '+' sign here + if ((*p == UC('-')) || + (uint64_t(options.format & chars_format::allow_leading_plus) && + !uint64_t(options.format & detail::basic_json_fmt) && *p == UC('+'))) { + ++p; + if (p == pend) { + return report_parse_error( + p, parse_error::missing_integer_or_dot_after_sign); + } + if (uint64_t(options.format & detail::basic_json_fmt)) { + if (!is_integer(*p)) { // a sign must be followed by an integer + return report_parse_error(p, + parse_error::missing_integer_after_sign); + } + } else { + if (!is_integer(*p) && + (*p != + options.decimal_point)) { // a sign must be followed by an integer or the dot return report_parse_error( p, parse_error::missing_integer_or_dot_after_sign); } - if (uint64_t(options.format & detail::basic_json_fmt)) { - if (!is_integer(*p)) { // a sign must be followed by an integer - return report_parse_error(p, - parse_error::missing_integer_after_sign); - } - } else { - if (!is_integer(*p) && - (*p != - options.decimal_point)) { // a sign must be followed by an integer or the dot - return report_parse_error( - p, parse_error::missing_integer_or_dot_after_sign); - } - } } - } else { - answer.negative = false; } +#endif UC const *const start_digits = p; @@ -332,6 +334,7 @@ parse_number_string(UC const *p, UC const *pend, UC const *const end_of_integer_part = p; int64_t digit_count = int64_t(end_of_integer_part - start_digits); answer.integer = span(start_digits, size_t(digit_count)); +#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN if (uint64_t(options.format & detail::basic_json_fmt)) { // at least 1 digit in integer part, without leading zeros if (digit_count == 0) { @@ -342,6 +345,7 @@ parse_number_string(UC const *p, UC const *pend, parse_error::leading_zeros_in_integer_part); } } +#endif int64_t exponent = 0; bool const has_decimal_point = (p != pend) && (*p == options.decimal_point); @@ -361,22 +365,28 @@ parse_number_string(UC const *p, UC const *pend, answer.fraction = span(before, size_t(p - before)); digit_count -= exponent; } +#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN if (uint64_t(options.format & detail::basic_json_fmt)) { // at least 1 digit in fractional part if (has_decimal_point && exponent == 0) { return report_parse_error(p, parse_error::no_digits_in_fractional_part); } - } else if (digit_count == + } else +#endif + if (digit_count == 0) { // we must have encountered at least one integer! return report_parse_error(p, parse_error::no_digits_in_mantissa); } int64_t exp_number = 0; // explicit exponential part if ((uint64_t(options.format & chars_format::scientific) && (p != pend) && - ((UC('e') == *p) || (UC('E') == *p))) || - (uint64_t(options.format & detail::basic_fortran_fmt) && (p != pend) && + ((UC('e') == *p) || (UC('E') == *p))) +#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN + || (uint64_t(options.format & detail::basic_fortran_fmt) && (p != pend) && ((UC('+') == *p) || (UC('-') == *p) || (UC('d') == *p) || - (UC('D') == *p)))) { + (UC('D') == *p))) +#endif + ) { UC const *location_of_e = p; if ((UC('e') == *p) || (UC('E') == *p) || (UC('d') == *p) || (UC('D') == *p)) { @@ -483,9 +493,8 @@ parse_int_string(UC const *p, UC const *pend, T &value, UC const *const first = p; - bool negative; - if (!uint64_t(options.fmt & chars_format::disallow_leading_sign)) { - negative = (*p == UC('-')); +#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN + bool const negative = (*p == UC('-')); #ifdef FASTFLOAT_VISUAL_STUDIO #pragma warning(push) #pragma warning(disable : 4127) @@ -502,9 +511,7 @@ parse_int_string(UC const *p, UC const *pend, T &value, (uint64_t(options.fmt & chars_format::allow_leading_plus) && (*p == UC('+')))) { ++p; } - } else { - negative = false; - } +#endif UC const *const start_num = p; @@ -560,12 +567,17 @@ parse_int_string(UC const *p, UC const *pend, T &value, // check other types overflow if (!std::is_same::value) { - if (i > uint64_t(std::numeric_limits::max()) + uint64_t(negative)) { + if (i > uint64_t(std::numeric_limits::max()) +#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN + + uint64_t(negative) +#endif + ) { answer.ec = std::errc::result_out_of_range; return answer; } } +#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN if (negative) { #ifdef FASTFLOAT_VISUAL_STUDIO #pragma warning(push) @@ -583,8 +595,11 @@ parse_int_string(UC const *p, UC const *pend, T &value, #pragma warning(pop) #endif } else { +#endif value = T(i); +#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN } +#endif answer.ec = std::errc(); return answer; diff --git a/include/fast_float/digit_comparison.h b/include/fast_float/digit_comparison.h index ea8457a..a8d4eec 100644 --- a/include/fast_float/digit_comparison.h +++ b/include/fast_float/digit_comparison.h @@ -381,7 +381,11 @@ inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa negative_digit_comp( round(am_b, [](adjusted_mantissa &a, int32_t shift) { round_down(a, shift); }); T b; - to_float(false, am_b, b); + to_float( +#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN + false, +#endif + am_b, b); adjusted_mantissa theor = to_extended_halfway(b); bigint theor_digits(theor.mantissa); int32_t theor_exp = theor.power2; diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index a08f72d..edcda12 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -35,25 +35,29 @@ namespace fast_float { enum class chars_format : uint64_t; +#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN namespace detail { constexpr chars_format basic_json_fmt = chars_format(1 << 5); constexpr chars_format basic_fortran_fmt = chars_format(1 << 6); } // namespace detail +#endif enum class chars_format : uint64_t { scientific = 1 << 0, fixed = 1 << 2, + general = fixed | scientific, hex = 1 << 3, +#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN no_infnan = 1 << 4, // RFC 8259: https://datatracker.ietf.org/doc/html/rfc8259#section-6 - json = uint64_t(detail::basic_json_fmt) | fixed | scientific | no_infnan, + json = uint64_t(detail::basic_json_fmt) | general | no_infnan, // Extension of RFC 8259 where, e.g., "inf" and "nan" are allowed. - json_or_infnan = uint64_t(detail::basic_json_fmt) | fixed | scientific, - fortran = uint64_t(detail::basic_fortran_fmt) | fixed | scientific, - general = fixed | scientific, + json_or_infnan = uint64_t(detail::basic_json_fmt) | general, + fortran = uint64_t(detail::basic_fortran_fmt) | general, allow_leading_plus = 1 << 7, skip_white_space = 1 << 8, disallow_leading_sign = 1 << 9, +#endif }; template struct from_chars_result_t { @@ -616,6 +620,8 @@ template <> inline constexpr int binary_format::infinite_power() { return 0xFF; } +#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN + template <> inline constexpr int binary_format::sign_index() { return 63; } @@ -624,6 +630,8 @@ template <> inline constexpr int binary_format::sign_index() { return 31; } +#endif + template <> inline constexpr int binary_format::max_exponent_fast_path() { return 22; @@ -750,10 +758,14 @@ inline constexpr int binary_format::infinite_power() { return 0x1F; } +#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN + template <> inline constexpr int binary_format::sign_index() { return 15; } +#endif + template <> inline constexpr int binary_format::largest_power_of_ten() { return 4; @@ -873,10 +885,14 @@ inline constexpr int binary_format::infinite_power() { return 0xFF; } +#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN + template <> inline constexpr int binary_format::sign_index() { return 15; } +#endif + template <> inline constexpr int binary_format::largest_power_of_ten() { return 38; @@ -989,13 +1005,19 @@ binary_format::hidden_bit_mask() { template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void -to_float(const bool negative, const adjusted_mantissa am, T &value) noexcept { +to_float( +#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN + const bool negative, +#endif + const adjusted_mantissa am, T &value) noexcept { using equiv_uint = equiv_uint_t; equiv_uint word = equiv_uint(am.mantissa); word = equiv_uint(word | equiv_uint(am.power2) << binary_format::mantissa_explicit_bits()); +#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN word = equiv_uint(word | equiv_uint(negative) << binary_format::sign_index()); +#endif #if FASTFLOAT_HAS_BIT_CAST value = std::bit_cast(word); #else @@ -1042,6 +1064,8 @@ template static constexpr int int_cmp_len() { return sizeof(uint64_t) / sizeof(UC); } +#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN + template constexpr UC const *str_const_nan(); template <> constexpr char const *str_const_nan() { return "nan"; } @@ -1084,6 +1108,8 @@ template <> constexpr char8_t const *str_const_inf() { } #endif +#endif + template struct int_luts { static constexpr uint8_t chdigit[] = { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index f3b0619..0223343 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -14,6 +14,7 @@ namespace fast_float { namespace detail { +#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN /** * Special case +inf, -inf, nan, infinity, -infinity. * The case comparisons could be made much faster given that we know that the @@ -77,6 +78,7 @@ from_chars_result_t answer.ec = std::errc::invalid_argument; return answer; } +#endif /** * Returns true if the floating-pointing rounding mode is to 'nearest'. @@ -239,9 +241,11 @@ from_chars_advanced(const parsed_number_string_t &pns, T &value) noexcept { } else { value = value * binary_format::exact_power_of_ten(pns.exponent); } +#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN if (pns.negative) { value = -value; } +#endif return answer; } } else { @@ -254,15 +258,21 @@ from_chars_advanced(const parsed_number_string_t &pns, T &value) noexcept { #if defined(__clang__) || defined(FASTFLOAT_32BIT) // Clang may map 0 to -0.0 when fegetround() == FE_DOWNWARD if (pns.mantissa == 0) { - value = pns.negative ? T(-0.) : T(0.); + value = +#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN + pns.negative ? T(-0.) +#endif + : T(0.); return answer; } #endif value = T(pns.mantissa) * binary_format::exact_power_of_ten(pns.exponent); +#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN if (pns.negative) { value = -value; } +#endif return answer; } } @@ -280,7 +290,11 @@ from_chars_advanced(const parsed_number_string_t &pns, T &value) noexcept { if (am.power2 < 0) { am = digit_comp(pns, am); } - to_float(pns.negative, am, value); + to_float( +#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN + pns.negative, +#endif + am, value); // Test for over/underflow. if ((pns.mantissa != 0 && am.mantissa == 0 && am.power2 == 0) || am.power2 == binary_format::infinite_power()) { @@ -300,26 +314,32 @@ from_chars_float_advanced(UC const *first, UC const *last, T &value, "only char, wchar_t, char16_t and char32_t are supported"); from_chars_result_t answer; +#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN if (uint64_t(options.format & chars_format::skip_white_space)) { while ((first != last) && fast_float::is_space(*first)) { first++; } } +#endif if (first == last) { answer.ec = std::errc::invalid_argument; answer.ptr = first; return answer; } - parsed_number_string_t pns = + parsed_number_string_t const pns = parse_number_string(first, last, options); if (!pns.valid) { +#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN if (uint64_t(options.format & chars_format::no_infnan)) { +#endif answer.ec = std::errc::invalid_argument; answer.ptr = first; return answer; +#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN } else { return detail::parse_infnan(first, last, value, options.format); } +#endif } // call overload that takes parsed_number_string_t directly. @@ -335,28 +355,29 @@ from_chars(UC const *first, UC const *last, T &value, int base) noexcept { static_assert(is_supported_char_type::value, "only char, wchar_t, char16_t and char32_t are supported"); - parse_options_t options; - options.base = base; + parse_options_t const options(chars_format::general, UC('.'), base); return from_chars_advanced(first, last, value, options); } template FASTFLOAT_CONSTEXPR20 from_chars_result_t from_chars_int_advanced(UC const *first, UC const *last, T &value, - parse_options_t options) noexcept { + const parse_options_t options) noexcept { static_assert(is_supported_integer_type::value, "only integer types are supported"); static_assert(is_supported_char_type::value, "only char, wchar_t, char16_t and char32_t are supported"); - from_chars_result_t answer; +#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN if (uint64_t(options.format & chars_format::skip_white_space)) { while ((first != last) && fast_float::is_space(*first)) { first++; } } +#endif if (first == last || options.base < 2 || options.base > 36) { + from_chars_result_t answer; answer.ec = std::errc::invalid_argument; answer.ptr = first; return answer;