From 122220e2f0fc9c5b5c08de135587d5b81340911c Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Mon, 11 Dec 2023 04:17:26 -0500 Subject: [PATCH 1/4] Version 1 of from_chars integer parser --- include/fast_float/ascii_number.h | 61 ++++++++++++++++++++++-- include/fast_float/fast_float.h | 7 ++- include/fast_float/float_common.h | 79 ++++++++++++++++++++++++++++++- include/fast_float/parse_number.h | 29 +++++++++--- 4 files changed, 165 insertions(+), 11 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index d18e3d5..b59e549 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include "float_common.h" @@ -115,7 +116,7 @@ FASTFLOAT_SIMD_RESTORE_WARNINGS #if defined(_MSC_VER) && _MSC_VER <= 1900 template #else -template ())> +template ()) = 0> #endif // dummy for compile uint64_t simd_read8_to_u64(UC const*) { @@ -223,7 +224,7 @@ FASTFLOAT_SIMD_RESTORE_WARNINGS #if defined(_MSC_VER) && _MSC_VER <= 1900 template #else -template ())> +template ()) = 0> #endif // dummy for compile bool simd_parse_if_eight_digits_unrolled(UC const*, uint64_t&) { @@ -231,7 +232,7 @@ bool simd_parse_if_eight_digits_unrolled(UC const*, uint64_t&) { } -template ::value)> +template ::value) = 0> fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void loop_parse_if_eight_digits(const UC*& p, const UC* const pend, uint64_t& i) { if (!has_simd_opt()) { @@ -439,6 +440,60 @@ parsed_number_string_t parse_number_string(UC const *p, UC const * pend, par return answer; } +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +from_chars_result_t parse_int_string(UC const* p, UC const* pend, T& value, int base) +{ + from_chars_result_t answer; + answer.ec = std::errc::invalid_argument; + answer.ptr = p; + + bool negative = (*p == UC('-')); + if (!std::is_signed::value && negative) { + return answer; + } +#ifdef FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default + if ((*p == UC('-')) || (*p == UC('+'))) { +#else + if (*p == UC('-')) { +#endif + ++p; + } + + UC const* const start_digits = p; + + uint64_t i = 0; + while (p != pend) { + uint8_t digit = ch_to_digit(*p); + if (digit > base) { + break; + } + i = base * i + digit; // might overflow, check this later + p++; + } + + size_t digit_count = size_t(p - start_digits); + + // check u64 overflow + constexpr int max_digits = max_digits_u64(base); + if (digit_count == 0 || digit_count > max_digits) { + return answer; + } + // this check can be eliminated for all other types, but they will all require a max_digits(base) equivalent + if (digit_count == max_digits && i < min_safe_u64(base)) { + return answer; + } + + // check other types overflow + if (!std::is_same::value) { + if (i > uint64_t(std::numeric_limits::max()) + uint64_t(negative)) { + return answer; + } + } + + return negative ? (~i + 1) : i; +} + } // namespace fast_float #endif diff --git a/include/fast_float/fast_float.h b/include/fast_float/fast_float.h index 04efa87..b59fbf2 100644 --- a/include/fast_float/fast_float.h +++ b/include/fast_float/fast_float.h @@ -24,7 +24,7 @@ namespace fast_float { * to determine whether we allow the fixed point and scientific notation respectively. * The default is `fast_float::chars_format::general` which allows both `fixed` and `scientific`. */ -template +template())> FASTFLOAT_CONSTEXPR20 from_chars_result_t from_chars(UC const * first, UC const * last, T &value, chars_format fmt = chars_format::general) noexcept; @@ -36,6 +36,11 @@ template FASTFLOAT_CONSTEXPR20 from_chars_result_t from_chars_advanced(UC const * first, UC const * last, T &value, parse_options_t options) noexcept; +/** +* from_chars for integer types. +*/ +template ())> +from_chars_result_t from_chars(UC const * first, UC const * last, T& value, int base = 10) noexcept; } // namespace fast_float #include "parse_number.h" diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index d693dc3..e5d8446 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -173,7 +173,7 @@ using parse_options = parse_options_t; // rust style `try!()` macro, or `?` operator #define FASTFLOAT_TRY(x) { if (!(x)) return false; } -#define FASTFLOAT_ENABLE_IF(...) typename std::enable_if<(__VA_ARGS__), int>::type = 0 +#define FASTFLOAT_ENABLE_IF(...) typename std::enable_if<(__VA_ARGS__), int>::type namespace fast_float { @@ -186,6 +186,20 @@ fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() { #endif } +template +fastfloat_really_inline constexpr bool is_supported_float_type() { + return std::is_same::value || std::is_same::value; +} + +template +fastfloat_really_inline constexpr bool is_supported_char_type() { + return + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value; +} + // Compares two ASCII strings in a case insensitive manner. template inline FASTFLOAT_CONSTEXPR14 bool @@ -674,6 +688,69 @@ constexpr char32_t const * str_const_inf() { return U"infinity"; } + + +template +struct int_luts { + static constexpr uint8_t chdigit[] = { + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255, 255, 255, 255, 255, + 255, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 255, 255, 255, 255, 255, + 255, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 + }; + + static constexpr int maxdigits_u64[] = { + 64, 41, 32, 28, 25, 23, 22, 21, + 20, 19, 18, 18, 17, 17, 16, 16, + 16, 16, 15, 15, 15, 15, 14, 14, + 14, 14, 14, 14, 14, 13, 13, 13, + 13, 13, 13 + }; + + static constexpr uint64_t min_safe_u64[] = { + 9223372036854775808, 12157665459056928801, 4611686018427387904, 7450580596923828125, 4738381338321616896, + 3909821048582988049, 9223372036854775808, 12157665459056928801, 10000000000000000000, 5559917313492231481, + 2218611106740436992, 8650415919381337933, 2177953337809371136, 6568408355712890625, 1152921504606846976, + 2862423051509815793, 6746640616477458432, 15181127029874798299, 1638400000000000000, 3243919932521508681, + 6221821273427820544, 11592836324538749809, 876488338465357824, 1490116119384765625, 2481152873203736576, + 4052555153018976267, 6502111422497947648, 10260628712958602189, 15943230000000000000, 787662783788549761, + 1152921504606846976, 1667889514952984961, 2386420683693101056, 3379220508056640625, 4738381338321616896 + }; +}; + +template +constexpr uint8_t int_luts::chdigit[]; + +template +constexpr int int_luts::maxdigits_u64[]; + +template +constexpr uint64_t int_luts::min_safe_u64[]; + +template +fastfloat_really_inline +constexpr uint8_t ch_to_digit(UC c) { return int_luts<>::chdigit[c]; } + +fastfloat_really_inline +constexpr int max_digits_u64(int base) { return int_luts<>::maxdigits_u64[base - 2]; } + +// If a u64 is exactly max_digits_u64() in length, this is +// the minimum value below which it has definitely overflowed. +fastfloat_really_inline +constexpr uint64_t min_safe_u64(int base) { return int_luts<>::min_safe_u64[base - 2]; } + } // namespace fast_float #endif diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index 1c8afa4..5b74261 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -133,7 +133,7 @@ fastfloat_really_inline bool rounds_to_nearest() noexcept { } // namespace detail -template +template FASTFLOAT_CONSTEXPR20 from_chars_result_t from_chars(UC const * first, UC const * last, T &value, chars_format fmt /*= chars_format::general*/) noexcept { @@ -145,11 +145,8 @@ FASTFLOAT_CONSTEXPR20 from_chars_result_t from_chars_advanced(UC const * first, UC const * last, T &value, parse_options_t options) noexcept { - static_assert (std::is_same::value || std::is_same::value, "only float and double are supported"); - static_assert (std::is_same::value || - std::is_same::value || - std::is_same::value || - std::is_same::value , "only char, wchar_t, char16_t and char32_t are supported"); + static_assert (is_supported_float_type(), "only float and double are supported"); + static_assert (is_supported_char_type(), "only char, wchar_t, char16_t and char32_t are supported"); from_chars_result_t answer; #ifdef FASTFLOAT_SKIP_WHITE_SPACE // disabled by default @@ -232,6 +229,26 @@ from_chars_result_t from_chars_advanced(UC const * first, UC const * last, return answer; } + +template +from_chars_result_t from_chars(UC const* first, UC const* last, T& value, int base) noexcept +{ + static_assert (is_supported_char_type(), "only char, wchar_t, char16_t and char32_t are supported"); + + from_chars_result_t answer; +#ifdef FASTFLOAT_SKIP_WHITE_SPACE // disabled by default + while ((first != last) && fast_float::is_space(uint8_t(*first))) { + first++; + } +#endif + if (first == last) { + answer.ec = std::errc::invalid_argument; + answer.ptr = first; + return answer; + } + return parse_int_string(first, last, base); +} + } // namespace fast_float #endif From c9527c2e4f78759fa516bc122f229c07ff23c1e0 Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Mon, 11 Dec 2023 04:27:22 -0500 Subject: [PATCH 2/4] Skip leading zeros --- include/fast_float/ascii_number.h | 5 +++++ include/fast_float/float_common.h | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index b59e549..333ada9 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -460,6 +460,11 @@ from_chars_result_t parse_int_string(UC const* p, UC const* pend, T& value, ++p; } + // skip leading zeros + while (p != pend && *p == UC('0')) { + ++p; + } + UC const* const start_digits = p; uint64_t i = 0; diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index e5d8446..f0d09cb 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -747,7 +747,7 @@ fastfloat_really_inline constexpr int max_digits_u64(int base) { return int_luts<>::maxdigits_u64[base - 2]; } // If a u64 is exactly max_digits_u64() in length, this is -// the minimum value below which it has definitely overflowed. +// the value below which it has definitely overflowed. fastfloat_really_inline constexpr uint64_t min_safe_u64(int base) { return int_luts<>::min_safe_u64[base - 2]; } From 3d446f1ebafcfe567f5350cfabe4a6728980268b Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Mon, 11 Dec 2023 04:40:08 -0500 Subject: [PATCH 3/4] Fix gcc werrors --- include/fast_float/ascii_number.h | 25 +++++++++++++++++++------ include/fast_float/fast_float.h | 1 + include/fast_float/float_common.h | 16 ++++++++-------- include/fast_float/parse_number.h | 3 ++- 4 files changed, 30 insertions(+), 15 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 333ada9..322fba7 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -445,11 +445,11 @@ fastfloat_really_inline FASTFLOAT_CONSTEXPR20 from_chars_result_t parse_int_string(UC const* p, UC const* pend, T& value, int base) { from_chars_result_t answer; - answer.ec = std::errc::invalid_argument; - answer.ptr = p; bool negative = (*p == UC('-')); if (!std::is_signed::value && negative) { + answer.ec = std::errc::invalid_argument; + answer.ptr = p; return answer; } #ifdef FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default @@ -473,14 +473,16 @@ from_chars_result_t parse_int_string(UC const* p, UC const* pend, T& value, if (digit > base) { break; } - i = base * i + digit; // might overflow, check this later + i = uint64_t(base) * i + digit; // might overflow, check this later p++; } size_t digit_count = size_t(p - start_digits); + answer.ec = std::errc::result_out_of_range; + answer.ptr = p; // check u64 overflow - constexpr int max_digits = max_digits_u64(base); + size_t max_digits = max_digits_u64(base); if (digit_count == 0 || digit_count > max_digits) { return answer; } @@ -495,8 +497,19 @@ from_chars_result_t parse_int_string(UC const* p, UC const* pend, T& value, return answer; } } - - return negative ? (~i + 1) : i; + + if (negative) { + // this weird workaround is required because: + // - converting unsigned to signed when its value is greater than signed max is UB pre-C++23. + // - reinterpret_cast(~i + 1) would have worked, but it is not constexpr + // this should be optimized away. + value = -std::numeric_limits::max() - + static_cast(i - std::numeric_limits::max()); + } + else value = T(i); + + answer.ec = std::errc(); + return answer; } } // namespace fast_float diff --git a/include/fast_float/fast_float.h b/include/fast_float/fast_float.h index b59fbf2..9b1b9b4 100644 --- a/include/fast_float/fast_float.h +++ b/include/fast_float/fast_float.h @@ -40,6 +40,7 @@ from_chars_result_t from_chars_advanced(UC const * first, UC const * last, * from_chars for integer types. */ template ())> +FASTFLOAT_CONSTEXPR20 from_chars_result_t from_chars(UC const * first, UC const * last, T& value, int base = 10) noexcept; } // namespace fast_float diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index f0d09cb..0721d12 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -711,7 +711,7 @@ struct int_luts { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }; - static constexpr int maxdigits_u64[] = { + static constexpr size_t maxdigits_u64[] = { 64, 41, 32, 28, 25, 23, 22, 21, 20, 19, 18, 18, 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, @@ -720,12 +720,12 @@ struct int_luts { }; static constexpr uint64_t min_safe_u64[] = { - 9223372036854775808, 12157665459056928801, 4611686018427387904, 7450580596923828125, 4738381338321616896, - 3909821048582988049, 9223372036854775808, 12157665459056928801, 10000000000000000000, 5559917313492231481, + 9223372036854775808ull, 12157665459056928801ull, 4611686018427387904, 7450580596923828125, 4738381338321616896, + 3909821048582988049, 9223372036854775808ull, 12157665459056928801ull, 10000000000000000000ull, 5559917313492231481, 2218611106740436992, 8650415919381337933, 2177953337809371136, 6568408355712890625, 1152921504606846976, - 2862423051509815793, 6746640616477458432, 15181127029874798299, 1638400000000000000, 3243919932521508681, - 6221821273427820544, 11592836324538749809, 876488338465357824, 1490116119384765625, 2481152873203736576, - 4052555153018976267, 6502111422497947648, 10260628712958602189, 15943230000000000000, 787662783788549761, + 2862423051509815793, 6746640616477458432, 15181127029874798299ull, 1638400000000000000, 3243919932521508681, + 6221821273427820544, 11592836324538749809ull, 876488338465357824, 1490116119384765625, 2481152873203736576, + 4052555153018976267, 6502111422497947648, 10260628712958602189ull, 15943230000000000000ull, 787662783788549761, 1152921504606846976, 1667889514952984961, 2386420683693101056, 3379220508056640625, 4738381338321616896 }; }; @@ -734,7 +734,7 @@ template constexpr uint8_t int_luts::chdigit[]; template -constexpr int int_luts::maxdigits_u64[]; +constexpr size_t int_luts::maxdigits_u64[]; template constexpr uint64_t int_luts::min_safe_u64[]; @@ -744,7 +744,7 @@ fastfloat_really_inline constexpr uint8_t ch_to_digit(UC c) { return int_luts<>::chdigit[c]; } fastfloat_really_inline -constexpr int max_digits_u64(int base) { return int_luts<>::maxdigits_u64[base - 2]; } +constexpr size_t max_digits_u64(int base) { return int_luts<>::maxdigits_u64[base - 2]; } // If a u64 is exactly max_digits_u64() in length, this is // the value below which it has definitely overflowed. diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index 5b74261..4c75f5d 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -231,6 +231,7 @@ from_chars_result_t from_chars_advanced(UC const * first, UC const * last, template +FASTFLOAT_CONSTEXPR20 from_chars_result_t from_chars(UC const* first, UC const* last, T& value, int base) noexcept { static_assert (is_supported_char_type(), "only char, wchar_t, char16_t and char32_t are supported"); @@ -246,7 +247,7 @@ from_chars_result_t from_chars(UC const* first, UC const* last, T& value, in answer.ptr = first; return answer; } - return parse_int_string(first, last, base); + return parse_int_string(first, last, value, base); } } // namespace fast_float From 7a21a8d6d734c54529ec108c64a518b87fde5736 Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Tue, 12 Dec 2023 02:36:18 -0500 Subject: [PATCH 4/4] Return invalid_argument in more places --- include/fast_float/ascii_number.h | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 322fba7..3dc8b43 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -445,11 +445,13 @@ fastfloat_really_inline FASTFLOAT_CONSTEXPR20 from_chars_result_t parse_int_string(UC const* p, UC const* pend, T& value, int base) { from_chars_result_t answer; + + UC const* const first = p; bool negative = (*p == UC('-')); if (!std::is_signed::value && negative) { answer.ec = std::errc::invalid_argument; - answer.ptr = p; + answer.ptr = first; return answer; } #ifdef FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default @@ -478,22 +480,31 @@ from_chars_result_t parse_int_string(UC const* p, UC const* pend, T& value, } size_t digit_count = size_t(p - start_digits); - answer.ec = std::errc::result_out_of_range; + + if (digit_count == 0) { + answer.ec = std::errc::invalid_argument; + answer.ptr = first; + return answer; + } + answer.ptr = p; // check u64 overflow size_t max_digits = max_digits_u64(base); - if (digit_count == 0 || digit_count > max_digits) { - return answer; + if (digit_count > max_digits) { + answer.ec = std::errc::result_out_of_range; + return answer; } // this check can be eliminated for all other types, but they will all require a max_digits(base) equivalent - if (digit_count == max_digits && i < min_safe_u64(base)) { + if (digit_count == max_digits && i < min_safe_u64(base)) { + answer.ec = std::errc::result_out_of_range; return answer; } // check other types overflow if (!std::is_same::value) { if (i > uint64_t(std::numeric_limits::max()) + uint64_t(negative)) { + answer.ec = std::errc::result_out_of_range; return answer; } }