From 7385c2053b141bbe9fa395b371fa29dfe6082bc8 Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Mon, 27 Mar 2023 22:17:02 -0400 Subject: [PATCH 01/27] Update .gitignore --- .gitignore | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/.gitignore b/.gitignore index 1566557..6bbf906 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,18 @@ build/* Testing/* .cache/ compile_commands.json + +# Visual studio +.vs/ +Debug/ +Release/ +/out/build/ +*.sln +*.vcxproj +*.vcxproj.filters +*.vcxproj.user +*.psess +*.vspx +*.vsp +*.diagsession +*.hint \ No newline at end of file From 8f94758c7862a77964864c2386c2ce425d1f4aab Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Mon, 27 Mar 2023 22:50:21 -0400 Subject: [PATCH 02/27] Expose parsed string (before computation) so it can be reused --- include/fast_float/ascii_number.h | 2 ++ include/fast_float/fast_float.h | 12 +++++++++- include/fast_float/parse_number.h | 38 +++++++++++++++++++------------ 3 files changed, 36 insertions(+), 16 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 72b8098..9ad754e 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -96,6 +96,7 @@ typedef span byte_span; struct parsed_number_string { int64_t exponent{0}; uint64_t mantissa{0}; + uint64_t integer_value{-1}; const char *lastmatch{nullptr}; bool negative{false}; bool valid{false}; @@ -143,6 +144,7 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ const char *const end_of_integer_part = p; int64_t digit_count = int64_t(end_of_integer_part - start_digits); answer.integer = byte_span(start_digits, size_t(digit_count)); + answer.integer_value = i; int64_t exponent = 0; if ((p != pend) && (*p == decimal_point)) { ++p; diff --git a/include/fast_float/fast_float.h b/include/fast_float/fast_float.h index 65704da..d84405d 100644 --- a/include/fast_float/fast_float.h +++ b/include/fast_float/fast_float.h @@ -62,6 +62,16 @@ FASTFLOAT_CONSTEXPR20 from_chars_result from_chars_advanced(const char *first, const char *last, T &value, parse_options options) noexcept; -} // namespace fast_float +} + +#include "ascii_number.h" // parsed_number_string + +namespace fast_float { +template +FASTFLOAT_CONSTEXPR20 +from_chars_result from_chars_preparsed(parsed_number_string parsed, T& value) noexcept; +} + +// namespace fast_float #include "parse_number.h" #endif // FASTFLOAT_FAST_FLOAT_H diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index d16a25d..c880f1e 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -141,24 +141,12 @@ from_chars_result from_chars(const char *first, const char *last, template FASTFLOAT_CONSTEXPR20 -from_chars_result from_chars_advanced(const char *first, const char *last, - T &value, parse_options options) noexcept { - +from_chars_result from_chars_preparsed(parsed_number_string pns, T& value) noexcept +{ static_assert (std::is_same::value || std::is_same::value, "only float and double are supported"); - + from_chars_result answer; -#if FASTFLOAT_SKIP_WHITE_SPACE // disabled by default - while ((first != last) && fast_float::is_space(uint8_t(*first))) { - first++; - } -#endif - if (first == last) { - answer.ec = std::errc::invalid_argument; - answer.ptr = first; - return answer; - } - parsed_number_string pns = parse_number_string(first, last, options); if (!pns.valid) { return detail::parse_infnan(first, last, value); } @@ -217,6 +205,26 @@ from_chars_result from_chars_advanced(const char *first, const char *last, return answer; } +template +FASTFLOAT_CONSTEXPR20 +from_chars_result from_chars_advanced(const char *first, const char *last, + T &value, parse_options options) noexcept { + + from_chars_result answer; +#if FASTFLOAT_SKIP_WHITE_SPACE // disabled by default + while ((first != last) && fast_float::is_space(uint8_t(*first))) { + first++; + } +#endif + if (first == last) { + answer.ec = std::errc::invalid_argument; + answer.ptr = first; + return answer; + } + answer = from_chars_preparsed(parse_number_string(first, last, options), value); + return answer; +} + } // namespace fast_float #endif From 3cafcca2ffe7112cb8d8b57cc51a218759db743b Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Wed, 29 Mar 2023 02:14:12 -0400 Subject: [PATCH 03/27] Add support for json parsing rules and integers --- include/fast_float/ascii_number.h | 36 ++++++++++++++++++++++--------- include/fast_float/fast_float.h | 19 ++++++++++++---- include/fast_float/parse_number.h | 4 ++-- 3 files changed, 43 insertions(+), 16 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 9ad754e..83d7f9b 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -96,10 +96,10 @@ typedef span byte_span; struct parsed_number_string { int64_t exponent{0}; uint64_t mantissa{0}; - uint64_t integer_value{-1}; const char *lastmatch{nullptr}; bool negative{false}; bool valid{false}; + bool is_64bit_uint{false}; bool too_many_digits{false}; // contains the range of the significant digits byte_span integer{}; // non-nullable @@ -111,6 +111,8 @@ struct parsed_number_string { fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string parse_number_string(const char *p, const char *pend, parse_options options) noexcept { const chars_format fmt = options.format; + const parse_rules rules = options.rules; + const bool parse_ints = options.parse_ints; const char decimal_point = options.decimal_point; parsed_number_string answer; @@ -126,9 +128,9 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ if (p == pend) { return answer; } - if (!is_integer(*p) && (*p != decimal_point)) { // a sign must be followed by an integer or the dot - return answer; - } + // a sign must be followed by an integer or the dot + if (!is_integer(*p) && (rules == parse_rules::json_rules || *p != decimal_point)) + return answer; } const char *const start_digits = p; @@ -144,9 +146,9 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ const char *const end_of_integer_part = p; int64_t digit_count = int64_t(end_of_integer_part - start_digits); answer.integer = byte_span(start_digits, size_t(digit_count)); - answer.integer_value = i; int64_t exponent = 0; - if ((p != pend) && (*p == decimal_point)) { + const bool has_decimal_point = (p != pend) && (*p == decimal_point); + if (has_decimal_point) { ++p; const char* before = p; // can occur at most twice without overflowing, but let it occur more, since @@ -164,8 +166,8 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ answer.fraction = byte_span(before, size_t(p - before)); digit_count -= exponent; } - // we must have encountered at least one integer! - if (digit_count == 0) { + // we must have encountered at least one integer (or two if a decimal point exists, with json rules). + if (digit_count == 0 || (rules == parse_rules::json_rules && has_decimal_point && digit_count == 1)) { return answer; } int64_t exp_number = 0; // explicit exponential part @@ -201,6 +203,11 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ // If it scientific and not fixed, we have to bail out. if((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) { return answer; } } + + // disallow leading zeros before the decimal point + if (rules == parse_rules::json_rules && start_digits[0] == '0' && digit_count >= 2 && is_integer(start_digits[1])) + return answer; + answer.lastmatch = p; answer.valid = true; @@ -219,8 +226,16 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ if(*start == '0') { digit_count --; } start++; } - if (digit_count > 19) { - answer.too_many_digits = true; + constexpr uint64_t minimal_twenty_digit_integer{10000000000000000000}; + // maya: A 64-bit number may have up to 20 digits, not 19! + // If we're parsing ints, preserve accuracy up to 20 digits instead + // of converting them to the closest floating point value. + answer.too_many_digits = rules == parse_rules::json_rules && parse_ints ? + answer.is_integer && (digit_count > 20 || i < minimal_twenty_digit_integer) : + digit_count > 19; + + if (answer.too_many_digits) { + answer.is_64bit_uint = false; // Let us start again, this time, avoiding overflows. // We don't need to check if is_integer, since we use the // pre-tokenized spans from above. @@ -245,6 +260,7 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ } // We have now corrected both exponent and i, to a truncated value } + else answer.is_64bit_uint = (p == end_of_integer_part); } answer.exponent = exponent; answer.mantissa = i; diff --git a/include/fast_float/fast_float.h b/include/fast_float/fast_float.h index d84405d..2bfabdc 100644 --- a/include/fast_float/fast_float.h +++ b/include/fast_float/fast_float.h @@ -13,6 +13,10 @@ enum chars_format { general = fixed | scientific }; +enum parse_rules { + std_rules, + json_rules, +}; struct from_chars_result { const char *ptr; @@ -20,12 +24,18 @@ struct from_chars_result { }; struct parse_options { - constexpr explicit parse_options(chars_format fmt = chars_format::general, - char dot = '.') - : format(fmt), decimal_point(dot) {} + constexpr explicit parse_options( + chars_format fmt = chars_format::general, + parse_rules rules = parse_rules::std_rules, + bool parse_ints = false, char dot = '.', ) + : format(fmt), rules(rules), parse_ints(parse_ints), decimal_point(dot) {} /** Which number formats are accepted */ chars_format format; + /** Which parsing rules to use */ + parse_rules rules; + /* Whether to parse integers too, only applicable with json_rules */ + bool parse_ints; /** The character used as decimal point */ char decimal_point; }; @@ -69,7 +79,8 @@ from_chars_result from_chars_advanced(const char *first, const char *last, namespace fast_float { template FASTFLOAT_CONSTEXPR20 -from_chars_result from_chars_preparsed(parsed_number_string parsed, T& value) noexcept; +from_chars_result from_chars_preparsed(parsed_number_string parsed, + const char* first, const char* last, T& value) noexcept; } // namespace fast_float diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index c880f1e..c13b641 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -141,7 +141,7 @@ from_chars_result from_chars(const char *first, const char *last, template FASTFLOAT_CONSTEXPR20 -from_chars_result from_chars_preparsed(parsed_number_string pns, T& value) noexcept +from_chars_result from_chars_preparsed(parsed_number_string pns, const char* first, const char* last, T& value) noexcept { static_assert (std::is_same::value || std::is_same::value, "only float and double are supported"); @@ -221,7 +221,7 @@ from_chars_result from_chars_advanced(const char *first, const char *last, answer.ptr = first; return answer; } - answer = from_chars_preparsed(parse_number_string(first, last, options), value); + answer = from_chars_preparsed(parse_number_string(first, last, options), first, last, value); return answer; } From b6acf38a2ea6c78a95c044bab6e134aff60aa7be Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Wed, 29 Mar 2023 02:20:36 -0400 Subject: [PATCH 04/27] Fix bugs --- include/fast_float/ascii_number.h | 13 ++++++------- include/fast_float/fast_float.h | 2 +- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 83d7f9b..58305ba 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -99,7 +99,7 @@ struct parsed_number_string { const char *lastmatch{nullptr}; bool negative{false}; bool valid{false}; - bool is_64bit_uint{false}; + bool is_64bit_int{false}; bool too_many_digits{false}; // contains the range of the significant digits byte_span integer{}; // non-nullable @@ -210,6 +210,7 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ answer.lastmatch = p; answer.valid = true; + answer.is_64bit_int = (p == end_of_integer_part); // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead @@ -226,16 +227,15 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ if(*start == '0') { digit_count --; } start++; } - constexpr uint64_t minimal_twenty_digit_integer{10000000000000000000}; + constexpr uint64_t minimal_twenty_digit_integer{10000000000000000000ULL}; // maya: A 64-bit number may have up to 20 digits, not 19! // If we're parsing ints, preserve accuracy up to 20 digits instead // of converting them to the closest floating point value. - answer.too_many_digits = rules == parse_rules::json_rules && parse_ints ? - answer.is_integer && (digit_count > 20 || i < minimal_twenty_digit_integer) : - digit_count > 19; + answer.too_many_digits = rules == parse_rules::json_rules && parse_ints && answer.is_64bit_int ? + (digit_count > 20 || i < minimal_twenty_digit_integer) : digit_count > 19; if (answer.too_many_digits) { - answer.is_64bit_uint = false; + answer.is_64bit_int = false; // Let us start again, this time, avoiding overflows. // We don't need to check if is_integer, since we use the // pre-tokenized spans from above. @@ -260,7 +260,6 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ } // We have now corrected both exponent and i, to a truncated value } - else answer.is_64bit_uint = (p == end_of_integer_part); } answer.exponent = exponent; answer.mantissa = i; diff --git a/include/fast_float/fast_float.h b/include/fast_float/fast_float.h index 2bfabdc..c11627f 100644 --- a/include/fast_float/fast_float.h +++ b/include/fast_float/fast_float.h @@ -27,7 +27,7 @@ struct parse_options { constexpr explicit parse_options( chars_format fmt = chars_format::general, parse_rules rules = parse_rules::std_rules, - bool parse_ints = false, char dot = '.', ) + bool parse_ints = false, char dot = '.') : format(fmt), rules(rules), parse_ints(parse_ints), decimal_point(dot) {} /** Which number formats are accepted */ From a699476fd2b3344af4ceca055b9e842f1d91c870 Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Thu, 30 Mar 2023 04:47:42 -0400 Subject: [PATCH 05/27] ignore VS CMakeSettings file --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 6bbf906..a2601ad 100644 --- a/.gitignore +++ b/.gitignore @@ -16,4 +16,5 @@ Release/ *.vspx *.vsp *.diagsession -*.hint \ No newline at end of file +*.hint +/CMakeSettings.json From 2b118c843ab704fd437e56283961d69482ebd12a Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Thu, 30 Mar 2023 04:48:18 -0400 Subject: [PATCH 06/27] Experimental support for char_t types --- include/fast_float/ascii_number.h | 125 ++++++++++++++++++-------- include/fast_float/digit_comparison.h | 39 ++++---- include/fast_float/fast_float.h | 17 ++-- include/fast_float/float_common.h | 6 +- include/fast_float/parse_number.h | 38 ++++---- 5 files changed, 144 insertions(+), 81 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 58305ba..3d85543 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -5,15 +5,24 @@ #include #include #include +#include #include "float_common.h" +#define FASTFLOAT_SSE2 1 + +#if FASTFLOAT_SSE2 +#include +#endif + + namespace fast_float { // Next function can be micro-optimized, but compilers are entirely // able to optimize it well. -fastfloat_really_inline constexpr bool is_integer(char c) noexcept { - return c >= '0' && c <= '9'; +template +fastfloat_really_inline constexpr bool is_integer(CharT c) noexcept { + return c >= static_cast('0') && c <= static_cast('9'); } fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) { @@ -28,7 +37,46 @@ fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) { } fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -uint64_t read_u64(const char *chars) { +uint64_t fast_read_u64(const char* chars) +{ + uint64_t val; + ::memcpy(&val, chars, sizeof(uint64_t)); + return val; +} + +fastfloat_really_inline +uint64_t fast_read_u64(const char16_t* chars) +{ +#if FASTFLOAT_SSE2 + const void* const p = chars; + + static const char16_t masks[] = {0xff, 0xff, 0xff, 0xff}; + const __m128i m_masks = _mm_loadu_si128(reinterpret_cast(masks)); + // mask hi bytes + __m128i i1 = _mm_and_si128(_mm_loadu_si64(p), m_masks); + __m128i i2 = _mm_and_si128(_mm_loadu_si64(p + 8), m_masks); + + // pack into chars + __m128i packed = _mm_packus_epi16(i1, i2); + + // extract + uint64_t val; + _mm_storeu_epi64(&val, _mm_shuffle_epi32(packed, 0x8)); + return val; +#else + alignas(8) unsigned char bytes[8]; + for (int i = 0; i < 8; ++i) + bytes[i] = (unsigned char)chars[i]; + + uint64_t val; + ::memcpy(&val, bytes, sizeof(uint64_t)); + return val; +#endif +} + +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +uint64_t read_u64(const CharT *chars) { if (cpp20_and_in_constexpr()) { uint64_t val = 0; for(int i = 0; i < 8; ++i) { @@ -37,8 +85,7 @@ uint64_t read_u64(const char *chars) { } return val; } - uint64_t val; - ::memcpy(&val, chars, sizeof(uint64_t)); + uint64_t val = fast_read_u64(chars); #if FASTFLOAT_IS_BIG_ENDIAN == 1 // Need to read as-if the number was in little-endian order. val = byteswap(val); @@ -46,6 +93,7 @@ uint64_t read_u64(const char *chars) { return val; } + fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void write_u64(uint8_t *chars, uint64_t val) { if (cpp20_and_in_constexpr()) { @@ -75,8 +123,9 @@ uint32_t parse_eight_digits_unrolled(uint64_t val) { return uint32_t(val); } +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -uint32_t parse_eight_digits_unrolled(const char *chars) noexcept { +uint32_t parse_eight_digits_unrolled(const CharT *chars) noexcept { return parse_eight_digits_unrolled(read_u64(chars)); } @@ -86,43 +135,46 @@ fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val 0x8080808080808080)); } +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -bool is_made_of_eight_digits_fast(const char *chars) noexcept { +bool is_made_of_eight_digits_fast(const CharT *chars) noexcept { return is_made_of_eight_digits_fast(read_u64(chars)); } typedef span byte_span; +template struct parsed_number_string { int64_t exponent{0}; uint64_t mantissa{0}; - const char *lastmatch{nullptr}; + const CharT *lastmatch{nullptr}; bool negative{false}; bool valid{false}; bool is_64bit_int{false}; bool too_many_digits{false}; // contains the range of the significant digits - byte_span integer{}; // non-nullable - byte_span fraction{}; // nullable + span integer{}; // non-nullable + span fraction{}; // nullable }; // Assuming that you use no more than 19 digits, this will // parse an ASCII string. +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -parsed_number_string parse_number_string(const char *p, const char *pend, parse_options options) noexcept { +parsed_number_string parse_number_string(const CharT *p, const CharT *pend, parse_options options) noexcept { const chars_format fmt = options.format; const parse_rules rules = options.rules; const bool parse_ints = options.parse_ints; - const char decimal_point = options.decimal_point; + const CharT decimal_point = static_cast(options.decimal_point); - parsed_number_string answer; + parsed_number_string answer; answer.valid = false; answer.too_many_digits = false; - answer.negative = (*p == '-'); + answer.negative = (*p == static_cast('-')); #if FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default - if ((*p == '-') || (*p == '+')) { + if ((*p == static_cast('-')) || (*p == static_cast('+'))) { #else - if (*p == '-') { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here + if (*p == static_cast('-')) { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here #endif ++p; if (p == pend) { @@ -132,7 +184,7 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ if (!is_integer(*p) && (rules == parse_rules::json_rules || *p != decimal_point)) return answer; } - const char *const start_digits = p; + const CharT *const start_digits = p; uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad) @@ -140,17 +192,17 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ // a multiplication by 10 is cheaper than an arbitrary integer // multiplication i = 10 * i + - uint64_t(*p - '0'); // might overflow, we will handle the overflow later + uint64_t(*p - static_cast('0')); // might overflow, we will handle the overflow later ++p; } - const char *const end_of_integer_part = p; + const CharT *const end_of_integer_part = p; int64_t digit_count = int64_t(end_of_integer_part - start_digits); - answer.integer = byte_span(start_digits, size_t(digit_count)); + answer.integer = span(start_digits, size_t(digit_count)); int64_t exponent = 0; const bool has_decimal_point = (p != pend) && (*p == decimal_point); if (has_decimal_point) { ++p; - const char* before = p; + const CharT* before = p; // can occur at most twice without overflowing, but let it occur more, since // for integers with many digits, digit parsing is the primary bottleneck. while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) { @@ -158,12 +210,11 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ p += 8; } while ((p != pend) && is_integer(*p)) { - uint8_t digit = uint8_t(*p - '0'); + i = i * 10 + uint64_t(*p - static_cast('0')); // in rare cases, this will overflow, but that's ok ++p; - i = i * 10 + digit; // in rare cases, this will overflow, but that's ok } exponent = before - p; - answer.fraction = byte_span(before, size_t(p - before)); + answer.fraction = span(before, size_t(p - before)); digit_count -= exponent; } // we must have encountered at least one integer (or two if a decimal point exists, with json rules). @@ -171,14 +222,14 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ return answer; } int64_t exp_number = 0; // explicit exponential part - if ((fmt & chars_format::scientific) && (p != pend) && (('e' == *p) || ('E' == *p))) { - const char * location_of_e = p; + if ((fmt & chars_format::scientific) && (p != pend) && ((static_cast('e') == *p) || (static_cast('E') == *p))) { + const CharT * location_of_e = p; ++p; bool neg_exp = false; - if ((p != pend) && ('-' == *p)) { + if ((p != pend) && (static_cast('-') == *p)) { neg_exp = true; ++p; - } else if ((p != pend) && ('+' == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1) + } else if ((p != pend) && (static_cast('+') == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1) ++p; } if ((p == pend) || !is_integer(*p)) { @@ -190,7 +241,7 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ p = location_of_e; } else { while ((p != pend) && is_integer(*p)) { - uint8_t digit = uint8_t(*p - '0'); + uint8_t digit = uint8_t(*p - static_cast('0')); if (exp_number < 0x10000000) { exp_number = 10 * exp_number + digit; } @@ -205,7 +256,7 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ } // disallow leading zeros before the decimal point - if (rules == parse_rules::json_rules && start_digits[0] == '0' && digit_count >= 2 && is_integer(start_digits[1])) + if (rules == parse_rules::json_rules && start_digits[0] == static_cast('0') && digit_count >= 2 && is_integer(start_digits[1])) return answer; answer.lastmatch = p; @@ -222,9 +273,9 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ // We have to handle the case where we have 0.0000somenumber. // We need to be mindful of the case where we only have zeroes... // E.g., 0.000000000...000. - const char *start = start_digits; - while ((start != pend) && (*start == '0' || *start == decimal_point)) { - if(*start == '0') { digit_count --; } + const CharT *start = start_digits; + while ((start != pend) && (*start == static_cast('0') || *start == decimal_point)) { + if(*start == static_cast('0')) { digit_count --; } start++; } constexpr uint64_t minimal_twenty_digit_integer{10000000000000000000ULL}; @@ -241,19 +292,19 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ // pre-tokenized spans from above. i = 0; p = answer.integer.ptr; - const char* int_end = p + answer.integer.len(); + const CharT* int_end = p + answer.integer.len(); const uint64_t minimal_nineteen_digit_integer{1000000000000000000}; while((i < minimal_nineteen_digit_integer) && (p != int_end)) { - i = i * 10 + uint64_t(*p - '0'); + i = i * 10 + uint64_t(*p - static_cast('0')); ++p; } if (i >= minimal_nineteen_digit_integer) { // We have a big integers exponent = end_of_integer_part - p + exp_number; } else { // We have a value with a fractional component. p = answer.fraction.ptr; - const char* frac_end = p + answer.fraction.len(); + const CharT* frac_end = p + answer.fraction.len(); while((i < minimal_nineteen_digit_integer) && (p != frac_end)) { - i = i * 10 + uint64_t(*p - '0'); + i = i * 10 + uint64_t(*p - static_cast('0')); ++p; } exponent = answer.fraction.ptr - p + exp_number; diff --git a/include/fast_float/digit_comparison.h b/include/fast_float/digit_comparison.h index 3959ba0..5ba91af 100644 --- a/include/fast_float/digit_comparison.h +++ b/include/fast_float/digit_comparison.h @@ -23,8 +23,9 @@ constexpr static uint64_t powers_of_ten_uint64[] = { // this algorithm is not even close to optimized, but it has no practical // effect on performance: in order to have a faster algorithm, we'd need // to slow down performance for faster algorithms, and this is still fast. +template fastfloat_really_inline FASTFLOAT_CONSTEXPR14 -int32_t scientific_exponent(parsed_number_string& num) noexcept { +int32_t scientific_exponent(parsed_number_string& num) noexcept { uint64_t mantissa = num.mantissa; int32_t exponent = int32_t(num.exponent); while (mantissa >= 10000) { @@ -154,18 +155,19 @@ void round_down(adjusted_mantissa& am, int32_t shift) noexcept { am.power2 += shift; } +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -void skip_zeros(const char*& first, const char* last) noexcept { +void skip_zeros(const CharT*& first, const CharT* last) noexcept { uint64_t val; while (!cpp20_and_in_constexpr() && std::distance(first, last) >= 8) { - ::memcpy(&val, first, sizeof(uint64_t)); + val = fast_read_u64(first); if (val != 0x3030303030303030) { break; } first += 8; } while (first != last) { - if (*first != '0') { + if (*first != static_cast('0')) { break; } first++; @@ -174,19 +176,20 @@ void skip_zeros(const char*& first, const char* last) noexcept { // determine if any non-zero digits were truncated. // all characters must be valid digits. +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -bool is_truncated(const char* first, const char* last) noexcept { +bool is_truncated(const CharT* first, const CharT* last) noexcept { // do 8-bit optimizations, can just compare to 8 literal 0s. uint64_t val; while (!cpp20_and_in_constexpr() && std::distance(first, last) >= 8) { - ::memcpy(&val, first, sizeof(uint64_t)); + val = fast_read_u64(first); if (val != 0x3030303030303030) { return true; } first += 8; } while (first != last) { - if (*first != '0') { + if (*first != static_cast('0')) { return true; } first++; @@ -194,22 +197,25 @@ bool is_truncated(const char* first, const char* last) noexcept { return false; } +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -bool is_truncated(byte_span s) noexcept { +bool is_truncated(span s) noexcept { return is_truncated(s.ptr, s.ptr + s.len()); } +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -void parse_eight_digits(const char*& p, limb& value, size_t& counter, size_t& count) noexcept { +void parse_eight_digits(const CharT*& p, limb& value, size_t& counter, size_t& count) noexcept { value = value * 100000000 + parse_eight_digits_unrolled(p); p += 8; counter += 8; count += 8; } +template fastfloat_really_inline FASTFLOAT_CONSTEXPR14 -void parse_one_digit(const char*& p, limb& value, size_t& counter, size_t& count) noexcept { - value = value * 10 + limb(*p - '0'); +void parse_one_digit(const CharT*& p, limb& value, size_t& counter, size_t& count) noexcept { + value = value * 10 + limb(*p - static_cast('0')); p++; counter++; count++; @@ -230,8 +236,9 @@ void round_up_bigint(bigint& big, size_t& count) noexcept { } // parse the significant digits into a big integer +template inline FASTFLOAT_CONSTEXPR20 -void parse_mantissa(bigint& result, parsed_number_string& num, size_t max_digits, size_t& digits) noexcept { +void parse_mantissa(bigint& result, parsed_number_string& num, size_t max_digits, size_t& digits) noexcept { // try to minimize the number of big integer and scalar multiplication. // therefore, try to parse 8 digits at a time, and multiply by the largest // scalar value (9 or 19 digits) for each step. @@ -245,8 +252,8 @@ void parse_mantissa(bigint& result, parsed_number_string& num, size_t max_digits #endif // process all integer digits. - const char* p = num.integer.ptr; - const char* pend = p + num.integer.len(); + const CharT* p = num.integer.ptr; + const CharT* pend = p + num.integer.len(); skip_zeros(p, pend); // process all digits, in increments of step per loop while (p != pend) { @@ -395,9 +402,9 @@ adjusted_mantissa negative_digit_comp(bigint& bigmant, adjusted_mantissa am, int // `b` as a big-integer type, scaled to the same binary exponent as // the actual digits. we then compare the big integer representations // of both, and use that to direct rounding. -template +template inline FASTFLOAT_CONSTEXPR20 -adjusted_mantissa digit_comp(parsed_number_string& num, adjusted_mantissa am) noexcept { +adjusted_mantissa digit_comp(parsed_number_string& num, adjusted_mantissa am) noexcept { // remove the invalid exponent bias am.power2 -= invalid_am_bias; diff --git a/include/fast_float/fast_float.h b/include/fast_float/fast_float.h index c11627f..d4648e6 100644 --- a/include/fast_float/fast_float.h +++ b/include/fast_float/fast_float.h @@ -18,8 +18,9 @@ enum parse_rules { json_rules, }; +template struct from_chars_result { - const char *ptr; + const CharT *ptr; std::errc ec; }; @@ -59,17 +60,17 @@ struct parse_options { * to determine whether we allow the fixed point and scientific notation respectively. * The default is `fast_float::chars_format::general` which allows both `fixed` and `scientific`. */ -template +template FASTFLOAT_CONSTEXPR20 -from_chars_result from_chars(const char *first, const char *last, +from_chars_result from_chars(const CharT *first, const CharT *last, T &value, chars_format fmt = chars_format::general) noexcept; /** * Like from_chars, but accepts an `options` argument to govern number parsing. */ -template +template FASTFLOAT_CONSTEXPR20 -from_chars_result from_chars_advanced(const char *first, const char *last, +from_chars_result from_chars_advanced(const CharT *first, const CharT *last, T &value, parse_options options) noexcept; } @@ -77,10 +78,10 @@ from_chars_result from_chars_advanced(const char *first, const char *last, #include "ascii_number.h" // parsed_number_string namespace fast_float { -template +template FASTFLOAT_CONSTEXPR20 -from_chars_result from_chars_preparsed(parsed_number_string parsed, - const char* first, const char* last, T& value) noexcept; +from_chars_result from_chars_preparsed(parsed_number_string parsed, + const CharT* first, const CharT* last, T& value) noexcept; } // namespace fast_float diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index c878486..eaa6e73 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -106,11 +106,13 @@ fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() { } // Compares two ASCII strings in a case insensitive manner. +// maya: for now, keep input2 ASCII only +template inline FASTFLOAT_CONSTEXPR14 bool -fastfloat_strncasecmp(const char *input1, const char *input2, size_t length) { +fastfloat_strncasecmp(const CharT *input1, const char *input2, size_t length) { char running_diff{0}; for (size_t i = 0; i < length; i++) { - running_diff |= (input1[i] ^ input2[i]); + running_diff |= (static_cast(input1[i]) ^ input2[i]); } return (running_diff == 0) || (running_diff == 32); } diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index c13b641..2c2d2c3 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -19,19 +19,19 @@ namespace detail { * The case comparisons could be made much faster given that we know that the * strings a null-free and fixed. **/ -template -from_chars_result FASTFLOAT_CONSTEXPR14 -parse_infnan(const char *first, const char *last, T &value) noexcept { - from_chars_result answer{}; +template +from_chars_result FASTFLOAT_CONSTEXPR14 +parse_infnan(const CharT *first, const CharT *last, T &value) noexcept { + from_chars_result answer{}; answer.ptr = first; answer.ec = std::errc(); // be optimistic bool minusSign = false; - if (*first == '-') { // assume first < last, so dereference without checks; C++17 20.19.3.(7.1) explicitly forbids '+' here + if (*first == static_cast('-')) { // assume first < last, so dereference without checks; C++17 20.19.3.(7.1) explicitly forbids '+' here minusSign = true; ++first; } #if FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default - if (*first == '+') { + if (*first == static_cast('+')) { ++first; } #endif @@ -40,13 +40,15 @@ parse_infnan(const char *first, const char *last, T &value) noexcept { answer.ptr = (first += 3); value = minusSign ? -std::numeric_limits::quiet_NaN() : std::numeric_limits::quiet_NaN(); // Check for possible nan(n-char-seq-opt), C++17 20.19.3.7, C11 7.20.1.3.3. At least MSVC produces nan(ind) and nan(snan). - if(first != last && *first == '(') { - for(const char* ptr = first + 1; ptr != last; ++ptr) { - if (*ptr == ')') { + if(first != last && *first == static_cast('(')) { + for(const CharT* ptr = first + 1; ptr != last; ++ptr) { + if (*ptr == static_cast(')')) { answer.ptr = ptr + 1; // valid nan(n-char-seq-opt) break; } - else if(!(('a' <= *ptr && *ptr <= 'z') || ('A' <= *ptr && *ptr <= 'Z') || ('0' <= *ptr && *ptr <= '9') || *ptr == '_')) + else if(!((static_cast('a') <= *ptr && *ptr <= static_cast('z')) || + (static_cast('A') <= *ptr && *ptr <= static_cast('Z')) || + (static_cast('0') <= *ptr && *ptr <= static_cast('9')) || *ptr == static_cast('_'))) break; // forbidden char, not nan(n-char-seq-opt) } } @@ -132,21 +134,21 @@ fastfloat_really_inline bool rounds_to_nearest() noexcept { } // namespace detail -template +template FASTFLOAT_CONSTEXPR20 -from_chars_result from_chars(const char *first, const char *last, +from_chars_result from_chars(const CharT *first, const CharT *last, T &value, chars_format fmt /*= chars_format::general*/) noexcept { return from_chars_advanced(first, last, value, parse_options{fmt}); } -template +template FASTFLOAT_CONSTEXPR20 -from_chars_result from_chars_preparsed(parsed_number_string pns, const char* first, const char* last, T& value) noexcept +from_chars_result from_chars_preparsed(parsed_number_string pns, const CharT* first, const CharT* last, T& value) noexcept { static_assert (std::is_same::value || std::is_same::value, "only float and double are supported"); - from_chars_result answer; + from_chars_result answer; if (!pns.valid) { return detail::parse_infnan(first, last, value); } @@ -205,12 +207,12 @@ from_chars_result from_chars_preparsed(parsed_number_string pns, const char* fir return answer; } -template +template FASTFLOAT_CONSTEXPR20 -from_chars_result from_chars_advanced(const char *first, const char *last, +from_chars_result from_chars_advanced(const CharT *first, const CharT *last, T &value, parse_options options) noexcept { - from_chars_result answer; + from_chars_result answer; #if FASTFLOAT_SKIP_WHITE_SPACE // disabled by default while ((first != last) && fast_float::is_space(uint8_t(*first))) { first++; From 20f3870361f9f799b528dad3466f3ec5d38374d4 Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Thu, 30 Mar 2023 04:51:27 -0400 Subject: [PATCH 07/27] Fixes --- include/fast_float/ascii_number.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 3d85543..2676182 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -36,7 +36,7 @@ fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) { | (val & 0x00000000000000FF) << 56; } -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +fastfloat_really_inline uint64_t fast_read_u64(const char* chars) { uint64_t val; @@ -48,7 +48,7 @@ fastfloat_really_inline uint64_t fast_read_u64(const char16_t* chars) { #if FASTFLOAT_SSE2 - const void* const p = chars; + const unsigned char* const p = reinterpret_cast(chars); static const char16_t masks[] = {0xff, 0xff, 0xff, 0xff}; const __m128i m_masks = _mm_loadu_si128(reinterpret_cast(masks)); @@ -61,7 +61,7 @@ uint64_t fast_read_u64(const char16_t* chars) // extract uint64_t val; - _mm_storeu_epi64(&val, _mm_shuffle_epi32(packed, 0x8)); + _mm_storeu_si64(&val, _mm_shuffle_epi32(packed, 0x8)); return val; #else alignas(8) unsigned char bytes[8]; From f59f73c4dac05e115e186a3b5a4401bd808c5781 Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Sat, 1 Apr 2023 04:09:00 -0400 Subject: [PATCH 08/27] Disable simd-related warnings --- include/fast_float/ascii_number.h | 24 +++++++++++------------- include/fast_float/fast_float.h | 7 ++----- include/fast_float/float_common.h | 18 ++++++++++++++++++ 3 files changed, 31 insertions(+), 18 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 2676182..9c2ed8a 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -9,8 +9,6 @@ #include "float_common.h" -#define FASTFLOAT_SSE2 1 - #if FASTFLOAT_SSE2 #include #endif @@ -44,25 +42,26 @@ uint64_t fast_read_u64(const char* chars) return val; } +// https://quick-bench.com/q/fk6Y07KDGu8XZ9iUtQD8QJTc3Hg fastfloat_really_inline uint64_t fast_read_u64(const char16_t* chars) { #if FASTFLOAT_SSE2 - const unsigned char* const p = reinterpret_cast(chars); - +FASTFLOAT_SIMD_DISABLE_WARNINGS static const char16_t masks[] = {0xff, 0xff, 0xff, 0xff}; const __m128i m_masks = _mm_loadu_si128(reinterpret_cast(masks)); - // mask hi bytes + + // mask hi bytes and pack + const char* const p = reinterpret_cast(chars); __m128i i1 = _mm_and_si128(_mm_loadu_si64(p), m_masks); __m128i i2 = _mm_and_si128(_mm_loadu_si64(p + 8), m_masks); - - // pack into chars __m128i packed = _mm_packus_epi16(i1, i2); // extract uint64_t val; _mm_storeu_si64(&val, _mm_shuffle_epi32(packed, 0x8)); return val; +FASTFLOAT_SIMD_RESTORE_WARNINGS #else alignas(8) unsigned char bytes[8]; for (int i = 0; i < 8; ++i) @@ -143,7 +142,7 @@ bool is_made_of_eight_digits_fast(const CharT *chars) noexcept { typedef span byte_span; -template +template struct parsed_number_string { int64_t exponent{0}; uint64_t mantissa{0}; @@ -161,10 +160,9 @@ struct parsed_number_string { // parse an ASCII string. template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -parsed_number_string parse_number_string(const CharT *p, const CharT *pend, parse_options options) noexcept { +parsed_number_string parse_number_string(const CharT *p, const CharT *pend, parse_options options, const bool parse_ints = false) noexcept { const chars_format fmt = options.format; const parse_rules rules = options.rules; - const bool parse_ints = options.parse_ints; const CharT decimal_point = static_cast(options.decimal_point); parsed_number_string answer; @@ -279,9 +277,9 @@ parsed_number_string parse_number_string(const CharT *p, const CharT *pen start++; } constexpr uint64_t minimal_twenty_digit_integer{10000000000000000000ULL}; - // maya: A 64-bit number may have up to 20 digits, not 19! - // If we're parsing ints, preserve accuracy up to 20 digits instead - // of converting them to the closest floating point value. + // maya: A 64-bit number may have up to 20 digits! + // If we're parsing ints, preserve accuracy up to 20 digits + // instead of rounding them to a floating point value. answer.too_many_digits = rules == parse_rules::json_rules && parse_ints && answer.is_64bit_int ? (digit_count > 20 || i < minimal_twenty_digit_integer) : digit_count > 19; diff --git a/include/fast_float/fast_float.h b/include/fast_float/fast_float.h index d4648e6..91870a7 100644 --- a/include/fast_float/fast_float.h +++ b/include/fast_float/fast_float.h @@ -27,16 +27,13 @@ struct from_chars_result { struct parse_options { constexpr explicit parse_options( chars_format fmt = chars_format::general, - parse_rules rules = parse_rules::std_rules, - bool parse_ints = false, char dot = '.') - : format(fmt), rules(rules), parse_ints(parse_ints), decimal_point(dot) {} + parse_rules rules = parse_rules::std_rules, char dot = '.') + : format(fmt), rules(rules), decimal_point(dot) {} /** Which number formats are accepted */ chars_format format; /** Which parsing rules to use */ parse_rules rules; - /* Whether to parse integers too, only applicable with json_rules */ - bool parse_ints; /** The character used as decimal point */ char decimal_point; }; diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index eaa6e73..7ca3284 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -78,6 +78,24 @@ #endif #endif + +#if defined(__GNUC__) +#define FASTFLOAT_SIMD_DISABLE_WARNINGS \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wcast-align=strict\"") +#else +#define FASTFLOAT_SIMD_DISABLE_WARNINGS +#endif + +#if defined(__GNUC__) +#define FASTFLOAT_SIMD_RESTORE_WARNINGS \ + _Pragma("GCC diagnostic pop") +#else +#define FASTFLOAT_SIMD_RESTORE_WARNINGS +#endif + + + #ifdef FASTFLOAT_VISUAL_STUDIO #define fastfloat_really_inline __forceinline #else From 8a9a9d538a2721ad6d8143230a08104c30032a37 Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Sat, 1 Apr 2023 22:43:00 -0400 Subject: [PATCH 09/27] SIMD optimization to parse 8 char16_t at a time --- include/fast_float/ascii_number.h | 77 +++++++++++++++++++++++++------ 1 file changed, 62 insertions(+), 15 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 9c2ed8a..3b192dc 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -35,8 +35,7 @@ fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) { } fastfloat_really_inline -uint64_t fast_read_u64(const char* chars) -{ +uint64_t fast_read_u64(const char* chars) { uint64_t val; ::memcpy(&val, chars, sizeof(uint64_t)); return val; @@ -44,8 +43,7 @@ uint64_t fast_read_u64(const char* chars) // https://quick-bench.com/q/fk6Y07KDGu8XZ9iUtQD8QJTc3Hg fastfloat_really_inline -uint64_t fast_read_u64(const char16_t* chars) -{ +uint64_t fast_read_u64(const char16_t* chars) { #if FASTFLOAT_SSE2 FASTFLOAT_SIMD_DISABLE_WARNINGS static const char16_t masks[] = {0xff, 0xff, 0xff, 0xff}; @@ -63,7 +61,7 @@ FASTFLOAT_SIMD_DISABLE_WARNINGS return val; FASTFLOAT_SIMD_RESTORE_WARNINGS #else - alignas(8) unsigned char bytes[8]; + unsigned char bytes[8]; for (int i = 0; i < 8; ++i) bytes[i] = (unsigned char)chars[i]; @@ -122,24 +120,74 @@ uint32_t parse_eight_digits_unrolled(uint64_t val) { return uint32_t(val); } -template -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -uint32_t parse_eight_digits_unrolled(const CharT *chars) noexcept { - return parse_eight_digits_unrolled(read_u64(chars)); -} - // credit @aqrit fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val) noexcept { return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) & 0x8080808080808080)); } -template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -bool is_made_of_eight_digits_fast(const CharT *chars) noexcept { +uint32_t parse_eight_digits_unrolled(const char* chars) noexcept { + return parse_eight_digits_unrolled(read_u64(chars)); +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +bool is_made_of_eight_digits_fast(const char *chars) noexcept { return is_made_of_eight_digits_fast(read_u64(chars)); } +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +bool parse_if_eight_digits_unrolled(const char* chars, std::uint64_t& i) noexcept { + const bool all = is_made_of_eight_digits_fast(chars); + if (all) i = i * 100000000 * parse_eight_digits_unrolled(chars); + return all; +} + +// http://0x80.pl/articles/simd-parsing-int-sequences.html +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +bool parse_if_eight_digits_unrolled(const char16_t* chars, std::uint64_t& i) noexcept { + if (cpp20_and_in_constexpr() || !FASTFLOAT_SSE2) { + for (int i = 0; i < 8; ++i) { + if (chars[i] < u'0' || chars[i] > u'9') + return false; + } + i = i * 100000000 + parse_eight_digits_unrolled(read_u64(chars)); + return true; + } +#if FASTFLOAT_SSE2 +FASTFLOAT_SIMD_DISABLE_WARNINGS + const __m128i data = _mm_loadu_si128(reinterpret_cast(chars)); + + // (x - '0') <= 9 + const __m128i t0 = _mm_sub_epi16(data, _mm_set1_epi16(80)); + const __m128i t1 = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-119)); + const bool is_digits = _mm_movemask_epi8(t1) == 0; + + if (is_digits) { + // x - '0' + const __m128i s1digits16 = _mm_sub_epi16(data, _mm_set1_epi16('0')); + // 10 * x(b) + x(b-1) -> 2 digit numbers + const __m128i s2digits32 = _mm_madd_epi16(s1digits16, _mm_setr_epi16(10, 1, 10, 1, 10, 1, 10, 1); + const __m128i s2digits16 = _mm_packus_epi16(s2digits32, s2digits32); + // 100 * x(b) + x(b-1) -> 4 digit numbers + const __m128i s4digits32 = _mm_madd_epi16(s2digits16, _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1)); + const __m128i s4digits16 = _mm_packus_epi16(s4digits32, s4digits32); + // 10000 * x(b) + x(b-1) -> 8 digit number + const __m128i s8digits32 = _mm_madd_epi16(s4digits16, _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1)); + + uint32_t value; + _mm_storeu_si32(&value, s8digits32); + + i = i * 100000000 + value; + return true; + } + else return false; +FASTFLOAT_SIMD_RESTORE_WARNINGS +#endif +} + + + typedef span byte_span; template @@ -203,8 +251,7 @@ parsed_number_string parse_number_string(const CharT *p, const CharT *pen const CharT* before = p; // can occur at most twice without overflowing, but let it occur more, since // for integers with many digits, digit parsing is the primary bottleneck. - while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok + while ((std::distance(p, pend) >= 8) && parse_if_eight_digits_unrolled(p)) { // in rare cases, this will overflow, but that's ok p += 8; } while ((p != pend) && is_integer(*p)) { From 2d57c09530445f1c4c98d6b128ef953767207e8e Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Sat, 1 Apr 2023 22:46:43 -0400 Subject: [PATCH 10/27] Fixes --- include/fast_float/ascii_number.h | 17 ++++++++++------- include/fast_float/float_common.h | 10 +++++++++- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 3b192dc..6acd3da 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -126,8 +126,9 @@ fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val 0x8080808080808080)); } +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -uint32_t parse_eight_digits_unrolled(const char* chars) noexcept { +uint32_t parse_eight_digits_unrolled(const CharT* chars) noexcept { return parse_eight_digits_unrolled(read_u64(chars)); } @@ -139,14 +140,14 @@ bool is_made_of_eight_digits_fast(const char *chars) noexcept { fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool parse_if_eight_digits_unrolled(const char* chars, std::uint64_t& i) noexcept { const bool all = is_made_of_eight_digits_fast(chars); - if (all) i = i * 100000000 * parse_eight_digits_unrolled(chars); + if (all) i = i * 100000000 + parse_eight_digits_unrolled(chars); return all; } // http://0x80.pl/articles/simd-parsing-int-sequences.html fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool parse_if_eight_digits_unrolled(const char16_t* chars, std::uint64_t& i) noexcept { - if (cpp20_and_in_constexpr() || !FASTFLOAT_SSE2) { + if (cpp20_and_in_constexpr() || !has_simd()) { for (int i = 0; i < 8; ++i) { if (chars[i] < u'0' || chars[i] > u'9') return false; @@ -154,9 +155,11 @@ bool parse_if_eight_digits_unrolled(const char16_t* chars, std::uint64_t& i) noe i = i * 100000000 + parse_eight_digits_unrolled(read_u64(chars)); return true; } -#if FASTFLOAT_SSE2 +#if !FASTFLOAT_SSE2 + return false; // never reaches here, satisfy compiler +#else FASTFLOAT_SIMD_DISABLE_WARNINGS - const __m128i data = _mm_loadu_si128(reinterpret_cast(chars)); + const __m128i data = _mm_loadu_si128(reinterpret_cast(chars)); // (x - '0') <= 9 const __m128i t0 = _mm_sub_epi16(data, _mm_set1_epi16(80)); @@ -167,7 +170,7 @@ FASTFLOAT_SIMD_DISABLE_WARNINGS // x - '0' const __m128i s1digits16 = _mm_sub_epi16(data, _mm_set1_epi16('0')); // 10 * x(b) + x(b-1) -> 2 digit numbers - const __m128i s2digits32 = _mm_madd_epi16(s1digits16, _mm_setr_epi16(10, 1, 10, 1, 10, 1, 10, 1); + const __m128i s2digits32 = _mm_madd_epi16(s1digits16, _mm_setr_epi16(10, 1, 10, 1, 10, 1, 10, 1)); const __m128i s2digits16 = _mm_packus_epi16(s2digits32, s2digits32); // 100 * x(b) + x(b-1) -> 4 digit numbers const __m128i s4digits32 = _mm_madd_epi16(s2digits16, _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1)); @@ -251,7 +254,7 @@ parsed_number_string parse_number_string(const CharT *p, const CharT *pen const CharT* before = p; // can occur at most twice without overflowing, but let it occur more, since // for integers with many digits, digit parsing is the primary bottleneck. - while ((std::distance(p, pend) >= 8) && parse_if_eight_digits_unrolled(p)) { // in rare cases, this will overflow, but that's ok + while ((std::distance(p, pend) >= 8) && parse_if_eight_digits_unrolled(p, i)) { // in rare cases, this will overflow, but that's ok p += 8; } while ((p != pend) && is_integer(*p)) { diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index 7ca3284..fe64126 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -82,7 +82,7 @@ #if defined(__GNUC__) #define FASTFLOAT_SIMD_DISABLE_WARNINGS \ _Pragma("GCC diagnostic push") \ - _Pragma("GCC diagnostic ignored \"-Wcast-align=strict\"") + _Pragma("GCC diagnostic ignored \"-Wcast-align\"") #else #define FASTFLOAT_SIMD_DISABLE_WARNINGS #endif @@ -123,6 +123,14 @@ fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() { #endif } +fastfloat_really_inline constexpr bool has_simd() { +#if FASTFLOAT_SSE2 + return true; +#else + return false; +#endif +} + // Compares two ASCII strings in a case insensitive manner. // maya: for now, keep input2 ASCII only template From cda25408bcb1473a3684ae9de1d63036c3579c21 Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Sun, 2 Apr 2023 00:32:30 -0400 Subject: [PATCH 11/27] Optimize char16_t parsing for digit_comparison.h --- include/fast_float/ascii_number.h | 59 +++++++++++++++++++------------ 1 file changed, 36 insertions(+), 23 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 6acd3da..aef072d 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -120,27 +120,56 @@ uint32_t parse_eight_digits_unrolled(uint64_t val) { return uint32_t(val); } +// http://0x80.pl/articles/simd-parsing-int-sequences.html +#if FASTFLOAT_SSE2 +fastfloat_really_inline +uint32_t parse_eight_digits_unrolled_c16(const __m128i val) { + // x - '0' + const __m128i s1digits16 = _mm_sub_epi16(val, _mm_set1_epi16('0')); + // 10 * x(b) + x(b-1) -> 2 digit numbers + const __m128i s2digits32 = _mm_madd_epi16(s1digits16, _mm_setr_epi16(10, 1, 10, 1, 10, 1, 10, 1)); + const __m128i s2digits16 = _mm_packus_epi16(s2digits32, s2digits32); + // 100 * x(b) + x(b-1) -> 4 digit numbers + const __m128i s4digits32 = _mm_madd_epi16(s2digits16, _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1)); + const __m128i s4digits16 = _mm_packus_epi16(s4digits32, s4digits32); + // 10000 * x(b) + x(b-1) -> 8 digit number + const __m128i s8digits32 = _mm_madd_epi16(s4digits16, _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1)); + + uint32_t value; + _mm_storeu_si32(&value, s8digits32); + return value; +} +#endif + // credit @aqrit fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val) noexcept { return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) & 0x8080808080808080)); } -template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -uint32_t parse_eight_digits_unrolled(const CharT* chars) noexcept { +uint32_t parse_eight_digits_unrolled(const char* chars) noexcept { return parse_eight_digits_unrolled(read_u64(chars)); } fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -bool is_made_of_eight_digits_fast(const char *chars) noexcept { - return is_made_of_eight_digits_fast(read_u64(chars)); +uint32_t parse_eight_digits_unrolled(const char16_t* chars) noexcept { + if (cpp20_and_in_constexpr() || !has_simd()) { + return parse_eight_digits_unrolled(read_u64(chars)); + } +#if !FASTFLOAT_SSE2 + return 0; // never reaches here, satisfy compiler +#else +FASTFLOAT_SIMD_DISABLE_WARNINGS + return parse_eight_digits_unrolled_c16(_mm_loadu_si128(reinterpret_cast(chars))); +FASTFLOAT_SIMD_RESTORE_WARNINGS +#endif } fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool parse_if_eight_digits_unrolled(const char* chars, std::uint64_t& i) noexcept { - const bool all = is_made_of_eight_digits_fast(chars); - if (all) i = i * 100000000 + parse_eight_digits_unrolled(chars); + const bool all = is_made_of_eight_digits_fast(read_u64(chars)); + if (all) i = i * 100000000 + parse_eight_digits_unrolled(read_u64(chars)); return all; } @@ -160,28 +189,13 @@ bool parse_if_eight_digits_unrolled(const char16_t* chars, std::uint64_t& i) noe #else FASTFLOAT_SIMD_DISABLE_WARNINGS const __m128i data = _mm_loadu_si128(reinterpret_cast(chars)); - // (x - '0') <= 9 const __m128i t0 = _mm_sub_epi16(data, _mm_set1_epi16(80)); const __m128i t1 = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-119)); const bool is_digits = _mm_movemask_epi8(t1) == 0; if (is_digits) { - // x - '0' - const __m128i s1digits16 = _mm_sub_epi16(data, _mm_set1_epi16('0')); - // 10 * x(b) + x(b-1) -> 2 digit numbers - const __m128i s2digits32 = _mm_madd_epi16(s1digits16, _mm_setr_epi16(10, 1, 10, 1, 10, 1, 10, 1)); - const __m128i s2digits16 = _mm_packus_epi16(s2digits32, s2digits32); - // 100 * x(b) + x(b-1) -> 4 digit numbers - const __m128i s4digits32 = _mm_madd_epi16(s2digits16, _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1)); - const __m128i s4digits16 = _mm_packus_epi16(s4digits32, s4digits32); - // 10000 * x(b) + x(b-1) -> 8 digit number - const __m128i s8digits32 = _mm_madd_epi16(s4digits16, _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1)); - - uint32_t value; - _mm_storeu_si32(&value, s8digits32); - - i = i * 100000000 + value; + i = i * 100000000 + parse_eight_digits_unrolled_c16(data); return true; } else return false; @@ -190,7 +204,6 @@ FASTFLOAT_SIMD_RESTORE_WARNINGS } - typedef span byte_span; template From c849b7a8ff0520d2d337e23ef9bcca5d4f274619 Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Sat, 15 Apr 2023 23:16:01 -0400 Subject: [PATCH 12/27] Option to forbid nan/inf, refactor --- include/fast_float/ascii_number.h | 80 ++++++++++++++++--------------- include/fast_float/fast_float.h | 24 ++++++++-- include/fast_float/float_common.h | 5 +- include/fast_float/parse_number.h | 9 ++-- 4 files changed, 72 insertions(+), 46 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index aef072d..9fbe9ac 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -157,7 +157,7 @@ uint32_t parse_eight_digits_unrolled(const char16_t* chars) noexcept { if (cpp20_and_in_constexpr() || !has_simd()) { return parse_eight_digits_unrolled(read_u64(chars)); } -#if !FASTFLOAT_SSE2 +#if !FASTFLOAT_HAS_SIMD return 0; // never reaches here, satisfy compiler #else FASTFLOAT_SIMD_DISABLE_WARNINGS @@ -184,7 +184,7 @@ bool parse_if_eight_digits_unrolled(const char16_t* chars, std::uint64_t& i) noe i = i * 100000000 + parse_eight_digits_unrolled(read_u64(chars)); return true; } -#if !FASTFLOAT_SSE2 +#if !FASTFLOAT_HAS_SIMD return false; // never reaches here, satisfy compiler #else FASTFLOAT_SIMD_DISABLE_WARNINGS @@ -210,10 +210,10 @@ template struct parsed_number_string { int64_t exponent{0}; uint64_t mantissa{0}; + int64_t exp_number{0}; const CharT *lastmatch{nullptr}; bool negative{false}; bool valid{false}; - bool is_64bit_int{false}; bool too_many_digits{false}; // contains the range of the significant digits span integer{}; // non-nullable @@ -224,7 +224,7 @@ struct parsed_number_string { // parse an ASCII string. template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -parsed_number_string parse_number_string(const CharT *p, const CharT *pend, parse_options options, const bool parse_ints = false) noexcept { +parsed_number_string parse_number_string(const CharT *p, const CharT *pend, parse_options options) noexcept { const chars_format fmt = options.format; const parse_rules rules = options.rules; const CharT decimal_point = static_cast(options.decimal_point); @@ -322,7 +322,7 @@ parsed_number_string parse_number_string(const CharT *p, const CharT *pen answer.lastmatch = p; answer.valid = true; - answer.is_64bit_int = (p == end_of_integer_part); + answer.exp_number = exp_number; // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead @@ -339,45 +339,49 @@ parsed_number_string parse_number_string(const CharT *p, const CharT *pen if(*start == static_cast('0')) { digit_count --; } start++; } - constexpr uint64_t minimal_twenty_digit_integer{10000000000000000000ULL}; - // maya: A 64-bit number may have up to 20 digits! - // If we're parsing ints, preserve accuracy up to 20 digits - // instead of rounding them to a floating point value. - answer.too_many_digits = rules == parse_rules::json_rules && parse_ints && answer.is_64bit_int ? - (digit_count > 20 || i < minimal_twenty_digit_integer) : digit_count > 19; - - if (answer.too_many_digits) { - answer.is_64bit_int = false; - // Let us start again, this time, avoiding overflows. - // We don't need to check if is_integer, since we use the - // pre-tokenized spans from above. - i = 0; - p = answer.integer.ptr; - const CharT* int_end = p + answer.integer.len(); - const uint64_t minimal_nineteen_digit_integer{1000000000000000000}; - while((i < minimal_nineteen_digit_integer) && (p != int_end)) { - i = i * 10 + uint64_t(*p - static_cast('0')); - ++p; - } - if (i >= minimal_nineteen_digit_integer) { // We have a big integers - exponent = end_of_integer_part - p + exp_number; - } else { // We have a value with a fractional component. - p = answer.fraction.ptr; - const CharT* frac_end = p + answer.fraction.len(); - while((i < minimal_nineteen_digit_integer) && (p != frac_end)) { - i = i * 10 + uint64_t(*p - static_cast('0')); - ++p; - } - exponent = answer.fraction.ptr - p + exp_number; - } - // We have now corrected both exponent and i, to a truncated value - } + + // exponent/mantissa must be truncated later + answer.too_many_digits = digit_count > 19; } answer.exponent = exponent; answer.mantissa = i; return answer; } +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +void truncate_exponent_mantissa(parsed_number_string& ps) +{ + // Let us start again, this time, avoiding overflows. + // We don't need to check if is_integer, since we use the + // pre-tokenized spans. + uint64_t i = 0; + int64_t exponent = 0; + const CharT* p = ps.integer.ptr; + const CharT* const int_end = p + ps.integer.len(); + const uint64_t minimal_nineteen_digit_integer{1000000000000000000}; + while ((i < minimal_nineteen_digit_integer) && (p != int_end)) { + i = i * 10 + uint64_t(*p - static_cast('0')); + ++p; + } + if (i >= minimal_nineteen_digit_integer) { // We have a big integers + exponent = int_end - p + ps.exp_number; + } + else { // We have a value with a fractional component. + p = ps.fraction.ptr; + const CharT* const frac_end = p + ps.fraction.len(); + while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) { + i = i * 10 + uint64_t(*p - static_cast('0')); + ++p; + } + exponent = ps.fraction.ptr - p + ps.exp_number; + } + // We have now corrected both exponent and i, to a truncated value + + ps.exponent = exponent; + ps.mantissa = i; +} + } // namespace fast_float #endif diff --git a/include/fast_float/fast_float.h b/include/fast_float/fast_float.h index 91870a7..470d05a 100644 --- a/include/fast_float/fast_float.h +++ b/include/fast_float/fast_float.h @@ -26,18 +26,34 @@ struct from_chars_result { struct parse_options { constexpr explicit parse_options( - chars_format fmt = chars_format::general, - parse_rules rules = parse_rules::std_rules, char dot = '.') - : format(fmt), rules(rules), decimal_point(dot) {} + chars_format fmt = chars_format::general, + parse_rules rules = parse_rules::std_rules, + char dot = '.', bool allow_inf_nan = true) + : format(fmt), rules(rules), allow_inf_nan(allow_inf_nan), decimal_point(dot) {} /** Which number formats are accepted */ chars_format format; /** Which parsing rules to use */ parse_rules rules; + /** Whether to allow inf and nan */ + bool allow_inf_nan; /** The character used as decimal point */ char decimal_point; }; +struct preparsed_parse_options { + constexpr explicit preparsed_parse_options( + bool allow_inf_nan = true) + : allow_inf_nan(allow_inf_nan) {} + + constexpr preparsed_parse_options( + const parse_options& options) + : allow_inf_nan(options.allow_inf_nan) {} + + /** Whether to allow inf and nan */ + bool allow_inf_nan; +}; + /** * This function parses the character sequence [first,last) for a number. It parses floating-point numbers expecting * a locale-indepent format equivalent to what is used by std::strtod in the default ("C") locale. @@ -78,7 +94,7 @@ namespace fast_float { template FASTFLOAT_CONSTEXPR20 from_chars_result from_chars_preparsed(parsed_number_string parsed, - const CharT* first, const CharT* last, T& value) noexcept; + const CharT* first, const CharT* last, T& value, preparsed_parse_options options) noexcept; } // namespace fast_float diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index fe64126..4fb03fc 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -78,6 +78,9 @@ #endif #endif +#if FASTFLOAT_SSE2 +#define FASTFLOAT_HAS_SIMD (1) +#endif #if defined(__GNUC__) #define FASTFLOAT_SIMD_DISABLE_WARNINGS \ @@ -124,7 +127,7 @@ fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() { } fastfloat_really_inline constexpr bool has_simd() { -#if FASTFLOAT_SSE2 +#if FASTFLOAT_HAS_SIMD return true; #else return false; diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index 2c2d2c3..2f30e35 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -143,15 +143,18 @@ from_chars_result from_chars(const CharT *first, const CharT *last, template FASTFLOAT_CONSTEXPR20 -from_chars_result from_chars_preparsed(parsed_number_string pns, const CharT* first, const CharT* last, T& value) noexcept +from_chars_result from_chars_preparsed(parsed_number_string pns, const CharT* first, const CharT* last, T& value, preparsed_parse_options options) noexcept { static_assert (std::is_same::value || std::is_same::value, "only float and double are supported"); - + from_chars_result answer; if (!pns.valid) { - return detail::parse_infnan(first, last, value); + return options.allow_inf_nan ? detail::parse_infnan(first, last, value) : answer; } + if (pns.too_many_digits) + truncate_exponent_mantissa(pns); + answer.ec = std::errc(); // be optimistic answer.ptr = pns.lastmatch; // The implementation of the Clinger's fast path is convoluted because From 653790b5f3401abaa0bc9d13ede552b15b63a042 Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Sun, 16 Apr 2023 00:36:52 -0400 Subject: [PATCH 13/27] fixes --- include/fast_float/parse_number.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index 2f30e35..d5b266d 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -150,7 +150,13 @@ from_chars_result from_chars_preparsed(parsed_number_string pns, c from_chars_result answer; if (!pns.valid) { - return options.allow_inf_nan ? detail::parse_infnan(first, last, value) : answer; + if (options.allow_inf_nan) + return detail::parse_infnan(first, last, value); + else { + answer.ec = std::errc::invalid_argument; + answer.ptr = first; + return answer; + } } if (pns.too_many_digits) truncate_exponent_mantissa(pns); @@ -226,7 +232,7 @@ from_chars_result from_chars_advanced(const CharT *first, const CharT *la answer.ptr = first; return answer; } - answer = from_chars_preparsed(parse_number_string(first, last, options), first, last, value); + answer = from_chars_preparsed(parse_number_string(first, last, options), first, last, value, options); return answer; } From 89fc24007a5aaca3ef7c165625b67a96d14c040f Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Wed, 26 Apr 2023 16:25:41 -0400 Subject: [PATCH 14/27] Clean up --- include/fast_float/ascii_number.h | 60 +++++++++++++++------------ include/fast_float/digit_comparison.h | 36 +++++++++------- include/fast_float/float_common.h | 5 ++- include/fast_float/parse_number.h | 16 +++---- 4 files changed, 64 insertions(+), 53 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 9fbe9ac..c8a051f 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -9,7 +9,7 @@ #include "float_common.h" -#if FASTFLOAT_SSE2 +#ifdef FASTFLOAT_SSE2 #include #endif @@ -20,7 +20,7 @@ namespace fast_float { // able to optimize it well. template fastfloat_really_inline constexpr bool is_integer(CharT c) noexcept { - return c >= static_cast('0') && c <= static_cast('9'); + return c >= CharT('0') && c <= CharT('9'); } fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) { @@ -42,9 +42,10 @@ uint64_t fast_read_u64(const char* chars) { } // https://quick-bench.com/q/fk6Y07KDGu8XZ9iUtQD8QJTc3Hg +// todo: add support for char32_t fastfloat_really_inline uint64_t fast_read_u64(const char16_t* chars) { -#if FASTFLOAT_SSE2 +#ifdef FASTFLOAT_SSE2 FASTFLOAT_SIMD_DISABLE_WARNINGS static const char16_t masks[] = {0xff, 0xff, 0xff, 0xff}; const __m128i m_masks = _mm_loadu_si128(reinterpret_cast(masks)); @@ -65,6 +66,7 @@ FASTFLOAT_SIMD_RESTORE_WARNINGS for (int i = 0; i < 8; ++i) bytes[i] = (unsigned char)chars[i]; + // bit-cast uint64_t val; ::memcpy(&val, bytes, sizeof(uint64_t)); return val; @@ -77,7 +79,7 @@ uint64_t read_u64(const CharT *chars) { if (cpp20_and_in_constexpr()) { uint64_t val = 0; for(int i = 0; i < 8; ++i) { - val |= uint64_t(*chars) << (i*8); + val |= uint64_t(char(*chars)) << (i*8); ++chars; } return val; @@ -121,7 +123,7 @@ uint32_t parse_eight_digits_unrolled(uint64_t val) { } // http://0x80.pl/articles/simd-parsing-int-sequences.html -#if FASTFLOAT_SSE2 +#ifdef FASTFLOAT_SSE2 fastfloat_really_inline uint32_t parse_eight_digits_unrolled_c16(const __m128i val) { // x - '0' @@ -152,13 +154,15 @@ uint32_t parse_eight_digits_unrolled(const char* chars) noexcept { return parse_eight_digits_unrolled(read_u64(chars)); } +// Call this if you know chars are only digits +//todo: add support for char32_t fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint32_t parse_eight_digits_unrolled(const char16_t* chars) noexcept { if (cpp20_and_in_constexpr() || !has_simd()) { return parse_eight_digits_unrolled(read_u64(chars)); } -#if !FASTFLOAT_HAS_SIMD - return 0; // never reaches here, satisfy compiler +#ifndef FASTFLOAT_HAS_SIMD + return 0; // never reaches here, remove warning #else FASTFLOAT_SIMD_DISABLE_WARNINGS return parse_eight_digits_unrolled_c16(_mm_loadu_si128(reinterpret_cast(chars))); @@ -173,7 +177,9 @@ bool parse_if_eight_digits_unrolled(const char* chars, std::uint64_t& i) noexcep return all; } +// Call this if you don't know whether chars are only digits // http://0x80.pl/articles/simd-parsing-int-sequences.html +//todo: add support for char32_t fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool parse_if_eight_digits_unrolled(const char16_t* chars, std::uint64_t& i) noexcept { if (cpp20_and_in_constexpr() || !has_simd()) { @@ -184,17 +190,16 @@ bool parse_if_eight_digits_unrolled(const char16_t* chars, std::uint64_t& i) noe i = i * 100000000 + parse_eight_digits_unrolled(read_u64(chars)); return true; } -#if !FASTFLOAT_HAS_SIMD - return false; // never reaches here, satisfy compiler +#ifndef FASTFLOAT_HAS_SIMD + return false; // never reaches here, remove warning #else FASTFLOAT_SIMD_DISABLE_WARNINGS const __m128i data = _mm_loadu_si128(reinterpret_cast(chars)); // (x - '0') <= 9 const __m128i t0 = _mm_sub_epi16(data, _mm_set1_epi16(80)); const __m128i t1 = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-119)); - const bool is_digits = _mm_movemask_epi8(t1) == 0; - if (is_digits) { + if (_mm_movemask_epi8(t1) == 0) { i = i * 100000000 + parse_eight_digits_unrolled_c16(data); return true; } @@ -227,16 +232,16 @@ fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string parse_number_string(const CharT *p, const CharT *pend, parse_options options) noexcept { const chars_format fmt = options.format; const parse_rules rules = options.rules; - const CharT decimal_point = static_cast(options.decimal_point); + const CharT decimal_point = CharT(options.decimal_point); parsed_number_string answer; answer.valid = false; answer.too_many_digits = false; - answer.negative = (*p == static_cast('-')); + answer.negative = (*p == CharT('-')); #if FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default - if ((*p == static_cast('-')) || (*p == static_cast('+'))) { + if ((*p == CharT('-')) || (*p == CharT('+'))) { #else - if (*p == static_cast('-')) { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here + if (*p == CharT('-')) { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here #endif ++p; if (p == pend) { @@ -254,7 +259,7 @@ parsed_number_string parse_number_string(const CharT *p, const CharT *pen // a multiplication by 10 is cheaper than an arbitrary integer // multiplication i = 10 * i + - uint64_t(*p - static_cast('0')); // might overflow, we will handle the overflow later + uint64_t(*p - CharT('0')); // might overflow, we will handle the overflow later ++p; } const CharT *const end_of_integer_part = p; @@ -271,7 +276,7 @@ parsed_number_string parse_number_string(const CharT *p, const CharT *pen p += 8; } while ((p != pend) && is_integer(*p)) { - i = i * 10 + uint64_t(*p - static_cast('0')); // in rare cases, this will overflow, but that's ok + i = i * 10 + uint64_t(*p - CharT('0')); // in rare cases, this will overflow, but that's ok ++p; } exponent = before - p; @@ -283,14 +288,14 @@ parsed_number_string parse_number_string(const CharT *p, const CharT *pen return answer; } int64_t exp_number = 0; // explicit exponential part - if ((fmt & chars_format::scientific) && (p != pend) && ((static_cast('e') == *p) || (static_cast('E') == *p))) { + if ((fmt & chars_format::scientific) && (p != pend) && ((CharT('e') == *p) || (CharT('E') == *p))) { const CharT * location_of_e = p; ++p; bool neg_exp = false; - if ((p != pend) && (static_cast('-') == *p)) { + if ((p != pend) && (CharT('-') == *p)) { neg_exp = true; ++p; - } else if ((p != pend) && (static_cast('+') == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1) + } else if ((p != pend) && (CharT('+') == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1) ++p; } if ((p == pend) || !is_integer(*p)) { @@ -302,7 +307,7 @@ parsed_number_string parse_number_string(const CharT *p, const CharT *pen p = location_of_e; } else { while ((p != pend) && is_integer(*p)) { - uint8_t digit = uint8_t(*p - static_cast('0')); + uint8_t digit = uint8_t(*p - CharT('0')); if (exp_number < 0x10000000) { exp_number = 10 * exp_number + digit; } @@ -317,7 +322,7 @@ parsed_number_string parse_number_string(const CharT *p, const CharT *pen } // disallow leading zeros before the decimal point - if (rules == parse_rules::json_rules && start_digits[0] == static_cast('0') && digit_count >= 2 && is_integer(start_digits[1])) + if (rules == parse_rules::json_rules && start_digits[0] == CharT('0') && digit_count >= 2 && is_integer(start_digits[1])) return answer; answer.lastmatch = p; @@ -335,12 +340,13 @@ parsed_number_string parse_number_string(const CharT *p, const CharT *pen // We need to be mindful of the case where we only have zeroes... // E.g., 0.000000000...000. const CharT *start = start_digits; - while ((start != pend) && (*start == static_cast('0') || *start == decimal_point)) { - if(*start == static_cast('0')) { digit_count --; } + while ((start != pend) && (*start == CharT('0') || *start == decimal_point)) { + if(*start == CharT('0')) { digit_count --; } start++; } // exponent/mantissa must be truncated later + // this is unlikely, so don't inline truncation code with the rest of parse_number_string() answer.too_many_digits = digit_count > 19; } answer.exponent = exponent; @@ -350,7 +356,7 @@ parsed_number_string parse_number_string(const CharT *p, const CharT *pen template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -void truncate_exponent_mantissa(parsed_number_string& ps) +void parse_truncated_number_string(parsed_number_string& ps) { // Let us start again, this time, avoiding overflows. // We don't need to check if is_integer, since we use the @@ -361,7 +367,7 @@ void truncate_exponent_mantissa(parsed_number_string& ps) const CharT* const int_end = p + ps.integer.len(); const uint64_t minimal_nineteen_digit_integer{1000000000000000000}; while ((i < minimal_nineteen_digit_integer) && (p != int_end)) { - i = i * 10 + uint64_t(*p - static_cast('0')); + i = i * 10 + uint64_t(*p - CharT('0')); ++p; } if (i >= minimal_nineteen_digit_integer) { // We have a big integers @@ -371,7 +377,7 @@ void truncate_exponent_mantissa(parsed_number_string& ps) p = ps.fraction.ptr; const CharT* const frac_end = p + ps.fraction.len(); while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) { - i = i * 10 + uint64_t(*p - static_cast('0')); + i = i * 10 + uint64_t(*p - CharT('0')); ++p; } exponent = ps.fraction.ptr - p + ps.exp_number; diff --git a/include/fast_float/digit_comparison.h b/include/fast_float/digit_comparison.h index 5ba91af..73d6732 100644 --- a/include/fast_float/digit_comparison.h +++ b/include/fast_float/digit_comparison.h @@ -158,16 +158,18 @@ void round_down(adjusted_mantissa& am, int32_t shift) noexcept { template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void skip_zeros(const CharT*& first, const CharT* last) noexcept { - uint64_t val; - while (!cpp20_and_in_constexpr() && std::distance(first, last) >= 8) { - val = fast_read_u64(first); - if (val != 0x3030303030303030) { - break; + if (std::is_same::value || has_simd()) { + uint64_t val; + while (!cpp20_and_in_constexpr() && std::distance(first, last) >= 8) { + val = fast_read_u64(first); + if (val != 0x3030303030303030) { + break; + } + first += 8; } - first += 8; } while (first != last) { - if (*first != static_cast('0')) { + if (*first != CharT('0')) { break; } first++; @@ -179,17 +181,19 @@ void skip_zeros(const CharT*& first, const CharT* last) noexcept { template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool is_truncated(const CharT* first, const CharT* last) noexcept { - // do 8-bit optimizations, can just compare to 8 literal 0s. - uint64_t val; - while (!cpp20_and_in_constexpr() && std::distance(first, last) >= 8) { - val = fast_read_u64(first); - if (val != 0x3030303030303030) { - return true; + if (std::is_same::value || has_simd()) { + // do 8-bit optimizations, can just compare to 8 literal 0s. + uint64_t val; + while (!cpp20_and_in_constexpr() && std::distance(first, last) >= 8) { + val = fast_read_u64(first); + if (val != 0x3030303030303030) { + return true; + } + first += 8; } - first += 8; } while (first != last) { - if (*first != static_cast('0')) { + if (*first != CharT('0')) { return true; } first++; @@ -215,7 +219,7 @@ void parse_eight_digits(const CharT*& p, limb& value, size_t& counter, size_t& c template fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void parse_one_digit(const CharT*& p, limb& value, size_t& counter, size_t& count) noexcept { - value = value * 10 + limb(*p - static_cast('0')); + value = value * 10 + limb(*p - CharT('0')); p++; counter++; count++; diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index 4fb03fc..020ae6b 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -78,11 +78,12 @@ #endif #endif -#if FASTFLOAT_SSE2 +#ifdef FASTFLOAT_SSE2 #define FASTFLOAT_HAS_SIMD (1) #endif #if defined(__GNUC__) +// disable -Wcast-align=strict (GCC only) #define FASTFLOAT_SIMD_DISABLE_WARNINGS \ _Pragma("GCC diagnostic push") \ _Pragma("GCC diagnostic ignored \"-Wcast-align\"") @@ -141,7 +142,7 @@ inline FASTFLOAT_CONSTEXPR14 bool fastfloat_strncasecmp(const CharT *input1, const char *input2, size_t length) { char running_diff{0}; for (size_t i = 0; i < length; i++) { - running_diff |= (static_cast(input1[i]) ^ input2[i]); + running_diff |= (char(input1[i]) ^ input2[i]); } return (running_diff == 0) || (running_diff == 32); } diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index d5b266d..a407bfc 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -26,12 +26,12 @@ parse_infnan(const CharT *first, const CharT *last, T &value) noexcept { answer.ptr = first; answer.ec = std::errc(); // be optimistic bool minusSign = false; - if (*first == static_cast('-')) { // assume first < last, so dereference without checks; C++17 20.19.3.(7.1) explicitly forbids '+' here + if (*first == CharT('-')) { // assume first < last, so dereference without checks; C++17 20.19.3.(7.1) explicitly forbids '+' here minusSign = true; ++first; } #if FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default - if (*first == static_cast('+')) { + if (*first == CharT('+')) { ++first; } #endif @@ -40,15 +40,15 @@ parse_infnan(const CharT *first, const CharT *last, T &value) noexcept { answer.ptr = (first += 3); value = minusSign ? -std::numeric_limits::quiet_NaN() : std::numeric_limits::quiet_NaN(); // Check for possible nan(n-char-seq-opt), C++17 20.19.3.7, C11 7.20.1.3.3. At least MSVC produces nan(ind) and nan(snan). - if(first != last && *first == static_cast('(')) { + if(first != last && *first == CharT('(')) { for(const CharT* ptr = first + 1; ptr != last; ++ptr) { - if (*ptr == static_cast(')')) { + if (*ptr == CharT(')')) { answer.ptr = ptr + 1; // valid nan(n-char-seq-opt) break; } - else if(!((static_cast('a') <= *ptr && *ptr <= static_cast('z')) || - (static_cast('A') <= *ptr && *ptr <= static_cast('Z')) || - (static_cast('0') <= *ptr && *ptr <= static_cast('9')) || *ptr == static_cast('_'))) + else if(!((CharT('a') <= *ptr && *ptr <= CharT('z')) || + (CharT('A') <= *ptr && *ptr <= CharT('Z')) || + (CharT('0') <= *ptr && *ptr <= CharT('9')) || *ptr == CharT('_'))) break; // forbidden char, not nan(n-char-seq-opt) } } @@ -159,7 +159,7 @@ from_chars_result from_chars_preparsed(parsed_number_string pns, c } } if (pns.too_many_digits) - truncate_exponent_mantissa(pns); + parse_truncated_number_string(pns); answer.ec = std::errc(); // be optimistic answer.ptr = pns.lastmatch; From 091458d192d129336116df4b330983740b2c32b1 Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Sun, 30 Apr 2023 02:20:24 -0400 Subject: [PATCH 15/27] Add basic support for char32_t (unoptimized) --- include/fast_float/ascii_number.h | 160 +++++++++++++------------- include/fast_float/digit_comparison.h | 8 +- 2 files changed, 81 insertions(+), 87 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index c72e210..cc9619c 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -34,49 +34,47 @@ fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) { | (val & 0x00000000000000FF) << 56; } + +#ifdef FASTFLOAT_SSE2 + fastfloat_really_inline -uint64_t fast_read_u64(const char* chars) { - uint64_t val; - ::memcpy(&val, chars, sizeof(uint64_t)); - return val; +__m128i load_packus_masks_c16(void) noexcept { +FASTFLOAT_SIMD_DISABLE_WARNINGS + static const char16_t masks[] = { 0xff, 0xff, 0xff, 0xff }; + return _mm_loadu_si128(reinterpret_cast(masks)); +FASTFLOAT_SIMD_RESTORE_WARNINGS } -// https://quick-bench.com/q/fk6Y07KDGu8XZ9iUtQD8QJTc3Hg -// todo: add support for char32_t +// packus_masks is an argument only so its value may be preloaded. +// it should always come from load_packus_masks_c16(). fastfloat_really_inline -uint64_t fast_read_u64(const char16_t* chars) { -#ifdef FASTFLOAT_SSE2 +uint64_t simd_read8_to_u64(const char16_t* chars, const __m128i packus_masks) { FASTFLOAT_SIMD_DISABLE_WARNINGS - static const char16_t masks[] = {0xff, 0xff, 0xff, 0xff}; - const __m128i m_masks = _mm_loadu_si128(reinterpret_cast(masks)); - - // mask hi bytes and pack - const char* const p = reinterpret_cast(chars); - __m128i i1 = _mm_and_si128(_mm_loadu_si64(p), m_masks); - __m128i i2 = _mm_and_si128(_mm_loadu_si64(p + 8), m_masks); + // process 4 and 4 chars simultaneously (loadu_si64 has high latency) + // with AVX512BW + AVX512VL, masking is not required as we have cvtepi16_epi8 + const char* const p = reinterpret_cast(chars); + __m128i i1 = _mm_and_si128(_mm_loadu_si64(p), packus_masks); + __m128i i2 = _mm_and_si128(_mm_loadu_si64(p + 8), packus_masks); __m128i packed = _mm_packus_epi16(i1, i2); - // extract uint64_t val; _mm_storeu_si64(&val, _mm_shuffle_epi32(packed, 0x8)); return val; FASTFLOAT_SIMD_RESTORE_WARNINGS -#else - unsigned char bytes[8]; - for (int i = 0; i < 8; ++i) - bytes[i] = (unsigned char)chars[i]; - - // bit-cast - uint64_t val; - ::memcpy(&val, bytes, sizeof(uint64_t)); - return val; -#endif } +// https://quick-bench.com/q/fk6Y07KDGu8XZ9iUtQD8QJTc3Hg +fastfloat_really_inline +uint64_t simd_read8_to_u64(const char16_t* chars) { + return simd_read8_to_u64(chars, load_packus_masks_c16()); +} +#endif + +// Read 8 CharT into a u64. Truncates CharT if != char. template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -uint64_t read_u64(const CharT *chars) { - if (cpp20_and_in_constexpr()) { +uint64_t read8_to_u64(const CharT *chars) { + if (cpp20_and_in_constexpr() || !std::is_same::value) { uint64_t val = 0; for(int i = 0; i < 8; ++i) { val |= uint64_t(char(*chars)) << (i*8); @@ -84,7 +82,8 @@ uint64_t read_u64(const CharT *chars) { } return val; } - uint64_t val = fast_read_u64(chars); + uint64_t val; + ::memcpy(&val, chars, sizeof(uint64_t)); #if FASTFLOAT_IS_BIG_ENDIAN == 1 // Need to read as-if the number was in little-endian order. val = byteswap(val); @@ -121,92 +120,87 @@ uint32_t parse_eight_digits_unrolled(uint64_t val) { return uint32_t(val); } -// http://0x80.pl/articles/simd-parsing-int-sequences.html -#ifdef FASTFLOAT_SSE2 -fastfloat_really_inline -uint32_t parse_eight_digits_unrolled_c16(const __m128i val) { - // x - '0' - const __m128i s1digits16 = _mm_sub_epi16(val, _mm_set1_epi16('0')); - // 10 * x(b) + x(b-1) -> 2 digit numbers - const __m128i s2digits32 = _mm_madd_epi16(s1digits16, _mm_setr_epi16(10, 1, 10, 1, 10, 1, 10, 1)); - const __m128i s2digits16 = _mm_packus_epi16(s2digits32, s2digits32); - // 100 * x(b) + x(b-1) -> 4 digit numbers - const __m128i s4digits32 = _mm_madd_epi16(s2digits16, _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1)); - const __m128i s4digits16 = _mm_packus_epi16(s4digits32, s4digits32); - // 10000 * x(b) + x(b-1) -> 8 digit number - const __m128i s8digits32 = _mm_madd_epi16(s4digits16, _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1)); - - uint32_t value; - _mm_storeu_si32(&value, s8digits32); - return value; -} -#endif - -// credit @aqrit -fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val) noexcept { - return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) & - 0x8080808080808080)); -} +// Call this if chars are definitely 8 digits. fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint32_t parse_eight_digits_unrolled(const char* chars) noexcept { - return parse_eight_digits_unrolled(read_u64(chars)); + return parse_eight_digits_unrolled(read8_to_u64(chars)); } -// Call this if you know chars are only digits -//todo: add support for char32_t fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint32_t parse_eight_digits_unrolled(const char16_t* chars) noexcept { if (cpp20_and_in_constexpr() || !has_simd()) { - return parse_eight_digits_unrolled(read_u64(chars)); + return parse_eight_digits_unrolled(read8_to_u64(chars)); } -#ifndef FASTFLOAT_HAS_SIMD - return 0; // never reaches here, remove warning +#ifdef FASTFLOAT_HAS_SIMD + return parse_eight_digits_unrolled(simd_read8_to_u64(chars)); #else -FASTFLOAT_SIMD_DISABLE_WARNINGS - return parse_eight_digits_unrolled_c16(_mm_loadu_si128(reinterpret_cast(chars))); -FASTFLOAT_SIMD_RESTORE_WARNINGS + // never reaches here, remove warning + return 0; #endif } +// todo, no simd optimization yet fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -bool parse_if_eight_digits_unrolled(const char* chars, std::uint64_t& i) noexcept { - const bool all = is_made_of_eight_digits_fast(read_u64(chars)); - if (all) i = i * 100000000 + parse_eight_digits_unrolled(read_u64(chars)); - return all; +uint32_t parse_eight_digits_unrolled(const char32_t* chars) noexcept { + return parse_eight_digits_unrolled(read8_to_u64(chars)); +} + + +// credit @aqrit +fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val) noexcept { + return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) & + 0x8080808080808080)); } -// Call this if you don't know whether chars are only digits -// http://0x80.pl/articles/simd-parsing-int-sequences.html -//todo: add support for char32_t fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -bool parse_if_eight_digits_unrolled(const char16_t* chars, std::uint64_t& i) noexcept { - if (cpp20_and_in_constexpr() || !has_simd()) { - for (int i = 0; i < 8; ++i) { - if (chars[i] < u'0' || chars[i] > u'9') - return false; - } - i = i * 100000000 + parse_eight_digits_unrolled(read_u64(chars)); - return true; +bool parse_if_eight_digits_unrolled(const char* chars, uint64_t& i) noexcept { + const bool is_digits = is_made_of_eight_digits_fast(read8_to_u64(chars)); + if (is_digits) { + i = i * 100000000 + parse_eight_digits_unrolled(read8_to_u64(chars)); } -#ifndef FASTFLOAT_HAS_SIMD - return false; // never reaches here, remove warning -#else + return is_digits; +} + +// Call this if chars might not be 8 digits. +// Using this (instead of is_made_of_eight_digits_fast() then parse_eight_digits_unrolled()) +// ensures we don't load SIMD registers twice. +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +bool parse_if_eight_digits_unrolled(const char16_t* chars, uint64_t& i) noexcept { +#ifdef FASTFLOAT_SSE2 + if (cpp20_and_in_constexpr()) { + return false; + } FASTFLOAT_SIMD_DISABLE_WARNINGS const __m128i data = _mm_loadu_si128(reinterpret_cast(chars)); + const __m128i packus_masks = load_packus_masks_c16(); // be optimistic, preload + // (x - '0') <= 9 + // http://0x80.pl/articles/simd-parsing-int-sequences.html const __m128i t0 = _mm_sub_epi16(data, _mm_set1_epi16(80)); const __m128i t1 = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-119)); if (_mm_movemask_epi8(t1) == 0) { - i = i * 100000000 + parse_eight_digits_unrolled_c16(data); + uint64_t digits = simd_read8_to_u64(chars, packus_masks); + i = i * 100000000 + parse_eight_digits_unrolled(digits); return true; } else return false; FASTFLOAT_SIMD_RESTORE_WARNINGS + +#else // No SIMD available + return false; #endif } +// todo, no simd optimization yet +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +bool parse_if_eight_digits_unrolled(const char32_t*, uint64_t&) noexcept { + return false; +} + + + typedef span byte_span; template diff --git a/include/fast_float/digit_comparison.h b/include/fast_float/digit_comparison.h index 73d6732..b9601c3 100644 --- a/include/fast_float/digit_comparison.h +++ b/include/fast_float/digit_comparison.h @@ -158,10 +158,10 @@ void round_down(adjusted_mantissa& am, int32_t shift) noexcept { template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void skip_zeros(const CharT*& first, const CharT* last) noexcept { - if (std::is_same::value || has_simd()) { + if (std::is_same::value) { uint64_t val; while (!cpp20_and_in_constexpr() && std::distance(first, last) >= 8) { - val = fast_read_u64(first); + ::memcpy(&val, first, sizeof(uint64_t)); if (val != 0x3030303030303030) { break; } @@ -181,11 +181,11 @@ void skip_zeros(const CharT*& first, const CharT* last) noexcept { template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool is_truncated(const CharT* first, const CharT* last) noexcept { - if (std::is_same::value || has_simd()) { + if (std::is_same::value) { // do 8-bit optimizations, can just compare to 8 literal 0s. uint64_t val; while (!cpp20_and_in_constexpr() && std::distance(first, last) >= 8) { - val = fast_read_u64(first); + ::memcpy(&val, first, sizeof(uint64_t)); if (val != 0x3030303030303030) { return true; } From e08c55c38051883e5e9cad82957cc749a1fbda35 Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Mon, 1 May 2023 19:45:50 -0400 Subject: [PATCH 16/27] Remove json parse rules/allow inf_nan --- .gitignore | 2 +- include/fast_float/ascii_number.h | 16 ++++------- include/fast_float/fast_float.h | 37 ++---------------------- include/fast_float/parse_number.h | 47 +++++++++++-------------------- 4 files changed, 25 insertions(+), 77 deletions(-) diff --git a/.gitignore b/.gitignore index a2601ad..5cc2dd6 100644 --- a/.gitignore +++ b/.gitignore @@ -7,7 +7,7 @@ compile_commands.json .vs/ Debug/ Release/ -/out/build/ +/out/ *.sln *.vcxproj *.vcxproj.filters diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index cc9619c..f583c2b 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -51,7 +51,7 @@ fastfloat_really_inline uint64_t simd_read8_to_u64(const char16_t* chars, const __m128i packus_masks) { FASTFLOAT_SIMD_DISABLE_WARNINGS // process 4 and 4 chars simultaneously (loadu_si64 has high latency) - // with AVX512BW + AVX512VL, masking is not required as we have cvtepi16_epi8 + // with AVX512BW + AVX512VL, masking is not required as we can use cvtepi16_epi8 const char* const p = reinterpret_cast(chars); __m128i i1 = _mm_and_si128(_mm_loadu_si64(p), packus_masks); __m128i i2 = _mm_and_si128(_mm_loadu_si64(p + 8), packus_masks); @@ -223,8 +223,7 @@ template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string parse_number_string(const CharT *p, const CharT *pend, parse_options options) noexcept { const chars_format fmt = options.format; - const parse_rules rules = options.rules; - const CharT decimal_point = CharT(options.decimal_point); + const CharT decimal_point = options.decimal_point; parsed_number_string answer; answer.valid = false; @@ -240,7 +239,7 @@ parsed_number_string parse_number_string(const CharT *p, const CharT *pen return answer; } // a sign must be followed by an integer or the dot - if (!is_integer(*p) && (rules == parse_rules::json_rules || *p != decimal_point)) + if (!is_integer(*p) && *p != decimal_point) return answer; } const CharT *const start_digits = p; @@ -275,8 +274,8 @@ parsed_number_string parse_number_string(const CharT *p, const CharT *pen answer.fraction = span(before, size_t(p - before)); digit_count -= exponent; } - // we must have encountered at least one integer (or two if a decimal point exists, with json rules). - if (digit_count == 0 || (rules == parse_rules::json_rules && has_decimal_point && digit_count == 1)) { + // we must have encountered at least one integer + if (digit_count == 0) { return answer; } int64_t exp_number = 0; // explicit exponential part @@ -312,11 +311,6 @@ parsed_number_string parse_number_string(const CharT *p, const CharT *pen // If it scientific and not fixed, we have to bail out. if((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) { return answer; } } - - // disallow leading zeros before the decimal point - if (rules == parse_rules::json_rules && start_digits[0] == CharT('0') && digit_count >= 2 && is_integer(start_digits[1])) - return answer; - answer.lastmatch = p; answer.valid = true; answer.exp_number = exp_number; diff --git a/include/fast_float/fast_float.h b/include/fast_float/fast_float.h index 470d05a..57d39b3 100644 --- a/include/fast_float/fast_float.h +++ b/include/fast_float/fast_float.h @@ -13,11 +13,6 @@ enum chars_format { general = fixed | scientific }; -enum parse_rules { - std_rules, - json_rules, -}; - template struct from_chars_result { const CharT *ptr; @@ -26,34 +21,15 @@ struct from_chars_result { struct parse_options { constexpr explicit parse_options( - chars_format fmt = chars_format::general, - parse_rules rules = parse_rules::std_rules, - char dot = '.', bool allow_inf_nan = true) - : format(fmt), rules(rules), allow_inf_nan(allow_inf_nan), decimal_point(dot) {} + chars_format fmt = chars_format::general, char dot = '.') + : format(fmt), decimal_point(dot) {} /** Which number formats are accepted */ chars_format format; - /** Which parsing rules to use */ - parse_rules rules; - /** Whether to allow inf and nan */ - bool allow_inf_nan; /** The character used as decimal point */ char decimal_point; }; -struct preparsed_parse_options { - constexpr explicit preparsed_parse_options( - bool allow_inf_nan = true) - : allow_inf_nan(allow_inf_nan) {} - - constexpr preparsed_parse_options( - const parse_options& options) - : allow_inf_nan(options.allow_inf_nan) {} - - /** Whether to allow inf and nan */ - bool allow_inf_nan; -}; - /** * This function parses the character sequence [first,last) for a number. It parses floating-point numbers expecting * a locale-indepent format equivalent to what is used by std::strtod in the default ("C") locale. @@ -86,15 +62,6 @@ FASTFLOAT_CONSTEXPR20 from_chars_result from_chars_advanced(const CharT *first, const CharT *last, T &value, parse_options options) noexcept; -} - -#include "ascii_number.h" // parsed_number_string - -namespace fast_float { -template -FASTFLOAT_CONSTEXPR20 -from_chars_result from_chars_preparsed(parsed_number_string parsed, - const CharT* first, const CharT* last, T& value, preparsed_parse_options options) noexcept; } // namespace fast_float diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index 1cc220b..99ccd49 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -143,23 +143,30 @@ from_chars_result from_chars(const CharT *first, const CharT *last, template FASTFLOAT_CONSTEXPR20 -from_chars_result from_chars_preparsed(parsed_number_string pns, const CharT* first, const CharT* last, T& value, preparsed_parse_options options) noexcept +from_chars_result from_chars_advanced(const CharT *first, const CharT *last, + T &value, parse_options options) noexcept { { static_assert (std::is_same::value || std::is_same::value, "only float and double are supported"); from_chars_result answer; - if (!pns.valid) { - if (options.allow_inf_nan) - return detail::parse_infnan(first, last, value); - else { - answer.ec = std::errc::invalid_argument; - answer.ptr = first; - return answer; - } +#ifdef FASTFLOAT_SKIP_WHITE_SPACE // disabled by default + while ((first != last) && fast_float::is_space(uint8_t(*first))) { + first++; } - if (pns.too_many_digits) +#endif + if (first == last) { + answer.ec = std::errc::invalid_argument; + answer.ptr = first; + return answer; + } + parsed_number_string pns = parse_number_string(first, last, options); + if (!pns.valid) { + return detail::parse_infnan(first, last, value); + } + if (pns.too_many_digits) { parse_truncated_number_string(pns); + } answer.ec = std::errc(); // be optimistic answer.ptr = pns.lastmatch; @@ -220,26 +227,6 @@ from_chars_result from_chars_preparsed(parsed_number_string pns, c return answer; } -template -FASTFLOAT_CONSTEXPR20 -from_chars_result from_chars_advanced(const CharT *first, const CharT *last, - T &value, parse_options options) noexcept { - - from_chars_result answer; -#ifdef FASTFLOAT_SKIP_WHITE_SPACE // disabled by default - while ((first != last) && fast_float::is_space(uint8_t(*first))) { - first++; - } -#endif - if (first == last) { - answer.ec = std::errc::invalid_argument; - answer.ptr = first; - return answer; - } - answer = from_chars_preparsed(parse_number_string(first, last, options), first, last, value, options); - return answer; -} - } // namespace fast_float #endif From 5136b181bab4f9268c70a4c3a61ab00a897381bf Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Tue, 2 May 2023 01:41:49 -0400 Subject: [PATCH 17/27] Fixes and cleanup --- include/fast_float/ascii_number.h | 129 +++++++++++++++--------------- include/fast_float/float_common.h | 3 + 2 files changed, 66 insertions(+), 66 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 82a06b1..a15c2ef 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -9,6 +9,8 @@ #include "float_common.h" +#define FASTFLOAT_SSE2 + #ifdef FASTFLOAT_SSE2 #include #endif @@ -16,6 +18,15 @@ namespace fast_float { +template +fastfloat_really_inline constexpr bool has_simd_opts() { +#ifdef FASTFLOAT_HAS_SIMD + return std::is_same::value; +#else + return false; +#endif +} + // Next function can be micro-optimized, but compilers are entirely // able to optimize it well. template @@ -34,42 +45,6 @@ fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) { | (val & 0x00000000000000FF) << 56; } - -#ifdef FASTFLOAT_SSE2 - -fastfloat_really_inline -__m128i load_packus_masks_c16(void) noexcept { -FASTFLOAT_SIMD_DISABLE_WARNINGS - static const char16_t masks[] = { 0xff, 0xff, 0xff, 0xff }; - return _mm_loadu_si128(reinterpret_cast(masks)); -FASTFLOAT_SIMD_RESTORE_WARNINGS -} - -// packus_masks is an argument only so its value may be preloaded. -// it should always come from load_packus_masks_c16(). -fastfloat_really_inline -uint64_t simd_read8_to_u64(const char16_t* chars, const __m128i packus_masks) { -FASTFLOAT_SIMD_DISABLE_WARNINGS - // process 4 and 4 chars simultaneously (loadu_si64 has high latency) - // with AVX512BW + AVX512VL, masking is not required as we can use cvtepi16_epi8 - const char* const p = reinterpret_cast(chars); - __m128i i1 = _mm_and_si128(_mm_loadu_si64(p), packus_masks); - __m128i i2 = _mm_and_si128(_mm_loadu_si64(p + 8), packus_masks); - __m128i packed = _mm_packus_epi16(i1, i2); - - uint64_t val; - _mm_storeu_si64(&val, _mm_shuffle_epi32(packed, 0x8)); - return val; -FASTFLOAT_SIMD_RESTORE_WARNINGS -} - -// https://quick-bench.com/q/fk6Y07KDGu8XZ9iUtQD8QJTc3Hg -fastfloat_really_inline -uint64_t simd_read8_to_u64(const char16_t* chars) { - return simd_read8_to_u64(chars, load_packus_masks_c16()); -} -#endif - // Read 8 UC into a u64. Truncates UC if not char. template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 @@ -77,7 +52,7 @@ uint64_t read8_to_u64(const UC *chars) { if (cpp20_and_in_constexpr() || !std::is_same::value) { uint64_t val = 0; for(int i = 0; i < 8; ++i) { - val |= uint64_t(char(*chars)) << (i*8); + val |= uint64_t(uint8_t(*chars)) << (i*8); ++chars; } return val; @@ -91,6 +66,35 @@ uint64_t read8_to_u64(const UC *chars) { return val; } +#ifdef FASTFLOAT_SSE2 + +fastfloat_really_inline +uint64_t simd_read8_to_u64(const char16_t* chars) { +FASTFLOAT_SIMD_DISABLE_WARNINGS + static const char16_t kmasks[] = { 0xff, 0xff, 0xff, 0xff }; + const __m128i masks = _mm_loadu_si128(reinterpret_cast(kmasks)); + + // pipeline 4 and 4 chars at the same time (since loadu_si64 has high latency) + // todo: with AVX512BW + AVX512VL, can use cvtepi16_epi8 instead + const char* const p = reinterpret_cast(chars); + __m128i i1 = _mm_and_si128(_mm_loadu_si64(p), masks); + __m128i i2 = _mm_and_si128(_mm_loadu_si64(p + 8), masks); + __m128i packed = _mm_packus_epi16(i1, i2); + + uint64_t val; + _mm_storeu_si64(&val, _mm_shuffle_epi32(packed, 0x8)); + return val; +FASTFLOAT_SIMD_RESTORE_WARNINGS +} +#endif + +// dummy for compile +template ())> +uint64_t simd_read8_to_u64(UC const*) { + return 0; +} + + fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void write_u64(uint8_t *chars, uint64_t val) { if (cpp20_and_in_constexpr()) { @@ -122,28 +126,13 @@ uint32_t parse_eight_digits_unrolled(uint64_t val) { // Call this if chars are definitely 8 digits. +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -uint32_t parse_eight_digits_unrolled(const char* chars) noexcept { - return parse_eight_digits_unrolled(read8_to_u64(chars)); -} - -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -uint32_t parse_eight_digits_unrolled(const char16_t* chars) noexcept { - if (cpp20_and_in_constexpr() || !has_simd()) { - return parse_eight_digits_unrolled(read8_to_u64(chars)); +uint32_t parse_eight_digits_unrolled(UC const * chars) noexcept { + if (cpp20_and_in_constexpr() || !has_simd_opts()) { + return parse_eight_digits_unrolled(read8_to_u64(chars)); // truncation okay } -#ifdef FASTFLOAT_HAS_SIMD return parse_eight_digits_unrolled(simd_read8_to_u64(chars)); -#else - // never reaches here, removes warning - return 0; -#endif -} - -// todo, no simd optimization yet -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -uint32_t parse_eight_digits_unrolled(const char32_t* chars) noexcept { - return parse_eight_digits_unrolled(read8_to_u64(chars)); } @@ -163,8 +152,12 @@ bool parse_if_eight_digits_unrolled(const char* chars, uint64_t& i) noexcept { } // Call this if chars might not be 8 digits. -// Using this (instead of is_made_of_eight_digits_fast() then parse_eight_digits_unrolled()) +// Using this (instead of is_made_of_eight_digits_fast() and parse_eight_digits_unrolled()) // ensures we don't load SIMD registers twice. +// +// Benchmark: +// https://quick-bench.com/q/Bbn0B4WmZsdgS3qDZWpggAY-jgs +// fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool parse_if_eight_digits_unrolled(const char16_t* chars, uint64_t& i) noexcept { #ifdef FASTFLOAT_SSE2 @@ -173,7 +166,6 @@ bool parse_if_eight_digits_unrolled(const char16_t* chars, uint64_t& i) noexcept } FASTFLOAT_SIMD_DISABLE_WARNINGS const __m128i data = _mm_loadu_si128(reinterpret_cast(chars)); - const __m128i packus_masks = load_packus_masks_c16(); // be optimistic, preload // (x - '0') <= 9 // http://0x80.pl/articles/simd-parsing-int-sequences.html @@ -181,7 +173,7 @@ FASTFLOAT_SIMD_DISABLE_WARNINGS const __m128i t1 = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-119)); if (_mm_movemask_epi8(t1) == 0) { - uint64_t digits = simd_read8_to_u64(chars, packus_masks); + uint64_t digits = simd_read8_to_u64(chars); i = i * 100000000 + parse_eight_digits_unrolled(digits); return true; } @@ -189,6 +181,8 @@ FASTFLOAT_SIMD_DISABLE_WARNINGS FASTFLOAT_SIMD_RESTORE_WARNINGS #else // No SIMD available + + (void)chars; (void)i; // unused return false; #endif } @@ -212,8 +206,10 @@ struct parsed_number_string_t { span integer{}; // non-nullable span fraction{}; // nullable }; + using byte_span = span; using parsed_number_string = parsed_number_string_t; + // Assuming that you use no more than 19 digits, this will // parse an ASCII string. template @@ -265,6 +261,7 @@ parsed_number_string_t parse_number_string(UC const *p, UC const * pend, par while ((p != pend) && is_integer(*p)) { uint8_t digit = uint8_t(*p - UC('0')); ++p; + i = i * 10 + digit; // in rare cases, this will overflow, but that's ok } exponent = before - p; answer.fraction = span(before, size_t(p - before)); @@ -336,20 +333,20 @@ parsed_number_string_t parse_number_string(UC const *p, UC const * pend, par return answer; } -template +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -void parse_truncated_number_string(parsed_number_string& ps) +void parse_truncated_number_string(parsed_number_string_t& ps) { // Let us start again, this time, avoiding overflows. // We don't need to check if is_integer, since we use the // pre-tokenized spans. uint64_t i = 0; int64_t exponent = 0; - const CharT* p = ps.integer.ptr; - const CharT* const int_end = p + ps.integer.len(); + const UC* p = ps.integer.ptr; + const UC* const int_end = p + ps.integer.len(); const uint64_t minimal_nineteen_digit_integer{1000000000000000000}; while ((i < minimal_nineteen_digit_integer) && (p != int_end)) { - i = i * 10 + uint64_t(*p - CharT('0')); + i = i * 10 + uint64_t(*p - UC('0')); ++p; } if (i >= minimal_nineteen_digit_integer) { // We have a big integers @@ -357,9 +354,9 @@ void parse_truncated_number_string(parsed_number_string& ps) } else { // We have a value with a fractional component. p = ps.fraction.ptr; - const CharT* const frac_end = p + ps.fraction.len(); + const UC* const frac_end = p + ps.fraction.len(); while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) { - i = i * 10 + uint64_t(*p - CharT('0')); + i = i * 10 + uint64_t(*p - UC('0')); ++p; } exponent = ps.fraction.ptr - p + ps.exp_number; diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index f63a090..175389f 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -117,6 +117,9 @@ // rust style `try!()` macro, or `?` operator #define FASTFLOAT_TRY(x) { if (!(x)) return false; } +#define FASTFLOAT_ENABLE_IF(test) typename std::enable_if<(test), int>::type = 0 + + namespace fast_float { fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() { From c811b027eaf481b97d5c139e8efb6915fbef44aa Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Tue, 2 May 2023 01:52:00 -0400 Subject: [PATCH 18/27] Remove testing macro --- include/fast_float/ascii_number.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index a15c2ef..59318f2 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -9,8 +9,6 @@ #include "float_common.h" -#define FASTFLOAT_SSE2 - #ifdef FASTFLOAT_SSE2 #include #endif From 4cb09b5f5939a6b7d12a73e692e5dba9edde4651 Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Tue, 2 May 2023 13:05:57 -0400 Subject: [PATCH 19/27] Automatically detect SSE2 --- include/fast_float/ascii_number.h | 6 +++--- include/fast_float/float_common.h | 8 +++++++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 59318f2..15fc1cf 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -73,7 +73,7 @@ FASTFLOAT_SIMD_DISABLE_WARNINGS const __m128i masks = _mm_loadu_si128(reinterpret_cast(kmasks)); // pipeline 4 and 4 chars at the same time (since loadu_si64 has high latency) - // todo: with AVX512BW + AVX512VL, can use cvtepi16_epi8 instead + // todo: with AVX512BW + AVX512VL, can use cvtepi16_epi8 instead of masking + pack const char* const p = reinterpret_cast(chars); __m128i i1 = _mm_and_si128(_mm_loadu_si64(p), masks); __m128i i2 = _mm_and_si128(_mm_loadu_si64(p + 8), masks); @@ -150,8 +150,8 @@ bool parse_if_eight_digits_unrolled(const char* chars, uint64_t& i) noexcept { } // Call this if chars might not be 8 digits. -// Using this (instead of is_made_of_eight_digits_fast() and parse_eight_digits_unrolled()) -// ensures we don't load SIMD registers twice. +// Using this style (instead of is_made_of_eight_digits_fast() then parse_eight_digits_unrolled()) +// ensures we don't load SIMD registers twice if we don't have to. // // Benchmark: // https://quick-bench.com/q/Bbn0B4WmZsdgS3qDZWpggAY-jgs diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index 175389f..5a5942d 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -78,8 +78,14 @@ #endif #endif +#if defined(__SSE2__) || \ + (defined(FASTFLOAT_VISUAL_STUDIO) && \ + (defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP == 2))) +#define FASTFLOAT_SSE2 1 +#endif + #ifdef FASTFLOAT_SSE2 -#define FASTFLOAT_HAS_SIMD (1) +#define FASTFLOAT_HAS_SIMD 1 #endif #if defined(__GNUC__) From 53b065f38dcd5fdfdcf92139c68344e235b12475 Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Sun, 7 May 2023 17:38:32 -0400 Subject: [PATCH 20/27] Avoid redundant load in SSE2 code --- include/fast_float/ascii_number.h | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 15fc1cf..f17aea6 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -67,23 +67,28 @@ uint64_t read8_to_u64(const UC *chars) { #ifdef FASTFLOAT_SSE2 fastfloat_really_inline -uint64_t simd_read8_to_u64(const char16_t* chars) { +uint64_t simd_read8_to_u64(const __m128i data) { FASTFLOAT_SIMD_DISABLE_WARNINGS - static const char16_t kmasks[] = { 0xff, 0xff, 0xff, 0xff }; + static const char16_t kmasks[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; const __m128i masks = _mm_loadu_si128(reinterpret_cast(kmasks)); - // pipeline 4 and 4 chars at the same time (since loadu_si64 has high latency) // todo: with AVX512BW + AVX512VL, can use cvtepi16_epi8 instead of masking + pack - const char* const p = reinterpret_cast(chars); - __m128i i1 = _mm_and_si128(_mm_loadu_si64(p), masks); - __m128i i2 = _mm_and_si128(_mm_loadu_si64(p + 8), masks); - __m128i packed = _mm_packus_epi16(i1, i2); + __m128i masked = _mm_and_si128(data, masks); + __m128i packed = _mm_packus_epi16(masked, masked); uint64_t val; - _mm_storeu_si64(&val, _mm_shuffle_epi32(packed, 0x8)); + _mm_storeu_si64(&val, packed); return val; FASTFLOAT_SIMD_RESTORE_WARNINGS } + +fastfloat_really_inline +uint64_t simd_read8_to_u64(const char16_t* chars) { +FASTFLOAT_SIMD_DISABLE_WARNINGS + return simd_read8_to_u64(_mm_loadu_si128(reinterpret_cast(chars))); +FASTFLOAT_SIMD_RESTORE_WARNINGS +} + #endif // dummy for compile @@ -142,11 +147,11 @@ fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool parse_if_eight_digits_unrolled(const char* chars, uint64_t& i) noexcept { - const bool is_digits = is_made_of_eight_digits_fast(read8_to_u64(chars)); - if (is_digits) { + if (is_made_of_eight_digits_fast(read8_to_u64(chars))) { i = i * 100000000 + parse_eight_digits_unrolled(read8_to_u64(chars)); + return true; } - return is_digits; + return false; } // Call this if chars might not be 8 digits. @@ -171,8 +176,7 @@ FASTFLOAT_SIMD_DISABLE_WARNINGS const __m128i t1 = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-119)); if (_mm_movemask_epi8(t1) == 0) { - uint64_t digits = simd_read8_to_u64(chars); - i = i * 100000000 + parse_eight_digits_unrolled(digits); + i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data)); return true; } else return false; From 6ede03878977d0a3469d3fc239168333dcf8a3f2 Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Tue, 9 May 2023 22:19:23 -0400 Subject: [PATCH 21/27] Apply changes from benchmarked version - Move parse_truncated_number_string back inside parse_number_string --- include/fast_float/ascii_number.h | 68 +++++++++++++------------------ include/fast_float/parse_number.h | 3 -- 2 files changed, 28 insertions(+), 43 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index f17aea6..39f2a07 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -151,7 +151,7 @@ bool parse_if_eight_digits_unrolled(const char* chars, uint64_t& i) noexcept { i = i * 100000000 + parse_eight_digits_unrolled(read8_to_u64(chars)); return true; } - return false; + else return false; } // Call this if chars might not be 8 digits. @@ -199,7 +199,6 @@ template struct parsed_number_string_t { int64_t exponent{0}; uint64_t mantissa{0}; - int64_t exp_number{0}; UC const * lastmatch{nullptr}; bool negative{false}; bool valid{false}; @@ -308,7 +307,6 @@ parsed_number_string_t parse_number_string(UC const *p, UC const * pend, par } answer.lastmatch = p; answer.valid = true; - answer.exp_number = exp_number; // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead @@ -326,49 +324,39 @@ parsed_number_string_t parse_number_string(UC const *p, UC const * pend, par start++; } - // exponent/mantissa must be truncated later! - // this is unlikely, so don't inline truncation code with the rest of parse_number_string() - answer.too_many_digits = digit_count > 19; + if (digit_count > 19) { + answer.too_many_digits = true; + // Let us start again, this time, avoiding overflows. + // We don't need to check if is_integer, since we use the + // pre-tokenized spans from above. + i = 0; + p = answer.integer.ptr; + UC const* int_end = p + answer.integer.len(); + const uint64_t minimal_nineteen_digit_integer{ 1000000000000000000 }; + while ((i < minimal_nineteen_digit_integer) && (p != int_end)) { + i = i * 10 + uint64_t(*p - UC('0')); + ++p; + } + if (i >= minimal_nineteen_digit_integer) { // We have a big integers + exponent = end_of_integer_part - p + exp_number; + } + else { // We have a value with a fractional component. + p = answer.fraction.ptr; + UC const* frac_end = p + answer.fraction.len(); + while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) { + i = i * 10 + uint64_t(*p - UC('0')); + ++p; + } + exponent = answer.fraction.ptr - p + exp_number; + } + // We have now corrected both exponent and i, to a truncated value + } } answer.exponent = exponent; answer.mantissa = i; return answer; } -template -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -void parse_truncated_number_string(parsed_number_string_t& ps) -{ - // Let us start again, this time, avoiding overflows. - // We don't need to check if is_integer, since we use the - // pre-tokenized spans. - uint64_t i = 0; - int64_t exponent = 0; - const UC* p = ps.integer.ptr; - const UC* const int_end = p + ps.integer.len(); - const uint64_t minimal_nineteen_digit_integer{1000000000000000000}; - while ((i < minimal_nineteen_digit_integer) && (p != int_end)) { - i = i * 10 + uint64_t(*p - UC('0')); - ++p; - } - if (i >= minimal_nineteen_digit_integer) { // We have a big integers - exponent = int_end - p + ps.exp_number; - } - else { // We have a value with a fractional component. - p = ps.fraction.ptr; - const UC* const frac_end = p + ps.fraction.len(); - while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) { - i = i * 10 + uint64_t(*p - UC('0')); - ++p; - } - exponent = ps.fraction.ptr - p + ps.exp_number; - } - // We have now corrected both exponent and i, to a truncated value - - ps.exponent = exponent; - ps.mantissa = i; -} - } // namespace fast_float #endif diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index cea24ea..e077b9d 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -166,9 +166,6 @@ from_chars_result_t from_chars_advanced(UC const * first, UC const * last, if (!pns.valid) { return detail::parse_infnan(first, last, value); } - if (pns.too_many_digits) { - parse_truncated_number_string(pns); - } answer.ec = std::errc(); // be optimistic answer.ptr = pns.lastmatch; From 38613a39f9eb3763a68fe56f8acf22474ed9c5ed Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Wed, 17 May 2023 01:34:33 -0400 Subject: [PATCH 22/27] Fix perf decrease when UC = char --- include/fast_float/ascii_number.h | 80 +++++++++++++++---------------- include/fast_float/float_common.h | 2 +- 2 files changed, 39 insertions(+), 43 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 39f2a07..cc0af11 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -17,7 +17,7 @@ namespace fast_float { template -fastfloat_really_inline constexpr bool has_simd_opts() { +fastfloat_really_inline constexpr bool has_simd_opt() { #ifdef FASTFLOAT_HAS_SIMD return std::is_same::value; #else @@ -68,18 +68,7 @@ uint64_t read8_to_u64(const UC *chars) { fastfloat_really_inline uint64_t simd_read8_to_u64(const __m128i data) { -FASTFLOAT_SIMD_DISABLE_WARNINGS - static const char16_t kmasks[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; - const __m128i masks = _mm_loadu_si128(reinterpret_cast(kmasks)); - - // todo: with AVX512BW + AVX512VL, can use cvtepi16_epi8 instead of masking + pack - __m128i masked = _mm_and_si128(data, masks); - __m128i packed = _mm_packus_epi16(masked, masked); - - uint64_t val; - _mm_storeu_si64(&val, packed); - return val; -FASTFLOAT_SIMD_RESTORE_WARNINGS + return _mm_cvtsi128_si64x(_mm_packus_epi16(data, data)); } fastfloat_really_inline @@ -92,7 +81,7 @@ FASTFLOAT_SIMD_RESTORE_WARNINGS #endif // dummy for compile -template ())> +template ())> uint64_t simd_read8_to_u64(UC const*) { return 0; } @@ -132,7 +121,7 @@ uint32_t parse_eight_digits_unrolled(uint64_t val) { template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint32_t parse_eight_digits_unrolled(UC const * chars) noexcept { - if (cpp20_and_in_constexpr() || !has_simd_opts()) { + if (cpp20_and_in_constexpr() || !has_simd_opt()) { return parse_eight_digits_unrolled(read8_to_u64(chars)); // truncation okay } return parse_eight_digits_unrolled(simd_read8_to_u64(chars)); @@ -145,28 +134,18 @@ fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val 0x8080808080808080)); } -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -bool parse_if_eight_digits_unrolled(const char* chars, uint64_t& i) noexcept { - if (is_made_of_eight_digits_fast(read8_to_u64(chars))) { - i = i * 100000000 + parse_eight_digits_unrolled(read8_to_u64(chars)); - return true; - } - else return false; -} + +#ifdef FASTFLOAT_HAS_SIMD // Call this if chars might not be 8 digits. // Using this style (instead of is_made_of_eight_digits_fast() then parse_eight_digits_unrolled()) -// ensures we don't load SIMD registers twice if we don't have to. -// -// Benchmark: -// https://quick-bench.com/q/Bbn0B4WmZsdgS3qDZWpggAY-jgs -// +// ensures we don't load SIMD registers twice. fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -bool parse_if_eight_digits_unrolled(const char16_t* chars, uint64_t& i) noexcept { -#ifdef FASTFLOAT_SSE2 +bool simd_parse_if_eight_digits_unrolled(const char16_t* chars, uint64_t& i) noexcept { if (cpp20_and_in_constexpr()) { return false; - } + } +#ifdef FASTFLOAT_SSE2 FASTFLOAT_SIMD_DISABLE_WARNINGS const __m128i data = _mm_loadu_si128(reinterpret_cast(chars)); @@ -181,18 +160,36 @@ FASTFLOAT_SIMD_DISABLE_WARNINGS } else return false; FASTFLOAT_SIMD_RESTORE_WARNINGS - -#else // No SIMD available - - (void)chars; (void)i; // unused - return false; #endif } -// todo, no simd optimization yet +#endif + +// dummy for compile +template ())> +uint64_t simd_parse_if_eight_digits_unrolled(UC const*, uint64_t&) { + return 0; +} + + +template ::value)> fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -bool parse_if_eight_digits_unrolled(const char32_t*, uint64_t&) noexcept { - return false; +void loop_parse_if_eight_digits(const UC*& p, const UC* const pend, uint64_t& i) { + if (!has_simd_opt()) { + return; + } + while ((std::distance(p, pend) >= 8) && simd_parse_if_eight_digits_unrolled(p, i)) { // in rare cases, this will overflow, but that's ok + p += 8; + } +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +void loop_parse_if_eight_digits(const char*& p, const char* const pend, uint64_t& i) { + // optimizes better than parse_if_eight_digits_unrolled() for UC = char. + while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(read8_to_u64(p))) { + i = i * 100000000 + parse_eight_digits_unrolled(read8_to_u64(p)); // in rare cases, this will overflow, but that's ok + p += 8; + } } template @@ -256,9 +253,8 @@ parsed_number_string_t parse_number_string(UC const *p, UC const * pend, par UC const * before = p; // can occur at most twice without overflowing, but let it occur more, since // for integers with many digits, digit parsing is the primary bottleneck. - while ((std::distance(p, pend) >= 8) && parse_if_eight_digits_unrolled(p, i)) { // in rare cases, this will overflow, but that's ok - p += 8; - } + loop_parse_if_eight_digits(p, pend, i); + while ((p != pend) && is_integer(*p)) { uint8_t digit = uint8_t(*p - UC('0')); ++p; diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index 201e72f..80b022e 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -157,7 +157,7 @@ using parse_options = parse_options_t; // rust style `try!()` macro, or `?` operator #define FASTFLOAT_TRY(x) { if (!(x)) return false; } -#define FASTFLOAT_ENABLE_IF(test) typename std::enable_if<(test), int>::type = 0 +#define FASTFLOAT_ENABLE_IF(...) typename std::enable_if<(__VA_ARGS__), int>::type = 0 namespace fast_float { From 064d2b832d705fdb7b81c8066a512bb65ed4362a Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Wed, 17 May 2023 02:03:07 -0400 Subject: [PATCH 23/27] Fix Werrors --- include/fast_float/ascii_number.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index cc0af11..e39fafc 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -68,7 +68,9 @@ uint64_t read8_to_u64(const UC *chars) { fastfloat_really_inline uint64_t simd_read8_to_u64(const __m128i data) { - return _mm_cvtsi128_si64x(_mm_packus_epi16(data, data)); +FASTFLOAT_SIMD_DISABLE_WARNINGS + return uint64_t(_mm_cvtsi128_si64x(_mm_packus_epi16(data, data))); +FASTFLOAT_SIMD_RESTORE_WARNINGS } fastfloat_really_inline From 1c9a3088bf9e3af9b838437c9efb00b5d877eec9 Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Wed, 17 May 2023 02:35:16 -0400 Subject: [PATCH 24/27] Fix for VS 32-bit --- include/fast_float/ascii_number.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index e39fafc..ba6b0ef 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -69,7 +69,15 @@ uint64_t read8_to_u64(const UC *chars) { fastfloat_really_inline uint64_t simd_read8_to_u64(const __m128i data) { FASTFLOAT_SIMD_DISABLE_WARNINGS - return uint64_t(_mm_cvtsi128_si64x(_mm_packus_epi16(data, data))); + const __m128i packed = _mm_packus_epi16(data, data); +#ifdef FASTFLOAT_64BIT + return uint64_t(_mm_cvtsi128_si64x(packed)); +#else + uint64_t value; + // Visual Studio + older versions of GCC don't support _mm_storeu_si64 + _mm_storel_epi64(reinterpret_cast<__m128i*>(&value), packed); + return value; +#endif FASTFLOAT_SIMD_RESTORE_WARNINGS } From cb397ef446c3b59c73f24147386e0851ca2b93c1 Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Wed, 17 May 2023 05:40:29 -0400 Subject: [PATCH 25/27] Fix for clang (missing _mm_cvtsi128_si64x) --- include/fast_float/ascii_number.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index ba6b0ef..5327a79 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -71,7 +71,7 @@ uint64_t simd_read8_to_u64(const __m128i data) { FASTFLOAT_SIMD_DISABLE_WARNINGS const __m128i packed = _mm_packus_epi16(data, data); #ifdef FASTFLOAT_64BIT - return uint64_t(_mm_cvtsi128_si64x(packed)); + return uint64_t(_mm_cvtsi128_si64(packed)); #else uint64_t value; // Visual Studio + older versions of GCC don't support _mm_storeu_si64 From a5632d5b5748e4673b00e1b6d2a89f049031d3b1 Mon Sep 17 00:00:00 2001 From: Maya Warrier Date: Wed, 17 May 2023 22:54:46 -0400 Subject: [PATCH 26/27] Fix digit comparison, cleanup --- include/fast_float/ascii_number.h | 4 ++-- include/fast_float/float_common.h | 8 -------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 5327a79..481b91d 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -161,8 +161,8 @@ FASTFLOAT_SIMD_DISABLE_WARNINGS // (x - '0') <= 9 // http://0x80.pl/articles/simd-parsing-int-sequences.html - const __m128i t0 = _mm_sub_epi16(data, _mm_set1_epi16(80)); - const __m128i t1 = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-119)); + const __m128i t0 = _mm_add_epi16(data, _mm_set1_epi16(32720)); + const __m128i t1 = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-32759)); if (_mm_movemask_epi8(t1) == 0) { i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data)); diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index 80b022e..0418149 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -170,14 +170,6 @@ fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() { #endif } -fastfloat_really_inline constexpr bool has_simd() { -#if FASTFLOAT_HAS_SIMD - return true; -#else - return false; -#endif -} - // Compares two ASCII strings in a case insensitive manner. template inline FASTFLOAT_CONSTEXPR14 bool From b7119471b161ff4ddcae383d55e3c4f060e3eea6 Mon Sep 17 00:00:00 2001 From: Maya Warrier <34803055+mayawarrier@users.noreply.github.com> Date: Mon, 22 May 2023 13:22:46 -0400 Subject: [PATCH 27/27] Update CONTRIBUTORS --- CONTRIBUTORS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 03e02b4..e339869 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -5,4 +5,5 @@ Neal Richardson Tim Paine Fabio Pellacini Lénárd Szolnoki -Jan Pharago \ No newline at end of file +Jan Pharago +Maya Warrier \ No newline at end of file