From 6a3fabc342d4d149722fc2316babdcf24fdf8fb9 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 6 Mar 2023 21:02:19 -0500 Subject: [PATCH 01/11] Preparing release --- CMakeLists.txt | 2 +- tests/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 819df4f..4884c02 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.9) -project(fast_float VERSION 3.2.0 LANGUAGES CXX) +project(fast_float VERSION 3.10.1 LANGUAGES CXX) option(FASTFLOAT_TEST "Enable tests" OFF) set(CMAKE_CXX_STANDARD 11 CACHE STRING "C++ standard to be used") set(CMAKE_CXX_STANDARD_REQUIRED ON) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 9704aa2..d093b4d 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -9,7 +9,7 @@ option(SYSTEM_DOCTEST "Use system copy of doctest" OFF) if (NOT SYSTEM_DOCTEST) FetchContent_Declare(doctest GIT_REPOSITORY https://github.com/onqtam/doctest.git - GIT_TAG 2.4.6) + GIT_TAG v2.4.10) endif() FetchContent_Declare(supplemental_test_files GIT_REPOSITORY https://github.com/fastfloat/supplemental_test_files.git From bc77f956e29dc8cc52d3a31cb33fc36af29bba29 Mon Sep 17 00:00:00 2001 From: Pharago Date: Sun, 2 Apr 2023 22:58:01 +0200 Subject: [PATCH 02/11] Initial Unicode release Added support for the other char types --- include/fast_float/ascii_number.h | 89 ++++++++++++++------------- include/fast_float/digit_comparison.h | 51 ++++++++------- include/fast_float/fast_float.h | 27 ++++---- include/fast_float/float_common.h | 74 +++++++++++++++++++++- include/fast_float/parse_number.h | 47 +++++++------- 5 files changed, 185 insertions(+), 103 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 72b8098..360ee56 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -12,8 +12,9 @@ namespace fast_float { // Next function can be micro-optimized, but compilers are entirely // able to optimize it well. -fastfloat_really_inline constexpr bool is_integer(char c) noexcept { - return c >= '0' && c <= '9'; +template +fastfloat_really_inline constexpr bool is_integer(TCH c) noexcept { + return !(c > TCH('9') || c < TCH('0')); } fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) { @@ -26,13 +27,13 @@ fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) { | (val & 0x000000000000FF00) << 40 | (val & 0x00000000000000FF) << 56; } - +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -uint64_t read_u64(const char *chars) { - if (cpp20_and_in_constexpr()) { - uint64_t val = 0; +uint64_t read_u64(TCH const * chars) { + if (cpp20_and_in_constexpr() || sizeof(TCH) > 1) { + uint64_t val{}; for(int i = 0; i < 8; ++i) { - val |= uint64_t(*chars) << (i*8); + val |= uint64_t(char(*chars)) << (i * 8); ++chars; } return val; @@ -74,9 +75,9 @@ uint32_t parse_eight_digits_unrolled(uint64_t val) { val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; return uint32_t(val); } - +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -uint32_t parse_eight_digits_unrolled(const char *chars) noexcept { +uint32_t parse_eight_digits_unrolled(TCH const * chars) noexcept { return parse_eight_digits_unrolled(read_u64(chars)); } @@ -86,40 +87,42 @@ fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val 0x8080808080808080)); } +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -bool is_made_of_eight_digits_fast(const char *chars) noexcept { +bool is_made_of_eight_digits_fast(TCH const * chars) noexcept { return is_made_of_eight_digits_fast(read_u64(chars)); } -typedef span byte_span; - -struct parsed_number_string { +template +struct parsed_number_string_t { int64_t exponent{0}; uint64_t mantissa{0}; - const char *lastmatch{nullptr}; + TCH const * lastmatch{nullptr}; bool negative{false}; bool valid{false}; bool too_many_digits{false}; // contains the range of the significant digits - byte_span integer{}; // non-nullable - byte_span fraction{}; // nullable + span integer{}; // non-nullable + span fraction{}; // nullable }; - +using byte_span = span; +//using parsed_number_string = parsed_number_string_t; // Assuming that you use no more than 19 digits, this will // parse an ASCII string. +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -parsed_number_string parse_number_string(const char *p, const char *pend, parse_options options) noexcept { - const chars_format fmt = options.format; - const char decimal_point = options.decimal_point; +parsed_number_string_t parse_number_string(TCH const *p, TCH const * pend, parse_options_t options) noexcept { + chars_format const fmt = options.format; + TCH const decimal_point = options.decimal_point; - parsed_number_string answer; + parsed_number_string_t answer; answer.valid = false; answer.too_many_digits = false; - answer.negative = (*p == '-'); + answer.negative = (*p == TCH('-')); #if FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default - if ((*p == '-') || (*p == '+')) { + if ((*p == TCH('-')) || (*p == TCH('+'))) { #else - if (*p == '-') { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here + if (*p == TCH('-')) { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here #endif ++p; if (p == pend) { @@ -129,7 +132,7 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ return answer; } } - const char *const start_digits = p; + TCH const * const start_digits = p; uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad) @@ -137,16 +140,16 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ // a multiplication by 10 is cheaper than an arbitrary integer // multiplication i = 10 * i + - uint64_t(*p - '0'); // might overflow, we will handle the overflow later + uint64_t(*p - TCH('0')); // might overflow, we will handle the overflow later ++p; } - const char *const end_of_integer_part = p; + TCH const * const end_of_integer_part = p; int64_t digit_count = int64_t(end_of_integer_part - start_digits); - answer.integer = byte_span(start_digits, size_t(digit_count)); + answer.integer = span(start_digits, size_t(digit_count)); int64_t exponent = 0; if ((p != pend) && (*p == decimal_point)) { ++p; - const char* before = p; + TCH const * before = p; // can occur at most twice without overflowing, but let it occur more, since // for integers with many digits, digit parsing is the primary bottleneck. while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) { @@ -154,12 +157,12 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ p += 8; } while ((p != pend) && is_integer(*p)) { - uint8_t digit = uint8_t(*p - '0'); + uint8_t digit = uint8_t(*p - TCH('0')); ++p; i = i * 10 + digit; // in rare cases, this will overflow, but that's ok } exponent = before - p; - answer.fraction = byte_span(before, size_t(p - before)); + answer.fraction = span(before, size_t(p - before)); digit_count -= exponent; } // we must have encountered at least one integer! @@ -167,14 +170,14 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ return answer; } int64_t exp_number = 0; // explicit exponential part - if ((fmt & chars_format::scientific) && (p != pend) && (('e' == *p) || ('E' == *p))) { - const char * location_of_e = p; + if ((fmt & chars_format::scientific) && (p != pend) && ((TCH('e') == *p) || (TCH('E') == *p))) { + TCH const * location_of_e = p; ++p; bool neg_exp = false; - if ((p != pend) && ('-' == *p)) { + if ((p != pend) && (TCH('-') == *p)) { neg_exp = true; ++p; - } else if ((p != pend) && ('+' == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1) + } else if ((p != pend) && (TCH('+') == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1) ++p; } if ((p == pend) || !is_integer(*p)) { @@ -186,7 +189,7 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ p = location_of_e; } else { while ((p != pend) && is_integer(*p)) { - uint8_t digit = uint8_t(*p - '0'); + uint8_t digit = uint8_t(*p - TCH('0')); if (exp_number < 0x10000000) { exp_number = 10 * exp_number + digit; } @@ -212,9 +215,9 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ // We have to handle the case where we have 0.0000somenumber. // We need to be mindful of the case where we only have zeroes... // E.g., 0.000000000...000. - const char *start = start_digits; - while ((start != pend) && (*start == '0' || *start == decimal_point)) { - if(*start == '0') { digit_count --; } + TCH const * start = start_digits; + while ((start != pend) && (*start == TCH('0') || *start == decimal_point)) { + if(*start == TCH('0')) { digit_count --; } start++; } if (digit_count > 19) { @@ -224,19 +227,19 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ // pre-tokenized spans from above. i = 0; p = answer.integer.ptr; - const char* int_end = p + answer.integer.len(); + TCH const * int_end = p + answer.integer.len(); const uint64_t minimal_nineteen_digit_integer{1000000000000000000}; while((i < minimal_nineteen_digit_integer) && (p != int_end)) { - i = i * 10 + uint64_t(*p - '0'); + i = i * 10 + uint64_t(*p - TCH('0')); ++p; } if (i >= minimal_nineteen_digit_integer) { // We have a big integers exponent = end_of_integer_part - p + exp_number; } else { // We have a value with a fractional component. p = answer.fraction.ptr; - const char* frac_end = p + answer.fraction.len(); + TCH const * frac_end = p + answer.fraction.len(); while((i < minimal_nineteen_digit_integer) && (p != frac_end)) { - i = i * 10 + uint64_t(*p - '0'); + i = i * 10 + uint64_t(*p - TCH('0')); ++p; } exponent = answer.fraction.ptr - p + exp_number; diff --git a/include/fast_float/digit_comparison.h b/include/fast_float/digit_comparison.h index 3959ba0..81b9882 100644 --- a/include/fast_float/digit_comparison.h +++ b/include/fast_float/digit_comparison.h @@ -23,8 +23,9 @@ constexpr static uint64_t powers_of_ten_uint64[] = { // this algorithm is not even close to optimized, but it has no practical // effect on performance: in order to have a faster algorithm, we'd need // to slow down performance for faster algorithms, and this is still fast. +template fastfloat_really_inline FASTFLOAT_CONSTEXPR14 -int32_t scientific_exponent(parsed_number_string& num) noexcept { +int32_t scientific_exponent(parsed_number_string_t & num) noexcept { uint64_t mantissa = num.mantissa; int32_t exponent = int32_t(num.exponent); while (mantissa >= 10000) { @@ -153,19 +154,19 @@ void round_down(adjusted_mantissa& am, int32_t shift) noexcept { } am.power2 += shift; } - +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -void skip_zeros(const char*& first, const char* last) noexcept { +void skip_zeros(TCH const * & first, TCH const * last) noexcept { uint64_t val; - while (!cpp20_and_in_constexpr() && std::distance(first, last) >= 8) { + while (!cpp20_and_in_constexpr() && std::distance(first, last) >= int_cmp_len()) { ::memcpy(&val, first, sizeof(uint64_t)); - if (val != 0x3030303030303030) { + if (val != int_cmp_zeros()) { break; } - first += 8; + first += int_cmp_len(); } while (first != last) { - if (*first != '0') { + if (*first != TCH('0')) { break; } first++; @@ -174,42 +175,45 @@ void skip_zeros(const char*& first, const char* last) noexcept { // determine if any non-zero digits were truncated. // all characters must be valid digits. +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -bool is_truncated(const char* first, const char* last) noexcept { +bool is_truncated(TCH const * first, TCH const * last) noexcept { // do 8-bit optimizations, can just compare to 8 literal 0s. uint64_t val; - while (!cpp20_and_in_constexpr() && std::distance(first, last) >= 8) { + while (!cpp20_and_in_constexpr() && std::distance(first, last) >= int_cmp_len()) { ::memcpy(&val, first, sizeof(uint64_t)); - if (val != 0x3030303030303030) { + if (val != int_cmp_zeros()) { return true; } - first += 8; + first += int_cmp_len(); } while (first != last) { - if (*first != '0') { + if (*first != TCH('0')) { return true; } - first++; + ++first; } return false; } - +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -bool is_truncated(byte_span s) noexcept { +bool is_truncated(span s) noexcept { return is_truncated(s.ptr, s.ptr + s.len()); } +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -void parse_eight_digits(const char*& p, limb& value, size_t& counter, size_t& count) noexcept { +void parse_eight_digits(TCH const *& p, limb& value, size_t& counter, size_t& count) noexcept { value = value * 100000000 + parse_eight_digits_unrolled(p); p += 8; counter += 8; count += 8; } +template fastfloat_really_inline FASTFLOAT_CONSTEXPR14 -void parse_one_digit(const char*& p, limb& value, size_t& counter, size_t& count) noexcept { - value = value * 10 + limb(*p - '0'); +void parse_one_digit(TCH const *& p, limb& value, size_t& counter, size_t& count) noexcept { + value = value * 10 + limb(*p - TCH('0')); p++; counter++; count++; @@ -230,8 +234,9 @@ void round_up_bigint(bigint& big, size_t& count) noexcept { } // parse the significant digits into a big integer +template inline FASTFLOAT_CONSTEXPR20 -void parse_mantissa(bigint& result, parsed_number_string& num, size_t max_digits, size_t& digits) noexcept { +void parse_mantissa(bigint& result, parsed_number_string_t& num, size_t max_digits, size_t& digits) noexcept { // try to minimize the number of big integer and scalar multiplication. // therefore, try to parse 8 digits at a time, and multiply by the largest // scalar value (9 or 19 digits) for each step. @@ -245,8 +250,8 @@ void parse_mantissa(bigint& result, parsed_number_string& num, size_t max_digits #endif // process all integer digits. - const char* p = num.integer.ptr; - const char* pend = p + num.integer.len(); + TCH const * p = num.integer.ptr; + TCH const * pend = p + num.integer.len(); skip_zeros(p, pend); // process all digits, in increments of step per loop while (p != pend) { @@ -395,9 +400,9 @@ adjusted_mantissa negative_digit_comp(bigint& bigmant, adjusted_mantissa am, int // `b` as a big-integer type, scaled to the same binary exponent as // the actual digits. we then compare the big integer representations // of both, and use that to direct rounding. -template +template inline FASTFLOAT_CONSTEXPR20 -adjusted_mantissa digit_comp(parsed_number_string& num, adjusted_mantissa am) noexcept { +adjusted_mantissa digit_comp(parsed_number_string_t& num, adjusted_mantissa am) noexcept { // remove the invalid exponent bias am.power2 -= invalid_am_bias; diff --git a/include/fast_float/fast_float.h b/include/fast_float/fast_float.h index 65704da..9686260 100644 --- a/include/fast_float/fast_float.h +++ b/include/fast_float/fast_float.h @@ -13,22 +13,25 @@ enum chars_format { general = fixed | scientific }; - -struct from_chars_result { - const char *ptr; +template +struct from_chars_result_t { + TCH const * ptr; std::errc ec; }; +using from_chars_result = from_chars_result_t; -struct parse_options { - constexpr explicit parse_options(chars_format fmt = chars_format::general, - char dot = '.') +template +struct parse_options_t { + constexpr explicit parse_options_t(chars_format fmt = chars_format::general, + TCH dot = TCH('.')) : format(fmt), decimal_point(dot) {} /** Which number formats are accepted */ chars_format format; /** The character used as decimal point */ - char decimal_point; + TCH decimal_point; }; +using parse_options = parse_options_t; /** * This function parses the character sequence [first,last) for a number. It parses floating-point numbers expecting @@ -49,18 +52,18 @@ struct parse_options { * to determine whether we allow the fixed point and scientific notation respectively. * The default is `fast_float::chars_format::general` which allows both `fixed` and `scientific`. */ -template +template FASTFLOAT_CONSTEXPR20 -from_chars_result from_chars(const char *first, const char *last, +from_chars_result_t from_chars(TCH const * first, TCH const * last, T &value, chars_format fmt = chars_format::general) noexcept; /** * Like from_chars, but accepts an `options` argument to govern number parsing. */ -template +template FASTFLOAT_CONSTEXPR20 -from_chars_result from_chars_advanced(const char *first, const char *last, - T &value, parse_options options) noexcept; +from_chars_result_t from_chars_advanced(TCH const * first, TCH const * last, + T &value, parse_options_t options) noexcept; } // namespace fast_float #include "parse_number.h" diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index c878486..704954d 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -106,11 +106,12 @@ fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() { } // Compares two ASCII strings in a case insensitive manner. +template inline FASTFLOAT_CONSTEXPR14 bool -fastfloat_strncasecmp(const char *input1, const char *input2, size_t length) { +fastfloat_strncasecmp(TCH const * input1, TCH const * input2, size_t length) { char running_diff{0}; - for (size_t i = 0; i < length; i++) { - running_diff |= (input1[i] ^ input2[i]); + for (size_t i = 0; i < length; ++i) { + running_diff |= (char(input1[i]) ^ char(input2[i])); } return (running_diff == 0) || (running_diff == 32); } @@ -503,6 +504,73 @@ constexpr bool space_lut::value[]; inline constexpr bool is_space(uint8_t c) { return space_lut<>::value[c]; } #endif + +template +static constexpr uint64_t int_cmp_zeros() +{ + switch(sizeof(TCH)) + { + case 1: return 0x3030303030303030; + case 2: return (uint64_t(TCH('0')) << 48 | uint64_t(TCH('0')) << 32 | uint64_t(TCH('0')) << 16 | TCH('0')); + case 4: return (uint64_t(TCH('0')) << 32 | TCH('0')); + } + return 0; +} +template +static constexpr int int_cmp_len() +{ + return sizeof(uint64_t) / sizeof(TCH); +} +template +static constexpr TCH const * str_const_nan() +{ + return nullptr; +} +template<> +static constexpr char const * str_const_nan() +{ + return "nan"; +} +template<> +static constexpr wchar_t const * str_const_nan() +{ + return L"nan"; +} +template<> +static constexpr char16_t const * str_const_nan() +{ + return u"nan"; +} +template<> +static constexpr char32_t const * str_const_nan() +{ + return U"nan"; +} +template +static constexpr TCH const * str_const_inf() +{ + return nullptr; +} +template<> +static constexpr char const * str_const_inf() +{ + return "infinity"; +} +template<> +static constexpr wchar_t const * str_const_inf() +{ + return L"infinity"; +} +template<> +static constexpr char16_t const * str_const_inf() +{ + return u"infinity"; +} +template<> +static constexpr char32_t const * str_const_inf() +{ + return U"infinity"; +} } // namespace fast_float #endif diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index 6e4f6eb..da16235 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -19,41 +19,41 @@ namespace detail { * The case comparisons could be made much faster given that we know that the * strings a null-free and fixed. **/ -template -from_chars_result FASTFLOAT_CONSTEXPR14 -parse_infnan(const char *first, const char *last, T &value) noexcept { - from_chars_result answer{}; +template +from_chars_result_t FASTFLOAT_CONSTEXPR14 +parse_infnan(TCH const * first, TCH const * last, T &value) noexcept { + from_chars_result_t answer{}; answer.ptr = first; answer.ec = std::errc(); // be optimistic bool minusSign = false; - if (*first == '-') { // assume first < last, so dereference without checks; C++17 20.19.3.(7.1) explicitly forbids '+' here + if (*first == TCH('-')) { // assume first < last, so dereference without checks; C++17 20.19.3.(7.1) explicitly forbids '+' here minusSign = true; ++first; } #if FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default - if (*first == '+') { + if (*first == TCH('+')) { ++first; } #endif if (last - first >= 3) { - if (fastfloat_strncasecmp(first, "nan", 3)) { + if (fastfloat_strncasecmp(first, str_const_nan(), 3)) { answer.ptr = (first += 3); value = minusSign ? -std::numeric_limits::quiet_NaN() : std::numeric_limits::quiet_NaN(); // Check for possible nan(n-char-seq-opt), C++17 20.19.3.7, C11 7.20.1.3.3. At least MSVC produces nan(ind) and nan(snan). - if(first != last && *first == '(') { - for(const char* ptr = first + 1; ptr != last; ++ptr) { - if (*ptr == ')') { + if(first != last && *first == TCH('(')) { + for(TCH const * ptr = first + 1; ptr != last; ++ptr) { + if (*ptr == TCH(')')) { answer.ptr = ptr + 1; // valid nan(n-char-seq-opt) break; } - else if(!(('a' <= *ptr && *ptr <= 'z') || ('A' <= *ptr && *ptr <= 'Z') || ('0' <= *ptr && *ptr <= '9') || *ptr == '_')) + else if(!((TCH('a') <= *ptr && *ptr <= TCH('z')) || (TCH('A') <= *ptr && *ptr <= TCH('Z')) || (TCH('0') <= *ptr && *ptr <= TCH('9')) || *ptr == TCH('_'))) break; // forbidden char, not nan(n-char-seq-opt) } } return answer; } - if (fastfloat_strncasecmp(first, "inf", 3)) { - if ((last - first >= 8) && fastfloat_strncasecmp(first + 3, "inity", 5)) { + if (fastfloat_strncasecmp(first, str_const_inf(), 3)) { + if ((last - first >= 8) && fastfloat_strncasecmp(first + 3, str_const_inf() + 3, 5)) { answer.ptr = first + 8; } else { answer.ptr = first + 3; @@ -132,22 +132,25 @@ fastfloat_really_inline bool rounds_to_nearest() noexcept { } // namespace detail -template +template FASTFLOAT_CONSTEXPR20 -from_chars_result from_chars(const char *first, const char *last, +from_chars_result_t from_chars(TCH const * first, TCH const * last, T &value, chars_format fmt /*= chars_format::general*/) noexcept { - return from_chars_advanced(first, last, value, parse_options{fmt}); + return from_chars_advanced(first, last, value, parse_options_t{fmt}); } -template +template FASTFLOAT_CONSTEXPR20 -from_chars_result from_chars_advanced(const char *first, const char *last, - T &value, parse_options options) noexcept { +from_chars_result_t from_chars_advanced(TCH const * first, TCH const * last, + T &value, parse_options_t options) noexcept { static_assert (std::is_same::value || std::is_same::value, "only float and double are supported"); + static_assert (std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value , "only char, wchar_t, char16_t and char32_t are supported"); - - from_chars_result answer; + from_chars_result_t answer; #if FASTFLOAT_SKIP_WHITE_SPACE // disabled by default while ((first != last) && fast_float::is_space(uint8_t(*first))) { first++; @@ -158,7 +161,7 @@ from_chars_result from_chars_advanced(const char *first, const char *last, answer.ptr = first; return answer; } - parsed_number_string pns = parse_number_string(first, last, options); + parsed_number_string_t pns = parse_number_string(first, last, options); if (!pns.valid) { return detail::parse_infnan(first, last, value); } From d833148d7ebc51c3549adb834937759040dfcc15 Mon Sep 17 00:00:00 2001 From: Pharago Date: Sun, 2 Apr 2023 23:08:14 +0200 Subject: [PATCH 03/11] Update CONTRIBUTORS --- CONTRIBUTORS | 1 + 1 file changed, 1 insertion(+) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 3c428e6..311fc9c 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -5,3 +5,4 @@ Neal Richardson Tim Paine Fabio Pellacini Lénárd Szolnoki +Jan Pharago From 764341c1d9c8a88c55e58bcb62e1fdc42bc6acdb Mon Sep 17 00:00:00 2001 From: Pharago Date: Thu, 6 Apr 2023 00:41:11 +0200 Subject: [PATCH 04/11] Revert "Update CONTRIBUTORS" This reverts commit d833148d7ebc51c3549adb834937759040dfcc15. --- CONTRIBUTORS | 1 - 1 file changed, 1 deletion(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 311fc9c..3c428e6 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -5,4 +5,3 @@ Neal Richardson Tim Paine Fabio Pellacini Lénárd Szolnoki -Jan Pharago From 2bfbe4ca96a344caf1e072e090d80f46a14cc558 Mon Sep 17 00:00:00 2001 From: Pharago Date: Thu, 6 Apr 2023 00:58:34 +0200 Subject: [PATCH 05/11] cosmetic changes --- include/fast_float/ascii_number.h | 82 +++++++++++++-------------- include/fast_float/digit_comparison.h | 54 +++++++++--------- include/fast_float/fast_float.h | 20 +++---- include/fast_float/float_common.h | 24 ++++---- include/fast_float/parse_number.h | 50 ++++++++-------- 5 files changed, 115 insertions(+), 115 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index c2a7ea2..0751969 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -12,9 +12,9 @@ namespace fast_float { // Next function can be micro-optimized, but compilers are entirely // able to optimize it well. -template -fastfloat_really_inline constexpr bool is_integer(TCH c) noexcept { - return !(c > TCH('9') || c < TCH('0')); +template +fastfloat_really_inline constexpr bool is_integer(UC c) noexcept { + return !(c > UC('9') || c < UC('0')); } fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) { @@ -27,10 +27,10 @@ fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) { | (val & 0x000000000000FF00) << 40 | (val & 0x00000000000000FF) << 56; } -template +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -uint64_t read_u64(TCH const * chars) { - if (cpp20_and_in_constexpr() || sizeof(TCH) > 1) { +uint64_t read_u64(UC const * chars) { + if (cpp20_and_in_constexpr() || sizeof(UC) > 1) { uint64_t val{}; for(int i = 0; i < 8; ++i) { val |= uint64_t(char(*chars)) << (i * 8); @@ -75,9 +75,9 @@ uint32_t parse_eight_digits_unrolled(uint64_t val) { val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; return uint32_t(val); } -template +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -uint32_t parse_eight_digits_unrolled(TCH const * chars) noexcept { +uint32_t parse_eight_digits_unrolled(UC const * chars) noexcept { return parse_eight_digits_unrolled(read_u64(chars)); } @@ -87,42 +87,42 @@ fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val 0x8080808080808080)); } -template +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -bool is_made_of_eight_digits_fast(TCH const * chars) noexcept { +bool is_made_of_eight_digits_fast(UC const * chars) noexcept { return is_made_of_eight_digits_fast(read_u64(chars)); } -template +template struct parsed_number_string_t { int64_t exponent{0}; uint64_t mantissa{0}; - TCH const * lastmatch{nullptr}; + UC const * lastmatch{nullptr}; bool negative{false}; bool valid{false}; bool too_many_digits{false}; // contains the range of the significant digits - span integer{}; // non-nullable - span fraction{}; // nullable + span integer{}; // non-nullable + span fraction{}; // nullable }; using byte_span = span; -//using parsed_number_string = parsed_number_string_t; +using parsed_number_string = parsed_number_string_t; // Assuming that you use no more than 19 digits, this will // parse an ASCII string. -template +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -parsed_number_string_t parse_number_string(TCH const *p, TCH const * pend, parse_options_t options) noexcept { +parsed_number_string_t parse_number_string(UC const *p, UC const * pend, parse_options_t options) noexcept { chars_format const fmt = options.format; - TCH const decimal_point = options.decimal_point; + UC const decimal_point = options.decimal_point; - parsed_number_string_t answer; + parsed_number_string_t answer; answer.valid = false; answer.too_many_digits = false; - answer.negative = (*p == TCH('-')); + answer.negative = (*p == UC('-')); #ifdef FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default - if ((*p == TCH('-')) || (*p == TCH('+'))) { + if ((*p == UC('-')) || (*p == UC('+'))) { #else - if (*p == TCH('-')) { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here + if (*p == UC('-')) { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here #endif ++p; if (p == pend) { @@ -132,7 +132,7 @@ parsed_number_string_t parse_number_string(TCH const *p, TCH const * pend, return answer; } } - TCH const * const start_digits = p; + UC const * const start_digits = p; uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad) @@ -140,16 +140,16 @@ parsed_number_string_t parse_number_string(TCH const *p, TCH const * pend, // a multiplication by 10 is cheaper than an arbitrary integer // multiplication i = 10 * i + - uint64_t(*p - TCH('0')); // might overflow, we will handle the overflow later + uint64_t(*p - UC('0')); // might overflow, we will handle the overflow later ++p; } - TCH const * const end_of_integer_part = p; + UC const * const end_of_integer_part = p; int64_t digit_count = int64_t(end_of_integer_part - start_digits); - answer.integer = span(start_digits, size_t(digit_count)); + answer.integer = span(start_digits, size_t(digit_count)); int64_t exponent = 0; if ((p != pend) && (*p == decimal_point)) { ++p; - TCH const * before = p; + UC const * before = p; // can occur at most twice without overflowing, but let it occur more, since // for integers with many digits, digit parsing is the primary bottleneck. while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) { @@ -157,12 +157,12 @@ parsed_number_string_t parse_number_string(TCH const *p, TCH const * pend, p += 8; } while ((p != pend) && is_integer(*p)) { - uint8_t digit = uint8_t(*p - TCH('0')); + uint8_t digit = uint8_t(*p - UC('0')); ++p; i = i * 10 + digit; // in rare cases, this will overflow, but that's ok } exponent = before - p; - answer.fraction = span(before, size_t(p - before)); + answer.fraction = span(before, size_t(p - before)); digit_count -= exponent; } // we must have encountered at least one integer! @@ -170,14 +170,14 @@ parsed_number_string_t parse_number_string(TCH const *p, TCH const * pend, return answer; } int64_t exp_number = 0; // explicit exponential part - if ((fmt & chars_format::scientific) && (p != pend) && ((TCH('e') == *p) || (TCH('E') == *p))) { - TCH const * location_of_e = p; + if ((fmt & chars_format::scientific) && (p != pend) && ((UC('e') == *p) || (UC('E') == *p))) { + UC const * location_of_e = p; ++p; bool neg_exp = false; - if ((p != pend) && (TCH('-') == *p)) { + if ((p != pend) && (UC('-') == *p)) { neg_exp = true; ++p; - } else if ((p != pend) && (TCH('+') == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1) + } else if ((p != pend) && (UC('+') == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1) ++p; } if ((p == pend) || !is_integer(*p)) { @@ -189,7 +189,7 @@ parsed_number_string_t parse_number_string(TCH const *p, TCH const * pend, p = location_of_e; } else { while ((p != pend) && is_integer(*p)) { - uint8_t digit = uint8_t(*p - TCH('0')); + uint8_t digit = uint8_t(*p - UC('0')); if (exp_number < 0x10000000) { exp_number = 10 * exp_number + digit; } @@ -215,9 +215,9 @@ parsed_number_string_t parse_number_string(TCH const *p, TCH const * pend, // We have to handle the case where we have 0.0000somenumber. // We need to be mindful of the case where we only have zeroes... // E.g., 0.000000000...000. - TCH const * start = start_digits; - while ((start != pend) && (*start == TCH('0') || *start == decimal_point)) { - if(*start == TCH('0')) { digit_count --; } + UC const * start = start_digits; + while ((start != pend) && (*start == UC('0') || *start == decimal_point)) { + if(*start == UC('0')) { digit_count --; } start++; } if (digit_count > 19) { @@ -227,19 +227,19 @@ parsed_number_string_t parse_number_string(TCH const *p, TCH const * pend, // pre-tokenized spans from above. i = 0; p = answer.integer.ptr; - TCH const * int_end = p + answer.integer.len(); + UC const * int_end = p + answer.integer.len(); const uint64_t minimal_nineteen_digit_integer{1000000000000000000}; while((i < minimal_nineteen_digit_integer) && (p != int_end)) { - i = i * 10 + uint64_t(*p - TCH('0')); + i = i * 10 + uint64_t(*p - UC('0')); ++p; } if (i >= minimal_nineteen_digit_integer) { // We have a big integers exponent = end_of_integer_part - p + exp_number; } else { // We have a value with a fractional component. p = answer.fraction.ptr; - TCH const * frac_end = p + answer.fraction.len(); + UC const * frac_end = p + answer.fraction.len(); while((i < minimal_nineteen_digit_integer) && (p != frac_end)) { - i = i * 10 + uint64_t(*p - TCH('0')); + i = i * 10 + uint64_t(*p - UC('0')); ++p; } exponent = answer.fraction.ptr - p + exp_number; diff --git a/include/fast_float/digit_comparison.h b/include/fast_float/digit_comparison.h index 81b9882..c85be59 100644 --- a/include/fast_float/digit_comparison.h +++ b/include/fast_float/digit_comparison.h @@ -23,9 +23,9 @@ constexpr static uint64_t powers_of_ten_uint64[] = { // this algorithm is not even close to optimized, but it has no practical // effect on performance: in order to have a faster algorithm, we'd need // to slow down performance for faster algorithms, and this is still fast. -template +template fastfloat_really_inline FASTFLOAT_CONSTEXPR14 -int32_t scientific_exponent(parsed_number_string_t & num) noexcept { +int32_t scientific_exponent(parsed_number_string_t & num) noexcept { uint64_t mantissa = num.mantissa; int32_t exponent = int32_t(num.exponent); while (mantissa >= 10000) { @@ -154,19 +154,19 @@ void round_down(adjusted_mantissa& am, int32_t shift) noexcept { } am.power2 += shift; } -template +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -void skip_zeros(TCH const * & first, TCH const * last) noexcept { +void skip_zeros(UC const * & first, UC const * last) noexcept { uint64_t val; - while (!cpp20_and_in_constexpr() && std::distance(first, last) >= int_cmp_len()) { + while (!cpp20_and_in_constexpr() && std::distance(first, last) >= int_cmp_len()) { ::memcpy(&val, first, sizeof(uint64_t)); - if (val != int_cmp_zeros()) { + if (val != int_cmp_zeros()) { break; } - first += int_cmp_len(); + first += int_cmp_len(); } while (first != last) { - if (*first != TCH('0')) { + if (*first != UC('0')) { break; } first++; @@ -175,45 +175,45 @@ void skip_zeros(TCH const * & first, TCH const * last) noexcept { // determine if any non-zero digits were truncated. // all characters must be valid digits. -template +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -bool is_truncated(TCH const * first, TCH const * last) noexcept { +bool is_truncated(UC const * first, UC const * last) noexcept { // do 8-bit optimizations, can just compare to 8 literal 0s. uint64_t val; - while (!cpp20_and_in_constexpr() && std::distance(first, last) >= int_cmp_len()) { + while (!cpp20_and_in_constexpr() && std::distance(first, last) >= int_cmp_len()) { ::memcpy(&val, first, sizeof(uint64_t)); - if (val != int_cmp_zeros()) { + if (val != int_cmp_zeros()) { return true; } - first += int_cmp_len(); + first += int_cmp_len(); } while (first != last) { - if (*first != TCH('0')) { + if (*first != UC('0')) { return true; } ++first; } return false; } -template +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -bool is_truncated(span s) noexcept { +bool is_truncated(span s) noexcept { return is_truncated(s.ptr, s.ptr + s.len()); } -template +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -void parse_eight_digits(TCH const *& p, limb& value, size_t& counter, size_t& count) noexcept { +void parse_eight_digits(UC const *& p, limb& value, size_t& counter, size_t& count) noexcept { value = value * 100000000 + parse_eight_digits_unrolled(p); p += 8; counter += 8; count += 8; } -template +template fastfloat_really_inline FASTFLOAT_CONSTEXPR14 -void parse_one_digit(TCH const *& p, limb& value, size_t& counter, size_t& count) noexcept { - value = value * 10 + limb(*p - TCH('0')); +void parse_one_digit(UC const *& p, limb& value, size_t& counter, size_t& count) noexcept { + value = value * 10 + limb(*p - UC('0')); p++; counter++; count++; @@ -234,9 +234,9 @@ void round_up_bigint(bigint& big, size_t& count) noexcept { } // parse the significant digits into a big integer -template +template inline FASTFLOAT_CONSTEXPR20 -void parse_mantissa(bigint& result, parsed_number_string_t& num, size_t max_digits, size_t& digits) noexcept { +void parse_mantissa(bigint& result, parsed_number_string_t& num, size_t max_digits, size_t& digits) noexcept { // try to minimize the number of big integer and scalar multiplication. // therefore, try to parse 8 digits at a time, and multiply by the largest // scalar value (9 or 19 digits) for each step. @@ -250,8 +250,8 @@ void parse_mantissa(bigint& result, parsed_number_string_t& num, size_t max #endif // process all integer digits. - TCH const * p = num.integer.ptr; - TCH const * pend = p + num.integer.len(); + UC const * p = num.integer.ptr; + UC const * pend = p + num.integer.len(); skip_zeros(p, pend); // process all digits, in increments of step per loop while (p != pend) { @@ -400,9 +400,9 @@ adjusted_mantissa negative_digit_comp(bigint& bigmant, adjusted_mantissa am, int // `b` as a big-integer type, scaled to the same binary exponent as // the actual digits. we then compare the big integer representations // of both, and use that to direct rounding. -template +template inline FASTFLOAT_CONSTEXPR20 -adjusted_mantissa digit_comp(parsed_number_string_t& num, adjusted_mantissa am) noexcept { +adjusted_mantissa digit_comp(parsed_number_string_t& num, adjusted_mantissa am) noexcept { // remove the invalid exponent bias am.power2 -= invalid_am_bias; diff --git a/include/fast_float/fast_float.h b/include/fast_float/fast_float.h index 9686260..1cc25f4 100644 --- a/include/fast_float/fast_float.h +++ b/include/fast_float/fast_float.h @@ -13,23 +13,23 @@ enum chars_format { general = fixed | scientific }; -template +template struct from_chars_result_t { - TCH const * ptr; + UC const * ptr; std::errc ec; }; using from_chars_result = from_chars_result_t; -template +template struct parse_options_t { constexpr explicit parse_options_t(chars_format fmt = chars_format::general, - TCH dot = TCH('.')) + UC dot = UC('.')) : format(fmt), decimal_point(dot) {} /** Which number formats are accepted */ chars_format format; /** The character used as decimal point */ - TCH decimal_point; + UC decimal_point; }; using parse_options = parse_options_t; @@ -52,18 +52,18 @@ using parse_options = parse_options_t; * to determine whether we allow the fixed point and scientific notation respectively. * The default is `fast_float::chars_format::general` which allows both `fixed` and `scientific`. */ -template +template FASTFLOAT_CONSTEXPR20 -from_chars_result_t from_chars(TCH const * first, TCH const * last, +from_chars_result_t from_chars(UC const * first, UC const * last, T &value, chars_format fmt = chars_format::general) noexcept; /** * Like from_chars, but accepts an `options` argument to govern number parsing. */ -template +template FASTFLOAT_CONSTEXPR20 -from_chars_result_t from_chars_advanced(TCH const * first, TCH const * last, - T &value, parse_options_t options) noexcept; +from_chars_result_t from_chars_advanced(UC const * first, UC const * last, + T &value, parse_options_t options) noexcept; } // namespace fast_float #include "parse_number.h" diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index bb45338..d0467f3 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -106,9 +106,9 @@ fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() { } // Compares two ASCII strings in a case insensitive manner. -template +template inline FASTFLOAT_CONSTEXPR14 bool -fastfloat_strncasecmp(TCH const * input1, TCH const * input2, size_t length) { +fastfloat_strncasecmp(UC const * input1, UC const * input2, size_t length) { char running_diff{0}; for (size_t i = 0; i < length; ++i) { running_diff |= (char(input1[i]) ^ char(input2[i])); @@ -505,24 +505,24 @@ constexpr bool space_lut::value[]; inline constexpr bool is_space(uint8_t c) { return space_lut<>::value[c]; } #endif -template +template static constexpr uint64_t int_cmp_zeros() { - switch(sizeof(TCH)) + switch(sizeof(UC)) { case 1: return 0x3030303030303030; - case 2: return (uint64_t(TCH('0')) << 48 | uint64_t(TCH('0')) << 32 | uint64_t(TCH('0')) << 16 | TCH('0')); - case 4: return (uint64_t(TCH('0')) << 32 | TCH('0')); + case 2: return (uint64_t(UC('0')) << 48 | uint64_t(UC('0')) << 32 | uint64_t(UC('0')) << 16 | UC('0')); + case 4: return (uint64_t(UC('0')) << 32 | UC('0')); } return 0; } -template +template static constexpr int int_cmp_len() { - return sizeof(uint64_t) / sizeof(TCH); + return sizeof(uint64_t) / sizeof(UC); } -template -static constexpr TCH const * str_const_nan() +template +static constexpr UC const * str_const_nan() { return nullptr; } @@ -546,8 +546,8 @@ static constexpr char32_t const * str_const_nan() { return U"nan"; } -template -static constexpr TCH const * str_const_inf() +template +static constexpr UC const * str_const_inf() { return nullptr; } diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index c074c59..726d761 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -19,41 +19,41 @@ namespace detail { * The case comparisons could be made much faster given that we know that the * strings a null-free and fixed. **/ -template -from_chars_result_t FASTFLOAT_CONSTEXPR14 -parse_infnan(TCH const * first, TCH const * last, T &value) noexcept { - from_chars_result_t answer{}; +template +from_chars_result_t FASTFLOAT_CONSTEXPR14 +parse_infnan(UC const * first, UC const * last, T &value) noexcept { + from_chars_result_t answer{}; answer.ptr = first; answer.ec = std::errc(); // be optimistic bool minusSign = false; - if (*first == TCH('-')) { // assume first < last, so dereference without checks; C++17 20.19.3.(7.1) explicitly forbids '+' here + if (*first == UC('-')) { // assume first < last, so dereference without checks; C++17 20.19.3.(7.1) explicitly forbids '+' here minusSign = true; ++first; } #ifdef FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default - if (*first == TCH('+')) { + if (*first == UC('+')) { ++first; } #endif if (last - first >= 3) { - if (fastfloat_strncasecmp(first, str_const_nan(), 3)) { + if (fastfloat_strncasecmp(first, str_const_nan(), 3)) { answer.ptr = (first += 3); value = minusSign ? -std::numeric_limits::quiet_NaN() : std::numeric_limits::quiet_NaN(); // Check for possible nan(n-char-seq-opt), C++17 20.19.3.7, C11 7.20.1.3.3. At least MSVC produces nan(ind) and nan(snan). - if(first != last && *first == TCH('(')) { - for(TCH const * ptr = first + 1; ptr != last; ++ptr) { - if (*ptr == TCH(')')) { + if(first != last && *first == UC('(')) { + for(UC const * ptr = first + 1; ptr != last; ++ptr) { + if (*ptr == UC(')')) { answer.ptr = ptr + 1; // valid nan(n-char-seq-opt) break; } - else if(!((TCH('a') <= *ptr && *ptr <= TCH('z')) || (TCH('A') <= *ptr && *ptr <= TCH('Z')) || (TCH('0') <= *ptr && *ptr <= TCH('9')) || *ptr == TCH('_'))) + else if(!((UC('a') <= *ptr && *ptr <= UC('z')) || (UC('A') <= *ptr && *ptr <= UC('Z')) || (UC('0') <= *ptr && *ptr <= UC('9')) || *ptr == UC('_'))) break; // forbidden char, not nan(n-char-seq-opt) } } return answer; } - if (fastfloat_strncasecmp(first, str_const_inf(), 3)) { - if ((last - first >= 8) && fastfloat_strncasecmp(first + 3, str_const_inf() + 3, 5)) { + if (fastfloat_strncasecmp(first, str_const_inf(), 3)) { + if ((last - first >= 8) && fastfloat_strncasecmp(first + 3, str_const_inf() + 3, 5)) { answer.ptr = first + 8; } else { answer.ptr = first + 3; @@ -132,25 +132,25 @@ fastfloat_really_inline bool rounds_to_nearest() noexcept { } // namespace detail -template +template FASTFLOAT_CONSTEXPR20 -from_chars_result_t from_chars(TCH const * first, TCH const * last, +from_chars_result_t from_chars(UC const * first, UC const * last, T &value, chars_format fmt /*= chars_format::general*/) noexcept { - return from_chars_advanced(first, last, value, parse_options_t{fmt}); + return from_chars_advanced(first, last, value, parse_options_t{fmt}); } -template +template FASTFLOAT_CONSTEXPR20 -from_chars_result_t from_chars_advanced(TCH const * first, TCH const * last, - T &value, parse_options_t options) noexcept { +from_chars_result_t from_chars_advanced(UC const * first, UC const * last, + T &value, parse_options_t options) noexcept { static_assert (std::is_same::value || std::is_same::value, "only float and double are supported"); - static_assert (std::is_same::value || - std::is_same::value || - std::is_same::value || - std::is_same::value , "only char, wchar_t, char16_t and char32_t are supported"); + static_assert (std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value , "only char, wchar_t, char16_t and char32_t are supported"); - from_chars_result_t answer; + from_chars_result_t answer; #ifdef FASTFLOAT_SKIP_WHITE_SPACE // disabled by default while ((first != last) && fast_float::is_space(uint8_t(*first))) { first++; @@ -161,7 +161,7 @@ from_chars_result_t from_chars_advanced(TCH const * first, TCH const * last answer.ptr = first; return answer; } - parsed_number_string_t pns = parse_number_string(first, last, options); + parsed_number_string_t pns = parse_number_string(first, last, options); if (!pns.valid) { return detail::parse_infnan(first, last, value); } From ece3b3886b95de6c766d7746de8bda7cc2c4b1f3 Mon Sep 17 00:00:00 2001 From: Pharago Date: Thu, 6 Apr 2023 20:58:31 +0200 Subject: [PATCH 06/11] Fix storage class errors on non msvc builds Removed storage class declarations from explicit template specializations of string constants --- include/fast_float/float_common.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index d0467f3..00471d9 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -527,22 +527,22 @@ static constexpr UC const * str_const_nan() return nullptr; } template<> -static constexpr char const * str_const_nan() +constexpr char const * str_const_nan() { return "nan"; } template<> -static constexpr wchar_t const * str_const_nan() +constexpr wchar_t const * str_const_nan() { return L"nan"; } template<> -static constexpr char16_t const * str_const_nan() +constexpr char16_t const * str_const_nan() { return u"nan"; } template<> -static constexpr char32_t const * str_const_nan() +constexpr char32_t const * str_const_nan() { return U"nan"; } @@ -552,22 +552,22 @@ static constexpr UC const * str_const_inf() return nullptr; } template<> -static constexpr char const * str_const_inf() +constexpr char const * str_const_inf() { return "infinity"; } template<> -static constexpr wchar_t const * str_const_inf() +constexpr wchar_t const * str_const_inf() { return L"infinity"; } template<> -static constexpr char16_t const * str_const_inf() +constexpr char16_t const * str_const_inf() { return u"infinity"; } template<> -static constexpr char32_t const * str_const_inf() +constexpr char32_t const * str_const_inf() { return U"infinity"; } From acce5b4158ed89a050255404d48d12e871163079 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 26 Apr 2023 16:46:09 -0400 Subject: [PATCH 07/11] Documentation. --- README.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/README.md b/README.md index 1327f5a..fe1c1ca 100644 --- a/README.md +++ b/README.md @@ -97,6 +97,24 @@ constexpr double constexptest() { } ``` +## Non-ASCII Inputs + +We also support UTF-16 and UTF-32 inputs, as well as ASCII/UTF-8, as in the following example: + +``` C++ +#include "fast_float/fast_float.h" +#include + +int main() { + const std::u16string input = u"3.1416 xyz "; + double result; + auto answer = fast_float::from_chars(input.data(), input.data()+input.size(), result); + if(answer.ec != std::errc()) { std::cerr << "parsing failure\n"; return EXIT_FAILURE; } + std::cout << "parsed the number " << result << std::endl; + return EXIT_SUCCESS; +} +``` + ## Using commas as decimal separator From 545c1845969c4b4427d40a79c25222d4fc2cd0da Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 26 Apr 2023 16:53:44 -0400 Subject: [PATCH 08/11] Stack!!! --- tests/basictest.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tests/basictest.cpp b/tests/basictest.cpp index 09fa077..3d7f753 100644 --- a/tests/basictest.cpp +++ b/tests/basictest.cpp @@ -702,32 +702,33 @@ constexpr void check_basic_test_result(stringtype str, #undef FASTFLOAT_CHECK_EQ } -// We give plenty of memory: 2048 characters. -const size_t global_string_capacity = 2048; -std::u16string u16(global_string_capacity, '\0'); -std::u32string u32(global_string_capacity, '\0'); template constexpr void basic_test(std::string_view str, T expected, std::errc expected_ec = std::errc()) { T actual; auto result = fast_float::from_chars(str.data(), str.data() + str.size(), actual); check_basic_test_result(str, result, actual, expected, expected_ec); + constexpr size_t global_string_capacity = 2048; + if(str.size() > global_string_capacity) { return; } + // We give plenty of memory: 2048 characters. + char16_t u16[global_string_capacity]{}; for (size_t i = 0; i < str.size(); i++) { u16[i] = char16_t(str[i]); } - auto result16 = fast_float::from_chars(u16.data(), u16.data() + str.size(), actual); - check_basic_test_result(std::u16string_view(u16.data(), str.size()), result16, actual, expected, expected_ec); + auto result16 = fast_float::from_chars(u16, u16 + str.size(), actual); + check_basic_test_result(std::u16string_view(u16, str.size()), result16, actual, expected, expected_ec); + char32_t u32[global_string_capacity]{}; for (size_t i = 0; i < str.size(); i++) { u32[i] = char32_t(str[i]); } - auto result32 = fast_float::from_chars(u32.data(), u32.data() + str.size(), actual); - check_basic_test_result(std::u32string_view(u32.data(), str.size()), result32, actual, expected, expected_ec); + auto result32 = fast_float::from_chars(u32, u32 + str.size(), actual); + check_basic_test_result(std::u32string_view(u32, str.size()), result32, actual, expected, expected_ec); } template From 5223d7a460276fe61f63807cf09a1e3a8bfcc3e6 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 26 Apr 2023 18:25:27 -0400 Subject: [PATCH 09/11] address issues raised by @mayawarrier --- include/fast_float/ascii_number.h | 33 ++++++++++++++------------- include/fast_float/digit_comparison.h | 17 ++++++++------ 2 files changed, 27 insertions(+), 23 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 0751969..98365e2 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -27,13 +27,13 @@ fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) { | (val & 0x000000000000FF00) << 40 | (val & 0x00000000000000FF) << 56; } -template + fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -uint64_t read_u64(UC const * chars) { - if (cpp20_and_in_constexpr() || sizeof(UC) > 1) { - uint64_t val{}; +uint64_t read_u64(const char *chars) { + if (cpp20_and_in_constexpr()) { + uint64_t val = 0; for(int i = 0; i < 8; ++i) { - val |= uint64_t(char(*chars)) << (i * 8); + val |= uint64_t(*chars) << (i*8); ++chars; } return val; @@ -75,9 +75,9 @@ uint32_t parse_eight_digits_unrolled(uint64_t val) { val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; return uint32_t(val); } -template + fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -uint32_t parse_eight_digits_unrolled(UC const * chars) noexcept { +uint32_t parse_eight_digits_unrolled(const char *chars) noexcept { return parse_eight_digits_unrolled(read_u64(chars)); } @@ -87,9 +87,8 @@ fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val 0x8080808080808080)); } -template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -bool is_made_of_eight_digits_fast(UC const * chars) noexcept { +bool is_made_of_eight_digits_fast(const char *chars) noexcept { return is_made_of_eight_digits_fast(read_u64(chars)); } @@ -102,8 +101,8 @@ struct parsed_number_string_t { bool valid{false}; bool too_many_digits{false}; // contains the range of the significant digits - span integer{}; // non-nullable - span fraction{}; // nullable + span integer{}; // non-nullable + span fraction{}; // nullable }; using byte_span = span; using parsed_number_string = parsed_number_string_t; @@ -145,16 +144,18 @@ parsed_number_string_t parse_number_string(UC const *p, UC const * pend, par } UC const * const end_of_integer_part = p; int64_t digit_count = int64_t(end_of_integer_part - start_digits); - answer.integer = span(start_digits, size_t(digit_count)); + answer.integer = span(start_digits, size_t(digit_count)); int64_t exponent = 0; if ((p != pend) && (*p == decimal_point)) { ++p; UC const * before = p; // can occur at most twice without overflowing, but let it occur more, since // for integers with many digits, digit parsing is the primary bottleneck. - while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok - p += 8; + if (std::is_same::value) { + while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast((const char *&)p)) { + i = i * 100000000 + parse_eight_digits_unrolled((const char *&)p); // in rare cases, this will overflow, but that's ok + p += 8; + } } while ((p != pend) && is_integer(*p)) { uint8_t digit = uint8_t(*p - UC('0')); @@ -162,7 +163,7 @@ parsed_number_string_t parse_number_string(UC const *p, UC const * pend, par i = i * 10 + digit; // in rare cases, this will overflow, but that's ok } exponent = before - p; - answer.fraction = span(before, size_t(p - before)); + answer.fraction = span(before, size_t(p - before)); digit_count -= exponent; } // we must have encountered at least one integer! diff --git a/include/fast_float/digit_comparison.h b/include/fast_float/digit_comparison.h index c85be59..b2c8495 100644 --- a/include/fast_float/digit_comparison.h +++ b/include/fast_float/digit_comparison.h @@ -197,13 +197,12 @@ bool is_truncated(UC const * first, UC const * last) noexcept { } template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -bool is_truncated(span s) noexcept { +bool is_truncated(span s) noexcept { return is_truncated(s.ptr, s.ptr + s.len()); } -template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -void parse_eight_digits(UC const *& p, limb& value, size_t& counter, size_t& count) noexcept { +void parse_eight_digits(const char*& p, limb& value, size_t& counter, size_t& count) noexcept { value = value * 100000000 + parse_eight_digits_unrolled(p); p += 8; counter += 8; @@ -255,8 +254,10 @@ void parse_mantissa(bigint& result, parsed_number_string_t& num, size_t max_ skip_zeros(p, pend); // process all digits, in increments of step per loop while (p != pend) { - while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) { - parse_eight_digits(p, value, counter, digits); + if (std::is_same::value) { + while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) { + parse_eight_digits((const char *&)p, value, counter, digits); + } } while (counter < step && p != pend && digits < max_digits) { parse_one_digit(p, value, counter, digits); @@ -288,8 +289,10 @@ void parse_mantissa(bigint& result, parsed_number_string_t& num, size_t max_ } // process all digits, in increments of step per loop while (p != pend) { - while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) { - parse_eight_digits(p, value, counter, digits); + if (std::is_same::value) { + while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) { + parse_eight_digits((const char *&)p, value, counter, digits); + } } while (counter < step && p != pend && digits < max_digits) { parse_one_digit(p, value, counter, digits); From 8199baeb7075bf7642834c388ffd1a3d5a980011 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 26 Apr 2023 18:46:19 -0400 Subject: [PATCH 10/11] Slightly less ugly code. --- include/fast_float/ascii_number.h | 24 ++++++++++++++++++++++-- include/fast_float/digit_comparison.h | 14 ++++++++++++-- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 98365e2..d506326 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -76,6 +76,16 @@ uint32_t parse_eight_digits_unrolled(uint64_t val) { return uint32_t(val); } +fastfloat_really_inline constexpr +uint32_t parse_eight_digits_unrolled(const char16_t *) noexcept { + return 0; +} + +fastfloat_really_inline constexpr +uint32_t parse_eight_digits_unrolled(const char32_t *) noexcept { + return 0; +} + fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint32_t parse_eight_digits_unrolled(const char *chars) noexcept { return parse_eight_digits_unrolled(read_u64(chars)); @@ -87,6 +97,16 @@ fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val 0x8080808080808080)); } +fastfloat_really_inline constexpr +bool is_made_of_eight_digits_fast(const char16_t *) noexcept { + return false; +} + +fastfloat_really_inline constexpr +bool is_made_of_eight_digits_fast(const char32_t *) noexcept { + return false; +} + fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool is_made_of_eight_digits_fast(const char *chars) noexcept { return is_made_of_eight_digits_fast(read_u64(chars)); @@ -152,8 +172,8 @@ parsed_number_string_t parse_number_string(UC const *p, UC const * pend, par // can occur at most twice without overflowing, but let it occur more, since // for integers with many digits, digit parsing is the primary bottleneck. if (std::is_same::value) { - while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast((const char *&)p)) { - i = i * 100000000 + parse_eight_digits_unrolled((const char *&)p); // in rare cases, this will overflow, but that's ok + while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok p += 8; } } diff --git a/include/fast_float/digit_comparison.h b/include/fast_float/digit_comparison.h index b2c8495..f469f6b 100644 --- a/include/fast_float/digit_comparison.h +++ b/include/fast_float/digit_comparison.h @@ -201,6 +201,16 @@ bool is_truncated(span s) noexcept { return is_truncated(s.ptr, s.ptr + s.len()); } +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +void parse_eight_digits(const char16_t*& , limb& , size_t& , size_t& ) noexcept { + // currently unused +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +void parse_eight_digits(const char32_t*& , limb& , size_t& , size_t& ) noexcept { + // currently unused +} + fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void parse_eight_digits(const char*& p, limb& value, size_t& counter, size_t& count) noexcept { value = value * 100000000 + parse_eight_digits_unrolled(p); @@ -256,7 +266,7 @@ void parse_mantissa(bigint& result, parsed_number_string_t& num, size_t max_ while (p != pend) { if (std::is_same::value) { while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) { - parse_eight_digits((const char *&)p, value, counter, digits); + parse_eight_digits(p, value, counter, digits); } } while (counter < step && p != pend && digits < max_digits) { @@ -291,7 +301,7 @@ void parse_mantissa(bigint& result, parsed_number_string_t& num, size_t max_ while (p != pend) { if (std::is_same::value) { while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) { - parse_eight_digits((const char *&)p, value, counter, digits); + parse_eight_digits(p, value, counter, digits); } } while (counter < step && p != pend && digits < max_digits) { From 1aba54bac203023d444173219ee50679a63c0e44 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 26 Apr 2023 19:46:06 -0400 Subject: [PATCH 11/11] Fixing version regression. --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e2769a8..727d135 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.9) -project(fast_float VERSION 3.10.1 LANGUAGES CXX) +project(fast_float VERSION 4.0.0 LANGUAGES CXX) option(FASTFLOAT_TEST "Enable tests" OFF) if(FASTFLOAT_TEST) enable_testing()