From bc77f956e29dc8cc52d3a31cb33fc36af29bba29 Mon Sep 17 00:00:00 2001 From: Pharago Date: Sun, 2 Apr 2023 22:58:01 +0200 Subject: [PATCH] Initial Unicode release Added support for the other char types --- include/fast_float/ascii_number.h | 89 ++++++++++++++------------- include/fast_float/digit_comparison.h | 51 ++++++++------- include/fast_float/fast_float.h | 27 ++++---- include/fast_float/float_common.h | 74 +++++++++++++++++++++- include/fast_float/parse_number.h | 47 +++++++------- 5 files changed, 185 insertions(+), 103 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 72b8098..360ee56 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -12,8 +12,9 @@ namespace fast_float { // Next function can be micro-optimized, but compilers are entirely // able to optimize it well. -fastfloat_really_inline constexpr bool is_integer(char c) noexcept { - return c >= '0' && c <= '9'; +template +fastfloat_really_inline constexpr bool is_integer(TCH c) noexcept { + return !(c > TCH('9') || c < TCH('0')); } fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) { @@ -26,13 +27,13 @@ fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) { | (val & 0x000000000000FF00) << 40 | (val & 0x00000000000000FF) << 56; } - +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -uint64_t read_u64(const char *chars) { - if (cpp20_and_in_constexpr()) { - uint64_t val = 0; +uint64_t read_u64(TCH const * chars) { + if (cpp20_and_in_constexpr() || sizeof(TCH) > 1) { + uint64_t val{}; for(int i = 0; i < 8; ++i) { - val |= uint64_t(*chars) << (i*8); + val |= uint64_t(char(*chars)) << (i * 8); ++chars; } return val; @@ -74,9 +75,9 @@ uint32_t parse_eight_digits_unrolled(uint64_t val) { val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; return uint32_t(val); } - +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -uint32_t parse_eight_digits_unrolled(const char *chars) noexcept { +uint32_t parse_eight_digits_unrolled(TCH const * chars) noexcept { return parse_eight_digits_unrolled(read_u64(chars)); } @@ -86,40 +87,42 @@ fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val 0x8080808080808080)); } +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -bool is_made_of_eight_digits_fast(const char *chars) noexcept { +bool is_made_of_eight_digits_fast(TCH const * chars) noexcept { return is_made_of_eight_digits_fast(read_u64(chars)); } -typedef span byte_span; - -struct parsed_number_string { +template +struct parsed_number_string_t { int64_t exponent{0}; uint64_t mantissa{0}; - const char *lastmatch{nullptr}; + TCH const * lastmatch{nullptr}; bool negative{false}; bool valid{false}; bool too_many_digits{false}; // contains the range of the significant digits - byte_span integer{}; // non-nullable - byte_span fraction{}; // nullable + span integer{}; // non-nullable + span fraction{}; // nullable }; - +using byte_span = span; +//using parsed_number_string = parsed_number_string_t; // Assuming that you use no more than 19 digits, this will // parse an ASCII string. +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -parsed_number_string parse_number_string(const char *p, const char *pend, parse_options options) noexcept { - const chars_format fmt = options.format; - const char decimal_point = options.decimal_point; +parsed_number_string_t parse_number_string(TCH const *p, TCH const * pend, parse_options_t options) noexcept { + chars_format const fmt = options.format; + TCH const decimal_point = options.decimal_point; - parsed_number_string answer; + parsed_number_string_t answer; answer.valid = false; answer.too_many_digits = false; - answer.negative = (*p == '-'); + answer.negative = (*p == TCH('-')); #if FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default - if ((*p == '-') || (*p == '+')) { + if ((*p == TCH('-')) || (*p == TCH('+'))) { #else - if (*p == '-') { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here + if (*p == TCH('-')) { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here #endif ++p; if (p == pend) { @@ -129,7 +132,7 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ return answer; } } - const char *const start_digits = p; + TCH const * const start_digits = p; uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad) @@ -137,16 +140,16 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ // a multiplication by 10 is cheaper than an arbitrary integer // multiplication i = 10 * i + - uint64_t(*p - '0'); // might overflow, we will handle the overflow later + uint64_t(*p - TCH('0')); // might overflow, we will handle the overflow later ++p; } - const char *const end_of_integer_part = p; + TCH const * const end_of_integer_part = p; int64_t digit_count = int64_t(end_of_integer_part - start_digits); - answer.integer = byte_span(start_digits, size_t(digit_count)); + answer.integer = span(start_digits, size_t(digit_count)); int64_t exponent = 0; if ((p != pend) && (*p == decimal_point)) { ++p; - const char* before = p; + TCH const * before = p; // can occur at most twice without overflowing, but let it occur more, since // for integers with many digits, digit parsing is the primary bottleneck. while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) { @@ -154,12 +157,12 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ p += 8; } while ((p != pend) && is_integer(*p)) { - uint8_t digit = uint8_t(*p - '0'); + uint8_t digit = uint8_t(*p - TCH('0')); ++p; i = i * 10 + digit; // in rare cases, this will overflow, but that's ok } exponent = before - p; - answer.fraction = byte_span(before, size_t(p - before)); + answer.fraction = span(before, size_t(p - before)); digit_count -= exponent; } // we must have encountered at least one integer! @@ -167,14 +170,14 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ return answer; } int64_t exp_number = 0; // explicit exponential part - if ((fmt & chars_format::scientific) && (p != pend) && (('e' == *p) || ('E' == *p))) { - const char * location_of_e = p; + if ((fmt & chars_format::scientific) && (p != pend) && ((TCH('e') == *p) || (TCH('E') == *p))) { + TCH const * location_of_e = p; ++p; bool neg_exp = false; - if ((p != pend) && ('-' == *p)) { + if ((p != pend) && (TCH('-') == *p)) { neg_exp = true; ++p; - } else if ((p != pend) && ('+' == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1) + } else if ((p != pend) && (TCH('+') == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1) ++p; } if ((p == pend) || !is_integer(*p)) { @@ -186,7 +189,7 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ p = location_of_e; } else { while ((p != pend) && is_integer(*p)) { - uint8_t digit = uint8_t(*p - '0'); + uint8_t digit = uint8_t(*p - TCH('0')); if (exp_number < 0x10000000) { exp_number = 10 * exp_number + digit; } @@ -212,9 +215,9 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ // We have to handle the case where we have 0.0000somenumber. // We need to be mindful of the case where we only have zeroes... // E.g., 0.000000000...000. - const char *start = start_digits; - while ((start != pend) && (*start == '0' || *start == decimal_point)) { - if(*start == '0') { digit_count --; } + TCH const * start = start_digits; + while ((start != pend) && (*start == TCH('0') || *start == decimal_point)) { + if(*start == TCH('0')) { digit_count --; } start++; } if (digit_count > 19) { @@ -224,19 +227,19 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ // pre-tokenized spans from above. i = 0; p = answer.integer.ptr; - const char* int_end = p + answer.integer.len(); + TCH const * int_end = p + answer.integer.len(); const uint64_t minimal_nineteen_digit_integer{1000000000000000000}; while((i < minimal_nineteen_digit_integer) && (p != int_end)) { - i = i * 10 + uint64_t(*p - '0'); + i = i * 10 + uint64_t(*p - TCH('0')); ++p; } if (i >= minimal_nineteen_digit_integer) { // We have a big integers exponent = end_of_integer_part - p + exp_number; } else { // We have a value with a fractional component. p = answer.fraction.ptr; - const char* frac_end = p + answer.fraction.len(); + TCH const * frac_end = p + answer.fraction.len(); while((i < minimal_nineteen_digit_integer) && (p != frac_end)) { - i = i * 10 + uint64_t(*p - '0'); + i = i * 10 + uint64_t(*p - TCH('0')); ++p; } exponent = answer.fraction.ptr - p + exp_number; diff --git a/include/fast_float/digit_comparison.h b/include/fast_float/digit_comparison.h index 3959ba0..81b9882 100644 --- a/include/fast_float/digit_comparison.h +++ b/include/fast_float/digit_comparison.h @@ -23,8 +23,9 @@ constexpr static uint64_t powers_of_ten_uint64[] = { // this algorithm is not even close to optimized, but it has no practical // effect on performance: in order to have a faster algorithm, we'd need // to slow down performance for faster algorithms, and this is still fast. +template fastfloat_really_inline FASTFLOAT_CONSTEXPR14 -int32_t scientific_exponent(parsed_number_string& num) noexcept { +int32_t scientific_exponent(parsed_number_string_t & num) noexcept { uint64_t mantissa = num.mantissa; int32_t exponent = int32_t(num.exponent); while (mantissa >= 10000) { @@ -153,19 +154,19 @@ void round_down(adjusted_mantissa& am, int32_t shift) noexcept { } am.power2 += shift; } - +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -void skip_zeros(const char*& first, const char* last) noexcept { +void skip_zeros(TCH const * & first, TCH const * last) noexcept { uint64_t val; - while (!cpp20_and_in_constexpr() && std::distance(first, last) >= 8) { + while (!cpp20_and_in_constexpr() && std::distance(first, last) >= int_cmp_len()) { ::memcpy(&val, first, sizeof(uint64_t)); - if (val != 0x3030303030303030) { + if (val != int_cmp_zeros()) { break; } - first += 8; + first += int_cmp_len(); } while (first != last) { - if (*first != '0') { + if (*first != TCH('0')) { break; } first++; @@ -174,42 +175,45 @@ void skip_zeros(const char*& first, const char* last) noexcept { // determine if any non-zero digits were truncated. // all characters must be valid digits. +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -bool is_truncated(const char* first, const char* last) noexcept { +bool is_truncated(TCH const * first, TCH const * last) noexcept { // do 8-bit optimizations, can just compare to 8 literal 0s. uint64_t val; - while (!cpp20_and_in_constexpr() && std::distance(first, last) >= 8) { + while (!cpp20_and_in_constexpr() && std::distance(first, last) >= int_cmp_len()) { ::memcpy(&val, first, sizeof(uint64_t)); - if (val != 0x3030303030303030) { + if (val != int_cmp_zeros()) { return true; } - first += 8; + first += int_cmp_len(); } while (first != last) { - if (*first != '0') { + if (*first != TCH('0')) { return true; } - first++; + ++first; } return false; } - +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -bool is_truncated(byte_span s) noexcept { +bool is_truncated(span s) noexcept { return is_truncated(s.ptr, s.ptr + s.len()); } +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -void parse_eight_digits(const char*& p, limb& value, size_t& counter, size_t& count) noexcept { +void parse_eight_digits(TCH const *& p, limb& value, size_t& counter, size_t& count) noexcept { value = value * 100000000 + parse_eight_digits_unrolled(p); p += 8; counter += 8; count += 8; } +template fastfloat_really_inline FASTFLOAT_CONSTEXPR14 -void parse_one_digit(const char*& p, limb& value, size_t& counter, size_t& count) noexcept { - value = value * 10 + limb(*p - '0'); +void parse_one_digit(TCH const *& p, limb& value, size_t& counter, size_t& count) noexcept { + value = value * 10 + limb(*p - TCH('0')); p++; counter++; count++; @@ -230,8 +234,9 @@ void round_up_bigint(bigint& big, size_t& count) noexcept { } // parse the significant digits into a big integer +template inline FASTFLOAT_CONSTEXPR20 -void parse_mantissa(bigint& result, parsed_number_string& num, size_t max_digits, size_t& digits) noexcept { +void parse_mantissa(bigint& result, parsed_number_string_t& num, size_t max_digits, size_t& digits) noexcept { // try to minimize the number of big integer and scalar multiplication. // therefore, try to parse 8 digits at a time, and multiply by the largest // scalar value (9 or 19 digits) for each step. @@ -245,8 +250,8 @@ void parse_mantissa(bigint& result, parsed_number_string& num, size_t max_digits #endif // process all integer digits. - const char* p = num.integer.ptr; - const char* pend = p + num.integer.len(); + TCH const * p = num.integer.ptr; + TCH const * pend = p + num.integer.len(); skip_zeros(p, pend); // process all digits, in increments of step per loop while (p != pend) { @@ -395,9 +400,9 @@ adjusted_mantissa negative_digit_comp(bigint& bigmant, adjusted_mantissa am, int // `b` as a big-integer type, scaled to the same binary exponent as // the actual digits. we then compare the big integer representations // of both, and use that to direct rounding. -template +template inline FASTFLOAT_CONSTEXPR20 -adjusted_mantissa digit_comp(parsed_number_string& num, adjusted_mantissa am) noexcept { +adjusted_mantissa digit_comp(parsed_number_string_t& num, adjusted_mantissa am) noexcept { // remove the invalid exponent bias am.power2 -= invalid_am_bias; diff --git a/include/fast_float/fast_float.h b/include/fast_float/fast_float.h index 65704da..9686260 100644 --- a/include/fast_float/fast_float.h +++ b/include/fast_float/fast_float.h @@ -13,22 +13,25 @@ enum chars_format { general = fixed | scientific }; - -struct from_chars_result { - const char *ptr; +template +struct from_chars_result_t { + TCH const * ptr; std::errc ec; }; +using from_chars_result = from_chars_result_t; -struct parse_options { - constexpr explicit parse_options(chars_format fmt = chars_format::general, - char dot = '.') +template +struct parse_options_t { + constexpr explicit parse_options_t(chars_format fmt = chars_format::general, + TCH dot = TCH('.')) : format(fmt), decimal_point(dot) {} /** Which number formats are accepted */ chars_format format; /** The character used as decimal point */ - char decimal_point; + TCH decimal_point; }; +using parse_options = parse_options_t; /** * This function parses the character sequence [first,last) for a number. It parses floating-point numbers expecting @@ -49,18 +52,18 @@ struct parse_options { * to determine whether we allow the fixed point and scientific notation respectively. * The default is `fast_float::chars_format::general` which allows both `fixed` and `scientific`. */ -template +template FASTFLOAT_CONSTEXPR20 -from_chars_result from_chars(const char *first, const char *last, +from_chars_result_t from_chars(TCH const * first, TCH const * last, T &value, chars_format fmt = chars_format::general) noexcept; /** * Like from_chars, but accepts an `options` argument to govern number parsing. */ -template +template FASTFLOAT_CONSTEXPR20 -from_chars_result from_chars_advanced(const char *first, const char *last, - T &value, parse_options options) noexcept; +from_chars_result_t from_chars_advanced(TCH const * first, TCH const * last, + T &value, parse_options_t options) noexcept; } // namespace fast_float #include "parse_number.h" diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index c878486..704954d 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -106,11 +106,12 @@ fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() { } // Compares two ASCII strings in a case insensitive manner. +template inline FASTFLOAT_CONSTEXPR14 bool -fastfloat_strncasecmp(const char *input1, const char *input2, size_t length) { +fastfloat_strncasecmp(TCH const * input1, TCH const * input2, size_t length) { char running_diff{0}; - for (size_t i = 0; i < length; i++) { - running_diff |= (input1[i] ^ input2[i]); + for (size_t i = 0; i < length; ++i) { + running_diff |= (char(input1[i]) ^ char(input2[i])); } return (running_diff == 0) || (running_diff == 32); } @@ -503,6 +504,73 @@ constexpr bool space_lut::value[]; inline constexpr bool is_space(uint8_t c) { return space_lut<>::value[c]; } #endif + +template +static constexpr uint64_t int_cmp_zeros() +{ + switch(sizeof(TCH)) + { + case 1: return 0x3030303030303030; + case 2: return (uint64_t(TCH('0')) << 48 | uint64_t(TCH('0')) << 32 | uint64_t(TCH('0')) << 16 | TCH('0')); + case 4: return (uint64_t(TCH('0')) << 32 | TCH('0')); + } + return 0; +} +template +static constexpr int int_cmp_len() +{ + return sizeof(uint64_t) / sizeof(TCH); +} +template +static constexpr TCH const * str_const_nan() +{ + return nullptr; +} +template<> +static constexpr char const * str_const_nan() +{ + return "nan"; +} +template<> +static constexpr wchar_t const * str_const_nan() +{ + return L"nan"; +} +template<> +static constexpr char16_t const * str_const_nan() +{ + return u"nan"; +} +template<> +static constexpr char32_t const * str_const_nan() +{ + return U"nan"; +} +template +static constexpr TCH const * str_const_inf() +{ + return nullptr; +} +template<> +static constexpr char const * str_const_inf() +{ + return "infinity"; +} +template<> +static constexpr wchar_t const * str_const_inf() +{ + return L"infinity"; +} +template<> +static constexpr char16_t const * str_const_inf() +{ + return u"infinity"; +} +template<> +static constexpr char32_t const * str_const_inf() +{ + return U"infinity"; +} } // namespace fast_float #endif diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index 6e4f6eb..da16235 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -19,41 +19,41 @@ namespace detail { * The case comparisons could be made much faster given that we know that the * strings a null-free and fixed. **/ -template -from_chars_result FASTFLOAT_CONSTEXPR14 -parse_infnan(const char *first, const char *last, T &value) noexcept { - from_chars_result answer{}; +template +from_chars_result_t FASTFLOAT_CONSTEXPR14 +parse_infnan(TCH const * first, TCH const * last, T &value) noexcept { + from_chars_result_t answer{}; answer.ptr = first; answer.ec = std::errc(); // be optimistic bool minusSign = false; - if (*first == '-') { // assume first < last, so dereference without checks; C++17 20.19.3.(7.1) explicitly forbids '+' here + if (*first == TCH('-')) { // assume first < last, so dereference without checks; C++17 20.19.3.(7.1) explicitly forbids '+' here minusSign = true; ++first; } #if FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default - if (*first == '+') { + if (*first == TCH('+')) { ++first; } #endif if (last - first >= 3) { - if (fastfloat_strncasecmp(first, "nan", 3)) { + if (fastfloat_strncasecmp(first, str_const_nan(), 3)) { answer.ptr = (first += 3); value = minusSign ? -std::numeric_limits::quiet_NaN() : std::numeric_limits::quiet_NaN(); // Check for possible nan(n-char-seq-opt), C++17 20.19.3.7, C11 7.20.1.3.3. At least MSVC produces nan(ind) and nan(snan). - if(first != last && *first == '(') { - for(const char* ptr = first + 1; ptr != last; ++ptr) { - if (*ptr == ')') { + if(first != last && *first == TCH('(')) { + for(TCH const * ptr = first + 1; ptr != last; ++ptr) { + if (*ptr == TCH(')')) { answer.ptr = ptr + 1; // valid nan(n-char-seq-opt) break; } - else if(!(('a' <= *ptr && *ptr <= 'z') || ('A' <= *ptr && *ptr <= 'Z') || ('0' <= *ptr && *ptr <= '9') || *ptr == '_')) + else if(!((TCH('a') <= *ptr && *ptr <= TCH('z')) || (TCH('A') <= *ptr && *ptr <= TCH('Z')) || (TCH('0') <= *ptr && *ptr <= TCH('9')) || *ptr == TCH('_'))) break; // forbidden char, not nan(n-char-seq-opt) } } return answer; } - if (fastfloat_strncasecmp(first, "inf", 3)) { - if ((last - first >= 8) && fastfloat_strncasecmp(first + 3, "inity", 5)) { + if (fastfloat_strncasecmp(first, str_const_inf(), 3)) { + if ((last - first >= 8) && fastfloat_strncasecmp(first + 3, str_const_inf() + 3, 5)) { answer.ptr = first + 8; } else { answer.ptr = first + 3; @@ -132,22 +132,25 @@ fastfloat_really_inline bool rounds_to_nearest() noexcept { } // namespace detail -template +template FASTFLOAT_CONSTEXPR20 -from_chars_result from_chars(const char *first, const char *last, +from_chars_result_t from_chars(TCH const * first, TCH const * last, T &value, chars_format fmt /*= chars_format::general*/) noexcept { - return from_chars_advanced(first, last, value, parse_options{fmt}); + return from_chars_advanced(first, last, value, parse_options_t{fmt}); } -template +template FASTFLOAT_CONSTEXPR20 -from_chars_result from_chars_advanced(const char *first, const char *last, - T &value, parse_options options) noexcept { +from_chars_result_t from_chars_advanced(TCH const * first, TCH const * last, + T &value, parse_options_t options) noexcept { static_assert (std::is_same::value || std::is_same::value, "only float and double are supported"); + static_assert (std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value , "only char, wchar_t, char16_t and char32_t are supported"); - - from_chars_result answer; + from_chars_result_t answer; #if FASTFLOAT_SKIP_WHITE_SPACE // disabled by default while ((first != last) && fast_float::is_space(uint8_t(*first))) { first++; @@ -158,7 +161,7 @@ from_chars_result from_chars_advanced(const char *first, const char *last, answer.ptr = first; return answer; } - parsed_number_string pns = parse_number_string(first, last, options); + parsed_number_string_t pns = parse_number_string(first, last, options); if (!pns.valid) { return detail::parse_infnan(first, last, value); }