Initial Unicode release

Added support for the other char types
This commit is contained in:
Pharago 2023-04-02 22:58:01 +02:00
parent fbd5bd712e
commit bc77f956e2
5 changed files with 185 additions and 103 deletions

View File

@ -12,8 +12,9 @@ namespace fast_float {
// Next function can be micro-optimized, but compilers are entirely
// able to optimize it well.
fastfloat_really_inline constexpr bool is_integer(char c) noexcept {
return c >= '0' && c <= '9';
template <typename TCH>
fastfloat_really_inline constexpr bool is_integer(TCH c) noexcept {
return !(c > TCH('9') || c < TCH('0'));
}
fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) {
@ -26,13 +27,13 @@ fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) {
| (val & 0x000000000000FF00) << 40
| (val & 0x00000000000000FF) << 56;
}
template <typename TCH>
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
uint64_t read_u64(const char *chars) {
if (cpp20_and_in_constexpr()) {
uint64_t val = 0;
uint64_t read_u64(TCH const * chars) {
if (cpp20_and_in_constexpr() || sizeof(TCH) > 1) {
uint64_t val{};
for(int i = 0; i < 8; ++i) {
val |= uint64_t(*chars) << (i*8);
val |= uint64_t(char(*chars)) << (i * 8);
++chars;
}
return val;
@ -74,9 +75,9 @@ uint32_t parse_eight_digits_unrolled(uint64_t val) {
val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
return uint32_t(val);
}
template <typename TCH>
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
uint32_t parse_eight_digits_unrolled(const char *chars) noexcept {
uint32_t parse_eight_digits_unrolled(TCH const * chars) noexcept {
return parse_eight_digits_unrolled(read_u64(chars));
}
@ -86,40 +87,42 @@ fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val
0x8080808080808080));
}
template <typename TCH>
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
bool is_made_of_eight_digits_fast(const char *chars) noexcept {
bool is_made_of_eight_digits_fast(TCH const * chars) noexcept {
return is_made_of_eight_digits_fast(read_u64(chars));
}
typedef span<const char> byte_span;
struct parsed_number_string {
template <typename TCH>
struct parsed_number_string_t {
int64_t exponent{0};
uint64_t mantissa{0};
const char *lastmatch{nullptr};
TCH const * lastmatch{nullptr};
bool negative{false};
bool valid{false};
bool too_many_digits{false};
// contains the range of the significant digits
byte_span integer{}; // non-nullable
byte_span fraction{}; // nullable
span<TCH> integer{}; // non-nullable
span<TCH> fraction{}; // nullable
};
using byte_span = span<char>;
//using parsed_number_string = parsed_number_string_t<char>;
// Assuming that you use no more than 19 digits, this will
// parse an ASCII string.
template <typename TCH>
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
parsed_number_string parse_number_string(const char *p, const char *pend, parse_options options) noexcept {
const chars_format fmt = options.format;
const char decimal_point = options.decimal_point;
parsed_number_string_t<TCH> parse_number_string(TCH const *p, TCH const * pend, parse_options_t<TCH> options) noexcept {
chars_format const fmt = options.format;
TCH const decimal_point = options.decimal_point;
parsed_number_string answer;
parsed_number_string_t<TCH> answer;
answer.valid = false;
answer.too_many_digits = false;
answer.negative = (*p == '-');
answer.negative = (*p == TCH('-'));
#if FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default
if ((*p == '-') || (*p == '+')) {
if ((*p == TCH('-')) || (*p == TCH('+'))) {
#else
if (*p == '-') { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here
if (*p == TCH('-')) { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here
#endif
++p;
if (p == pend) {
@ -129,7 +132,7 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
return answer;
}
}
const char *const start_digits = p;
TCH const * const start_digits = p;
uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad)
@ -137,16 +140,16 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
// a multiplication by 10 is cheaper than an arbitrary integer
// multiplication
i = 10 * i +
uint64_t(*p - '0'); // might overflow, we will handle the overflow later
uint64_t(*p - TCH('0')); // might overflow, we will handle the overflow later
++p;
}
const char *const end_of_integer_part = p;
TCH const * const end_of_integer_part = p;
int64_t digit_count = int64_t(end_of_integer_part - start_digits);
answer.integer = byte_span(start_digits, size_t(digit_count));
answer.integer = span<TCH>(start_digits, size_t(digit_count));
int64_t exponent = 0;
if ((p != pend) && (*p == decimal_point)) {
++p;
const char* before = p;
TCH const * before = p;
// can occur at most twice without overflowing, but let it occur more, since
// for integers with many digits, digit parsing is the primary bottleneck.
while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) {
@ -154,12 +157,12 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
p += 8;
}
while ((p != pend) && is_integer(*p)) {
uint8_t digit = uint8_t(*p - '0');
uint8_t digit = uint8_t(*p - TCH('0'));
++p;
i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
}
exponent = before - p;
answer.fraction = byte_span(before, size_t(p - before));
answer.fraction = span<TCH>(before, size_t(p - before));
digit_count -= exponent;
}
// we must have encountered at least one integer!
@ -167,14 +170,14 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
return answer;
}
int64_t exp_number = 0; // explicit exponential part
if ((fmt & chars_format::scientific) && (p != pend) && (('e' == *p) || ('E' == *p))) {
const char * location_of_e = p;
if ((fmt & chars_format::scientific) && (p != pend) && ((TCH('e') == *p) || (TCH('E') == *p))) {
TCH const * location_of_e = p;
++p;
bool neg_exp = false;
if ((p != pend) && ('-' == *p)) {
if ((p != pend) && (TCH('-') == *p)) {
neg_exp = true;
++p;
} else if ((p != pend) && ('+' == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1)
} else if ((p != pend) && (TCH('+') == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1)
++p;
}
if ((p == pend) || !is_integer(*p)) {
@ -186,7 +189,7 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
p = location_of_e;
} else {
while ((p != pend) && is_integer(*p)) {
uint8_t digit = uint8_t(*p - '0');
uint8_t digit = uint8_t(*p - TCH('0'));
if (exp_number < 0x10000000) {
exp_number = 10 * exp_number + digit;
}
@ -212,9 +215,9 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
// We have to handle the case where we have 0.0000somenumber.
// We need to be mindful of the case where we only have zeroes...
// E.g., 0.000000000...000.
const char *start = start_digits;
while ((start != pend) && (*start == '0' || *start == decimal_point)) {
if(*start == '0') { digit_count --; }
TCH const * start = start_digits;
while ((start != pend) && (*start == TCH('0') || *start == decimal_point)) {
if(*start == TCH('0')) { digit_count --; }
start++;
}
if (digit_count > 19) {
@ -224,19 +227,19 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
// pre-tokenized spans from above.
i = 0;
p = answer.integer.ptr;
const char* int_end = p + answer.integer.len();
TCH const * int_end = p + answer.integer.len();
const uint64_t minimal_nineteen_digit_integer{1000000000000000000};
while((i < minimal_nineteen_digit_integer) && (p != int_end)) {
i = i * 10 + uint64_t(*p - '0');
i = i * 10 + uint64_t(*p - TCH('0'));
++p;
}
if (i >= minimal_nineteen_digit_integer) { // We have a big integers
exponent = end_of_integer_part - p + exp_number;
} else { // We have a value with a fractional component.
p = answer.fraction.ptr;
const char* frac_end = p + answer.fraction.len();
TCH const * frac_end = p + answer.fraction.len();
while((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
i = i * 10 + uint64_t(*p - '0');
i = i * 10 + uint64_t(*p - TCH('0'));
++p;
}
exponent = answer.fraction.ptr - p + exp_number;

View File

@ -23,8 +23,9 @@ constexpr static uint64_t powers_of_ten_uint64[] = {
// this algorithm is not even close to optimized, but it has no practical
// effect on performance: in order to have a faster algorithm, we'd need
// to slow down performance for faster algorithms, and this is still fast.
template <typename TCH>
fastfloat_really_inline FASTFLOAT_CONSTEXPR14
int32_t scientific_exponent(parsed_number_string& num) noexcept {
int32_t scientific_exponent(parsed_number_string_t<TCH> & num) noexcept {
uint64_t mantissa = num.mantissa;
int32_t exponent = int32_t(num.exponent);
while (mantissa >= 10000) {
@ -153,19 +154,19 @@ void round_down(adjusted_mantissa& am, int32_t shift) noexcept {
}
am.power2 += shift;
}
template <typename TCH>
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
void skip_zeros(const char*& first, const char* last) noexcept {
void skip_zeros(TCH const * & first, TCH const * last) noexcept {
uint64_t val;
while (!cpp20_and_in_constexpr() && std::distance(first, last) >= 8) {
while (!cpp20_and_in_constexpr() && std::distance(first, last) >= int_cmp_len<TCH>()) {
::memcpy(&val, first, sizeof(uint64_t));
if (val != 0x3030303030303030) {
if (val != int_cmp_zeros<TCH>()) {
break;
}
first += 8;
first += int_cmp_len<TCH>();
}
while (first != last) {
if (*first != '0') {
if (*first != TCH('0')) {
break;
}
first++;
@ -174,42 +175,45 @@ void skip_zeros(const char*& first, const char* last) noexcept {
// determine if any non-zero digits were truncated.
// all characters must be valid digits.
template <typename TCH>
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
bool is_truncated(const char* first, const char* last) noexcept {
bool is_truncated(TCH const * first, TCH const * last) noexcept {
// do 8-bit optimizations, can just compare to 8 literal 0s.
uint64_t val;
while (!cpp20_and_in_constexpr() && std::distance(first, last) >= 8) {
while (!cpp20_and_in_constexpr() && std::distance(first, last) >= int_cmp_len<TCH>()) {
::memcpy(&val, first, sizeof(uint64_t));
if (val != 0x3030303030303030) {
if (val != int_cmp_zeros<TCH>()) {
return true;
}
first += 8;
first += int_cmp_len<TCH>();
}
while (first != last) {
if (*first != '0') {
if (*first != TCH('0')) {
return true;
}
first++;
++first;
}
return false;
}
template <typename TCH>
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
bool is_truncated(byte_span s) noexcept {
bool is_truncated(span<TCH> s) noexcept {
return is_truncated(s.ptr, s.ptr + s.len());
}
template <typename TCH>
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
void parse_eight_digits(const char*& p, limb& value, size_t& counter, size_t& count) noexcept {
void parse_eight_digits(TCH const *& p, limb& value, size_t& counter, size_t& count) noexcept {
value = value * 100000000 + parse_eight_digits_unrolled(p);
p += 8;
counter += 8;
count += 8;
}
template <typename TCH>
fastfloat_really_inline FASTFLOAT_CONSTEXPR14
void parse_one_digit(const char*& p, limb& value, size_t& counter, size_t& count) noexcept {
value = value * 10 + limb(*p - '0');
void parse_one_digit(TCH const *& p, limb& value, size_t& counter, size_t& count) noexcept {
value = value * 10 + limb(*p - TCH('0'));
p++;
counter++;
count++;
@ -230,8 +234,9 @@ void round_up_bigint(bigint& big, size_t& count) noexcept {
}
// parse the significant digits into a big integer
template <typename TCH>
inline FASTFLOAT_CONSTEXPR20
void parse_mantissa(bigint& result, parsed_number_string& num, size_t max_digits, size_t& digits) noexcept {
void parse_mantissa(bigint& result, parsed_number_string_t<TCH>& num, size_t max_digits, size_t& digits) noexcept {
// try to minimize the number of big integer and scalar multiplication.
// therefore, try to parse 8 digits at a time, and multiply by the largest
// scalar value (9 or 19 digits) for each step.
@ -245,8 +250,8 @@ void parse_mantissa(bigint& result, parsed_number_string& num, size_t max_digits
#endif
// process all integer digits.
const char* p = num.integer.ptr;
const char* pend = p + num.integer.len();
TCH const * p = num.integer.ptr;
TCH const * pend = p + num.integer.len();
skip_zeros(p, pend);
// process all digits, in increments of step per loop
while (p != pend) {
@ -395,9 +400,9 @@ adjusted_mantissa negative_digit_comp(bigint& bigmant, adjusted_mantissa am, int
// `b` as a big-integer type, scaled to the same binary exponent as
// the actual digits. we then compare the big integer representations
// of both, and use that to direct rounding.
template <typename T>
template <typename T, typename TCH>
inline FASTFLOAT_CONSTEXPR20
adjusted_mantissa digit_comp(parsed_number_string& num, adjusted_mantissa am) noexcept {
adjusted_mantissa digit_comp(parsed_number_string_t<TCH>& num, adjusted_mantissa am) noexcept {
// remove the invalid exponent bias
am.power2 -= invalid_am_bias;

View File

@ -13,22 +13,25 @@ enum chars_format {
general = fixed | scientific
};
struct from_chars_result {
const char *ptr;
template <typename TCH>
struct from_chars_result_t {
TCH const * ptr;
std::errc ec;
};
using from_chars_result = from_chars_result_t<char>;
struct parse_options {
constexpr explicit parse_options(chars_format fmt = chars_format::general,
char dot = '.')
template <typename TCH>
struct parse_options_t {
constexpr explicit parse_options_t(chars_format fmt = chars_format::general,
TCH dot = TCH('.'))
: format(fmt), decimal_point(dot) {}
/** Which number formats are accepted */
chars_format format;
/** The character used as decimal point */
char decimal_point;
TCH decimal_point;
};
using parse_options = parse_options_t<char>;
/**
* This function parses the character sequence [first,last) for a number. It parses floating-point numbers expecting
@ -49,18 +52,18 @@ struct parse_options {
* to determine whether we allow the fixed point and scientific notation respectively.
* The default is `fast_float::chars_format::general` which allows both `fixed` and `scientific`.
*/
template<typename T>
template<typename T, typename TCH = char>
FASTFLOAT_CONSTEXPR20
from_chars_result from_chars(const char *first, const char *last,
from_chars_result_t<TCH> from_chars(TCH const * first, TCH const * last,
T &value, chars_format fmt = chars_format::general) noexcept;
/**
* Like from_chars, but accepts an `options` argument to govern number parsing.
*/
template<typename T>
template<typename T, typename TCH = char>
FASTFLOAT_CONSTEXPR20
from_chars_result from_chars_advanced(const char *first, const char *last,
T &value, parse_options options) noexcept;
from_chars_result_t<TCH> from_chars_advanced(TCH const * first, TCH const * last,
T &value, parse_options_t<TCH> options) noexcept;
} // namespace fast_float
#include "parse_number.h"

View File

@ -106,11 +106,12 @@ fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() {
}
// Compares two ASCII strings in a case insensitive manner.
template <typename TCH>
inline FASTFLOAT_CONSTEXPR14 bool
fastfloat_strncasecmp(const char *input1, const char *input2, size_t length) {
fastfloat_strncasecmp(TCH const * input1, TCH const * input2, size_t length) {
char running_diff{0};
for (size_t i = 0; i < length; i++) {
running_diff |= (input1[i] ^ input2[i]);
for (size_t i = 0; i < length; ++i) {
running_diff |= (char(input1[i]) ^ char(input2[i]));
}
return (running_diff == 0) || (running_diff == 32);
}
@ -503,6 +504,73 @@ constexpr bool space_lut<T>::value[];
inline constexpr bool is_space(uint8_t c) { return space_lut<>::value[c]; }
#endif
template<typename TCH>
static constexpr uint64_t int_cmp_zeros()
{
switch(sizeof(TCH))
{
case 1: return 0x3030303030303030;
case 2: return (uint64_t(TCH('0')) << 48 | uint64_t(TCH('0')) << 32 | uint64_t(TCH('0')) << 16 | TCH('0'));
case 4: return (uint64_t(TCH('0')) << 32 | TCH('0'));
}
return 0;
}
template<typename TCH>
static constexpr int int_cmp_len()
{
return sizeof(uint64_t) / sizeof(TCH);
}
template<typename TCH>
static constexpr TCH const * str_const_nan()
{
return nullptr;
}
template<>
static constexpr char const * str_const_nan<char>()
{
return "nan";
}
template<>
static constexpr wchar_t const * str_const_nan<wchar_t>()
{
return L"nan";
}
template<>
static constexpr char16_t const * str_const_nan<char16_t>()
{
return u"nan";
}
template<>
static constexpr char32_t const * str_const_nan<char32_t>()
{
return U"nan";
}
template<typename TCH>
static constexpr TCH const * str_const_inf()
{
return nullptr;
}
template<>
static constexpr char const * str_const_inf<char>()
{
return "infinity";
}
template<>
static constexpr wchar_t const * str_const_inf<wchar_t>()
{
return L"infinity";
}
template<>
static constexpr char16_t const * str_const_inf<char16_t>()
{
return u"infinity";
}
template<>
static constexpr char32_t const * str_const_inf<char32_t>()
{
return U"infinity";
}
} // namespace fast_float
#endif

View File

@ -19,41 +19,41 @@ namespace detail {
* The case comparisons could be made much faster given that we know that the
* strings a null-free and fixed.
**/
template <typename T>
from_chars_result FASTFLOAT_CONSTEXPR14
parse_infnan(const char *first, const char *last, T &value) noexcept {
from_chars_result answer{};
template <typename T, typename TCH>
from_chars_result_t<TCH> FASTFLOAT_CONSTEXPR14
parse_infnan(TCH const * first, TCH const * last, T &value) noexcept {
from_chars_result_t<TCH> answer{};
answer.ptr = first;
answer.ec = std::errc(); // be optimistic
bool minusSign = false;
if (*first == '-') { // assume first < last, so dereference without checks; C++17 20.19.3.(7.1) explicitly forbids '+' here
if (*first == TCH('-')) { // assume first < last, so dereference without checks; C++17 20.19.3.(7.1) explicitly forbids '+' here
minusSign = true;
++first;
}
#if FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default
if (*first == '+') {
if (*first == TCH('+')) {
++first;
}
#endif
if (last - first >= 3) {
if (fastfloat_strncasecmp(first, "nan", 3)) {
if (fastfloat_strncasecmp(first, str_const_nan<TCH>(), 3)) {
answer.ptr = (first += 3);
value = minusSign ? -std::numeric_limits<T>::quiet_NaN() : std::numeric_limits<T>::quiet_NaN();
// Check for possible nan(n-char-seq-opt), C++17 20.19.3.7, C11 7.20.1.3.3. At least MSVC produces nan(ind) and nan(snan).
if(first != last && *first == '(') {
for(const char* ptr = first + 1; ptr != last; ++ptr) {
if (*ptr == ')') {
if(first != last && *first == TCH('(')) {
for(TCH const * ptr = first + 1; ptr != last; ++ptr) {
if (*ptr == TCH(')')) {
answer.ptr = ptr + 1; // valid nan(n-char-seq-opt)
break;
}
else if(!(('a' <= *ptr && *ptr <= 'z') || ('A' <= *ptr && *ptr <= 'Z') || ('0' <= *ptr && *ptr <= '9') || *ptr == '_'))
else if(!((TCH('a') <= *ptr && *ptr <= TCH('z')) || (TCH('A') <= *ptr && *ptr <= TCH('Z')) || (TCH('0') <= *ptr && *ptr <= TCH('9')) || *ptr == TCH('_')))
break; // forbidden char, not nan(n-char-seq-opt)
}
}
return answer;
}
if (fastfloat_strncasecmp(first, "inf", 3)) {
if ((last - first >= 8) && fastfloat_strncasecmp(first + 3, "inity", 5)) {
if (fastfloat_strncasecmp(first, str_const_inf<TCH>(), 3)) {
if ((last - first >= 8) && fastfloat_strncasecmp(first + 3, str_const_inf<TCH>() + 3, 5)) {
answer.ptr = first + 8;
} else {
answer.ptr = first + 3;
@ -132,22 +132,25 @@ fastfloat_really_inline bool rounds_to_nearest() noexcept {
} // namespace detail
template<typename T>
template<typename T, typename TCH>
FASTFLOAT_CONSTEXPR20
from_chars_result from_chars(const char *first, const char *last,
from_chars_result_t<TCH> from_chars(TCH const * first, TCH const * last,
T &value, chars_format fmt /*= chars_format::general*/) noexcept {
return from_chars_advanced(first, last, value, parse_options{fmt});
return from_chars_advanced(first, last, value, parse_options_t<TCH>{fmt});
}
template<typename T>
template<typename T, typename TCH>
FASTFLOAT_CONSTEXPR20
from_chars_result from_chars_advanced(const char *first, const char *last,
T &value, parse_options options) noexcept {
from_chars_result_t<TCH> from_chars_advanced(TCH const * first, TCH const * last,
T &value, parse_options_t<TCH> options) noexcept {
static_assert (std::is_same<T, double>::value || std::is_same<T, float>::value, "only float and double are supported");
static_assert (std::is_same<TCH, char>::value ||
std::is_same<TCH, wchar_t>::value ||
std::is_same<TCH, char16_t>::value ||
std::is_same<TCH, char32_t>::value , "only char, wchar_t, char16_t and char32_t are supported");
from_chars_result answer;
from_chars_result_t<TCH> answer;
#if FASTFLOAT_SKIP_WHITE_SPACE // disabled by default
while ((first != last) && fast_float::is_space(uint8_t(*first))) {
first++;
@ -158,7 +161,7 @@ from_chars_result from_chars_advanced(const char *first, const char *last,
answer.ptr = first;
return answer;
}
parsed_number_string pns = parse_number_string(first, last, options);
parsed_number_string_t<TCH> pns = parse_number_string<TCH>(first, last, options);
if (!pns.valid) {
return detail::parse_infnan(first, last, value);
}