diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 8ca345c..cc8ff4f 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -14,6 +14,34 @@ namespace fast_float { // able to optimize it well. fastfloat_really_inline bool is_integer(char c) noexcept { return c >= '0' && c <= '9'; } +fastfloat_really_inline uint64_t byteswap(uint64_t val) { + return (val & 0xFF00000000000000) >> 56 + | (val & 0x00FF000000000000) >> 40 + | (val & 0x0000FF0000000000) >> 24 + | (val & 0x000000FF00000000) >> 8 + | (val & 0x00000000FF000000) << 8 + | (val & 0x0000000000FF0000) << 24 + | (val & 0x000000000000FF00) << 40 + | (val & 0x00000000000000FF) << 56; +} + +fastfloat_really_inline uint64_t read_u64(const char *chars) { + uint64_t val; + ::memcpy(&val, chars, sizeof(uint64_t)); +#if FASTFLOAT_IS_BIG_ENDIAN == 1 + // Need to read as-if the number was in little-endian order. + val = byteswap(val); +#endif + return val; +} + +fastfloat_really_inline void write_u64(uint8_t *chars, uint64_t val) { +#if FASTFLOAT_IS_BIG_ENDIAN == 1 + // Need to read as-if the number was in little-endian order. + val = byteswap(val); +#endif + ::memcpy(chars, &val, sizeof(uint64_t)); +} // credit @aqrit fastfloat_really_inline uint32_t parse_eight_digits_unrolled(uint64_t val) { @@ -27,21 +55,17 @@ fastfloat_really_inline uint32_t parse_eight_digits_unrolled(uint64_t val) { } fastfloat_really_inline uint32_t parse_eight_digits_unrolled(const char *chars) noexcept { - uint64_t val; - ::memcpy(&val, chars, sizeof(uint64_t)); - return parse_eight_digits_unrolled(val); + return parse_eight_digits_unrolled(read_u64(chars)); } // credit @aqrit fastfloat_really_inline bool is_made_of_eight_digits_fast(uint64_t val) noexcept { return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) & - 0x8080808080808080)); + 0x8080808080808080)); } fastfloat_really_inline bool is_made_of_eight_digits_fast(const char *chars) noexcept { - uint64_t val; - ::memcpy(&val, chars, 8); - return is_made_of_eight_digits_fast(val); + return is_made_of_eight_digits_fast(read_u64(chars)); } struct parsed_number_string { @@ -87,17 +111,15 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_ int64_t exponent = 0; if ((p != pend) && (*p == '.')) { ++p; -#if FASTFLOAT_IS_BIG_ENDIAN == 0 - // Fast approach only tested under little endian systems + // Fast approach only tested under little endian systems + if ((p + 8 <= pend) && is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok + p += 8; if ((p + 8 <= pend) && is_made_of_eight_digits_fast(p)) { i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok p += 8; - if ((p + 8 <= pend) && is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok - p += 8; - } } -#endif + } while ((p != pend) && is_integer(*p)) { uint8_t digit = uint8_t(*p - '0'); ++p; @@ -225,20 +247,17 @@ fastfloat_really_inline decimal parse_decimal(const char *p, const char *pend) n ++p; } } -#if FASTFLOAT_IS_BIG_ENDIAN == 0 // We expect that this loop will often take the bulk of the running time // because when a value has lots of digits, these digits often while ((p + 8 <= pend) && (answer.num_digits + 8 < max_digits)) { - uint64_t val; - ::memcpy(&val, p, sizeof(uint64_t)); + uint64_t val = read_u64(p); if(! is_made_of_eight_digits_fast(val)) { break; } // We have eight digits, process them in one go! val -= 0x3030303030303030; - ::memcpy(answer.digits + answer.num_digits, &val, sizeof(uint64_t)); + write_u64(answer.digits + answer.num_digits, val); answer.num_digits += 8; p += 8; } -#endif while ((p != pend) && is_integer(*p)) { if (answer.num_digits < max_digits) { answer.digits[answer.num_digits] = uint8_t(*p - '0'); @@ -282,7 +301,7 @@ fastfloat_really_inline decimal parse_decimal(const char *p, const char *pend) n uint8_t digit = uint8_t(*p - '0'); if (exp_number < 0x10000) { exp_number = 10 * exp_number + digit; - } + } ++p; } answer.decimal_point += (neg_exp ? -exp_number : exp_number);