mirror of
https://github.com/fastfloat/fast_float.git
synced 2026-02-13 21:59:54 +08:00
Merge pull request #198 from mayawarrier/main
Add opt-in SIMD support for char16_t
This commit is contained in:
commit
8139e164b8
3
.gitignore
vendored
3
.gitignore
vendored
@ -3,10 +3,11 @@ Testing/*
|
|||||||
.cache/
|
.cache/
|
||||||
compile_commands.json
|
compile_commands.json
|
||||||
|
|
||||||
# Visual Studio
|
# Visual studio
|
||||||
.vs/
|
.vs/
|
||||||
Debug/
|
Debug/
|
||||||
Release/
|
Release/
|
||||||
|
/out/
|
||||||
*.sln
|
*.sln
|
||||||
*.vcxproj
|
*.vcxproj
|
||||||
*.vcxproj.filters
|
*.vcxproj.filters
|
||||||
|
|||||||
@ -5,4 +5,5 @@ Neal Richardson
|
|||||||
Tim Paine
|
Tim Paine
|
||||||
Fabio Pellacini
|
Fabio Pellacini
|
||||||
Lénárd Szolnoki
|
Lénárd Szolnoki
|
||||||
Jan Pharago
|
Jan Pharago
|
||||||
|
Maya Warrier
|
||||||
@ -5,11 +5,26 @@
|
|||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <iterator>
|
#include <iterator>
|
||||||
|
#include <type_traits>
|
||||||
|
|
||||||
#include "float_common.h"
|
#include "float_common.h"
|
||||||
|
|
||||||
|
#ifdef FASTFLOAT_SSE2
|
||||||
|
#include <emmintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
namespace fast_float {
|
namespace fast_float {
|
||||||
|
|
||||||
|
template <typename UC>
|
||||||
|
fastfloat_really_inline constexpr bool has_simd_opt() {
|
||||||
|
#ifdef FASTFLOAT_HAS_SIMD
|
||||||
|
return std::is_same<UC, char16_t>::value;
|
||||||
|
#else
|
||||||
|
return false;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
// Next function can be micro-optimized, but compilers are entirely
|
// Next function can be micro-optimized, but compilers are entirely
|
||||||
// able to optimize it well.
|
// able to optimize it well.
|
||||||
template <typename UC>
|
template <typename UC>
|
||||||
@ -28,12 +43,14 @@ fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) {
|
|||||||
| (val & 0x00000000000000FF) << 56;
|
| (val & 0x00000000000000FF) << 56;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Read 8 UC into a u64. Truncates UC if not char.
|
||||||
|
template <typename UC>
|
||||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
|
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
|
||||||
uint64_t read_u64(const char *chars) {
|
uint64_t read8_to_u64(const UC *chars) {
|
||||||
if (cpp20_and_in_constexpr()) {
|
if (cpp20_and_in_constexpr() || !std::is_same<UC, char>::value) {
|
||||||
uint64_t val = 0;
|
uint64_t val = 0;
|
||||||
for(int i = 0; i < 8; ++i) {
|
for(int i = 0; i < 8; ++i) {
|
||||||
val |= uint64_t(*chars) << (i*8);
|
val |= uint64_t(uint8_t(*chars)) << (i*8);
|
||||||
++chars;
|
++chars;
|
||||||
}
|
}
|
||||||
return val;
|
return val;
|
||||||
@ -47,6 +64,39 @@ uint64_t read_u64(const char *chars) {
|
|||||||
return val;
|
return val;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef FASTFLOAT_SSE2
|
||||||
|
|
||||||
|
fastfloat_really_inline
|
||||||
|
uint64_t simd_read8_to_u64(const __m128i data) {
|
||||||
|
FASTFLOAT_SIMD_DISABLE_WARNINGS
|
||||||
|
const __m128i packed = _mm_packus_epi16(data, data);
|
||||||
|
#ifdef FASTFLOAT_64BIT
|
||||||
|
return uint64_t(_mm_cvtsi128_si64(packed));
|
||||||
|
#else
|
||||||
|
uint64_t value;
|
||||||
|
// Visual Studio + older versions of GCC don't support _mm_storeu_si64
|
||||||
|
_mm_storel_epi64(reinterpret_cast<__m128i*>(&value), packed);
|
||||||
|
return value;
|
||||||
|
#endif
|
||||||
|
FASTFLOAT_SIMD_RESTORE_WARNINGS
|
||||||
|
}
|
||||||
|
|
||||||
|
fastfloat_really_inline
|
||||||
|
uint64_t simd_read8_to_u64(const char16_t* chars) {
|
||||||
|
FASTFLOAT_SIMD_DISABLE_WARNINGS
|
||||||
|
return simd_read8_to_u64(_mm_loadu_si128(reinterpret_cast<const __m128i*>(chars)));
|
||||||
|
FASTFLOAT_SIMD_RESTORE_WARNINGS
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// dummy for compile
|
||||||
|
template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opt<UC>())>
|
||||||
|
uint64_t simd_read8_to_u64(UC const*) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
|
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
|
||||||
void write_u64(uint8_t *chars, uint64_t val) {
|
void write_u64(uint8_t *chars, uint64_t val) {
|
||||||
if (cpp20_and_in_constexpr()) {
|
if (cpp20_and_in_constexpr()) {
|
||||||
@ -76,40 +126,80 @@ uint32_t parse_eight_digits_unrolled(uint64_t val) {
|
|||||||
return uint32_t(val);
|
return uint32_t(val);
|
||||||
}
|
}
|
||||||
|
|
||||||
fastfloat_really_inline constexpr
|
|
||||||
uint32_t parse_eight_digits_unrolled(const char16_t *) noexcept {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
fastfloat_really_inline constexpr
|
|
||||||
uint32_t parse_eight_digits_unrolled(const char32_t *) noexcept {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
// Call this if chars are definitely 8 digits.
|
||||||
|
template <typename UC>
|
||||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
|
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
|
||||||
uint32_t parse_eight_digits_unrolled(const char *chars) noexcept {
|
uint32_t parse_eight_digits_unrolled(UC const * chars) noexcept {
|
||||||
return parse_eight_digits_unrolled(read_u64(chars));
|
if (cpp20_and_in_constexpr() || !has_simd_opt<UC>()) {
|
||||||
|
return parse_eight_digits_unrolled(read8_to_u64(chars)); // truncation okay
|
||||||
|
}
|
||||||
|
return parse_eight_digits_unrolled(simd_read8_to_u64(chars));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// credit @aqrit
|
// credit @aqrit
|
||||||
fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val) noexcept {
|
fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val) noexcept {
|
||||||
return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) &
|
return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) &
|
||||||
0x8080808080808080));
|
0x8080808080808080));
|
||||||
}
|
}
|
||||||
|
|
||||||
fastfloat_really_inline constexpr
|
|
||||||
bool is_made_of_eight_digits_fast(const char16_t *) noexcept {
|
#ifdef FASTFLOAT_HAS_SIMD
|
||||||
return false;
|
|
||||||
|
// Call this if chars might not be 8 digits.
|
||||||
|
// Using this style (instead of is_made_of_eight_digits_fast() then parse_eight_digits_unrolled())
|
||||||
|
// ensures we don't load SIMD registers twice.
|
||||||
|
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
|
||||||
|
bool simd_parse_if_eight_digits_unrolled(const char16_t* chars, uint64_t& i) noexcept {
|
||||||
|
if (cpp20_and_in_constexpr()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
#ifdef FASTFLOAT_SSE2
|
||||||
|
FASTFLOAT_SIMD_DISABLE_WARNINGS
|
||||||
|
const __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i*>(chars));
|
||||||
|
|
||||||
|
// (x - '0') <= 9
|
||||||
|
// http://0x80.pl/articles/simd-parsing-int-sequences.html
|
||||||
|
const __m128i t0 = _mm_add_epi16(data, _mm_set1_epi16(32720));
|
||||||
|
const __m128i t1 = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-32759));
|
||||||
|
|
||||||
|
if (_mm_movemask_epi8(t1) == 0) {
|
||||||
|
i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else return false;
|
||||||
|
FASTFLOAT_SIMD_RESTORE_WARNINGS
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
fastfloat_really_inline constexpr
|
#endif
|
||||||
bool is_made_of_eight_digits_fast(const char32_t *) noexcept {
|
|
||||||
return false;
|
// dummy for compile
|
||||||
|
template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opt<UC>())>
|
||||||
|
uint64_t simd_parse_if_eight_digits_unrolled(UC const*, uint64_t&) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename UC, FASTFLOAT_ENABLE_IF(!std::is_same<UC, char>::value)>
|
||||||
|
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
|
||||||
|
void loop_parse_if_eight_digits(const UC*& p, const UC* const pend, uint64_t& i) {
|
||||||
|
if (!has_simd_opt<UC>()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
while ((std::distance(p, pend) >= 8) && simd_parse_if_eight_digits_unrolled(p, i)) { // in rare cases, this will overflow, but that's ok
|
||||||
|
p += 8;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
|
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
|
||||||
bool is_made_of_eight_digits_fast(const char *chars) noexcept {
|
void loop_parse_if_eight_digits(const char*& p, const char* const pend, uint64_t& i) {
|
||||||
return is_made_of_eight_digits_fast(read_u64(chars));
|
// optimizes better than parse_if_eight_digits_unrolled() for UC = char.
|
||||||
|
while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(read8_to_u64(p))) {
|
||||||
|
i = i * 100000000 + parse_eight_digits_unrolled(read8_to_u64(p)); // in rare cases, this will overflow, but that's ok
|
||||||
|
p += 8;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename UC>
|
template <typename UC>
|
||||||
@ -124,8 +214,10 @@ struct parsed_number_string_t {
|
|||||||
span<const UC> integer{}; // non-nullable
|
span<const UC> integer{}; // non-nullable
|
||||||
span<const UC> fraction{}; // nullable
|
span<const UC> fraction{}; // nullable
|
||||||
};
|
};
|
||||||
using byte_span = span<char>;
|
|
||||||
|
using byte_span = span<const char>;
|
||||||
using parsed_number_string = parsed_number_string_t<char>;
|
using parsed_number_string = parsed_number_string_t<char>;
|
||||||
|
|
||||||
// Assuming that you use no more than 19 digits, this will
|
// Assuming that you use no more than 19 digits, this will
|
||||||
// parse an ASCII string.
|
// parse an ASCII string.
|
||||||
template <typename UC>
|
template <typename UC>
|
||||||
@ -171,12 +263,8 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
|
|||||||
UC const * before = p;
|
UC const * before = p;
|
||||||
// can occur at most twice without overflowing, but let it occur more, since
|
// can occur at most twice without overflowing, but let it occur more, since
|
||||||
// for integers with many digits, digit parsing is the primary bottleneck.
|
// for integers with many digits, digit parsing is the primary bottleneck.
|
||||||
if (std::is_same<UC,char>::value) {
|
loop_parse_if_eight_digits(p, pend, i);
|
||||||
while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) {
|
|
||||||
i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok
|
|
||||||
p += 8;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
while ((p != pend) && is_integer(*p)) {
|
while ((p != pend) && is_integer(*p)) {
|
||||||
uint8_t digit = uint8_t(*p - UC('0'));
|
uint8_t digit = uint8_t(*p - UC('0'));
|
||||||
++p;
|
++p;
|
||||||
@ -241,6 +329,7 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
|
|||||||
if(*start == UC('0')) { digit_count --; }
|
if(*start == UC('0')) { digit_count --; }
|
||||||
start++;
|
start++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (digit_count > 19) {
|
if (digit_count > 19) {
|
||||||
answer.too_many_digits = true;
|
answer.too_many_digits = true;
|
||||||
// Let us start again, this time, avoiding overflows.
|
// Let us start again, this time, avoiding overflows.
|
||||||
@ -248,22 +337,23 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
|
|||||||
// pre-tokenized spans from above.
|
// pre-tokenized spans from above.
|
||||||
i = 0;
|
i = 0;
|
||||||
p = answer.integer.ptr;
|
p = answer.integer.ptr;
|
||||||
UC const * int_end = p + answer.integer.len();
|
UC const* int_end = p + answer.integer.len();
|
||||||
const uint64_t minimal_nineteen_digit_integer{1000000000000000000};
|
const uint64_t minimal_nineteen_digit_integer{ 1000000000000000000 };
|
||||||
while((i < minimal_nineteen_digit_integer) && (p != int_end)) {
|
while ((i < minimal_nineteen_digit_integer) && (p != int_end)) {
|
||||||
i = i * 10 + uint64_t(*p - UC('0'));
|
i = i * 10 + uint64_t(*p - UC('0'));
|
||||||
++p;
|
++p;
|
||||||
}
|
}
|
||||||
if (i >= minimal_nineteen_digit_integer) { // We have a big integers
|
if (i >= minimal_nineteen_digit_integer) { // We have a big integers
|
||||||
exponent = end_of_integer_part - p + exp_number;
|
exponent = end_of_integer_part - p + exp_number;
|
||||||
} else { // We have a value with a fractional component.
|
}
|
||||||
p = answer.fraction.ptr;
|
else { // We have a value with a fractional component.
|
||||||
UC const * frac_end = p + answer.fraction.len();
|
p = answer.fraction.ptr;
|
||||||
while((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
|
UC const* frac_end = p + answer.fraction.len();
|
||||||
i = i * 10 + uint64_t(*p - UC('0'));
|
while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
|
||||||
++p;
|
i = i * 10 + uint64_t(*p - UC('0'));
|
||||||
}
|
++p;
|
||||||
exponent = answer.fraction.ptr - p + exp_number;
|
}
|
||||||
|
exponent = answer.fraction.ptr - p + exp_number;
|
||||||
}
|
}
|
||||||
// We have now corrected both exponent and i, to a truncated value
|
// We have now corrected both exponent and i, to a truncated value
|
||||||
}
|
}
|
||||||
|
|||||||
@ -201,18 +201,10 @@ bool is_truncated(span<const UC> s) noexcept {
|
|||||||
return is_truncated(s.ptr, s.ptr + s.len());
|
return is_truncated(s.ptr, s.ptr + s.len());
|
||||||
}
|
}
|
||||||
|
|
||||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
|
|
||||||
void parse_eight_digits(const char16_t*& , limb& , size_t& , size_t& ) noexcept {
|
|
||||||
// currently unused
|
|
||||||
}
|
|
||||||
|
|
||||||
|
template <typename UC>
|
||||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
|
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
|
||||||
void parse_eight_digits(const char32_t*& , limb& , size_t& , size_t& ) noexcept {
|
void parse_eight_digits(const UC*& p, limb& value, size_t& counter, size_t& count) noexcept {
|
||||||
// currently unused
|
|
||||||
}
|
|
||||||
|
|
||||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
|
|
||||||
void parse_eight_digits(const char*& p, limb& value, size_t& counter, size_t& count) noexcept {
|
|
||||||
value = value * 100000000 + parse_eight_digits_unrolled(p);
|
value = value * 100000000 + parse_eight_digits_unrolled(p);
|
||||||
p += 8;
|
p += 8;
|
||||||
counter += 8;
|
counter += 8;
|
||||||
@ -264,10 +256,8 @@ void parse_mantissa(bigint& result, parsed_number_string_t<UC>& num, size_t max_
|
|||||||
skip_zeros(p, pend);
|
skip_zeros(p, pend);
|
||||||
// process all digits, in increments of step per loop
|
// process all digits, in increments of step per loop
|
||||||
while (p != pend) {
|
while (p != pend) {
|
||||||
if (std::is_same<UC,char>::value) {
|
while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) {
|
||||||
while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) {
|
parse_eight_digits(p, value, counter, digits);
|
||||||
parse_eight_digits(p, value, counter, digits);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
while (counter < step && p != pend && digits < max_digits) {
|
while (counter < step && p != pend && digits < max_digits) {
|
||||||
parse_one_digit(p, value, counter, digits);
|
parse_one_digit(p, value, counter, digits);
|
||||||
@ -299,10 +289,8 @@ void parse_mantissa(bigint& result, parsed_number_string_t<UC>& num, size_t max_
|
|||||||
}
|
}
|
||||||
// process all digits, in increments of step per loop
|
// process all digits, in increments of step per loop
|
||||||
while (p != pend) {
|
while (p != pend) {
|
||||||
if (std::is_same<UC,char>::value) {
|
while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) {
|
||||||
while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) {
|
parse_eight_digits(p, value, counter, digits);
|
||||||
parse_eight_digits(p, value, counter, digits);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
while (counter < step && p != pend && digits < max_digits) {
|
while (counter < step && p != pend && digits < max_digits) {
|
||||||
parse_one_digit(p, value, counter, digits);
|
parse_one_digit(p, value, counter, digits);
|
||||||
|
|||||||
@ -115,6 +115,34 @@ using parse_options = parse_options_t<char>;
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__SSE2__) || \
|
||||||
|
(defined(FASTFLOAT_VISUAL_STUDIO) && \
|
||||||
|
(defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP == 2)))
|
||||||
|
#define FASTFLOAT_SSE2 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef FASTFLOAT_SSE2
|
||||||
|
#define FASTFLOAT_HAS_SIMD 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__GNUC__)
|
||||||
|
// disable -Wcast-align=strict (GCC only)
|
||||||
|
#define FASTFLOAT_SIMD_DISABLE_WARNINGS \
|
||||||
|
_Pragma("GCC diagnostic push") \
|
||||||
|
_Pragma("GCC diagnostic ignored \"-Wcast-align\"")
|
||||||
|
#else
|
||||||
|
#define FASTFLOAT_SIMD_DISABLE_WARNINGS
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__GNUC__)
|
||||||
|
#define FASTFLOAT_SIMD_RESTORE_WARNINGS \
|
||||||
|
_Pragma("GCC diagnostic pop")
|
||||||
|
#else
|
||||||
|
#define FASTFLOAT_SIMD_RESTORE_WARNINGS
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef FASTFLOAT_VISUAL_STUDIO
|
#ifdef FASTFLOAT_VISUAL_STUDIO
|
||||||
#define fastfloat_really_inline __forceinline
|
#define fastfloat_really_inline __forceinline
|
||||||
#else
|
#else
|
||||||
@ -132,6 +160,9 @@ using parse_options = parse_options_t<char>;
|
|||||||
// rust style `try!()` macro, or `?` operator
|
// rust style `try!()` macro, or `?` operator
|
||||||
#define FASTFLOAT_TRY(x) { if (!(x)) return false; }
|
#define FASTFLOAT_TRY(x) { if (!(x)) return false; }
|
||||||
|
|
||||||
|
#define FASTFLOAT_ENABLE_IF(...) typename std::enable_if<(__VA_ARGS__), int>::type = 0
|
||||||
|
|
||||||
|
|
||||||
namespace fast_float {
|
namespace fast_float {
|
||||||
|
|
||||||
fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() {
|
fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() {
|
||||||
|
|||||||
@ -166,6 +166,7 @@ from_chars_result_t<UC> from_chars_advanced(UC const * first, UC const * last,
|
|||||||
if (!pns.valid) {
|
if (!pns.valid) {
|
||||||
return detail::parse_infnan(first, last, value);
|
return detail::parse_infnan(first, last, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
answer.ec = std::errc(); // be optimistic
|
answer.ec = std::errc(); // be optimistic
|
||||||
answer.ptr = pns.lastmatch;
|
answer.ptr = pns.lastmatch;
|
||||||
// The implementation of the Clinger's fast path is convoluted because
|
// The implementation of the Clinger's fast path is convoluted because
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user