Final review for a more simple merge.

Feature FASTFLOAT_TABLE_HACK_CHAR_DIGIT_LUT_DISABLED is removed because hex and bin parsing isn't working properly.
Properly use FASTFLOAT_SIMD_DISABLE_WARNINGS and FASTFLOAT_SIMD_RESTORE_WARNINGS only for instructions that allow unaligned loads.
This commit is contained in:
IRainman 2025-12-26 00:13:41 +03:00
parent cdabfe49af
commit d07362c459
8 changed files with 67 additions and 57 deletions

View File

@ -84,10 +84,10 @@ fastfloat_really_inline uint64_t simd_read8_to_u64(__m128i const &data) {
}
fastfloat_really_inline uint64_t simd_read8_to_u64(char16_t const *chars) {
return simd_read8_to_u64(_mm_loadu_si128(reinterpret_cast<__m128i const *>(
chars))); // TODO: V1032 https://pvs-studio.com/en/docs/warnings/v1032/
// The pointer 'chars' is cast to a more strictly aligned
// pointer type.
FASTFLOAT_SIMD_DISABLE_WARNINGS
return simd_read8_to_u64(
_mm_loadu_si128(reinterpret_cast<__m128i const *>(chars)));
FASTFLOAT_SIMD_RESTORE_WARNINGS
}
#elif defined(FASTFLOAT_NEON)
@ -98,8 +98,10 @@ fastfloat_really_inline uint64_t simd_read8_to_u64(uint16x8_t const &data) {
}
fastfloat_really_inline uint64_t simd_read8_to_u64(char16_t const *chars) {
FASTFLOAT_SIMD_DISABLE_WARNINGS
return simd_read8_to_u64(
vld1q_u16(reinterpret_cast<uint16_t const *>(chars)));
FASTFLOAT_SIMD_RESTORE_WARNINGS
}
#endif
@ -118,9 +120,9 @@ uint64_t simd_read8_to_u64(UC const *) {
// credit @aqrit
fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint32_t
parse_eight_digits_unrolled(uint64_t val) noexcept {
constexpr uint64_t mask = 0x000000FF000000FF;
constexpr uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
constexpr uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
uint64_t const mask = 0x000000FF000000FF;
uint64_t const mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
uint64_t const mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
val -= 0x3030303030303030;
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
@ -156,11 +158,11 @@ simd_parse_if_eight_digits_unrolled(char16_t const *chars,
return false;
}
#ifdef FASTFLOAT_SSE2
FASTFLOAT_SIMD_DISABLE_WARNINGS
// Load 8 UTF-16 characters (16 bytes)
__m128i const data = _mm_loadu_si128(reinterpret_cast<__m128i const *>(
chars)); // TODO: V1032 https://pvs-studio.com/en/docs/warnings/v1032/ The
// pointer 'chars' is cast to a more strictly aligned pointer
// type.
__m128i const data =
_mm_loadu_si128(reinterpret_cast<__m128i const *>(chars));
FASTFLOAT_SIMD_RESTORE_WARNINGS
// Branchless "are all digits?" trick from Lemire:
// (x - '0') <= 9 <=> (x + 32720) <= 32729
@ -175,7 +177,9 @@ simd_parse_if_eight_digits_unrolled(char16_t const *chars,
return true;
}
#elif defined(FASTFLOAT_NEON)
FASTFLOAT_SIMD_DISABLE_WARNINGS
uint16x8_t const data = vld1q_u16(reinterpret_cast<uint16_t const *>(chars));
FASTFLOAT_SIMD_RESTORE_WARNINGS
// (x - '0') <= 9
// http://0x80.pl/articles/simd-parsing-int-sequences.html
@ -286,7 +290,7 @@ report_parse_error(UC const *p, parse_error error) noexcept {
template <bool basic_json_fmt, typename UC>
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string_t<UC>
parse_number_string(UC const *p, UC const *pend,
parse_options_t<UC> const &options) noexcept {
parse_options_t<UC> const options) noexcept {
// Cyclomatic complexity https://en.wikipedia.org/wiki/Cyclomatic_complexity
// Consider refactoring the 'parse_number_string' function.
// FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN fix this.
@ -295,8 +299,8 @@ parse_number_string(UC const *p, UC const *pend,
FASTFLOAT_ASSUME(p < pend);
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
answer.negative = (*p == UC('-'));
// C++17 20.19.3.(7.1) explicitly forbids '+' sign here
if (answer.negative ||
// C++17 20.19.3.(7.1) explicitly forbids '+' sign here
((chars_format_t(options.format & chars_format::allow_leading_plus)) &&
(!basic_json_fmt && *p == UC('+')))) {
++p;
@ -400,8 +404,11 @@ parse_number_string(UC const *p, UC const *pend,
#ifdef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
++p;
#else
if ((UC('e') == *p) || (UC('E') == *p) || (UC('d') == *p) ||
(UC('D') == *p)) {
if ((UC('e') == *p) || (UC('E') == *p)
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|| (UC('d') == *p) || (UC('D') == *p)
#endif
) {
++p;
}
#endif
@ -483,8 +490,8 @@ parse_number_string(UC const *p, UC const *pend,
p = answer.integer.ptr;
UC const *int_end = p + answer.integer.len();
constexpr am_mant_t minimal_nineteen_digit_integer{1000000000000000000};
while ((answer.mantissa < minimal_nineteen_digit_integer) &&
(p != int_end)) {
while ((p != int_end) &&
(answer.mantissa < minimal_nineteen_digit_integer)) {
answer.mantissa = static_cast<am_mant_t>(
answer.mantissa * 10 + static_cast<am_mant_t>(*p - UC('0')));
++p;
@ -496,8 +503,8 @@ parse_number_string(UC const *p, UC const *pend,
// We have a value with a significant fractional component.
p = answer.fraction.ptr;
UC const *const frac_end = p + answer.fraction.len();
while ((answer.mantissa < minimal_nineteen_digit_integer) &&
(p != frac_end)) {
while ((p != frac_end) &&
(answer.mantissa < minimal_nineteen_digit_integer)) {
answer.mantissa = static_cast<am_mant_t>(
answer.mantissa * 10 + static_cast<am_mant_t>(*p - UC('0')));
++p;
@ -514,7 +521,7 @@ parse_number_string(UC const *p, UC const *pend,
template <typename T, typename UC>
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
parse_int_string(UC const *p, UC const *pend, T &value,
parse_options_t<UC> const &options) noexcept {
parse_options_t<UC> const options) noexcept {
from_chars_result_t<UC> answer;
@ -645,14 +652,7 @@ parse_int_string(UC const *p, UC const *pend, T &value,
loop_parse_if_eight_digits(p, pend, i); // use SIMD if possible
}
while (p != pend) {
#ifdef FASTFLOAT_TABLE_HACK_CHAR_DIGIT_LUT_DISABLED
const auto digit = *p;
if (!is_integer(digit)) {
break;
}
#else
auto const digit = ch_to_digit(*p);
#endif
if (digit >= options.base) {
break;
}

View File

@ -281,7 +281,7 @@ template <limb_t size>
inline FASTFLOAT_CONSTEXPR20 bool small_mul(stackvec<size> &vec,
limb y) noexcept {
limb carry = 0;
for (limb_t index = 0; index++ != vec.len();) {
for (limb_t index = 0; index != vec.len(); ++index) {
vec[index] = scalar_mul(vec[index], y, carry);
}
if (carry != 0) {
@ -302,7 +302,7 @@ FASTFLOAT_CONSTEXPR20 bool large_add_from(stackvec<size> &x, limb_span y,
}
bool carry = false;
for (limb_t index = 0; index++ != y.len();) {
for (limb_t index = 0; index != y.len(); ++index) {
limb xi = x[index + start];
limb yi = y[index];
bool c1 = false;
@ -487,7 +487,7 @@ struct bigint : pow5_tables<> {
} else if (vec.len() < other.vec.len()) {
return -1;
} else {
for (limb_t index = vec.len(); index-- != 0;) {
for (limb_t index = vec.len(); index != 0; --index) {
limb xi = vec[index - 1];
limb yi = other.vec[index - 1];
if (xi > yi) {
@ -514,7 +514,7 @@ struct bigint : pow5_tables<> {
bigint_bits_t const shl = n;
bigint_bits_t const shr = limb_bits - shl;
limb prev = 0;
for (limb_t index = 0; index++ != vec.len();) {
for (limb_t index = 0; index != vec.len(); ++index) {
limb xi = vec[index];
vec[index] = (xi << shl) | (prev >> shr);
prev = xi;

View File

@ -139,8 +139,8 @@ compute_float(int64_t q, uint64_t w) noexcept {
// branchless approach: value128 product = compute_product(q, w); but in
// practice, we can win big with the compute_product_approximation if its
// additional branch is easily predicted. Which is best is data specific.
limb_t const upperbit = limb_t(product.high >> 63);
limb_t const shift =
auto const upperbit = limb_t(product.high >> 63);
auto const shift =
limb_t(upperbit + 64 - binary::mantissa_explicit_bits() - 3);
answer.mantissa = product.high >> shift;

View File

@ -58,18 +58,18 @@ scientific_exponent(am_mant_t mantissa, am_pow_t exponent) noexcept {
// this converts a native floating-point number to an extended-precision float.
template <typename T>
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
to_extended(T const &value) noexcept {
to_extended(T const value) noexcept {
using equiv_uint = equiv_uint_t<T>;
constexpr equiv_uint exponent_mask = binary_format<T>::exponent_mask();
constexpr equiv_uint mantissa_mask = binary_format<T>::mantissa_mask();
constexpr equiv_uint hidden_bit_mask = binary_format<T>::hidden_bit_mask();
adjusted_mantissa am;
constexpr am_pow_t bias = binary_format<T>::mantissa_explicit_bits() -
binary_format<T>::minimum_exponent();
equiv_uint const bits = bit_cast<equiv_uint, T>(value);
adjusted_mantissa am;
if ((bits & exponent_mask) == 0) {
// denormal
am.power2 = 1 - bias;
@ -90,7 +90,7 @@ to_extended(T const &value) noexcept {
// halfway between b and b+u.
template <typename T>
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
to_extended_halfway(T const &value) noexcept {
to_extended_halfway(T const value) noexcept {
adjusted_mantissa am = to_extended(value);
am.mantissa <<= 1;
am.mantissa += 1;
@ -253,7 +253,7 @@ round_up_bigint(bigint &big, am_digits &count) noexcept {
// parse the significant digits into a big integer
template <typename T, typename UC>
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 am_digits
inline FASTFLOAT_CONSTEXPR20 am_digits
parse_mantissa(bigint &result, const parsed_number_string_t<UC> &num) noexcept {
// try to minimize the number of big integer and scalar multiplication.
// therefore, try to parse 8 digits at a time, and multiply by the largest
@ -369,8 +369,7 @@ template <typename T>
inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
negative_digit_comp(bigint &real_digits, adjusted_mantissa am,
am_pow_t const real_exp) noexcept {
// get the value of `b`, rounded down, and get a bigint representation of
// b+h
// get the value of `b`, rounded down, and get a bigint representation of b+h
adjusted_mantissa am_b = am;
// gcc7 bug: use a lambda to remove the noexcept qualifier bug with
// -Wnoexcept-type.

View File

@ -43,7 +43,7 @@ from_chars(UC const *first, UC const *last, T &value,
template <typename T, typename UC = char>
FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
from_chars_advanced(UC const *first, UC const *last, T &value,
parse_options_t<UC> const &options) noexcept;
parse_options_t<UC> const options) noexcept;
/**
* This function multiplies an integer number by a power of 10 and returns

View File

@ -1,8 +1,6 @@
#ifndef FASTFLOAT_FAST_TABLE_H
#define FASTFLOAT_FAST_TABLE_H
#include <cstdint>
namespace fast_float {
/**

View File

@ -205,6 +205,21 @@ FASTFLOAT_CONSTEXPR20 To bit_cast(const From &from) {
#define FASTFLOAT_HAS_SIMD 1
#endif
#if defined(__GNUC__)
// disable -Wcast-align=strict (GCC only)
#define FASTFLOAT_SIMD_DISABLE_WARNINGS \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wcast-align\"")
#else
#define FASTFLOAT_SIMD_DISABLE_WARNINGS
#endif
#if defined(__GNUC__)
#define FASTFLOAT_SIMD_RESTORE_WARNINGS _Pragma("GCC diagnostic pop")
#else
#define FASTFLOAT_SIMD_RESTORE_WARNINGS
#endif
#ifdef FASTFLOAT_VISUAL_STUDIO
#define fastfloat_really_inline __forceinline
#else
@ -288,7 +303,7 @@ template <typename UC>
inline FASTFLOAT_CONSTEXPR14 bool
fastfloat_strncasecmp(UC const *actual_mixedcase, UC const *expected_lowercase,
uint_fast8_t const length) noexcept {
for (uint_fast8_t i = 0; i++ != length;) {
for (uint_fast8_t i = 0; i != length; ++i) {
UC const actual = actual_mixedcase[i];
if ((actual < 256 ? actual | 32 : actual) != expected_lowercase[i]) {
return false;
@ -450,8 +465,10 @@ umul128_generic(uint64_t ab, uint64_t cd, uint64_t *hi) noexcept {
// slow emulation routine for 32-bit
#if !defined(__MINGW64__)
fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint64_t
_umul128(uint64_t ab, uint64_t cd, uint64_t *hi) noexcept {
fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint64_t_umul128(uint64_t ab,
uint64_t cd,
uint64_t *hi)
noexcept {
return umul128_generic(ab, cd, hi);
}
#endif // !__MINGW64__
@ -1186,7 +1203,6 @@ template <> constexpr char8_t const *str_const_inf<char8_t>() {
#endif
template <typename = void> struct int_luts {
#ifndef FASTFLOAT_TABLE_HACK_CHAR_DIGIT_LUT_DISABLED
static constexpr uint8_t chdigit[] = {
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
@ -1206,7 +1222,6 @@ template <typename = void> struct int_luts {
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255};
#endif
static constexpr uint_fast8_t maxdigits_u64[] = {
64, 41, 32, 28, 25, 23, 22, 21, 20, 19, 18, 18, 17, 17, 16, 16, 16, 16,
@ -1237,7 +1252,6 @@ template <typename T> constexpr uint64_t int_luts<T>::min_safe_u64[];
#endif
#ifndef FASTFLOAT_TABLE_HACK_CHAR_DIGIT_LUT_DISABLED
template <typename UC>
fastfloat_really_inline constexpr uint_fast8_t ch_to_digit(UC c) noexcept {
// wchar_t and char can be signed, so we need to be careful.
@ -1247,7 +1261,6 @@ fastfloat_really_inline constexpr uint_fast8_t ch_to_digit(UC c) noexcept {
static_cast<UnsignedUC>(
-((static_cast<UnsignedUC>(c) & ~0xFFull) == 0)))];
}
#endif
fastfloat_really_inline constexpr uint_fast8_t
max_digits_u64(uint_fast8_t base) noexcept {

View File

@ -148,7 +148,7 @@ template <typename T> struct from_chars_caller {
template <typename UC>
FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
call(UC const *first, UC const *last, T &value,
parse_options_t<UC> const &options) noexcept {
parse_options_t<UC> const options) noexcept {
return from_chars_advanced(first, last, value, options);
}
};
@ -322,7 +322,7 @@ from_chars_advanced(parsed_number_string_t<UC> const &pns, T &value) noexcept {
template <typename T, typename UC>
FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
from_chars_float_advanced(UC const *first, UC const *last, T &value,
parse_options_t<UC> const &options) noexcept {
parse_options_t<UC> const options) noexcept {
static_assert(is_supported_float_type<T>::value,
"only some floating-point types are supported");
@ -400,7 +400,7 @@ FASTFLOAT_CONSTEXPR20
return value;
adjusted_mantissa am =
compute_float<binary_format<double>>(decimal_exponent, mantissa);
compute_float<binary_format<T>>(decimal_exponent, mantissa);
to_float(
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
false,
@ -415,7 +415,7 @@ FASTFLOAT_CONSTEXPR20
integer_times_pow10(int64_t const mantissa,
int_fast16_t const decimal_exponent) noexcept {
#ifdef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
FASTFLOAT_ASSUME(mantissa > 0);
FASTFLOAT_ASSUME(mantissa >= 0);
const am_mant_t m = static_cast<am_mant_t>(mantissa);
#else
const bool is_negative = mantissa < 0;
@ -493,7 +493,7 @@ integer_times_pow10(Int mantissa, int_fast16_t decimal_exponent) noexcept {
template <typename T, typename UC>
FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
from_chars_int_advanced(UC const *first, UC const *last, T &value,
parse_options_t<UC> const &options) noexcept {
parse_options_t<UC> const options) noexcept {
static_assert(is_supported_integer_type<T>::value,
"only integer types are supported");
@ -533,7 +533,7 @@ template <> struct from_chars_advanced_caller<1> {
template <typename T, typename UC>
FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
call(UC const *first, UC const *last, T &value,
parse_options_t<UC> const &options) noexcept {
parse_options_t<UC> const options) noexcept {
return from_chars_float_advanced(first, last, value, options);
}
};
@ -542,7 +542,7 @@ template <> struct from_chars_advanced_caller<2> {
template <typename T, typename UC>
FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
call(UC const *first, UC const *last, T &value,
parse_options_t<UC> const &options) noexcept {
parse_options_t<UC> const options) noexcept {
return from_chars_int_advanced(first, last, value, options);
}
};
@ -550,7 +550,7 @@ template <> struct from_chars_advanced_caller<2> {
template <typename T, typename UC>
FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
from_chars_advanced(UC const *first, UC const *last, T &value,
parse_options_t<UC> const &options) noexcept {
parse_options_t<UC> const options) noexcept {
return from_chars_advanced_caller<
size_t(is_supported_float_type<T>::value) +
2 * size_t(is_supported_integer_type<T>::value)>::call(first, last, value,