From 6ceb29a7e48db8ac5f441ce0a23e3e4f205d07a8 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 16 Nov 2022 16:21:34 +0000 Subject: [PATCH 01/15] We might reenable clinger. --- include/fast_float/float_common.h | 25 ++++++++++++++++++++++- include/fast_float/parse_number.h | 34 +++++++++++++++++++++++++------ 2 files changed, 52 insertions(+), 7 deletions(-) diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index f99371c..a19a817 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -272,10 +272,12 @@ template struct binary_format { static inline constexpr int minimum_exponent(); static inline constexpr int infinite_power(); static inline constexpr int sign_index(); + static inline constexpr int min_exponent_fast_path(); // used when fegetround() == FE_TONEAREST static inline constexpr int max_exponent_fast_path(); static inline constexpr int max_exponent_round_to_even(); static inline constexpr int min_exponent_round_to_even(); static inline constexpr uint64_t max_mantissa_fast_path(int64_t power); + static inline constexpr uint64_t max_mantissa_fast_path(); // used when fegetround() == FE_TONEAREST static inline constexpr int largest_power_of_ten(); static inline constexpr int smallest_power_of_ten(); static inline constexpr T exact_power_of_ten(int64_t power); @@ -285,6 +287,22 @@ template struct binary_format { static inline constexpr equiv_uint hidden_bit_mask(); }; +template <> inline constexpr int binary_format::min_exponent_fast_path() { +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + return 0; +#else + return -22; +#endif +} + +template <> inline constexpr int binary_format::min_exponent_fast_path() { +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + return 0; +#else + return -10; +#endif +} + template <> inline constexpr int binary_format::mantissa_explicit_bits() { return 52; } @@ -331,13 +349,18 @@ template <> inline constexpr int binary_format::max_exponent_fast_path() template <> inline constexpr int binary_format::max_exponent_fast_path() { return 10; } - +template <> inline constexpr uint64_t binary_format::max_mantissa_fast_path() { + return uint64_t(2) << mantissa_explicit_bits(); +} template <> inline constexpr uint64_t binary_format::max_mantissa_fast_path(int64_t power) { // caller is responsible to ensure that // power >= 0 && power <= 22 // return max_mantissa_double[power]; } +template <> inline constexpr uint64_t binary_format::max_mantissa_fast_path() { + return uint64_t(2) << mantissa_explicit_bits(); +} template <> inline constexpr uint64_t binary_format::max_mantissa_fast_path(int64_t power) { // caller is responsible to ensure that // power >= 0 && power <= 10 diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index 8789475..89e1b5d 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -60,6 +60,15 @@ from_chars_result parse_infnan(const char *first, const char *last, T &value) n return answer; } +fastfloat_really_inline bool rounds_nearest() { + // This function is meant to be equivalent to : + // prior: #include + // return fegetround() == FE_TONEAREST; + // volatile prevents the compiler from computing the function at compile-time + static volatile float fmin = std::numeric_limits::min(); + return (fmin + 1.0f == 1.0f - fmin); +} + } // namespace detail template @@ -87,12 +96,25 @@ from_chars_result from_chars_advanced(const char *first, const char *last, } answer.ec = std::errc(); // be optimistic answer.ptr = pns.lastmatch; - // Next is a modified Clinger's fast path, inspired by Jakub Jelínek's proposal - if (pns.exponent >= 0 && pns.exponent <= binary_format::max_exponent_fast_path() && pns.mantissa <=binary_format::max_mantissa_fast_path(pns.exponent) && !pns.too_many_digits) { - value = T(pns.mantissa); - value = value * binary_format::exact_power_of_ten(pns.exponent); - if (pns.negative) { value = -value; } - return answer; + if(detail::rounds_nearest()) { + // We have that fegetround() == FE_TONEAREST. + // Next is Clinger's fast path. + if (binary_format::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format::max_exponent_fast_path() && pns.mantissa <=binary_format::max_mantissa_fast_path() && !pns.too_many_digits) { + value = T(pns.mantissa); + if (pns.exponent < 0) { value = value / binary_format::exact_power_of_ten(-pns.exponent); } + else { value = value * binary_format::exact_power_of_ten(pns.exponent); } + if (pns.negative) { value = -value; } + return answer; + } + } else { + // We do not have that fegetround() == FE_TONEAREST. + // Next is a modified Clinger's fast path, inspired by Jakub Jelínek's proposal + if (pns.exponent >= 0 && pns.exponent <= binary_format::max_exponent_fast_path() && pns.mantissa <=binary_format::max_mantissa_fast_path(pns.exponent) && !pns.too_many_digits) { + value = T(pns.mantissa); + value = value * binary_format::exact_power_of_ten(pns.exponent); + if (pns.negative) { value = -value; } + return answer; + } } adjusted_mantissa am = compute_float>(pns.exponent, pns.mantissa); if(pns.too_many_digits && am.power2 >= 0) { From 2c8e73895062198bedca9597fd44f8e8d32c7212 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 16 Nov 2022 12:06:33 -0500 Subject: [PATCH 02/15] Cleaning. --- include/fast_float/parse_number.h | 29 +++++- tests/basictest.cpp | 149 ++++++++++++++++++++---------- 2 files changed, 129 insertions(+), 49 deletions(-) diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index 89e1b5d..1340a0d 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -60,12 +60,35 @@ from_chars_result parse_infnan(const char *first, const char *last, T &value) n return answer; } -fastfloat_really_inline bool rounds_nearest() { +fastfloat_really_inline bool rounds_to_nearest() noexcept { // This function is meant to be equivalent to : // prior: #include // return fegetround() == FE_TONEAREST; + // However, it is expected to be much faster than the fegetround() + // function call. + // // volatile prevents the compiler from computing the function at compile-time static volatile float fmin = std::numeric_limits::min(); + // + // Explanation: + // Only when fegetround() == FE_TONEAREST do we have that + // fmin + 1.0f == 1.0f - fmin. + // + // FE_UPWARD: + // fmin + 1.0f = 0x1.00001 (1.00001) + // 1.0f - fmin = 0x1 (1) + // + // FE_DOWNWARD or FE_TOWARDZERO: + // fmin + 1.0f = 0x1 (1) + // 1.0f - fmin = 0x0.999999 (0.999999) + // + // fmin + 1.0f = 0x1 (1) + // 1.0f - fmin = 0x0.999999 (0.999999) + // + // FE_TONEAREST: + // fmin + 1.0f = 0x1 (1) + // 1.0f - fmin = 0x1 (1) + // return (fmin + 1.0f == 1.0f - fmin); } @@ -96,7 +119,9 @@ from_chars_result from_chars_advanced(const char *first, const char *last, } answer.ec = std::errc(); // be optimistic answer.ptr = pns.lastmatch; - if(detail::rounds_nearest()) { + // Unfortunately, the conventional Clinger's fast path is only possible + // when the system rounds to the nearest float. + if(detail::rounds_to_nearest()) { // We have that fegetround() == FE_TONEAREST. // Next is Clinger's fast path. if (binary_format::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format::max_exponent_fast_path() && pns.mantissa <=binary_format::max_mantissa_fast_path() && !pns.too_many_digits) { diff --git a/tests/basictest.cpp b/tests/basictest.cpp index 273c2c6..251e609 100644 --- a/tests/basictest.cpp +++ b/tests/basictest.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #ifndef SUPPLEMENTAL_TEST_DATA_DIR #define SUPPLEMENTAL_TEST_DATA_DIR "data/" @@ -42,6 +43,11 @@ #define FASTFLOAT_ODDPLATFORM 1 #endif + +#define iHexAndDec(v) std::hex << "0x" << (v) << " (" << std::dec << (v) << ")" +#define fHexAndDec(v) std::hexfloat << (v) << " (" << std::defaultfloat << (v) << ")" + + // C++ 17 because it is otherwise annoying to browse all files in a directory. // We also only run these tests on little endian systems. #if (FASTFLOAT_CPLUSPLUS >= 201703L) && (FASTFLOAT_IS_BIG_ENDIAN == 0) && !defined(FASTFLOAT_ODDPLATFORM) @@ -50,59 +56,111 @@ #include #include +TEST_CASE("rounds_to_nearest") { + // + // If this function fails, we may be left in a non-standard rounding state. + // + static volatile float fmin = std::numeric_limits::min(); + fesetround(FE_UPWARD); + std::cout << "FE_UPWARD: fmin + 1.0f = " << iHexAndDec(fmin + 1.0f) << " 1.0f - fmin = " << iHexAndDec(1.0f - fmin) << std::endl; + CHECK(fegetround() == FE_UPWARD); + CHECK(fast_float::detail::rounds_to_nearest() == false); + + fesetround(FE_DOWNWARD); + std::cout << "FE_DOWNWARD: fmin + 1.0f = " << iHexAndDec(fmin + 1.0f) << " 1.0f - fmin = " << iHexAndDec(1.0f - fmin) << std::endl; + CHECK(fegetround() == FE_DOWNWARD); + CHECK(fast_float::detail::rounds_to_nearest() == false); + + fesetround(FE_TOWARDZERO); + std::cout << "FE_TOWARDZERO: fmin + 1.0f = " << iHexAndDec(fmin + 1.0f) << " 1.0f - fmin = " << iHexAndDec(1.0f - fmin) << std::endl; + CHECK(fegetround() == FE_TOWARDZERO); + CHECK(fast_float::detail::rounds_to_nearest() == false); + + fesetround(FE_TONEAREST); + std::cout << "FE_TONEAREST: fmin + 1.0f = " << iHexAndDec(fmin + 1.0f) << " 1.0f - fmin = " << iHexAndDec(1.0f - fmin) << std::endl; + CHECK(fegetround() == FE_TONEAREST); + CHECK(fast_float::detail::rounds_to_nearest() == true); +} + +const char * round_name(int d) { + switch(d) { + case FE_UPWARD: + return "FE_UPWARD"; + case FE_DOWNWARD: + return "FE_DOWNWARD"; + case FE_TOWARDZERO: + return "FE_TOWARDZERO"; + case FE_TONEAREST: + return "FE_TONEAREST"; + default: + return "UNKNOWN"; + } + +} + // return true on success bool check_file(std::string file_name) { std::cout << "Checking " << file_name << std::endl; - size_t number{0}; - std::fstream newfile(file_name, std::ios::in); - if (newfile.is_open()) { - std::string str; - while (std::getline(newfile, str)) { - if (str.size() > 0) { - // Read 32-bit hex - uint32_t float32; - auto r32 = std::from_chars(str.data() + 5, str.data() + str.size(), + // We check all rounding directions, for each file. + std::vector directions = {FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO, FE_TONEAREST}; + for (int d : directions) { + std::cout << "fesetround to " << round_name(d) << std::endl; + fesetround(d); + size_t number{0}; + std::fstream newfile(file_name, std::ios::in); + if (newfile.is_open()) { + std::string str; + while (std::getline(newfile, str)) { + if (str.size() > 0) { + // Read 32-bit hex + uint32_t float32; + auto r32 = std::from_chars(str.data() + 5, str.data() + str.size(), float32, 16); - if(r32.ec != std::errc()) { std::cerr << "32-bit parsing failure\n"; return false; } - // Read 64-bit hex - uint64_t float64; - auto r64 = std::from_chars(str.data() + 14, str.data() + str.size(), + if(r32.ec != std::errc()) { std::cerr << "32-bit parsing failure\n"; return false; } + // Read 64-bit hex + uint64_t float64; + auto r64 = std::from_chars(str.data() + 14, str.data() + str.size(), float64, 16); - if(r64.ec != std::errc()) { std::cerr << "64-bit parsing failure\n"; return false; } - // The string to parse: - const char *number_string = str.data() + 31; - const char *end_of_string = str.data() + str.size(); - // Parse as 32-bit float - float parsed_32; - auto fast_float_r32 = fast_float::from_chars(number_string, end_of_string, parsed_32); - if(fast_float_r32.ec != std::errc()) { std::cerr << "parsing failure\n"; return false; } - // Parse as 64-bit float - double parsed_64; - auto fast_float_r64 = fast_float::from_chars(number_string, end_of_string, parsed_64); - if(fast_float_r64.ec != std::errc()) { std::cerr << "parsing failure\n"; return false; } - // Convert the floats to unsigned ints. - uint32_t float32_parsed; - uint64_t float64_parsed; - ::memcpy(&float32_parsed, &parsed_32, sizeof(parsed_32)); - ::memcpy(&float64_parsed, &parsed_64, sizeof(parsed_64)); - // Compare with expected results - if (float32_parsed != float32) { - std::cout << "bad 32 " << str << std::endl; - return false; + if(r64.ec != std::errc()) { std::cerr << "64-bit parsing failure\n"; return false; } + // The string to parse: + const char *number_string = str.data() + 31; + const char *end_of_string = str.data() + str.size(); + // Parse as 32-bit float + float parsed_32; + auto fast_float_r32 = fast_float::from_chars(number_string, end_of_string, parsed_32); + if(fast_float_r32.ec != std::errc()) { std::cerr << "parsing failure\n"; return false; } + // Parse as 64-bit float + double parsed_64; + auto fast_float_r64 = fast_float::from_chars(number_string, end_of_string, parsed_64); + if(fast_float_r64.ec != std::errc()) { std::cerr << "parsing failure\n"; return false; } + // Convert the floats to unsigned ints. + uint32_t float32_parsed; + uint64_t float64_parsed; + ::memcpy(&float32_parsed, &parsed_32, sizeof(parsed_32)); + ::memcpy(&float64_parsed, &parsed_64, sizeof(parsed_64)); + // Compare with expected results + if (float32_parsed != float32) { + std::cout << "bad 32 " << str << std::endl; + fesetround(FE_TONEAREST); + return false; + } + if (float64_parsed != float64) { + std::cout << "bad 64 " << str << std::endl; + fesetround(FE_TONEAREST); + return false; + } + number++; } - if (float64_parsed != float64) { - std::cout << "bad 64 " << str << std::endl; - return false; - } - number++; } + std::cout << "checked " << std::defaultfloat << number << " values" << std::endl; + newfile.close(); // close the file object + } else { + std::cout << "Could not read " << file_name << std::endl; + fesetround(FE_TONEAREST); + return false; } - std::cout << "checked " << std::defaultfloat << number << " values" << std::endl; - newfile.close(); // close the file object - } else { - std::cout << "Could not read " << file_name << std::endl; - return false; } + fesetround(FE_TONEAREST); return true; } @@ -125,9 +183,6 @@ TEST_CASE("leading_zeroes") { CHECK(fast_float::leading_zeroes(bit << 63) == 0); } -#define iHexAndDec(v) std::hex << "0x" << (v) << " (" << std::dec << (v) << ")" -#define fHexAndDec(v) std::hexfloat << (v) << " (" << std::defaultfloat << (v) << ")" - void test_full_multiplication(uint64_t lhs, uint64_t rhs, uint64_t expected_lo, uint64_t expected_hi) { fast_float::value128 v; v = fast_float::full_multiplication(lhs, rhs); From 95321767eebd207c23cb67a21e48db6affb0ffbd Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 16 Nov 2022 12:29:46 -0500 Subject: [PATCH 03/15] More verbose error report. --- tests/basictest.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/basictest.cpp b/tests/basictest.cpp index 251e609..d79fb82 100644 --- a/tests/basictest.cpp +++ b/tests/basictest.cpp @@ -141,11 +141,15 @@ bool check_file(std::string file_name) { // Compare with expected results if (float32_parsed != float32) { std::cout << "bad 32 " << str << std::endl; + std::cout << "parsed = " << iHexAndDec(float32_parsed) << ", expectd = " << iHexAndDec(float32) << std::endl; + std::cout << "fesetround: " << round_name(d) << std::endl; fesetround(FE_TONEAREST); return false; } if (float64_parsed != float64) { std::cout << "bad 64 " << str << std::endl; + std::cout << "parsed = " << iHexAndDec(float64_parsed) << ", expectd = " << iHexAndDec(float64) << std::endl; + std::cout << "fesetround: " << round_name(d) << std::endl; fesetround(FE_TONEAREST); return false; } From d2250598737dbb87289ee3fa137c49ad3b79528f Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 16 Nov 2022 14:35:31 -0500 Subject: [PATCH 04/15] Fix for Win32+ClangCL --- include/fast_float/parse_number.h | 5 +++++ tests/basictest.cpp | 30 ++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index 1340a0d..b5f92ce 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -135,7 +135,12 @@ from_chars_result from_chars_advanced(const char *first, const char *last, // We do not have that fegetround() == FE_TONEAREST. // Next is a modified Clinger's fast path, inspired by Jakub Jelínek's proposal if (pns.exponent >= 0 && pns.exponent <= binary_format::max_exponent_fast_path() && pns.mantissa <=binary_format::max_mantissa_fast_path(pns.exponent) && !pns.too_many_digits) { +#if (defined(_WIN32) && defined(__clang__)) + // 32-bit ClangCL maps 0 to -0.0 when fegetround() == FE_DOWNWARD + value = pns.mantissa ? T(pns.mantissa) : 0.0; +#else value = T(pns.mantissa); +#endif value = value * binary_format::exact_power_of_ten(pns.exponent); if (pns.negative) { value = -value; } return answer; diff --git a/tests/basictest.cpp b/tests/basictest.cpp index d79fb82..980ead1 100644 --- a/tests/basictest.cpp +++ b/tests/basictest.cpp @@ -98,6 +98,36 @@ const char * round_name(int d) { } + +TEST_CASE("parse_zero") { + // + // If this function fails, we may be left in a non-standard rounding state. + // + const char * zero = "0"; + double f = 0; + fesetround(FE_UPWARD); + auto r1 = fast_float::from_chars(zero, zero + 1, f); + CHECK(r1.ec == std::errc()); + std::cout << "FE_UPWARD parsed zero as " << iHexAndDec(f) << std::endl; + CHECK(f == 0); + fesetround(FE_TOWARDZERO); + auto r2 = fast_float::from_chars(zero, zero + 1, f); + CHECK(r2.ec == std::errc()); + std::cout << "FE_TOWARDZERO parsed zero as " << iHexAndDec(f) << std::endl; + CHECK(f == 0); + fesetround(FE_DOWNWARD); + auto r3 = fast_float::from_chars(zero, zero + 1, f); + CHECK(r3.ec == std::errc()); + std::cout << "FE_DOWNWARD parsed zero as " << iHexAndDec(f) << std::endl; + CHECK(f == 0); + fesetround(FE_TONEAREST); + auto r4 = fast_float::from_chars(zero, zero + 1, f); + CHECK(r4.ec == std::errc()); + std::cout << "FE_TONEAREST parsed zero as " << iHexAndDec(f) << std::endl; + CHECK(f == 0); +} + + // return true on success bool check_file(std::string file_name) { std::cout << "Checking " << file_name << std::endl; From 559b89d34d48a8559437d48fceb939eb0fc0b8af Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 16 Nov 2022 14:59:03 -0500 Subject: [PATCH 05/15] 32-bit clangcl appears to be ridiculous. --- tests/basictest.cpp | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/tests/basictest.cpp b/tests/basictest.cpp index 980ead1..1c07256 100644 --- a/tests/basictest.cpp +++ b/tests/basictest.cpp @@ -104,27 +104,46 @@ TEST_CASE("parse_zero") { // If this function fails, we may be left in a non-standard rounding state. // const char * zero = "0"; + uint64_t float64_parsed; double f = 0; + ::memcpy(&float64_parsed, &f, sizeof(f)); + CHECK(float64_parsed == 0); + fesetround(FE_UPWARD); auto r1 = fast_float::from_chars(zero, zero + 1, f); CHECK(r1.ec == std::errc()); std::cout << "FE_UPWARD parsed zero as " << iHexAndDec(f) << std::endl; CHECK(f == 0); + ::memcpy(&float64_parsed, &f, sizeof(f)); + std::cout << "double as uint64_t is " << float64_parsed << std::endl; + CHECK(float64_parsed == 0); + fesetround(FE_TOWARDZERO); auto r2 = fast_float::from_chars(zero, zero + 1, f); CHECK(r2.ec == std::errc()); std::cout << "FE_TOWARDZERO parsed zero as " << iHexAndDec(f) << std::endl; CHECK(f == 0); + ::memcpy(&float64_parsed, &f, sizeof(f)); + std::cout << "double as uint64_t is " << float64_parsed << std::endl; + CHECK(float64_parsed == 0); + fesetround(FE_DOWNWARD); auto r3 = fast_float::from_chars(zero, zero + 1, f); CHECK(r3.ec == std::errc()); std::cout << "FE_DOWNWARD parsed zero as " << iHexAndDec(f) << std::endl; CHECK(f == 0); + ::memcpy(&float64_parsed, &f, sizeof(f)); + std::cout << "double as uint64_t is " << float64_parsed << std::endl; + CHECK(float64_parsed == 0); + fesetround(FE_TONEAREST); auto r4 = fast_float::from_chars(zero, zero + 1, f); CHECK(r4.ec == std::errc()); std::cout << "FE_TONEAREST parsed zero as " << iHexAndDec(f) << std::endl; CHECK(f == 0); + ::memcpy(&float64_parsed, &f, sizeof(f)); + std::cout << "double as uint64_t is " << float64_parsed << std::endl; + CHECK(float64_parsed == 0); } @@ -171,14 +190,16 @@ bool check_file(std::string file_name) { // Compare with expected results if (float32_parsed != float32) { std::cout << "bad 32 " << str << std::endl; - std::cout << "parsed = " << iHexAndDec(float32_parsed) << ", expectd = " << iHexAndDec(float32) << std::endl; + std::cout << "parsed as " << iHexAndDec(parsed_32) << std::endl; + std::cout << "as raw uint32_t, parsed = " << float32_parsed << ", expected = " << float32 << std::endl; std::cout << "fesetround: " << round_name(d) << std::endl; fesetround(FE_TONEAREST); return false; } if (float64_parsed != float64) { std::cout << "bad 64 " << str << std::endl; - std::cout << "parsed = " << iHexAndDec(float64_parsed) << ", expectd = " << iHexAndDec(float64) << std::endl; + std::cout << "parsed as " << iHexAndDec(parsed_64) << std::endl; + std::cout << "as raw uint64_t, parsed = " << float64_parsed << ", expected = " << float64 << std::endl; std::cout << "fesetround: " << round_name(d) << std::endl; fesetround(FE_TONEAREST); return false; From fd9d9effda91bdba9b9135ef8b66f14bb576dc25 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 16 Nov 2022 15:25:03 -0500 Subject: [PATCH 06/15] More tweaking around clangcl --- include/fast_float/parse_number.h | 13 +++-- tests/basictest.cpp | 91 +++++++++++++++++++++++-------- 2 files changed, 75 insertions(+), 29 deletions(-) diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index b5f92ce..2b3b935 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -135,13 +135,14 @@ from_chars_result from_chars_advanced(const char *first, const char *last, // We do not have that fegetround() == FE_TONEAREST. // Next is a modified Clinger's fast path, inspired by Jakub Jelínek's proposal if (pns.exponent >= 0 && pns.exponent <= binary_format::max_exponent_fast_path() && pns.mantissa <=binary_format::max_mantissa_fast_path(pns.exponent) && !pns.too_many_digits) { -#if (defined(_WIN32) && defined(__clang__)) - // 32-bit ClangCL maps 0 to -0.0 when fegetround() == FE_DOWNWARD - value = pns.mantissa ? T(pns.mantissa) : 0.0; -#else - value = T(pns.mantissa); +#if (defined(_MSC_VER) && defined(__clang__)) + // ClangCL may map 0 to -0.0 when fegetround() == FE_DOWNWARD + if(pns.mantissa == 0) { + value = 0; + return answer; + } #endif - value = value * binary_format::exact_power_of_ten(pns.exponent); + value = T(pns.mantissa) * binary_format::exact_power_of_ten(pns.exponent); if (pns.negative) { value = -value; } return answer; } diff --git a/tests/basictest.cpp b/tests/basictest.cpp index 1c07256..fec51de 100644 --- a/tests/basictest.cpp +++ b/tests/basictest.cpp @@ -48,13 +48,66 @@ #define fHexAndDec(v) std::hexfloat << (v) << " (" << std::defaultfloat << (v) << ")" -// C++ 17 because it is otherwise annoying to browse all files in a directory. -// We also only run these tests on little endian systems. -#if (FASTFLOAT_CPLUSPLUS >= 201703L) && (FASTFLOAT_IS_BIG_ENDIAN == 0) && !defined(FASTFLOAT_ODDPLATFORM) +const char * round_name(int d) { + switch(d) { + case FE_UPWARD: + return "FE_UPWARD"; + case FE_DOWNWARD: + return "FE_DOWNWARD"; + case FE_TOWARDZERO: + return "FE_TOWARDZERO"; + case FE_TONEAREST: + return "FE_TONEAREST"; + default: + return "UNKNOWN"; + } +} + + +#define FASTFLOAT_STR(x) #x +#define SHOW_DEFINE(x) printf("%s='%s'\n", #x, FASTFLOAT_STR(x)) + +TEST_CASE("system_info") { + std::cout << "system info:" << std::endl; +#ifdef _MSC_VER + SHOW_DEFINE(_MSC_VER); +#endif +#ifdef FASTFLOAT_64BIT_LIMB + SHOW_DEFINE(FASTFLOAT_64BIT_LIMB); +#endif +#ifdef __clang__ + SHOW_DEFINE(__clang__); +#endif +#ifdef FASTFLOAT_VISUAL_STUDIO + SHOW_DEFINE(FASTFLOAT_VISUAL_STUDIO); +#endif +#ifdef FASTFLOAT_IS_BIG_ENDIAN + #if FASTFLOAT_IS_BIG_ENDIAN + printf("big endian\n"); + #else + printf("little endian\n"); + #endif +#endif +#ifdef FASTFLOAT_32BIT + SHOW_DEFINE(FASTFLOAT_32BIT); +#endif +#ifdef FASTFLOAT_64BIT + SHOW_DEFINE(FASTFLOAT_64BIT); +#endif +#ifdef FLT_EVAL_METHOD + SHOW_DEFINE(FLT_EVAL_METHOD); +#endif +#ifdef _WIN32 + SHOW_DEFINE(_WIN32); +#endif +#ifdef _WIN64 + SHOW_DEFINE(_WIN64); +#endif + std::cout << "fegetround() = " << round_name(fegetround()) << std::endl; + std::cout << std::endl; + +} -#include -#include -#include TEST_CASE("rounds_to_nearest") { // @@ -82,23 +135,6 @@ TEST_CASE("rounds_to_nearest") { CHECK(fast_float::detail::rounds_to_nearest() == true); } -const char * round_name(int d) { - switch(d) { - case FE_UPWARD: - return "FE_UPWARD"; - case FE_DOWNWARD: - return "FE_DOWNWARD"; - case FE_TOWARDZERO: - return "FE_TOWARDZERO"; - case FE_TONEAREST: - return "FE_TONEAREST"; - default: - return "UNKNOWN"; - } - -} - - TEST_CASE("parse_zero") { // // If this function fails, we may be left in a non-standard rounding state. @@ -146,6 +182,15 @@ TEST_CASE("parse_zero") { CHECK(float64_parsed == 0); } +// C++ 17 because it is otherwise annoying to browse all files in a directory. +// We also only run these tests on little endian systems. +#if (FASTFLOAT_CPLUSPLUS >= 201703L) && (FASTFLOAT_IS_BIG_ENDIAN == 0) && !defined(FASTFLOAT_ODDPLATFORM) + +#include +#include +#include + + // return true on success bool check_file(std::string file_name) { From 8f27b7e9210b31f5966f5a3ef9517d478ad28b2e Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 16 Nov 2022 15:42:56 -0500 Subject: [PATCH 07/15] More tuning. --- include/fast_float/bigint.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/fast_float/bigint.h b/include/fast_float/bigint.h index b56cb9b..220e057 100644 --- a/include/fast_float/bigint.h +++ b/include/fast_float/bigint.h @@ -17,7 +17,7 @@ namespace fast_float { // we might have platforms where `CHAR_BIT` is not 8, so let's avoid // doing `8 * sizeof(limb)`. #if defined(FASTFLOAT_64BIT) && !defined(__sparc) -#define FASTFLOAT_64BIT_LIMB +#define FASTFLOAT_64BIT_LIMB 1 typedef uint64_t limb; constexpr size_t limb_bits = 64; #else From 29b1a03d5be3e250e70c8892b360b01da0cad9c9 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 16 Nov 2022 15:49:09 -0500 Subject: [PATCH 08/15] Make sure that macros have actual values when defined (makes debugging easier) --- include/fast_float/float_common.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index a19a817..c2084e0 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -12,11 +12,11 @@ || defined(__MINGW64__) \ || defined(__s390x__) \ || (defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || defined(__PPC64LE__)) ) -#define FASTFLOAT_64BIT +#define FASTFLOAT_64BIT 1 #elif (defined(__i386) || defined(__i386__) || defined(_M_IX86) \ || defined(__arm__) || defined(_M_ARM) \ || defined(__MINGW32__) || defined(__EMSCRIPTEN__)) -#define FASTFLOAT_32BIT +#define FASTFLOAT_32BIT 1 #else // Need to check incrementally, since SIZE_MAX is a size_t, avoid overflow. // We can never tell the register width, but the SIZE_MAX is a good approximation. @@ -24,9 +24,9 @@ #if SIZE_MAX == 0xffff #error Unknown platform (16-bit, unsupported) #elif SIZE_MAX == 0xffffffff - #define FASTFLOAT_32BIT + #define FASTFLOAT_32BIT 1 #elif SIZE_MAX == 0xffffffffffffffff - #define FASTFLOAT_64BIT + #define FASTFLOAT_64BIT 1 #else #error Unknown platform (not 32-bit, not 64-bit?) #endif From bfc0478feb213241c22d45340975c5690327e84c Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 16 Nov 2022 16:45:01 -0500 Subject: [PATCH 09/15] More tweaks. --- include/fast_float/parse_number.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index 2b3b935..d1f3bce 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -60,7 +60,16 @@ from_chars_result parse_infnan(const char *first, const char *last, T &value) n return answer; } +/** + * Returns true if the floating-pointing rounding mode is to 'nearest'. + * It is the default on most system. This function is meant to be inexpensive. + * Credit : @mwalcott3 + */ fastfloat_really_inline bool rounds_to_nearest() noexcept { + // See + // A fast function to check your floating-point rounding mode + // https://lemire.me/blog/2022/11/16/a-fast-function-to-check-your-floating-point-rounding-mode/ + // // This function is meant to be equivalent to : // prior: #include // return fegetround() == FE_TONEAREST; @@ -68,6 +77,8 @@ fastfloat_really_inline bool rounds_to_nearest() noexcept { // function call. // // volatile prevents the compiler from computing the function at compile-time + // It does not need to be std::numeric_limits::min(), any small + // value so that 1 + x should round to 1 would do. static volatile float fmin = std::numeric_limits::min(); // // Explanation: @@ -135,7 +146,7 @@ from_chars_result from_chars_advanced(const char *first, const char *last, // We do not have that fegetround() == FE_TONEAREST. // Next is a modified Clinger's fast path, inspired by Jakub Jelínek's proposal if (pns.exponent >= 0 && pns.exponent <= binary_format::max_exponent_fast_path() && pns.mantissa <=binary_format::max_mantissa_fast_path(pns.exponent) && !pns.too_many_digits) { -#if (defined(_MSC_VER) && defined(__clang__)) +#if (defined(_WIN32) && defined(__clang__)) // ClangCL may map 0 to -0.0 when fegetround() == FE_DOWNWARD if(pns.mantissa == 0) { value = 0; From 39ea41b84a4e7e9a8239ac07c9006af5eb59b15e Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 18 Nov 2022 11:28:34 -0500 Subject: [PATCH 10/15] Adopting proposal. --- include/fast_float/parse_number.h | 66 +++++++++++++++++++------------ 1 file changed, 41 insertions(+), 25 deletions(-) diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index d1f3bce..a1f4c5d 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -76,8 +76,10 @@ fastfloat_really_inline bool rounds_to_nearest() noexcept { // However, it is expected to be much faster than the fegetround() // function call. // - // volatile prevents the compiler from computing the function at compile-time - // It does not need to be std::numeric_limits::min(), any small + // The volatile keywoard prevents the compiler from computing the function + // at compile-time. + // There might be other ways to prevent compile-time optimizations (e.g., asm). + // The value does not need to be std::numeric_limits::min(), any small // value so that 1 + x should round to 1 would do. static volatile float fmin = std::numeric_limits::min(); // @@ -100,6 +102,8 @@ fastfloat_really_inline bool rounds_to_nearest() noexcept { // fmin + 1.0f = 0x1 (1) // 1.0f - fmin = 0x1 (1) // + // Note: This may fail to be accurate if fast-math has been + // enabled, as rounding conventions may not apply. return (fmin + 1.0f == 1.0f - fmin); } @@ -130,32 +134,44 @@ from_chars_result from_chars_advanced(const char *first, const char *last, } answer.ec = std::errc(); // be optimistic answer.ptr = pns.lastmatch; - // Unfortunately, the conventional Clinger's fast path is only possible - // when the system rounds to the nearest float. - if(detail::rounds_to_nearest()) { - // We have that fegetround() == FE_TONEAREST. - // Next is Clinger's fast path. - if (binary_format::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format::max_exponent_fast_path() && pns.mantissa <=binary_format::max_mantissa_fast_path() && !pns.too_many_digits) { - value = T(pns.mantissa); - if (pns.exponent < 0) { value = value / binary_format::exact_power_of_ten(-pns.exponent); } - else { value = value * binary_format::exact_power_of_ten(pns.exponent); } - if (pns.negative) { value = -value; } - return answer; - } - } else { - // We do not have that fegetround() == FE_TONEAREST. - // Next is a modified Clinger's fast path, inspired by Jakub Jelínek's proposal - if (pns.exponent >= 0 && pns.exponent <= binary_format::max_exponent_fast_path() && pns.mantissa <=binary_format::max_mantissa_fast_path(pns.exponent) && !pns.too_many_digits) { -#if (defined(_WIN32) && defined(__clang__)) - // ClangCL may map 0 to -0.0 when fegetround() == FE_DOWNWARD - if(pns.mantissa == 0) { - value = 0; + // The implementation of the Clinger's fast path is convoluted because + // we want round-to-nearest in all cases, irrespective of the rounding mode + // selected on the thread. + // We proceed optimistically, assuming that detail::rounds_to_nearest() returns + // true. + if (binary_format::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format::max_exponent_fast_path() && !pns.too_many_digits) { + // Unfortunately, the conventional Clinger's fast path is only possible + // when the system rounds to the nearest float. + // + // We expect the next branch to almost always be selected. + // We could check it first (before the previous branch), but + // there might be performance advantages at having the check + // be last. + if(detail::rounds_to_nearest()) { + // We have that fegetround() == FE_TONEAREST. + // Next is Clinger's fast path. + if (pns.mantissa <=binary_format::max_mantissa_fast_path()) { + value = T(pns.mantissa); + if (pns.exponent < 0) { value = value / binary_format::exact_power_of_ten(-pns.exponent); } + else { value = value * binary_format::exact_power_of_ten(pns.exponent); } + if (pns.negative) { value = -value; } return answer; } + } else { + // We do not have that fegetround() == FE_TONEAREST. + // Next is a modified Clinger's fast path, inspired by Jakub Jelínek's proposal + if (pns.exponent >= 0 && pns.mantissa <=binary_format::max_mantissa_fast_path(pns.exponent)) { +#if (defined(_WIN32) && defined(__clang__)) + // ClangCL may map 0 to -0.0 when fegetround() == FE_DOWNWARD + if(pns.mantissa == 0) { + value = 0; + return answer; + } #endif - value = T(pns.mantissa) * binary_format::exact_power_of_ten(pns.exponent); - if (pns.negative) { value = -value; } - return answer; + value = T(pns.mantissa) * binary_format::exact_power_of_ten(pns.exponent); + if (pns.negative) { value = -value; } + return answer; + } } } adjusted_mantissa am = compute_float>(pns.exponent, pns.mantissa); From 3d0e448940ba40760b1abae03f82100a6dccad51 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 18 Nov 2022 12:27:38 -0500 Subject: [PATCH 11/15] Added a remark. --- include/fast_float/parse_number.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index a1f4c5d..0a8092f 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -80,7 +80,8 @@ fastfloat_really_inline bool rounds_to_nearest() noexcept { // at compile-time. // There might be other ways to prevent compile-time optimizations (e.g., asm). // The value does not need to be std::numeric_limits::min(), any small - // value so that 1 + x should round to 1 would do. + // value so that 1 + x should round to 1 would do (after accounting for excess + // precision, as in 387). static volatile float fmin = std::numeric_limits::min(); // // Explanation: From 8b7a55a03cb38fa4ab4e9309d38f0fd24a51814a Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 18 Nov 2022 15:33:44 -0500 Subject: [PATCH 12/15] Minor optimization. --- include/fast_float/parse_number.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index 0a8092f..0e22afc 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -83,6 +83,7 @@ fastfloat_really_inline bool rounds_to_nearest() noexcept { // value so that 1 + x should round to 1 would do (after accounting for excess // precision, as in 387). static volatile float fmin = std::numeric_limits::min(); + float fmini = fmin; // we copy it so that it gets loaded at most once. // // Explanation: // Only when fegetround() == FE_TONEAREST do we have that @@ -105,7 +106,7 @@ fastfloat_really_inline bool rounds_to_nearest() noexcept { // // Note: This may fail to be accurate if fast-math has been // enabled, as rounding conventions may not apply. - return (fmin + 1.0f == 1.0f - fmin); + return (fmini + 1.0f == 1.0f - fmini); } } // namespace detail From 003a98318876f5376fe18cc745440ea6ef2f387a Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 18 Nov 2022 15:38:21 -0500 Subject: [PATCH 13/15] Simplifying the justification. --- include/fast_float/parse_number.h | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index 0e22afc..bd0ab43 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -81,7 +81,7 @@ fastfloat_really_inline bool rounds_to_nearest() noexcept { // There might be other ways to prevent compile-time optimizations (e.g., asm). // The value does not need to be std::numeric_limits::min(), any small // value so that 1 + x should round to 1 would do (after accounting for excess - // precision, as in 387). + // precision, as in 387 instructions). static volatile float fmin = std::numeric_limits::min(); float fmini = fmin; // we copy it so that it gets loaded at most once. // @@ -90,19 +90,12 @@ fastfloat_really_inline bool rounds_to_nearest() noexcept { // fmin + 1.0f == 1.0f - fmin. // // FE_UPWARD: - // fmin + 1.0f = 0x1.00001 (1.00001) - // 1.0f - fmin = 0x1 (1) + // fmin + 1.0f > 1 + // 1.0f - fmin == 1 // // FE_DOWNWARD or FE_TOWARDZERO: - // fmin + 1.0f = 0x1 (1) - // 1.0f - fmin = 0x0.999999 (0.999999) - // - // fmin + 1.0f = 0x1 (1) - // 1.0f - fmin = 0x0.999999 (0.999999) - // - // FE_TONEAREST: - // fmin + 1.0f = 0x1 (1) - // 1.0f - fmin = 0x1 (1) + // fmin + 1.0f == 1 + // 1.0f - fmin < 1 // // Note: This may fail to be accurate if fast-math has been // enabled, as rounding conventions may not apply. From eec504ae0360d39af3350f449d33159cbb7279a9 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 21 Nov 2022 09:53:49 -0500 Subject: [PATCH 14/15] Adding a fast-math test. --- .github/workflows/ubuntu20-fastmath.yml | 18 ++++++++++++++++++ tests/CMakeLists.txt | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/ubuntu20-fastmath.yml diff --git a/.github/workflows/ubuntu20-fastmath.yml b/.github/workflows/ubuntu20-fastmath.yml new file mode 100644 index 0000000..389f4ed --- /dev/null +++ b/.github/workflows/ubuntu20-fastmath.yml @@ -0,0 +1,18 @@ +name: Ubuntu 20.04 CI (GCC 9) + +on: [push, pull_request] + +jobs: + ubuntu-build: + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + steps: + - uses: actions/checkout@v3 + - name: Use cmake + run: | + mkdir build && + cd build && + cmake -DCMAKE_CXX_FLAGS="-ffast-math" -DFASTFLOAT_TEST=ON .. && + cmake --build . && + ctest --output-on-failure diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index dfeec75..6534217 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -9,7 +9,7 @@ option(SYSTEM_DOCTEST "Use system copy of doctest" OFF) if (NOT SYSTEM_DOCTEST) FetchContent_Declare(doctest GIT_REPOSITORY https://github.com/onqtam/doctest.git - GIT_TAG 2.4.6) + GIT_TAG v2.4.9) endif() FetchContent_Declare(supplemental_test_files GIT_REPOSITORY https://github.com/fastfloat/supplemental_test_files.git From 968bd9d86eccb7c16ffadeb1c8d936309849bbfe Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 21 Nov 2022 09:56:21 -0500 Subject: [PATCH 15/15] Renaming the test. --- .github/workflows/ubuntu20-fastmath.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ubuntu20-fastmath.yml b/.github/workflows/ubuntu20-fastmath.yml index 389f4ed..eda488a 100644 --- a/.github/workflows/ubuntu20-fastmath.yml +++ b/.github/workflows/ubuntu20-fastmath.yml @@ -1,4 +1,4 @@ -name: Ubuntu 20.04 CI (GCC 9) +name: Ubuntu 20.04 CI (GCC 9, fast-math) on: [push, pull_request]