diff --git a/.github/workflows/ubuntu20-fastmath.yml b/.github/workflows/ubuntu20-fastmath.yml new file mode 100644 index 0000000..eda488a --- /dev/null +++ b/.github/workflows/ubuntu20-fastmath.yml @@ -0,0 +1,18 @@ +name: Ubuntu 20.04 CI (GCC 9, fast-math) + +on: [push, pull_request] + +jobs: + ubuntu-build: + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + steps: + - uses: actions/checkout@v3 + - name: Use cmake + run: | + mkdir build && + cd build && + cmake -DCMAKE_CXX_FLAGS="-ffast-math" -DFASTFLOAT_TEST=ON .. && + cmake --build . && + ctest --output-on-failure diff --git a/include/fast_float/bigint.h b/include/fast_float/bigint.h index b56cb9b..220e057 100644 --- a/include/fast_float/bigint.h +++ b/include/fast_float/bigint.h @@ -17,7 +17,7 @@ namespace fast_float { // we might have platforms where `CHAR_BIT` is not 8, so let's avoid // doing `8 * sizeof(limb)`. #if defined(FASTFLOAT_64BIT) && !defined(__sparc) -#define FASTFLOAT_64BIT_LIMB +#define FASTFLOAT_64BIT_LIMB 1 typedef uint64_t limb; constexpr size_t limb_bits = 64; #else diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index f99371c..c2084e0 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -12,11 +12,11 @@ || defined(__MINGW64__) \ || defined(__s390x__) \ || (defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || defined(__PPC64LE__)) ) -#define FASTFLOAT_64BIT +#define FASTFLOAT_64BIT 1 #elif (defined(__i386) || defined(__i386__) || defined(_M_IX86) \ || defined(__arm__) || defined(_M_ARM) \ || defined(__MINGW32__) || defined(__EMSCRIPTEN__)) -#define FASTFLOAT_32BIT +#define FASTFLOAT_32BIT 1 #else // Need to check incrementally, since SIZE_MAX is a size_t, avoid overflow. // We can never tell the register width, but the SIZE_MAX is a good approximation. @@ -24,9 +24,9 @@ #if SIZE_MAX == 0xffff #error Unknown platform (16-bit, unsupported) #elif SIZE_MAX == 0xffffffff - #define FASTFLOAT_32BIT + #define FASTFLOAT_32BIT 1 #elif SIZE_MAX == 0xffffffffffffffff - #define FASTFLOAT_64BIT + #define FASTFLOAT_64BIT 1 #else #error Unknown platform (not 32-bit, not 64-bit?) #endif @@ -272,10 +272,12 @@ template struct binary_format { static inline constexpr int minimum_exponent(); static inline constexpr int infinite_power(); static inline constexpr int sign_index(); + static inline constexpr int min_exponent_fast_path(); // used when fegetround() == FE_TONEAREST static inline constexpr int max_exponent_fast_path(); static inline constexpr int max_exponent_round_to_even(); static inline constexpr int min_exponent_round_to_even(); static inline constexpr uint64_t max_mantissa_fast_path(int64_t power); + static inline constexpr uint64_t max_mantissa_fast_path(); // used when fegetround() == FE_TONEAREST static inline constexpr int largest_power_of_ten(); static inline constexpr int smallest_power_of_ten(); static inline constexpr T exact_power_of_ten(int64_t power); @@ -285,6 +287,22 @@ template struct binary_format { static inline constexpr equiv_uint hidden_bit_mask(); }; +template <> inline constexpr int binary_format::min_exponent_fast_path() { +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + return 0; +#else + return -22; +#endif +} + +template <> inline constexpr int binary_format::min_exponent_fast_path() { +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + return 0; +#else + return -10; +#endif +} + template <> inline constexpr int binary_format::mantissa_explicit_bits() { return 52; } @@ -331,13 +349,18 @@ template <> inline constexpr int binary_format::max_exponent_fast_path() template <> inline constexpr int binary_format::max_exponent_fast_path() { return 10; } - +template <> inline constexpr uint64_t binary_format::max_mantissa_fast_path() { + return uint64_t(2) << mantissa_explicit_bits(); +} template <> inline constexpr uint64_t binary_format::max_mantissa_fast_path(int64_t power) { // caller is responsible to ensure that // power >= 0 && power <= 22 // return max_mantissa_double[power]; } +template <> inline constexpr uint64_t binary_format::max_mantissa_fast_path() { + return uint64_t(2) << mantissa_explicit_bits(); +} template <> inline constexpr uint64_t binary_format::max_mantissa_fast_path(int64_t power) { // caller is responsible to ensure that // power >= 0 && power <= 10 diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index 8789475..bd0ab43 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -60,6 +60,48 @@ from_chars_result parse_infnan(const char *first, const char *last, T &value) n return answer; } +/** + * Returns true if the floating-pointing rounding mode is to 'nearest'. + * It is the default on most system. This function is meant to be inexpensive. + * Credit : @mwalcott3 + */ +fastfloat_really_inline bool rounds_to_nearest() noexcept { + // See + // A fast function to check your floating-point rounding mode + // https://lemire.me/blog/2022/11/16/a-fast-function-to-check-your-floating-point-rounding-mode/ + // + // This function is meant to be equivalent to : + // prior: #include + // return fegetround() == FE_TONEAREST; + // However, it is expected to be much faster than the fegetround() + // function call. + // + // The volatile keywoard prevents the compiler from computing the function + // at compile-time. + // There might be other ways to prevent compile-time optimizations (e.g., asm). + // The value does not need to be std::numeric_limits::min(), any small + // value so that 1 + x should round to 1 would do (after accounting for excess + // precision, as in 387 instructions). + static volatile float fmin = std::numeric_limits::min(); + float fmini = fmin; // we copy it so that it gets loaded at most once. + // + // Explanation: + // Only when fegetround() == FE_TONEAREST do we have that + // fmin + 1.0f == 1.0f - fmin. + // + // FE_UPWARD: + // fmin + 1.0f > 1 + // 1.0f - fmin == 1 + // + // FE_DOWNWARD or FE_TOWARDZERO: + // fmin + 1.0f == 1 + // 1.0f - fmin < 1 + // + // Note: This may fail to be accurate if fast-math has been + // enabled, as rounding conventions may not apply. + return (fmini + 1.0f == 1.0f - fmini); +} + } // namespace detail template @@ -87,12 +129,45 @@ from_chars_result from_chars_advanced(const char *first, const char *last, } answer.ec = std::errc(); // be optimistic answer.ptr = pns.lastmatch; - // Next is a modified Clinger's fast path, inspired by Jakub Jelínek's proposal - if (pns.exponent >= 0 && pns.exponent <= binary_format::max_exponent_fast_path() && pns.mantissa <=binary_format::max_mantissa_fast_path(pns.exponent) && !pns.too_many_digits) { - value = T(pns.mantissa); - value = value * binary_format::exact_power_of_ten(pns.exponent); - if (pns.negative) { value = -value; } - return answer; + // The implementation of the Clinger's fast path is convoluted because + // we want round-to-nearest in all cases, irrespective of the rounding mode + // selected on the thread. + // We proceed optimistically, assuming that detail::rounds_to_nearest() returns + // true. + if (binary_format::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format::max_exponent_fast_path() && !pns.too_many_digits) { + // Unfortunately, the conventional Clinger's fast path is only possible + // when the system rounds to the nearest float. + // + // We expect the next branch to almost always be selected. + // We could check it first (before the previous branch), but + // there might be performance advantages at having the check + // be last. + if(detail::rounds_to_nearest()) { + // We have that fegetround() == FE_TONEAREST. + // Next is Clinger's fast path. + if (pns.mantissa <=binary_format::max_mantissa_fast_path()) { + value = T(pns.mantissa); + if (pns.exponent < 0) { value = value / binary_format::exact_power_of_ten(-pns.exponent); } + else { value = value * binary_format::exact_power_of_ten(pns.exponent); } + if (pns.negative) { value = -value; } + return answer; + } + } else { + // We do not have that fegetround() == FE_TONEAREST. + // Next is a modified Clinger's fast path, inspired by Jakub Jelínek's proposal + if (pns.exponent >= 0 && pns.mantissa <=binary_format::max_mantissa_fast_path(pns.exponent)) { +#if (defined(_WIN32) && defined(__clang__)) + // ClangCL may map 0 to -0.0 when fegetround() == FE_DOWNWARD + if(pns.mantissa == 0) { + value = 0; + return answer; + } +#endif + value = T(pns.mantissa) * binary_format::exact_power_of_ten(pns.exponent); + if (pns.negative) { value = -value; } + return answer; + } + } } adjusted_mantissa am = compute_float>(pns.exponent, pns.mantissa); if(pns.too_many_digits && am.power2 >= 0) { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index dfeec75..6534217 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -9,7 +9,7 @@ option(SYSTEM_DOCTEST "Use system copy of doctest" OFF) if (NOT SYSTEM_DOCTEST) FetchContent_Declare(doctest GIT_REPOSITORY https://github.com/onqtam/doctest.git - GIT_TAG 2.4.6) + GIT_TAG v2.4.9) endif() FetchContent_Declare(supplemental_test_files GIT_REPOSITORY https://github.com/fastfloat/supplemental_test_files.git diff --git a/tests/basictest.cpp b/tests/basictest.cpp index 273c2c6..fec51de 100644 --- a/tests/basictest.cpp +++ b/tests/basictest.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #ifndef SUPPLEMENTAL_TEST_DATA_DIR #define SUPPLEMENTAL_TEST_DATA_DIR "data/" @@ -42,6 +43,145 @@ #define FASTFLOAT_ODDPLATFORM 1 #endif + +#define iHexAndDec(v) std::hex << "0x" << (v) << " (" << std::dec << (v) << ")" +#define fHexAndDec(v) std::hexfloat << (v) << " (" << std::defaultfloat << (v) << ")" + + +const char * round_name(int d) { + switch(d) { + case FE_UPWARD: + return "FE_UPWARD"; + case FE_DOWNWARD: + return "FE_DOWNWARD"; + case FE_TOWARDZERO: + return "FE_TOWARDZERO"; + case FE_TONEAREST: + return "FE_TONEAREST"; + default: + return "UNKNOWN"; + } +} + + +#define FASTFLOAT_STR(x) #x +#define SHOW_DEFINE(x) printf("%s='%s'\n", #x, FASTFLOAT_STR(x)) + +TEST_CASE("system_info") { + std::cout << "system info:" << std::endl; +#ifdef _MSC_VER + SHOW_DEFINE(_MSC_VER); +#endif +#ifdef FASTFLOAT_64BIT_LIMB + SHOW_DEFINE(FASTFLOAT_64BIT_LIMB); +#endif +#ifdef __clang__ + SHOW_DEFINE(__clang__); +#endif +#ifdef FASTFLOAT_VISUAL_STUDIO + SHOW_DEFINE(FASTFLOAT_VISUAL_STUDIO); +#endif +#ifdef FASTFLOAT_IS_BIG_ENDIAN + #if FASTFLOAT_IS_BIG_ENDIAN + printf("big endian\n"); + #else + printf("little endian\n"); + #endif +#endif +#ifdef FASTFLOAT_32BIT + SHOW_DEFINE(FASTFLOAT_32BIT); +#endif +#ifdef FASTFLOAT_64BIT + SHOW_DEFINE(FASTFLOAT_64BIT); +#endif +#ifdef FLT_EVAL_METHOD + SHOW_DEFINE(FLT_EVAL_METHOD); +#endif +#ifdef _WIN32 + SHOW_DEFINE(_WIN32); +#endif +#ifdef _WIN64 + SHOW_DEFINE(_WIN64); +#endif + std::cout << "fegetround() = " << round_name(fegetround()) << std::endl; + std::cout << std::endl; + +} + + +TEST_CASE("rounds_to_nearest") { + // + // If this function fails, we may be left in a non-standard rounding state. + // + static volatile float fmin = std::numeric_limits::min(); + fesetround(FE_UPWARD); + std::cout << "FE_UPWARD: fmin + 1.0f = " << iHexAndDec(fmin + 1.0f) << " 1.0f - fmin = " << iHexAndDec(1.0f - fmin) << std::endl; + CHECK(fegetround() == FE_UPWARD); + CHECK(fast_float::detail::rounds_to_nearest() == false); + + fesetround(FE_DOWNWARD); + std::cout << "FE_DOWNWARD: fmin + 1.0f = " << iHexAndDec(fmin + 1.0f) << " 1.0f - fmin = " << iHexAndDec(1.0f - fmin) << std::endl; + CHECK(fegetround() == FE_DOWNWARD); + CHECK(fast_float::detail::rounds_to_nearest() == false); + + fesetround(FE_TOWARDZERO); + std::cout << "FE_TOWARDZERO: fmin + 1.0f = " << iHexAndDec(fmin + 1.0f) << " 1.0f - fmin = " << iHexAndDec(1.0f - fmin) << std::endl; + CHECK(fegetround() == FE_TOWARDZERO); + CHECK(fast_float::detail::rounds_to_nearest() == false); + + fesetround(FE_TONEAREST); + std::cout << "FE_TONEAREST: fmin + 1.0f = " << iHexAndDec(fmin + 1.0f) << " 1.0f - fmin = " << iHexAndDec(1.0f - fmin) << std::endl; + CHECK(fegetround() == FE_TONEAREST); + CHECK(fast_float::detail::rounds_to_nearest() == true); +} + +TEST_CASE("parse_zero") { + // + // If this function fails, we may be left in a non-standard rounding state. + // + const char * zero = "0"; + uint64_t float64_parsed; + double f = 0; + ::memcpy(&float64_parsed, &f, sizeof(f)); + CHECK(float64_parsed == 0); + + fesetround(FE_UPWARD); + auto r1 = fast_float::from_chars(zero, zero + 1, f); + CHECK(r1.ec == std::errc()); + std::cout << "FE_UPWARD parsed zero as " << iHexAndDec(f) << std::endl; + CHECK(f == 0); + ::memcpy(&float64_parsed, &f, sizeof(f)); + std::cout << "double as uint64_t is " << float64_parsed << std::endl; + CHECK(float64_parsed == 0); + + fesetround(FE_TOWARDZERO); + auto r2 = fast_float::from_chars(zero, zero + 1, f); + CHECK(r2.ec == std::errc()); + std::cout << "FE_TOWARDZERO parsed zero as " << iHexAndDec(f) << std::endl; + CHECK(f == 0); + ::memcpy(&float64_parsed, &f, sizeof(f)); + std::cout << "double as uint64_t is " << float64_parsed << std::endl; + CHECK(float64_parsed == 0); + + fesetround(FE_DOWNWARD); + auto r3 = fast_float::from_chars(zero, zero + 1, f); + CHECK(r3.ec == std::errc()); + std::cout << "FE_DOWNWARD parsed zero as " << iHexAndDec(f) << std::endl; + CHECK(f == 0); + ::memcpy(&float64_parsed, &f, sizeof(f)); + std::cout << "double as uint64_t is " << float64_parsed << std::endl; + CHECK(float64_parsed == 0); + + fesetround(FE_TONEAREST); + auto r4 = fast_float::from_chars(zero, zero + 1, f); + CHECK(r4.ec == std::errc()); + std::cout << "FE_TONEAREST parsed zero as " << iHexAndDec(f) << std::endl; + CHECK(f == 0); + ::memcpy(&float64_parsed, &f, sizeof(f)); + std::cout << "double as uint64_t is " << float64_parsed << std::endl; + CHECK(float64_parsed == 0); +} + // C++ 17 because it is otherwise annoying to browse all files in a directory. // We also only run these tests on little endian systems. #if (FASTFLOAT_CPLUSPLUS >= 201703L) && (FASTFLOAT_IS_BIG_ENDIAN == 0) && !defined(FASTFLOAT_ODDPLATFORM) @@ -50,59 +190,77 @@ #include #include + + // return true on success bool check_file(std::string file_name) { std::cout << "Checking " << file_name << std::endl; - size_t number{0}; - std::fstream newfile(file_name, std::ios::in); - if (newfile.is_open()) { - std::string str; - while (std::getline(newfile, str)) { - if (str.size() > 0) { - // Read 32-bit hex - uint32_t float32; - auto r32 = std::from_chars(str.data() + 5, str.data() + str.size(), + // We check all rounding directions, for each file. + std::vector directions = {FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO, FE_TONEAREST}; + for (int d : directions) { + std::cout << "fesetround to " << round_name(d) << std::endl; + fesetround(d); + size_t number{0}; + std::fstream newfile(file_name, std::ios::in); + if (newfile.is_open()) { + std::string str; + while (std::getline(newfile, str)) { + if (str.size() > 0) { + // Read 32-bit hex + uint32_t float32; + auto r32 = std::from_chars(str.data() + 5, str.data() + str.size(), float32, 16); - if(r32.ec != std::errc()) { std::cerr << "32-bit parsing failure\n"; return false; } - // Read 64-bit hex - uint64_t float64; - auto r64 = std::from_chars(str.data() + 14, str.data() + str.size(), + if(r32.ec != std::errc()) { std::cerr << "32-bit parsing failure\n"; return false; } + // Read 64-bit hex + uint64_t float64; + auto r64 = std::from_chars(str.data() + 14, str.data() + str.size(), float64, 16); - if(r64.ec != std::errc()) { std::cerr << "64-bit parsing failure\n"; return false; } - // The string to parse: - const char *number_string = str.data() + 31; - const char *end_of_string = str.data() + str.size(); - // Parse as 32-bit float - float parsed_32; - auto fast_float_r32 = fast_float::from_chars(number_string, end_of_string, parsed_32); - if(fast_float_r32.ec != std::errc()) { std::cerr << "parsing failure\n"; return false; } - // Parse as 64-bit float - double parsed_64; - auto fast_float_r64 = fast_float::from_chars(number_string, end_of_string, parsed_64); - if(fast_float_r64.ec != std::errc()) { std::cerr << "parsing failure\n"; return false; } - // Convert the floats to unsigned ints. - uint32_t float32_parsed; - uint64_t float64_parsed; - ::memcpy(&float32_parsed, &parsed_32, sizeof(parsed_32)); - ::memcpy(&float64_parsed, &parsed_64, sizeof(parsed_64)); - // Compare with expected results - if (float32_parsed != float32) { - std::cout << "bad 32 " << str << std::endl; - return false; + if(r64.ec != std::errc()) { std::cerr << "64-bit parsing failure\n"; return false; } + // The string to parse: + const char *number_string = str.data() + 31; + const char *end_of_string = str.data() + str.size(); + // Parse as 32-bit float + float parsed_32; + auto fast_float_r32 = fast_float::from_chars(number_string, end_of_string, parsed_32); + if(fast_float_r32.ec != std::errc()) { std::cerr << "parsing failure\n"; return false; } + // Parse as 64-bit float + double parsed_64; + auto fast_float_r64 = fast_float::from_chars(number_string, end_of_string, parsed_64); + if(fast_float_r64.ec != std::errc()) { std::cerr << "parsing failure\n"; return false; } + // Convert the floats to unsigned ints. + uint32_t float32_parsed; + uint64_t float64_parsed; + ::memcpy(&float32_parsed, &parsed_32, sizeof(parsed_32)); + ::memcpy(&float64_parsed, &parsed_64, sizeof(parsed_64)); + // Compare with expected results + if (float32_parsed != float32) { + std::cout << "bad 32 " << str << std::endl; + std::cout << "parsed as " << iHexAndDec(parsed_32) << std::endl; + std::cout << "as raw uint32_t, parsed = " << float32_parsed << ", expected = " << float32 << std::endl; + std::cout << "fesetround: " << round_name(d) << std::endl; + fesetround(FE_TONEAREST); + return false; + } + if (float64_parsed != float64) { + std::cout << "bad 64 " << str << std::endl; + std::cout << "parsed as " << iHexAndDec(parsed_64) << std::endl; + std::cout << "as raw uint64_t, parsed = " << float64_parsed << ", expected = " << float64 << std::endl; + std::cout << "fesetround: " << round_name(d) << std::endl; + fesetround(FE_TONEAREST); + return false; + } + number++; } - if (float64_parsed != float64) { - std::cout << "bad 64 " << str << std::endl; - return false; - } - number++; } + std::cout << "checked " << std::defaultfloat << number << " values" << std::endl; + newfile.close(); // close the file object + } else { + std::cout << "Could not read " << file_name << std::endl; + fesetround(FE_TONEAREST); + return false; } - std::cout << "checked " << std::defaultfloat << number << " values" << std::endl; - newfile.close(); // close the file object - } else { - std::cout << "Could not read " << file_name << std::endl; - return false; } + fesetround(FE_TONEAREST); return true; } @@ -125,9 +283,6 @@ TEST_CASE("leading_zeroes") { CHECK(fast_float::leading_zeroes(bit << 63) == 0); } -#define iHexAndDec(v) std::hex << "0x" << (v) << " (" << std::dec << (v) << ")" -#define fHexAndDec(v) std::hexfloat << (v) << " (" << std::defaultfloat << (v) << ")" - void test_full_multiplication(uint64_t lhs, uint64_t rhs, uint64_t expected_lo, uint64_t expected_hi) { fast_float::value128 v; v = fast_float::full_multiplication(lhs, rhs);