From ca43e6722e2d38396506aec85f05adc785a83c77 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Sun, 2 Apr 2023 17:43:17 -0400 Subject: [PATCH] We need to update some of our exhaustive tests to the new API --- .github/workflows/ubuntu22-clang.yml | 2 +- .github/workflows/ubuntu22-gcc12.yml | 2 +- .github/workflows/ubuntu22.yml | 2 +- README.md | 10 +++++----- include/fast_float/decimal_to_binary.h | 4 ++-- script/amalgamate.py | 6 +++--- script/mushtak_lemire.py | 8 ++++---- tests/example_comma_test.cpp | 2 +- tests/exhaustive32.cpp | 7 +++++-- tests/exhaustive32_64.cpp | 6 +++--- tests/exhaustive32_midpoint.cpp | 13 ++++++++----- tests/installation_tests/find/CMakeLists.txt | 2 +- tests/long_exhaustive32.cpp | 7 +++++-- tests/long_exhaustive32_64.cpp | 7 +++++-- tests/long_random64.cpp | 11 +++++++---- tests/long_test.cpp | 4 ++-- tests/powersoffive_hardround.cpp | 2 +- tests/random64.cpp | 11 +++++++---- tests/random_string.cpp | 11 ++++++++--- tests/short_random_string.cpp | 11 ++++++++--- tests/string_test.cpp | 6 +++--- 21 files changed, 81 insertions(+), 53 deletions(-) diff --git a/.github/workflows/ubuntu22-clang.yml b/.github/workflows/ubuntu22-clang.yml index d7fd9ee..14cf763 100644 --- a/.github/workflows/ubuntu22-clang.yml +++ b/.github/workflows/ubuntu22-clang.yml @@ -22,4 +22,4 @@ jobs: cd build20 && CXX=clang++-14 cmake -DFASTFLOAT_CONSTEXPR_TESTS=ON -DCMAKE_CXX_STANDARD=20 -DFASTFLOAT_TEST=ON .. && cmake --build . && - ctest --output-on-failure \ No newline at end of file + ctest --output-on-failure \ No newline at end of file diff --git a/.github/workflows/ubuntu22-gcc12.yml b/.github/workflows/ubuntu22-gcc12.yml index a4b6f21..666888a 100644 --- a/.github/workflows/ubuntu22-gcc12.yml +++ b/.github/workflows/ubuntu22-gcc12.yml @@ -20,4 +20,4 @@ jobs: cd build20 && CXX=g++-12 CXXFLAGS=-Werror cmake -DFASTFLOAT_CONSTEXPR_TESTS=ON -DCMAKE_CXX_STANDARD=20 -DFASTFLOAT_TEST=ON .. && cmake --build . && - ctest --output-on-failure \ No newline at end of file + ctest --output-on-failure \ No newline at end of file diff --git a/.github/workflows/ubuntu22.yml b/.github/workflows/ubuntu22.yml index 367cb0a..2ba4769 100644 --- a/.github/workflows/ubuntu22.yml +++ b/.github/workflows/ubuntu22.yml @@ -13,4 +13,4 @@ jobs: cd build && cmake -DFASTFLOAT_TEST=ON .. && cmake --build . && - ctest --output-on-failure \ No newline at end of file + ctest --output-on-failure \ No newline at end of file diff --git a/README.md b/README.md index 10cd01b..0b57a5b 100644 --- a/README.md +++ b/README.md @@ -189,11 +189,11 @@ It can parse random floating-point numbers at a speed of 1 GB/s on some systems. $ ./build/benchmarks/benchmark # parsing random integers in the range [0,1) volume = 2.09808 MB -netlib : 271.18 MB/s (+/- 1.2 %) 12.93 Mfloat/s -doubleconversion : 225.35 MB/s (+/- 1.2 %) 10.74 Mfloat/s -strtod : 190.94 MB/s (+/- 1.6 %) 9.10 Mfloat/s -abseil : 430.45 MB/s (+/- 2.2 %) 20.52 Mfloat/s -fastfloat : 1042.38 MB/s (+/- 9.9 %) 49.68 Mfloat/s +netlib : 271.18 MB/s (+/- 1.2 %) 12.93 Mfloat/s +doubleconversion : 225.35 MB/s (+/- 1.2 %) 10.74 Mfloat/s +strtod : 190.94 MB/s (+/- 1.6 %) 9.10 Mfloat/s +abseil : 430.45 MB/s (+/- 2.2 %) 20.52 Mfloat/s +fastfloat : 1042.38 MB/s (+/- 9.9 %) 49.68 Mfloat/s ``` See https://github.com/lemire/simple_fastfloat_benchmark for our benchmarking code. diff --git a/include/fast_float/decimal_to_binary.h b/include/fast_float/decimal_to_binary.h index 6d0f730..fec916f 100644 --- a/include/fast_float/decimal_to_binary.h +++ b/include/fast_float/decimal_to_binary.h @@ -48,9 +48,9 @@ namespace detail { * where * p = log(5**q)/log(2) = q * log(5)/log(2) * - * For negative values of q in (-400,0), we have that + * For negative values of q in (-400,0), we have that * f = (((152170 + 65536) * q ) >> 16); - * is equal to + * is equal to * -ceil(p) + q * where * p = log(5**-q)/log(2) = -q * log(5)/log(2) diff --git a/script/amalgamate.py b/script/amalgamate.py index 7ef44d4..c0f078d 100644 --- a/script/amalgamate.py +++ b/script/amalgamate.py @@ -31,7 +31,7 @@ for filename in ['LICENSE-MIT', 'LICENSE-APACHE']: processed_files[filename] = text # code -for filename in [ 'constexpr_feature_detect.h', 'fast_float.h', 'float_common.h', 'ascii_number.h', +for filename in [ 'constexpr_feature_detect.h', 'fast_float.h', 'float_common.h', 'ascii_number.h', 'fast_table.h', 'decimal_to_binary.h', 'bigint.h', 'ascii_number.h', 'digit_comparison.h', 'parse_number.h']: with open('include/fast_float/' + filename, encoding='utf8') as f: @@ -73,10 +73,10 @@ def license_content(license_arg): return result text = ''.join([ - processed_files['AUTHORS'], processed_files['CONTRIBUTORS'], + processed_files['AUTHORS'], processed_files['CONTRIBUTORS'], *license_content(args.license), processed_files['constexpr_feature_detect.h'], - processed_files['fast_float.h'], processed_files['float_common.h'], + processed_files['fast_float.h'], processed_files['float_common.h'], processed_files['ascii_number.h'], processed_files['fast_table.h'], processed_files['decimal_to_binary.h'], processed_files['bigint.h'], processed_files['ascii_number.h'], processed_files['digit_comparison.h'], diff --git a/script/mushtak_lemire.py b/script/mushtak_lemire.py index 14bf11f..5b98fda 100644 --- a/script/mushtak_lemire.py +++ b/script/mushtak_lemire.py @@ -1,12 +1,12 @@ # -# Reference : +# Reference : # Noble Mushtak and Daniel Lemire, Fast Number Parsing Without Fallback (to appear) # all_tqs = [] # Generates all possible values of T[q] -# Appendix B of Number parsing at a gigabyte per second. +# Appendix B of Number parsing at a gigabyte per second. # Software: Practice and Experience 2021;51(8):1700–1727. for q in range(-342, -27): power5 = 5**-q @@ -44,9 +44,9 @@ def continued_fraction(numer, denom): numer, denom = denom, rem return cf -# Given a continued fraction [a0; a1, a2, ..., an], returns +# Given a continued fraction [a0; a1, a2, ..., an], returns # all the convergents of that continued fraction -# as pairs of the form (numer, denom), where numer/denom is +# as pairs of the form (numer, denom), where numer/denom is # a convergent of the continued fraction in simple form. def convergents(cf): p_n_minus_2 = 0 diff --git a/tests/example_comma_test.cpp b/tests/example_comma_test.cpp index 12f488d..aa19735 100644 --- a/tests/example_comma_test.cpp +++ b/tests/example_comma_test.cpp @@ -3,7 +3,7 @@ #include #include #include - + int main() { const std::string input = "3,1416 xyz "; double result; diff --git a/tests/exhaustive32.cpp b/tests/exhaustive32.cpp index 9e1e42f..bddde23 100644 --- a/tests/exhaustive32.cpp +++ b/tests/exhaustive32.cpp @@ -30,7 +30,10 @@ void allvalues() { const char *string_end = to_string(v, buffer); float result_value; auto result = fast_float::from_chars(buffer, string_end, result_value); - if (result.ec != std::errc()) { + // Starting with version 4.0 for fast_float, we return result_out_of_range if the + // value is either too small (too close to zero) or too large (effectively infinity). + // So std::errc::result_out_of_range is normal for well-formed input strings. + if (result.ec != std::errc() && result.ec != std::errc::result_out_of_range) { std::cerr << "parsing error ? " << buffer << std::endl; abort(); } @@ -46,7 +49,7 @@ void allvalues() { } else if (result_value != v) { std::cerr << "no match ? " << buffer << std::endl; std::cout << "started with " << std::hexfloat << v << std::endl; - std::cout << "got back " << std::hexfloat << result_value << std::endl; + std::cout << "got back " << std::hexfloat << result_value << std::endl; std::cout << std::dec; abort(); } diff --git a/tests/exhaustive32_64.cpp b/tests/exhaustive32_64.cpp index e02757b..0c6285c 100644 --- a/tests/exhaustive32_64.cpp +++ b/tests/exhaustive32_64.cpp @@ -21,7 +21,7 @@ bool basic_test_64bit(std::string vals, double val) { double result_value; auto result = fast_float::from_chars(vals.data(), vals.data() + vals.size(), result_value); - if (result.ec != std::errc()) { + if (result.ec != std::errc() && result.ec != std::errc::result_out_of_range) { std::cerr << " I could not parse " << vals << std::endl; return false; } @@ -30,11 +30,11 @@ bool basic_test_64bit(std::string vals, double val) { std::cerr << vals << std::endl; std::cerr << "not nan" << result_value << std::endl; return false; - } + } } else if(copysign(1,result_value) != copysign(1,val)) { std::cerr << "I got " << std::hexfloat << result_value << " but I was expecting " << val << std::endl; - return false; + return false; } else if (result_value != val) { std::cerr << vals << std::endl; std::cerr << "I got " << std::hexfloat << result_value << " but I was expecting " << val diff --git a/tests/exhaustive32_midpoint.cpp b/tests/exhaustive32_midpoint.cpp index fdfd25d..8b6f0a6 100644 --- a/tests/exhaustive32_midpoint.cpp +++ b/tests/exhaustive32_midpoint.cpp @@ -8,7 +8,7 @@ #include #include -#if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__MINGW64__) +#if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__MINGW64__) // Anything at all that is related to cygwin, msys and so forth will // always use this fallback because we cannot rely on it behaving as normal // gcc. @@ -73,7 +73,7 @@ bool allvalues() { } uint32_t word = uint32_t(w); memcpy(&v, &word, sizeof(v)); - if(std::isfinite(v)) { + if(std::isfinite(v)) { float nextf = std::nextafterf(v, INFINITY); if(copysign(1,v) != copysign(1,nextf)) { continue; } if(!std::isfinite(nextf)) { continue; } @@ -90,7 +90,10 @@ bool allvalues() { float result_value; auto result = fast_float::from_chars(buffer, string_end, result_value); - if (result.ec != std::errc()) { + // Starting with version 4.0 for fast_float, we return result_out_of_range if the + // value is either too small (too close to zero) or too large (effectively infinity). + // So std::errc::result_out_of_range is normal for well-formed input strings. + if (result.ec != std::errc() && result.ec != std::errc::result_out_of_range) { std::cerr << "parsing error ? " << buffer << std::endl; return false; } @@ -120,7 +123,7 @@ bool allvalues() { std::cerr << "expected_midv " << std::hexfloat << expected_midv << std::endl; std::cout << "started with " << std::hexfloat << midv << std::endl; std::cout << "round down to " << std::hexfloat << str_answer << std::endl; - std::cout << "got back " << std::hexfloat << result_value << std::endl; + std::cout << "got back " << std::hexfloat << result_value << std::endl; std::cout << std::dec; return false; } @@ -133,7 +136,7 @@ bool allvalues() { inline void Assert(bool Assertion) { #if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__MINGW64__) || defined(sun) || defined(__sun) if (!Assertion) { std::cerr << "Omitting hard failure on msys/cygwin/sun systems."; } -#else +#else if (!Assertion) { throw std::runtime_error("bug"); } #endif } diff --git a/tests/installation_tests/find/CMakeLists.txt b/tests/installation_tests/find/CMakeLists.txt index b2ccd40..9a61879 100644 --- a/tests/installation_tests/find/CMakeLists.txt +++ b/tests/installation_tests/find/CMakeLists.txt @@ -14,7 +14,7 @@ find_package(FastFloat REQUIRED) file(WRITE main.cpp " #include \"fast_float/fast_float.h\" #include - + int main() { const std::string input = \"3.1416 xyz \"; double result; diff --git a/tests/long_exhaustive32.cpp b/tests/long_exhaustive32.cpp index 0a6b53d..9c297bd 100644 --- a/tests/long_exhaustive32.cpp +++ b/tests/long_exhaustive32.cpp @@ -29,7 +29,10 @@ void allvalues() { const char *string_end = to_string(v, buffer); float result_value; auto result = fast_float::from_chars(buffer, string_end, result_value); - if (result.ec != std::errc()) { + // Starting with version 4.0 for fast_float, we return result_out_of_range if the + // value is either too small (too close to zero) or too large (effectively infinity). + // So std::errc::result_out_of_range is normal for well-formed input strings. + if (result.ec != std::errc() && result.ec != std::errc::result_out_of_range) { std::cerr << "parsing error ? " << buffer << std::endl; abort(); } @@ -46,7 +49,7 @@ void allvalues() { } else if (result_value != v) { std::cerr << "no match ? " << buffer << " got " << result_value << " expected " << v << std::endl; std::cout << "started with " << std::hexfloat << v << std::endl; - std::cout << "got back " << std::hexfloat << result_value << std::endl; + std::cout << "got back " << std::hexfloat << result_value << std::endl; std::cout << std::dec; abort(); } diff --git a/tests/long_exhaustive32_64.cpp b/tests/long_exhaustive32_64.cpp index cea8497..b6d9d50 100644 --- a/tests/long_exhaustive32_64.cpp +++ b/tests/long_exhaustive32_64.cpp @@ -28,7 +28,10 @@ void all_32bit_values() { const char *string_end = to_string(v, buffer); double result_value; auto result = fast_float::from_chars(buffer, string_end, result_value); - if (result.ec != std::errc()) { + // Starting with version 4.0 for fast_float, we return result_out_of_range if the + // value is either too small (too close to zero) or too large (effectively infinity). + // So std::errc::result_out_of_range is normal for well-formed input strings. + if (result.ec != std::errc() && result.ec != std::errc::result_out_of_range) { std::cerr << "parsing error ? " << buffer << std::endl; abort(); } @@ -49,7 +52,7 @@ void all_32bit_values() { } else if (result_value != v) { std::cerr << "no match ? " << buffer << std::endl; std::cout << "started with " << std::hexfloat << v << std::endl; - std::cout << "got back " << std::hexfloat << result_value << std::endl; + std::cout << "got back " << std::hexfloat << result_value << std::endl; std::cout << std::dec; abort(); } diff --git a/tests/long_random64.cpp b/tests/long_random64.cpp index 713c125..419414e 100644 --- a/tests/long_random64.cpp +++ b/tests/long_random64.cpp @@ -27,9 +27,9 @@ static fast_float::value128 g_lehmer64_state; * Society 68.225 (1999): 249-260. */ -static inline void lehmer64_seed(uint64_t seed) { +static inline void lehmer64_seed(uint64_t seed) { g_lehmer64_state.high = 0; - g_lehmer64_state.low = seed; + g_lehmer64_state.low = seed; } static inline uint64_t lehmer64() { @@ -56,7 +56,10 @@ void random_values(size_t N) { const char *string_end = to_string(v, buffer); double result_value; auto result = fast_float::from_chars(buffer, string_end, result_value); - if (result.ec != std::errc()) { + // Starting with version 4.0 for fast_float, we return result_out_of_range if the + // value is either too small (too close to zero) or too large (effectively infinity). + // So std::errc::result_out_of_range is normal for well-formed input strings. + if (result.ec != std::errc() && result.ec != std::errc::result_out_of_range) { std::cerr << "parsing error ? " << buffer << std::endl; errors++; if (errors > 10) { @@ -80,7 +83,7 @@ void random_values(size_t N) { } else if (result_value != v) { std::cerr << "no match ? '" << buffer << "'" << std::endl; std::cout << "started with " << std::hexfloat << v << std::endl; - std::cout << "got back " << std::hexfloat << result_value << std::endl; + std::cout << "got back " << std::hexfloat << result_value << std::endl; std::cout << std::dec; errors++; if (errors > 10) { diff --git a/tests/long_test.cpp b/tests/long_test.cpp index 36b9210..85ac0be 100644 --- a/tests/long_test.cpp +++ b/tests/long_test.cpp @@ -22,7 +22,7 @@ bool test() { while((begin < end) && (std::isspace(*begin))) { begin++; } auto result = fast_float::from_chars(begin, end, result_value); - if (result.ec != std::errc()) { + if (result.ec != std::errc() && result.ec != std::errc::result_out_of_range) { printf("parsing %.*s\n", int(end - begin), begin); std::cerr << " I could not parse " << std::endl; return false; @@ -40,7 +40,7 @@ bool test() { } if(begin != end) { std::cerr << " bad ending " << std::endl; - return false; + return false; } return true; } diff --git a/tests/powersoffive_hardround.cpp b/tests/powersoffive_hardround.cpp index 09b95bd..7f0903f 100644 --- a/tests/powersoffive_hardround.cpp +++ b/tests/powersoffive_hardround.cpp @@ -105,7 +105,7 @@ bool tester() { double result_value; auto result = fast_float::from_chars(to_be_parsed.data(), to_be_parsed.data() + to_be_parsed.size(), result_value); - if (result.ec != std::errc()) { + if (result.ec != std::errc() && result.ec != std::errc::result_out_of_range) { std::cout << to_be_parsed << std::endl; std::cerr << " I could not parse " << std::endl; return false; diff --git a/tests/random64.cpp b/tests/random64.cpp index 6b3ef50..4836947 100644 --- a/tests/random64.cpp +++ b/tests/random64.cpp @@ -29,9 +29,9 @@ static fast_float::value128 g_lehmer64_state; * Society 68.225 (1999): 249-260. */ -static inline void lehmer64_seed(uint64_t seed) { +static inline void lehmer64_seed(uint64_t seed) { g_lehmer64_state.high = 0; - g_lehmer64_state.low = seed; + g_lehmer64_state.low = seed; } static inline uint64_t lehmer64() { @@ -59,7 +59,10 @@ void random_values(size_t N) { const char *string_end = to_string(v, buffer); double result_value; auto result = fast_float::from_chars(buffer, string_end, result_value); - if (result.ec != std::errc()) { + // Starting with version 4.0 for fast_float, we return result_out_of_range if the + // value is either too small (too close to zero) or too large (effectively infinity). + // So std::errc::result_out_of_range is normal for well-formed input strings. + if (result.ec != std::errc() && result.ec != std::errc::result_out_of_range) { std::cerr << "parsing error ? " << buffer << std::endl; errors++; if (errors > 10) { @@ -83,7 +86,7 @@ void random_values(size_t N) { } else if (result_value != v) { std::cerr << "no match ? " << buffer << std::endl; std::cout << "started with " << std::hexfloat << v << std::endl; - std::cout << "got back " << std::hexfloat << result_value << std::endl; + std::cout << "got back " << std::hexfloat << result_value << std::endl; std::cout << std::dec; errors++; if (errors > 10) { diff --git a/tests/random_string.cpp b/tests/random_string.cpp index 8cabf5f..7ecfc6b 100644 --- a/tests/random_string.cpp +++ b/tests/random_string.cpp @@ -101,7 +101,12 @@ size_t build_random_string(RandomEngine &rand, char *buffer) { if (i == size_t(location_of_decimal_separator)) { buffer[pos++] = '.'; } - buffer[pos++] = char(rand.next_digit() + '0'); + buffer[pos] = char(rand.next_digit() + '0'); + // We can have a leading zero only if location_of_decimal_separator = 1. + while(i == 0 && 1 != size_t(location_of_decimal_separator) && buffer[pos] == '0') { + buffer[pos] = char(rand.next_digit() + '0'); + } + pos++; } if (rand.next_bool()) { if (rand.next_bool()) { @@ -178,7 +183,7 @@ bool tester(uint64_t seed, size_t volume) { double result_value; auto result = fast_float::from_chars(buffer, buffer + length, result_value); - if (result.ec != std::errc()) { + if (result.ec != std::errc() && result.ec != std::errc::result_out_of_range) { printf("parsing %.*s\n", int(length), buffer); std::cerr << " I could not parse " << std::endl; return false; @@ -201,7 +206,7 @@ bool tester(uint64_t seed, size_t volume) { float result_value; auto result = fast_float::from_chars(buffer, buffer + length, result_value); - if (result.ec != std::errc()) { + if (result.ec != std::errc() && result.ec != std::errc::result_out_of_range) { printf("parsing %.*s\n", int(length), buffer); std::cerr << " I could not parse " << std::endl; return false; diff --git a/tests/short_random_string.cpp b/tests/short_random_string.cpp index 3051b74..fd894ab 100644 --- a/tests/short_random_string.cpp +++ b/tests/short_random_string.cpp @@ -97,7 +97,12 @@ size_t build_random_string(RandomEngine &rand, char *buffer) { if (i == size_t(location_of_decimal_separator)) { buffer[pos++] = '.'; } - buffer[pos++] = char(rand.next_digit() + '0'); + buffer[pos] = char(rand.next_digit() + '0'); + // We can have a leading zero only if location_of_decimal_separator = 1. + while(i == 0 && 1 != size_t(location_of_decimal_separator) && buffer[pos] == '0') { + buffer[pos] = char(rand.next_digit() + '0'); + } + pos++; } if (rand.next_bool()) { if (rand.next_bool()) { @@ -174,7 +179,7 @@ bool tester(uint64_t seed, size_t volume) { double result_value; auto result = fast_float::from_chars(buffer, buffer + length, result_value); - if (result.ec != std::errc()) { + if (result.ec != std::errc() && result.ec != std::errc::result_out_of_range) { printf("parsing %.*s\n", int(length), buffer); std::cerr << " I could not parse " << std::endl; return false; @@ -197,7 +202,7 @@ bool tester(uint64_t seed, size_t volume) { float result_value; auto result = fast_float::from_chars(buffer, buffer + length, result_value); - if (result.ec != std::errc()) { + if (result.ec != std::errc() && result.ec != std::errc::result_out_of_range) { printf("parsing %.*s\n", int(length), buffer); std::cerr << " I could not parse " << std::endl; return false; diff --git a/tests/string_test.cpp b/tests/string_test.cpp index 4329dae..05f840c 100644 --- a/tests/string_test.cpp +++ b/tests/string_test.cpp @@ -85,7 +85,7 @@ bool test() { } if(begin != end) { std::cerr << " bad ending " << std::endl; - return false; + return false; } return true; } @@ -239,7 +239,7 @@ bool partow_test() { T result_value; auto result = fast_float::from_chars(st.data(), st.data() + st.size(), result_value); - if (result.ec != std::errc()) { + if (result.ec != std::errc() && result.ec != std::errc::result_out_of_range) { printf("parsing %.*s\n", int(st.size()), st.data()); std::cerr << " I could not parse " << std::endl; return false; @@ -270,7 +270,7 @@ int main() { std::cout << "32 bits checks" << std::endl; Assert(partow_test()); Assert(test()); - + std::cout << "64 bits checks" << std::endl; Assert(partow_test()); Assert(test());