Merge pull request #383 from redis-performance/pr/parallel-exhaustive

Parallelize the exhaustive float32 sweeps across hardware threads (~75-88x)
This commit is contained in:
Daniel Lemire 2026-06-01 18:07:01 -04:00 committed by GitHub
commit 06f3e27411
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 188 additions and 129 deletions

View File

@ -4,6 +4,10 @@ cmake_minimum_required(VERSION 3.11 FATAL_ERROR)
include(FetchContent) include(FetchContent)
# Some tests (the exhaustive sweeps) parallelize across std::thread.
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
option(SYSTEM_DOCTEST "Use system copy of doctest" OFF) option(SYSTEM_DOCTEST "Use system copy of doctest" OFF)
option(FASTFLOAT_SUPPLEMENTAL_TESTS "Run supplemental tests" ON) option(FASTFLOAT_SUPPLEMENTAL_TESTS "Run supplemental tests" ON)
@ -49,6 +53,7 @@ function(fast_float_add_cpp_test TEST_NAME)
target_compile_options(${TEST_NAME} PUBLIC -Wsign-compare -Wshadow -Wwrite-strings -Wpointer-arith -Winit-self -Wconversion -Wsign-conversion) target_compile_options(${TEST_NAME} PUBLIC -Wsign-compare -Wshadow -Wwrite-strings -Wpointer-arith -Winit-self -Wconversion -Wsign-conversion)
endif() endif()
target_link_libraries(${TEST_NAME} PUBLIC fast_float supplemental-data) target_link_libraries(${TEST_NAME} PUBLIC fast_float supplemental-data)
target_link_libraries(${TEST_NAME} PUBLIC Threads::Threads)
if (NOT SYSTEM_DOCTEST) if (NOT SYSTEM_DOCTEST)
target_link_libraries(${TEST_NAME} PUBLIC doctest) target_link_libraries(${TEST_NAME} PUBLIC doctest)
else () else ()

View File

@ -8,6 +8,8 @@
#include <iostream> #include <iostream>
#include <limits> #include <limits>
#include <system_error> #include <system_error>
#include <thread>
#include <vector>
template <typename T> char *to_string(T d, char *buffer) { template <typename T> char *to_string(T d, char *buffer) {
auto written = std::snprintf(buffer, 64, "%.*e", auto written = std::snprintf(buffer, 64, "%.*e",
@ -15,47 +17,59 @@ template <typename T> char *to_string(T d, char *buffer) {
return buffer + written; return buffer + written;
} }
void allvalues() { // Checks a single 32-bit word (interpreted as a float); aborts on a mismatch.
void check_word(uint32_t word) {
char buffer[64]; char buffer[64];
for (uint64_t w = 0; w <= 0xFFFFFFFF; w++) { float v;
float v; memcpy(&v, &word, sizeof(v));
if ((w % 1048576) == 0) {
std::cout << ".";
std::cout.flush();
}
uint32_t word = uint32_t(w);
memcpy(&v, &word, sizeof(v));
{ char const *string_end = to_string(v, buffer);
char const *string_end = to_string(v, buffer); float result_value;
float result_value; auto result = fast_float::from_chars(buffer, string_end, result_value);
auto result = fast_float::from_chars(buffer, string_end, result_value); // Starting with version 4.0 for fast_float, we return result_out_of_range
// Starting with version 4.0 for fast_float, we return result_out_of_range // if the value is either too small (too close to zero) or too large
// if the value is either too small (too close to zero) or too large // (effectively infinity). So std::errc::result_out_of_range is normal for
// (effectively infinity). So std::errc::result_out_of_range is normal for // well-formed input strings.
// well-formed input strings. if (result.ec != std::errc() && result.ec != std::errc::result_out_of_range) {
if (result.ec != std::errc() && std::cerr << "parsing error ? " << buffer << std::endl;
result.ec != std::errc::result_out_of_range) { abort();
std::cerr << "parsing error ? " << buffer << std::endl; }
abort(); if (std::isnan(v)) {
} if (!std::isnan(result_value)) {
if (std::isnan(v)) { std::cerr << "not nan" << buffer << std::endl;
if (!std::isnan(result_value)) { abort();
std::cerr << "not nan" << buffer << std::endl;
abort();
}
} else if (copysign(1, result_value) != copysign(1, v)) {
std::cerr << "I got " << std::hexfloat << result_value
<< " but I was expecting " << v << std::endl;
abort();
} else if (result_value != v) {
std::cerr << "no match ? " << buffer << std::endl;
std::cout << "started with " << std::hexfloat << v << std::endl;
std::cout << "got back " << std::hexfloat << result_value << std::endl;
std::cout << std::dec;
abort();
}
} }
} else if (copysign(1, result_value) != copysign(1, v)) {
std::cerr << "I got " << std::hexfloat << result_value
<< " but I was expecting " << v << std::endl;
abort();
} else if (result_value != v) {
std::cerr << "no match ? " << buffer << std::endl;
std::cout << "started with " << std::hexfloat << v << std::endl;
std::cout << "got back " << std::hexfloat << result_value << std::endl;
std::cout << std::dec;
abort();
}
}
// Sweeps the whole 2^32 float space, split across hardware threads (the values
// are independent); check_word() aborts on the first mismatch.
void allvalues() {
unsigned int nthreads = std::thread::hardware_concurrency();
if (nthreads == 0) {
nthreads = 1;
}
std::vector<std::thread> workers;
workers.reserve(nthreads);
for (unsigned int t = 0; t < nthreads; t++) {
workers.emplace_back([t, nthreads]() {
for (uint64_t w = t; w <= 0xFFFFFFFF; w += nthreads) {
check_word(uint32_t(w));
}
});
}
for (std::thread &worker : workers) {
worker.join();
} }
std::cout << std::endl; std::cout << std::endl;
} }

View File

@ -1,6 +1,7 @@
#include "fast_float/fast_float.h" #include "fast_float/fast_float.h"
#include <atomic>
#include <cassert> #include <cassert>
#include <cmath> #include <cmath>
#include <cstdio> #include <cstdio>
@ -9,6 +10,8 @@
#include <limits> #include <limits>
#include <string> #include <string>
#include <system_error> #include <system_error>
#include <thread>
#include <vector>
template <typename T> char *to_string(T d, char *buffer) { template <typename T> char *to_string(T d, char *buffer) {
auto written = std::snprintf(buffer, 64, "%.*e", auto written = std::snprintf(buffer, 64, "%.*e",
@ -45,25 +48,38 @@ bool basic_test_64bit(std::string vals, double val) {
return true; return true;
} }
// Sweeps the whole 2^32 float space (widened to double), split across hardware
// threads (the values are independent); stops at the first mismatch.
void all_32bit_values() { void all_32bit_values() {
char buffer[64]; unsigned int nthreads = std::thread::hardware_concurrency();
for (uint64_t w = 0; w <= 0xFFFFFFFF; w++) { if (nthreads == 0) {
float v32; nthreads = 1;
if ((w % 1048576) == 0) { }
std::cout << "."; std::atomic<bool> ok{true};
std::cout.flush(); std::vector<std::thread> workers;
} workers.reserve(nthreads);
uint32_t word = uint32_t(w); for (unsigned int t = 0; t < nthreads; t++) {
memcpy(&v32, &word, sizeof(v32)); workers.emplace_back([t, nthreads, &ok]() {
double v = v32; char buffer[64];
for (uint64_t w = t;
w <= 0xFFFFFFFF && ok.load(std::memory_order_relaxed);
w += nthreads) {
float v32;
uint32_t word = uint32_t(w);
memcpy(&v32, &word, sizeof(v32));
double v = v32;
{ char const *string_end = to_string(v, buffer);
char const *string_end = to_string(v, buffer); std::string s(buffer, size_t(string_end - buffer));
std::string s(buffer, size_t(string_end - buffer)); if (!basic_test_64bit(s, v)) {
if (!basic_test_64bit(s, v)) { ok.store(false, std::memory_order_relaxed);
return; return;
}
} }
} });
}
for (std::thread &worker : workers) {
worker.join();
} }
std::cout << std::endl; std::cout << std::endl;
} }

View File

@ -1,5 +1,6 @@
#include "fast_float/fast_float.h" #include "fast_float/fast_float.h"
#include <atomic>
#include <cassert> #include <cassert>
#include <cmath> #include <cmath>
#include <cstdio> #include <cstdio>
@ -7,6 +8,8 @@
#include <iostream> #include <iostream>
#include <limits> #include <limits>
#include <stdexcept> #include <stdexcept>
#include <thread>
#include <vector>
#if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__MINGW64__) #if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__MINGW64__)
// Anything at all that is related to cygwin, msys and so forth will // Anything at all that is related to cygwin, msys and so forth will
@ -74,86 +77,107 @@ void strtof_from_string(char const *st, float &d) {
} }
} }
bool allvalues() { // Checks a single 32-bit word (interpreted as a float). Returns true if the
// parser agrees with the reference, false (after logging) on a mismatch.
bool check_word(uint32_t word) {
char buffer[64]; char buffer[64];
for (uint64_t w = 0; w <= 0xFFFFFFFF; w++) { float v;
float v; memcpy(&v, &word, sizeof(v));
if ((w % 1048576) == 0) { if (!std::isfinite(v)) {
std::cout << "."; return true;
std::cout.flush(); }
} float nextf = std::nextafterf(v, INFINITY);
uint32_t word = uint32_t(w); if (copysign(1, v) != copysign(1, nextf)) {
memcpy(&v, &word, sizeof(v)); return true;
if (std::isfinite(v)) { }
float nextf = std::nextafterf(v, INFINITY); if (!std::isfinite(nextf)) {
if (copysign(1, v) != copysign(1, nextf)) { return true;
continue; }
} double v1{v};
if (!std::isfinite(nextf)) { assert(float(v1) == v);
continue; double v2{nextf};
} assert(float(v2) == nextf);
double v1{v}; double midv{v1 + (v2 - v1) / 2};
assert(float(v1) == v); float expected_midv = float(midv);
double v2{nextf};
assert(float(v2) == nextf); char const *string_end = to_string(midv, buffer);
double midv{v1 + (v2 - v1) / 2}; float str_answer;
float expected_midv = float(midv); strtof_from_string(buffer, str_answer);
char const *string_end = to_string(midv, buffer); float result_value;
float str_answer; auto result = fast_float::from_chars(buffer, string_end, result_value);
strtof_from_string(buffer, str_answer); // Starting with version 4.0 for fast_float, we return result_out_of_range
// if the value is either too small (too close to zero) or too large
float result_value; // (effectively infinity). So std::errc::result_out_of_range is normal for
auto result = fast_float::from_chars(buffer, string_end, result_value); // well-formed input strings.
// Starting with version 4.0 for fast_float, we return result_out_of_range if (result.ec != std::errc() && result.ec != std::errc::result_out_of_range) {
// if the value is either too small (too close to zero) or too large std::cerr << "parsing error ? " << buffer << std::endl;
// (effectively infinity). So std::errc::result_out_of_range is normal for return false;
// well-formed input strings. }
if (result.ec != std::errc() && if (std::isnan(v)) {
result.ec != std::errc::result_out_of_range) { if (!std::isnan(result_value)) {
std::cerr << "parsing error ? " << buffer << std::endl; std::cerr << "not nan" << buffer << std::endl;
return false; std::cerr << "v " << std::hexfloat << v << std::endl;
} std::cerr << "v2 " << std::hexfloat << v2 << std::endl;
if (std::isnan(v)) { std::cerr << "midv " << std::hexfloat << midv << std::endl;
if (!std::isnan(result_value)) { std::cerr << "expected_midv " << std::hexfloat << expected_midv
std::cerr << "not nan" << buffer << std::endl; << std::endl;
std::cerr << "v " << std::hexfloat << v << std::endl; return false;
std::cerr << "v2 " << std::hexfloat << v2 << std::endl; }
std::cerr << "midv " << std::hexfloat << midv << std::endl; } else if (copysign(1, result_value) != copysign(1, v)) {
std::cerr << "expected_midv " << std::hexfloat << expected_midv std::cerr << buffer << std::endl;
<< std::endl; std::cerr << "v " << std::hexfloat << v << std::endl;
return false; std::cerr << "v2 " << std::hexfloat << v2 << std::endl;
} std::cerr << "midv " << std::hexfloat << midv << std::endl;
} else if (copysign(1, result_value) != copysign(1, v)) { std::cerr << "expected_midv " << std::hexfloat << expected_midv
std::cerr << buffer << std::endl; << std::endl;
std::cerr << "v " << std::hexfloat << v << std::endl; std::cerr << "I got " << std::hexfloat << result_value
std::cerr << "v2 " << std::hexfloat << v2 << std::endl; << " but I was expecting " << v << std::endl;
std::cerr << "midv " << std::hexfloat << midv << std::endl; return false;
std::cerr << "expected_midv " << std::hexfloat << expected_midv } else if (result_value != str_answer) {
<< std::endl; std::cerr << "no match ? " << buffer << std::endl;
std::cerr << "I got " << std::hexfloat << result_value std::cerr << "v " << std::hexfloat << v << std::endl;
<< " but I was expecting " << v << std::endl; std::cerr << "v2 " << std::hexfloat << v2 << std::endl;
return false; std::cerr << "midv " << std::hexfloat << midv << std::endl;
} else if (result_value != str_answer) { std::cerr << "expected_midv " << std::hexfloat << expected_midv
std::cerr << "no match ? " << buffer << std::endl; << std::endl;
std::cerr << "v " << std::hexfloat << v << std::endl; std::cout << "started with " << std::hexfloat << midv << std::endl;
std::cerr << "v2 " << std::hexfloat << v2 << std::endl; std::cout << "round down to " << std::hexfloat << str_answer << std::endl;
std::cerr << "midv " << std::hexfloat << midv << std::endl; std::cout << "got back " << std::hexfloat << result_value << std::endl;
std::cerr << "expected_midv " << std::hexfloat << expected_midv std::cout << std::dec;
<< std::endl; return false;
std::cout << "started with " << std::hexfloat << midv << std::endl;
std::cout << "round down to " << std::hexfloat << str_answer
<< std::endl;
std::cout << "got back " << std::hexfloat << result_value << std::endl;
std::cout << std::dec;
return false;
}
}
} }
std::cout << std::endl;
return true; return true;
} }
// Sweeps the whole 2^32 float space, split across hardware threads (the values
// are independent). Returns false as soon as any word mismatches.
bool allvalues() {
unsigned int nthreads = std::thread::hardware_concurrency();
if (nthreads == 0) {
nthreads = 1;
}
std::atomic<bool> ok{true};
std::vector<std::thread> workers;
workers.reserve(nthreads);
for (unsigned int t = 0; t < nthreads; t++) {
workers.emplace_back([t, nthreads, &ok]() {
for (uint64_t w = t;
w <= 0xFFFFFFFF && ok.load(std::memory_order_relaxed);
w += nthreads) {
if (!check_word(uint32_t(w))) {
ok.store(false, std::memory_order_relaxed);
return;
}
}
});
}
for (std::thread &worker : workers) {
worker.join();
}
return ok.load();
}
inline void Assert(bool Assertion) { inline void Assert(bool Assertion) {
#if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__MINGW64__) || \ #if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__MINGW64__) || \
defined(sun) || defined(__sun) defined(sun) || defined(__sun)