Merge pull request #383 from redis-performance/pr/parallel-exhaustive

Parallelize the exhaustive float32 sweeps across hardware threads (~75-88x)
This commit is contained in:
Daniel Lemire 2026-06-01 18:07:01 -04:00 committed by GitHub
commit 06f3e27411
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 188 additions and 129 deletions

View File

@ -4,6 +4,10 @@ cmake_minimum_required(VERSION 3.11 FATAL_ERROR)
include(FetchContent) include(FetchContent)
# Some tests (the exhaustive sweeps) parallelize across std::thread.
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
option(SYSTEM_DOCTEST "Use system copy of doctest" OFF) option(SYSTEM_DOCTEST "Use system copy of doctest" OFF)
option(FASTFLOAT_SUPPLEMENTAL_TESTS "Run supplemental tests" ON) option(FASTFLOAT_SUPPLEMENTAL_TESTS "Run supplemental tests" ON)
@ -49,6 +53,7 @@ function(fast_float_add_cpp_test TEST_NAME)
target_compile_options(${TEST_NAME} PUBLIC -Wsign-compare -Wshadow -Wwrite-strings -Wpointer-arith -Winit-self -Wconversion -Wsign-conversion) target_compile_options(${TEST_NAME} PUBLIC -Wsign-compare -Wshadow -Wwrite-strings -Wpointer-arith -Winit-self -Wconversion -Wsign-conversion)
endif() endif()
target_link_libraries(${TEST_NAME} PUBLIC fast_float supplemental-data) target_link_libraries(${TEST_NAME} PUBLIC fast_float supplemental-data)
target_link_libraries(${TEST_NAME} PUBLIC Threads::Threads)
if (NOT SYSTEM_DOCTEST) if (NOT SYSTEM_DOCTEST)
target_link_libraries(${TEST_NAME} PUBLIC doctest) target_link_libraries(${TEST_NAME} PUBLIC doctest)
else () else ()

View File

@ -8,6 +8,8 @@
#include <iostream> #include <iostream>
#include <limits> #include <limits>
#include <system_error> #include <system_error>
#include <thread>
#include <vector>
template <typename T> char *to_string(T d, char *buffer) { template <typename T> char *to_string(T d, char *buffer) {
auto written = std::snprintf(buffer, 64, "%.*e", auto written = std::snprintf(buffer, 64, "%.*e",
@ -15,18 +17,12 @@ template <typename T> char *to_string(T d, char *buffer) {
return buffer + written; return buffer + written;
} }
void allvalues() { // Checks a single 32-bit word (interpreted as a float); aborts on a mismatch.
void check_word(uint32_t word) {
char buffer[64]; char buffer[64];
for (uint64_t w = 0; w <= 0xFFFFFFFF; w++) {
float v; float v;
if ((w % 1048576) == 0) {
std::cout << ".";
std::cout.flush();
}
uint32_t word = uint32_t(w);
memcpy(&v, &word, sizeof(v)); memcpy(&v, &word, sizeof(v));
{
char const *string_end = to_string(v, buffer); char const *string_end = to_string(v, buffer);
float result_value; float result_value;
auto result = fast_float::from_chars(buffer, string_end, result_value); auto result = fast_float::from_chars(buffer, string_end, result_value);
@ -34,8 +30,7 @@ void allvalues() {
// if the value is either too small (too close to zero) or too large // if the value is either too small (too close to zero) or too large
// (effectively infinity). So std::errc::result_out_of_range is normal for // (effectively infinity). So std::errc::result_out_of_range is normal for
// well-formed input strings. // well-formed input strings.
if (result.ec != std::errc() && if (result.ec != std::errc() && result.ec != std::errc::result_out_of_range) {
result.ec != std::errc::result_out_of_range) {
std::cerr << "parsing error ? " << buffer << std::endl; std::cerr << "parsing error ? " << buffer << std::endl;
abort(); abort();
} }
@ -56,6 +51,25 @@ void allvalues() {
abort(); abort();
} }
} }
// Sweeps the whole 2^32 float space, split across hardware threads (the values
// are independent); check_word() aborts on the first mismatch.
void allvalues() {
unsigned int nthreads = std::thread::hardware_concurrency();
if (nthreads == 0) {
nthreads = 1;
}
std::vector<std::thread> workers;
workers.reserve(nthreads);
for (unsigned int t = 0; t < nthreads; t++) {
workers.emplace_back([t, nthreads]() {
for (uint64_t w = t; w <= 0xFFFFFFFF; w += nthreads) {
check_word(uint32_t(w));
}
});
}
for (std::thread &worker : workers) {
worker.join();
} }
std::cout << std::endl; std::cout << std::endl;
} }

View File

@ -1,6 +1,7 @@
#include "fast_float/fast_float.h" #include "fast_float/fast_float.h"
#include <atomic>
#include <cassert> #include <cassert>
#include <cmath> #include <cmath>
#include <cstdio> #include <cstdio>
@ -9,6 +10,8 @@
#include <limits> #include <limits>
#include <string> #include <string>
#include <system_error> #include <system_error>
#include <thread>
#include <vector>
template <typename T> char *to_string(T d, char *buffer) { template <typename T> char *to_string(T d, char *buffer) {
auto written = std::snprintf(buffer, 64, "%.*e", auto written = std::snprintf(buffer, 64, "%.*e",
@ -45,25 +48,38 @@ bool basic_test_64bit(std::string vals, double val) {
return true; return true;
} }
// Sweeps the whole 2^32 float space (widened to double), split across hardware
// threads (the values are independent); stops at the first mismatch.
void all_32bit_values() { void all_32bit_values() {
char buffer[64]; unsigned int nthreads = std::thread::hardware_concurrency();
for (uint64_t w = 0; w <= 0xFFFFFFFF; w++) { if (nthreads == 0) {
float v32; nthreads = 1;
if ((w % 1048576) == 0) {
std::cout << ".";
std::cout.flush();
} }
std::atomic<bool> ok{true};
std::vector<std::thread> workers;
workers.reserve(nthreads);
for (unsigned int t = 0; t < nthreads; t++) {
workers.emplace_back([t, nthreads, &ok]() {
char buffer[64];
for (uint64_t w = t;
w <= 0xFFFFFFFF && ok.load(std::memory_order_relaxed);
w += nthreads) {
float v32;
uint32_t word = uint32_t(w); uint32_t word = uint32_t(w);
memcpy(&v32, &word, sizeof(v32)); memcpy(&v32, &word, sizeof(v32));
double v = v32; double v = v32;
{
char const *string_end = to_string(v, buffer); char const *string_end = to_string(v, buffer);
std::string s(buffer, size_t(string_end - buffer)); std::string s(buffer, size_t(string_end - buffer));
if (!basic_test_64bit(s, v)) { if (!basic_test_64bit(s, v)) {
ok.store(false, std::memory_order_relaxed);
return; return;
} }
} }
});
}
for (std::thread &worker : workers) {
worker.join();
} }
std::cout << std::endl; std::cout << std::endl;
} }

View File

@ -1,5 +1,6 @@
#include "fast_float/fast_float.h" #include "fast_float/fast_float.h"
#include <atomic>
#include <cassert> #include <cassert>
#include <cmath> #include <cmath>
#include <cstdio> #include <cstdio>
@ -7,6 +8,8 @@
#include <iostream> #include <iostream>
#include <limits> #include <limits>
#include <stdexcept> #include <stdexcept>
#include <thread>
#include <vector>
#if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__MINGW64__) #if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__MINGW64__)
// Anything at all that is related to cygwin, msys and so forth will // Anything at all that is related to cygwin, msys and so forth will
@ -74,23 +77,21 @@ void strtof_from_string(char const *st, float &d) {
} }
} }
bool allvalues() { // Checks a single 32-bit word (interpreted as a float). Returns true if the
// parser agrees with the reference, false (after logging) on a mismatch.
bool check_word(uint32_t word) {
char buffer[64]; char buffer[64];
for (uint64_t w = 0; w <= 0xFFFFFFFF; w++) {
float v; float v;
if ((w % 1048576) == 0) {
std::cout << ".";
std::cout.flush();
}
uint32_t word = uint32_t(w);
memcpy(&v, &word, sizeof(v)); memcpy(&v, &word, sizeof(v));
if (std::isfinite(v)) { if (!std::isfinite(v)) {
return true;
}
float nextf = std::nextafterf(v, INFINITY); float nextf = std::nextafterf(v, INFINITY);
if (copysign(1, v) != copysign(1, nextf)) { if (copysign(1, v) != copysign(1, nextf)) {
continue; return true;
} }
if (!std::isfinite(nextf)) { if (!std::isfinite(nextf)) {
continue; return true;
} }
double v1{v}; double v1{v};
assert(float(v1) == v); assert(float(v1) == v);
@ -109,8 +110,7 @@ bool allvalues() {
// if the value is either too small (too close to zero) or too large // if the value is either too small (too close to zero) or too large
// (effectively infinity). So std::errc::result_out_of_range is normal for // (effectively infinity). So std::errc::result_out_of_range is normal for
// well-formed input strings. // well-formed input strings.
if (result.ec != std::errc() && if (result.ec != std::errc() && result.ec != std::errc::result_out_of_range) {
result.ec != std::errc::result_out_of_range) {
std::cerr << "parsing error ? " << buffer << std::endl; std::cerr << "parsing error ? " << buffer << std::endl;
return false; return false;
} }
@ -142,18 +142,42 @@ bool allvalues() {
std::cerr << "expected_midv " << std::hexfloat << expected_midv std::cerr << "expected_midv " << std::hexfloat << expected_midv
<< std::endl; << std::endl;
std::cout << "started with " << std::hexfloat << midv << std::endl; std::cout << "started with " << std::hexfloat << midv << std::endl;
std::cout << "round down to " << std::hexfloat << str_answer std::cout << "round down to " << std::hexfloat << str_answer << std::endl;
<< std::endl;
std::cout << "got back " << std::hexfloat << result_value << std::endl; std::cout << "got back " << std::hexfloat << result_value << std::endl;
std::cout << std::dec; std::cout << std::dec;
return false; return false;
} }
}
}
std::cout << std::endl;
return true; return true;
} }
// Sweeps the whole 2^32 float space, split across hardware threads (the values
// are independent). Returns false as soon as any word mismatches.
bool allvalues() {
unsigned int nthreads = std::thread::hardware_concurrency();
if (nthreads == 0) {
nthreads = 1;
}
std::atomic<bool> ok{true};
std::vector<std::thread> workers;
workers.reserve(nthreads);
for (unsigned int t = 0; t < nthreads; t++) {
workers.emplace_back([t, nthreads, &ok]() {
for (uint64_t w = t;
w <= 0xFFFFFFFF && ok.load(std::memory_order_relaxed);
w += nthreads) {
if (!check_word(uint32_t(w))) {
ok.store(false, std::memory_order_relaxed);
return;
}
}
});
}
for (std::thread &worker : workers) {
worker.join();
}
return ok.load();
}
inline void Assert(bool Assertion) { inline void Assert(bool Assertion) {
#if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__MINGW64__) || \ #if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__MINGW64__) || \
defined(sun) || defined(__sun) defined(sun) || defined(__sun)