Merge pull request #383 from redis-performance/pr/parallel-exhaustive

Parallelize the exhaustive float32 sweeps across hardware threads (~75-88x)
This commit is contained in:
Daniel Lemire 2026-06-01 18:07:01 -04:00 committed by GitHub
commit 06f3e27411
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 188 additions and 129 deletions

View File

@ -4,6 +4,10 @@ cmake_minimum_required(VERSION 3.11 FATAL_ERROR)
include(FetchContent)
# Some tests (the exhaustive sweeps) parallelize across std::thread.
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
option(SYSTEM_DOCTEST "Use system copy of doctest" OFF)
option(FASTFLOAT_SUPPLEMENTAL_TESTS "Run supplemental tests" ON)
@ -49,6 +53,7 @@ function(fast_float_add_cpp_test TEST_NAME)
target_compile_options(${TEST_NAME} PUBLIC -Wsign-compare -Wshadow -Wwrite-strings -Wpointer-arith -Winit-self -Wconversion -Wsign-conversion)
endif()
target_link_libraries(${TEST_NAME} PUBLIC fast_float supplemental-data)
target_link_libraries(${TEST_NAME} PUBLIC Threads::Threads)
if (NOT SYSTEM_DOCTEST)
target_link_libraries(${TEST_NAME} PUBLIC doctest)
else ()

View File

@ -8,6 +8,8 @@
#include <iostream>
#include <limits>
#include <system_error>
#include <thread>
#include <vector>
template <typename T> char *to_string(T d, char *buffer) {
auto written = std::snprintf(buffer, 64, "%.*e",
@ -15,47 +17,59 @@ template <typename T> char *to_string(T d, char *buffer) {
return buffer + written;
}
void allvalues() {
// Checks a single 32-bit word (interpreted as a float); aborts on a mismatch.
void check_word(uint32_t word) {
char buffer[64];
for (uint64_t w = 0; w <= 0xFFFFFFFF; w++) {
float v;
if ((w % 1048576) == 0) {
std::cout << ".";
std::cout.flush();
}
uint32_t word = uint32_t(w);
memcpy(&v, &word, sizeof(v));
float v;
memcpy(&v, &word, sizeof(v));
{
char const *string_end = to_string(v, buffer);
float result_value;
auto result = fast_float::from_chars(buffer, string_end, result_value);
// Starting with version 4.0 for fast_float, we return result_out_of_range
// if the value is either too small (too close to zero) or too large
// (effectively infinity). So std::errc::result_out_of_range is normal for
// well-formed input strings.
if (result.ec != std::errc() &&
result.ec != std::errc::result_out_of_range) {
std::cerr << "parsing error ? " << buffer << std::endl;
abort();
}
if (std::isnan(v)) {
if (!std::isnan(result_value)) {
std::cerr << "not nan" << buffer << std::endl;
abort();
}
} else if (copysign(1, result_value) != copysign(1, v)) {
std::cerr << "I got " << std::hexfloat << result_value
<< " but I was expecting " << v << std::endl;
abort();
} else if (result_value != v) {
std::cerr << "no match ? " << buffer << std::endl;
std::cout << "started with " << std::hexfloat << v << std::endl;
std::cout << "got back " << std::hexfloat << result_value << std::endl;
std::cout << std::dec;
abort();
}
char const *string_end = to_string(v, buffer);
float result_value;
auto result = fast_float::from_chars(buffer, string_end, result_value);
// Starting with version 4.0 for fast_float, we return result_out_of_range
// if the value is either too small (too close to zero) or too large
// (effectively infinity). So std::errc::result_out_of_range is normal for
// well-formed input strings.
if (result.ec != std::errc() && result.ec != std::errc::result_out_of_range) {
std::cerr << "parsing error ? " << buffer << std::endl;
abort();
}
if (std::isnan(v)) {
if (!std::isnan(result_value)) {
std::cerr << "not nan" << buffer << std::endl;
abort();
}
} else if (copysign(1, result_value) != copysign(1, v)) {
std::cerr << "I got " << std::hexfloat << result_value
<< " but I was expecting " << v << std::endl;
abort();
} else if (result_value != v) {
std::cerr << "no match ? " << buffer << std::endl;
std::cout << "started with " << std::hexfloat << v << std::endl;
std::cout << "got back " << std::hexfloat << result_value << std::endl;
std::cout << std::dec;
abort();
}
}
// Sweeps the whole 2^32 float space, split across hardware threads (the values
// are independent); check_word() aborts on the first mismatch.
void allvalues() {
unsigned int nthreads = std::thread::hardware_concurrency();
if (nthreads == 0) {
nthreads = 1;
}
std::vector<std::thread> workers;
workers.reserve(nthreads);
for (unsigned int t = 0; t < nthreads; t++) {
workers.emplace_back([t, nthreads]() {
for (uint64_t w = t; w <= 0xFFFFFFFF; w += nthreads) {
check_word(uint32_t(w));
}
});
}
for (std::thread &worker : workers) {
worker.join();
}
std::cout << std::endl;
}

View File

@ -1,6 +1,7 @@
#include "fast_float/fast_float.h"
#include <atomic>
#include <cassert>
#include <cmath>
#include <cstdio>
@ -9,6 +10,8 @@
#include <limits>
#include <string>
#include <system_error>
#include <thread>
#include <vector>
template <typename T> char *to_string(T d, char *buffer) {
auto written = std::snprintf(buffer, 64, "%.*e",
@ -45,25 +48,38 @@ bool basic_test_64bit(std::string vals, double val) {
return true;
}
// Sweeps the whole 2^32 float space (widened to double), split across hardware
// threads (the values are independent); stops at the first mismatch.
void all_32bit_values() {
char buffer[64];
for (uint64_t w = 0; w <= 0xFFFFFFFF; w++) {
float v32;
if ((w % 1048576) == 0) {
std::cout << ".";
std::cout.flush();
}
uint32_t word = uint32_t(w);
memcpy(&v32, &word, sizeof(v32));
double v = v32;
unsigned int nthreads = std::thread::hardware_concurrency();
if (nthreads == 0) {
nthreads = 1;
}
std::atomic<bool> ok{true};
std::vector<std::thread> workers;
workers.reserve(nthreads);
for (unsigned int t = 0; t < nthreads; t++) {
workers.emplace_back([t, nthreads, &ok]() {
char buffer[64];
for (uint64_t w = t;
w <= 0xFFFFFFFF && ok.load(std::memory_order_relaxed);
w += nthreads) {
float v32;
uint32_t word = uint32_t(w);
memcpy(&v32, &word, sizeof(v32));
double v = v32;
{
char const *string_end = to_string(v, buffer);
std::string s(buffer, size_t(string_end - buffer));
if (!basic_test_64bit(s, v)) {
return;
char const *string_end = to_string(v, buffer);
std::string s(buffer, size_t(string_end - buffer));
if (!basic_test_64bit(s, v)) {
ok.store(false, std::memory_order_relaxed);
return;
}
}
}
});
}
for (std::thread &worker : workers) {
worker.join();
}
std::cout << std::endl;
}

View File

@ -1,5 +1,6 @@
#include "fast_float/fast_float.h"
#include <atomic>
#include <cassert>
#include <cmath>
#include <cstdio>
@ -7,6 +8,8 @@
#include <iostream>
#include <limits>
#include <stdexcept>
#include <thread>
#include <vector>
#if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__MINGW64__)
// Anything at all that is related to cygwin, msys and so forth will
@ -74,86 +77,107 @@ void strtof_from_string(char const *st, float &d) {
}
}
bool allvalues() {
// Checks a single 32-bit word (interpreted as a float). Returns true if the
// parser agrees with the reference, false (after logging) on a mismatch.
bool check_word(uint32_t word) {
char buffer[64];
for (uint64_t w = 0; w <= 0xFFFFFFFF; w++) {
float v;
if ((w % 1048576) == 0) {
std::cout << ".";
std::cout.flush();
}
uint32_t word = uint32_t(w);
memcpy(&v, &word, sizeof(v));
if (std::isfinite(v)) {
float nextf = std::nextafterf(v, INFINITY);
if (copysign(1, v) != copysign(1, nextf)) {
continue;
}
if (!std::isfinite(nextf)) {
continue;
}
double v1{v};
assert(float(v1) == v);
double v2{nextf};
assert(float(v2) == nextf);
double midv{v1 + (v2 - v1) / 2};
float expected_midv = float(midv);
char const *string_end = to_string(midv, buffer);
float str_answer;
strtof_from_string(buffer, str_answer);
float result_value;
auto result = fast_float::from_chars(buffer, string_end, result_value);
// Starting with version 4.0 for fast_float, we return result_out_of_range
// if the value is either too small (too close to zero) or too large
// (effectively infinity). So std::errc::result_out_of_range is normal for
// well-formed input strings.
if (result.ec != std::errc() &&
result.ec != std::errc::result_out_of_range) {
std::cerr << "parsing error ? " << buffer << std::endl;
return false;
}
if (std::isnan(v)) {
if (!std::isnan(result_value)) {
std::cerr << "not nan" << buffer << std::endl;
std::cerr << "v " << std::hexfloat << v << std::endl;
std::cerr << "v2 " << std::hexfloat << v2 << std::endl;
std::cerr << "midv " << std::hexfloat << midv << std::endl;
std::cerr << "expected_midv " << std::hexfloat << expected_midv
<< std::endl;
return false;
}
} else if (copysign(1, result_value) != copysign(1, v)) {
std::cerr << buffer << std::endl;
std::cerr << "v " << std::hexfloat << v << std::endl;
std::cerr << "v2 " << std::hexfloat << v2 << std::endl;
std::cerr << "midv " << std::hexfloat << midv << std::endl;
std::cerr << "expected_midv " << std::hexfloat << expected_midv
<< std::endl;
std::cerr << "I got " << std::hexfloat << result_value
<< " but I was expecting " << v << std::endl;
return false;
} else if (result_value != str_answer) {
std::cerr << "no match ? " << buffer << std::endl;
std::cerr << "v " << std::hexfloat << v << std::endl;
std::cerr << "v2 " << std::hexfloat << v2 << std::endl;
std::cerr << "midv " << std::hexfloat << midv << std::endl;
std::cerr << "expected_midv " << std::hexfloat << expected_midv
<< std::endl;
std::cout << "started with " << std::hexfloat << midv << std::endl;
std::cout << "round down to " << std::hexfloat << str_answer
<< std::endl;
std::cout << "got back " << std::hexfloat << result_value << std::endl;
std::cout << std::dec;
return false;
}
}
float v;
memcpy(&v, &word, sizeof(v));
if (!std::isfinite(v)) {
return true;
}
float nextf = std::nextafterf(v, INFINITY);
if (copysign(1, v) != copysign(1, nextf)) {
return true;
}
if (!std::isfinite(nextf)) {
return true;
}
double v1{v};
assert(float(v1) == v);
double v2{nextf};
assert(float(v2) == nextf);
double midv{v1 + (v2 - v1) / 2};
float expected_midv = float(midv);
char const *string_end = to_string(midv, buffer);
float str_answer;
strtof_from_string(buffer, str_answer);
float result_value;
auto result = fast_float::from_chars(buffer, string_end, result_value);
// Starting with version 4.0 for fast_float, we return result_out_of_range
// if the value is either too small (too close to zero) or too large
// (effectively infinity). So std::errc::result_out_of_range is normal for
// well-formed input strings.
if (result.ec != std::errc() && result.ec != std::errc::result_out_of_range) {
std::cerr << "parsing error ? " << buffer << std::endl;
return false;
}
if (std::isnan(v)) {
if (!std::isnan(result_value)) {
std::cerr << "not nan" << buffer << std::endl;
std::cerr << "v " << std::hexfloat << v << std::endl;
std::cerr << "v2 " << std::hexfloat << v2 << std::endl;
std::cerr << "midv " << std::hexfloat << midv << std::endl;
std::cerr << "expected_midv " << std::hexfloat << expected_midv
<< std::endl;
return false;
}
} else if (copysign(1, result_value) != copysign(1, v)) {
std::cerr << buffer << std::endl;
std::cerr << "v " << std::hexfloat << v << std::endl;
std::cerr << "v2 " << std::hexfloat << v2 << std::endl;
std::cerr << "midv " << std::hexfloat << midv << std::endl;
std::cerr << "expected_midv " << std::hexfloat << expected_midv
<< std::endl;
std::cerr << "I got " << std::hexfloat << result_value
<< " but I was expecting " << v << std::endl;
return false;
} else if (result_value != str_answer) {
std::cerr << "no match ? " << buffer << std::endl;
std::cerr << "v " << std::hexfloat << v << std::endl;
std::cerr << "v2 " << std::hexfloat << v2 << std::endl;
std::cerr << "midv " << std::hexfloat << midv << std::endl;
std::cerr << "expected_midv " << std::hexfloat << expected_midv
<< std::endl;
std::cout << "started with " << std::hexfloat << midv << std::endl;
std::cout << "round down to " << std::hexfloat << str_answer << std::endl;
std::cout << "got back " << std::hexfloat << result_value << std::endl;
std::cout << std::dec;
return false;
}
std::cout << std::endl;
return true;
}
// Sweeps the whole 2^32 float space, split across hardware threads (the values
// are independent). Returns false as soon as any word mismatches.
bool allvalues() {
unsigned int nthreads = std::thread::hardware_concurrency();
if (nthreads == 0) {
nthreads = 1;
}
std::atomic<bool> ok{true};
std::vector<std::thread> workers;
workers.reserve(nthreads);
for (unsigned int t = 0; t < nthreads; t++) {
workers.emplace_back([t, nthreads, &ok]() {
for (uint64_t w = t;
w <= 0xFFFFFFFF && ok.load(std::memory_order_relaxed);
w += nthreads) {
if (!check_word(uint32_t(w))) {
ok.store(false, std::memory_order_relaxed);
return;
}
}
});
}
for (std::thread &worker : workers) {
worker.join();
}
return ok.load();
}
inline void Assert(bool Assertion) {
#if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__MINGW64__) || \
defined(sun) || defined(__sun)