mirror of
https://github.com/fastfloat/fast_float.git
synced 2026-06-15 08:26:08 +08:00
Merge pull request #383 from redis-performance/pr/parallel-exhaustive
Parallelize the exhaustive float32 sweeps across hardware threads (~75-88x)
This commit is contained in:
commit
06f3e27411
@ -4,6 +4,10 @@ cmake_minimum_required(VERSION 3.11 FATAL_ERROR)
|
||||
|
||||
include(FetchContent)
|
||||
|
||||
# Some tests (the exhaustive sweeps) parallelize across std::thread.
|
||||
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
option(SYSTEM_DOCTEST "Use system copy of doctest" OFF)
|
||||
option(FASTFLOAT_SUPPLEMENTAL_TESTS "Run supplemental tests" ON)
|
||||
|
||||
@ -49,6 +53,7 @@ function(fast_float_add_cpp_test TEST_NAME)
|
||||
target_compile_options(${TEST_NAME} PUBLIC -Wsign-compare -Wshadow -Wwrite-strings -Wpointer-arith -Winit-self -Wconversion -Wsign-conversion)
|
||||
endif()
|
||||
target_link_libraries(${TEST_NAME} PUBLIC fast_float supplemental-data)
|
||||
target_link_libraries(${TEST_NAME} PUBLIC Threads::Threads)
|
||||
if (NOT SYSTEM_DOCTEST)
|
||||
target_link_libraries(${TEST_NAME} PUBLIC doctest)
|
||||
else ()
|
||||
|
||||
@ -8,6 +8,8 @@
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <system_error>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
template <typename T> char *to_string(T d, char *buffer) {
|
||||
auto written = std::snprintf(buffer, 64, "%.*e",
|
||||
@ -15,18 +17,12 @@ template <typename T> char *to_string(T d, char *buffer) {
|
||||
return buffer + written;
|
||||
}
|
||||
|
||||
void allvalues() {
|
||||
// Checks a single 32-bit word (interpreted as a float); aborts on a mismatch.
|
||||
void check_word(uint32_t word) {
|
||||
char buffer[64];
|
||||
for (uint64_t w = 0; w <= 0xFFFFFFFF; w++) {
|
||||
float v;
|
||||
if ((w % 1048576) == 0) {
|
||||
std::cout << ".";
|
||||
std::cout.flush();
|
||||
}
|
||||
uint32_t word = uint32_t(w);
|
||||
memcpy(&v, &word, sizeof(v));
|
||||
|
||||
{
|
||||
char const *string_end = to_string(v, buffer);
|
||||
float result_value;
|
||||
auto result = fast_float::from_chars(buffer, string_end, result_value);
|
||||
@ -34,8 +30,7 @@ void allvalues() {
|
||||
// if the value is either too small (too close to zero) or too large
|
||||
// (effectively infinity). So std::errc::result_out_of_range is normal for
|
||||
// well-formed input strings.
|
||||
if (result.ec != std::errc() &&
|
||||
result.ec != std::errc::result_out_of_range) {
|
||||
if (result.ec != std::errc() && result.ec != std::errc::result_out_of_range) {
|
||||
std::cerr << "parsing error ? " << buffer << std::endl;
|
||||
abort();
|
||||
}
|
||||
@ -56,6 +51,25 @@ void allvalues() {
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
// Sweeps the whole 2^32 float space, split across hardware threads (the values
|
||||
// are independent); check_word() aborts on the first mismatch.
|
||||
void allvalues() {
|
||||
unsigned int nthreads = std::thread::hardware_concurrency();
|
||||
if (nthreads == 0) {
|
||||
nthreads = 1;
|
||||
}
|
||||
std::vector<std::thread> workers;
|
||||
workers.reserve(nthreads);
|
||||
for (unsigned int t = 0; t < nthreads; t++) {
|
||||
workers.emplace_back([t, nthreads]() {
|
||||
for (uint64_t w = t; w <= 0xFFFFFFFF; w += nthreads) {
|
||||
check_word(uint32_t(w));
|
||||
}
|
||||
});
|
||||
}
|
||||
for (std::thread &worker : workers) {
|
||||
worker.join();
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
|
||||
#include "fast_float/fast_float.h"
|
||||
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
@ -9,6 +10,8 @@
|
||||
#include <limits>
|
||||
#include <string>
|
||||
#include <system_error>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
template <typename T> char *to_string(T d, char *buffer) {
|
||||
auto written = std::snprintf(buffer, 64, "%.*e",
|
||||
@ -45,25 +48,38 @@ bool basic_test_64bit(std::string vals, double val) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Sweeps the whole 2^32 float space (widened to double), split across hardware
|
||||
// threads (the values are independent); stops at the first mismatch.
|
||||
void all_32bit_values() {
|
||||
char buffer[64];
|
||||
for (uint64_t w = 0; w <= 0xFFFFFFFF; w++) {
|
||||
float v32;
|
||||
if ((w % 1048576) == 0) {
|
||||
std::cout << ".";
|
||||
std::cout.flush();
|
||||
unsigned int nthreads = std::thread::hardware_concurrency();
|
||||
if (nthreads == 0) {
|
||||
nthreads = 1;
|
||||
}
|
||||
std::atomic<bool> ok{true};
|
||||
std::vector<std::thread> workers;
|
||||
workers.reserve(nthreads);
|
||||
for (unsigned int t = 0; t < nthreads; t++) {
|
||||
workers.emplace_back([t, nthreads, &ok]() {
|
||||
char buffer[64];
|
||||
for (uint64_t w = t;
|
||||
w <= 0xFFFFFFFF && ok.load(std::memory_order_relaxed);
|
||||
w += nthreads) {
|
||||
float v32;
|
||||
uint32_t word = uint32_t(w);
|
||||
memcpy(&v32, &word, sizeof(v32));
|
||||
double v = v32;
|
||||
|
||||
{
|
||||
char const *string_end = to_string(v, buffer);
|
||||
std::string s(buffer, size_t(string_end - buffer));
|
||||
if (!basic_test_64bit(s, v)) {
|
||||
ok.store(false, std::memory_order_relaxed);
|
||||
return;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
for (std::thread &worker : workers) {
|
||||
worker.join();
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
#include "fast_float/fast_float.h"
|
||||
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
@ -7,6 +8,8 @@
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <stdexcept>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
#if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__MINGW64__)
|
||||
// Anything at all that is related to cygwin, msys and so forth will
|
||||
@ -74,23 +77,21 @@ void strtof_from_string(char const *st, float &d) {
|
||||
}
|
||||
}
|
||||
|
||||
bool allvalues() {
|
||||
// Checks a single 32-bit word (interpreted as a float). Returns true if the
|
||||
// parser agrees with the reference, false (after logging) on a mismatch.
|
||||
bool check_word(uint32_t word) {
|
||||
char buffer[64];
|
||||
for (uint64_t w = 0; w <= 0xFFFFFFFF; w++) {
|
||||
float v;
|
||||
if ((w % 1048576) == 0) {
|
||||
std::cout << ".";
|
||||
std::cout.flush();
|
||||
}
|
||||
uint32_t word = uint32_t(w);
|
||||
memcpy(&v, &word, sizeof(v));
|
||||
if (std::isfinite(v)) {
|
||||
if (!std::isfinite(v)) {
|
||||
return true;
|
||||
}
|
||||
float nextf = std::nextafterf(v, INFINITY);
|
||||
if (copysign(1, v) != copysign(1, nextf)) {
|
||||
continue;
|
||||
return true;
|
||||
}
|
||||
if (!std::isfinite(nextf)) {
|
||||
continue;
|
||||
return true;
|
||||
}
|
||||
double v1{v};
|
||||
assert(float(v1) == v);
|
||||
@ -109,8 +110,7 @@ bool allvalues() {
|
||||
// if the value is either too small (too close to zero) or too large
|
||||
// (effectively infinity). So std::errc::result_out_of_range is normal for
|
||||
// well-formed input strings.
|
||||
if (result.ec != std::errc() &&
|
||||
result.ec != std::errc::result_out_of_range) {
|
||||
if (result.ec != std::errc() && result.ec != std::errc::result_out_of_range) {
|
||||
std::cerr << "parsing error ? " << buffer << std::endl;
|
||||
return false;
|
||||
}
|
||||
@ -142,18 +142,42 @@ bool allvalues() {
|
||||
std::cerr << "expected_midv " << std::hexfloat << expected_midv
|
||||
<< std::endl;
|
||||
std::cout << "started with " << std::hexfloat << midv << std::endl;
|
||||
std::cout << "round down to " << std::hexfloat << str_answer
|
||||
<< std::endl;
|
||||
std::cout << "round down to " << std::hexfloat << str_answer << std::endl;
|
||||
std::cout << "got back " << std::hexfloat << result_value << std::endl;
|
||||
std::cout << std::dec;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
std::cout << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Sweeps the whole 2^32 float space, split across hardware threads (the values
|
||||
// are independent). Returns false as soon as any word mismatches.
|
||||
bool allvalues() {
|
||||
unsigned int nthreads = std::thread::hardware_concurrency();
|
||||
if (nthreads == 0) {
|
||||
nthreads = 1;
|
||||
}
|
||||
std::atomic<bool> ok{true};
|
||||
std::vector<std::thread> workers;
|
||||
workers.reserve(nthreads);
|
||||
for (unsigned int t = 0; t < nthreads; t++) {
|
||||
workers.emplace_back([t, nthreads, &ok]() {
|
||||
for (uint64_t w = t;
|
||||
w <= 0xFFFFFFFF && ok.load(std::memory_order_relaxed);
|
||||
w += nthreads) {
|
||||
if (!check_word(uint32_t(w))) {
|
||||
ok.store(false, std::memory_order_relaxed);
|
||||
return;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
for (std::thread &worker : workers) {
|
||||
worker.join();
|
||||
}
|
||||
return ok.load();
|
||||
}
|
||||
|
||||
inline void Assert(bool Assertion) {
|
||||
#if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__MINGW64__) || \
|
||||
defined(sun) || defined(__sun)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user