Merge branch 'fastfloat:main' into main

This commit is contained in:
RealTimeChris 2024-11-25 18:32:50 -05:00 committed by GitHub
commit 3d83d19437
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 1718 additions and 2 deletions

View File

@ -11,7 +11,7 @@ jobs:
run: |
mkdir build &&
cd build &&
CXXFLAGS=-Werror cmake -DFASTFLOAT_TEST=ON .. &&
CXXFLAGS=-Werror cmake -DFASTFLOAT_TEST=ON -D FASTFLOAT_BENCHMARKS=ON .. &&
cmake --build . &&
ctest --output-on-failure
- name: Use cmake CXX23

View File

@ -4,6 +4,7 @@ project(fast_float VERSION 7.0.0 LANGUAGES CXX)
set(FASTFLOAT_CXX_STANDARD 11 CACHE STRING "the C++ standard to use for fastfloat")
set(CMAKE_CXX_STANDARD ${FASTFLOAT_CXX_STANDARD})
option(FASTFLOAT_TEST "Enable tests" OFF)
if(FASTFLOAT_TEST)
enable_testing()
add_subdirectory(tests)
@ -29,6 +30,16 @@ if(FASTFLOAT_INSTALL)
endif()
add_library(fast_float INTERFACE)
option(FASTFLOAT_BENCHMARKS "Enable benchmarks" OFF)
if(FASTFLOAT_BENCHMARKS)
add_subdirectory(benchmarks)
else(FASTFLOAT_BENCHMARKS)
message(STATUS "Benchmarks are disabled. Set FASTFLOAT_BENCHMARKS to ON to build benchmarks (assumes C++17).")
endif(FASTFLOAT_BENCHMARKS)
add_library(FastFloat::fast_float ALIAS fast_float)
target_include_directories(
fast_float

View File

@ -1,7 +1,6 @@
## fast_float number parsing library: 4x faster than strtod
[![Fuzzing Status](https://oss-fuzz-build-logs.storage.googleapis.com/badges/fast_float.svg)](https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:fast_float)
[![Ubuntu 22.04 CI (GCC 11)](https://github.com/fastfloat/fast_float/actions/workflows/ubuntu22.yml/badge.svg)](https://github.com/fastfloat/fast_float/actions/workflows/ubuntu22.yml)
The fast_float library provides fast header-only implementations for the C++

26
benchmarks/CMakeLists.txt Normal file
View File

@ -0,0 +1,26 @@
add_executable(realbenchmark benchmark.cpp)
set_property(
TARGET realbenchmark
PROPERTY CXX_STANDARD 17)
target_link_libraries(realbenchmark PUBLIC fast_float)
include(ExternalProject)
# Define the external project
ExternalProject_Add(simple_fastfloat_benchmark
GIT_REPOSITORY https://github.com/lemire/simple_fastfloat_benchmark.git
GIT_TAG master # or specify a particular commit/tag/branch
SOURCE_DIR ${CMAKE_BINARY_DIR}/simple_fastfloat_benchmark
BINARY_DIR ${CMAKE_BINARY_DIR}/simple_fastfloat_benchmark-build
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
)
set(DATA_DIR ${CMAKE_BINARY_DIR}/simple_fastfloat_benchmark/data)
add_custom_target(CopyData ALL
COMMAND ${CMAKE_COMMAND} -E copy_directory ${DATA_DIR} ${CMAKE_CURRENT_BINARY_DIR}/data
DEPENDS simple_fastfloat_benchmark
)
add_dependencies(realbenchmark CopyData)
target_compile_definitions(realbenchmark PUBLIC BENCHMARK_DATA_DIR="${CMAKE_CURRENT_BINARY_DIR}/data")

File diff suppressed because it is too large Load Diff

241
benchmarks/benchmark.cpp Normal file
View File

@ -0,0 +1,241 @@
#if defined(__linux__) || (__APPLE__ && __aarch64__)
#define USING_COUNTERS
#include "event_counter.h"
#endif
#include <algorithm>
#include "fast_float/fast_float.h"
#include <chrono>
#include <climits>
#include <cmath>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <ctype.h>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <random>
#include <sstream>
#include <stdio.h>
#include <string>
#include <vector>
#include <locale.h>
template <typename CharT>
double findmax_fastfloat64(std::vector<std::basic_string<CharT>> &s) {
double answer = 0;
double x = 0;
for (auto &st : s) {
auto [p, ec] = fast_float::from_chars(st.data(), st.data() + st.size(), x);
if (p == st.data()) {
throw std::runtime_error("bug in findmax_fastfloat");
}
answer = answer > x ? answer : x;
}
return answer;
}
template <typename CharT>
double findmax_fastfloat32(std::vector<std::basic_string<CharT>> &s) {
float answer = 0;
float x = 0;
for (auto &st : s) {
auto [p, ec] = fast_float::from_chars(st.data(), st.data() + st.size(), x);
if (p == st.data()) {
throw std::runtime_error("bug in findmax_fastfloat");
}
answer = answer > x ? answer : x;
}
return answer;
}
event_collector collector{};
#ifdef USING_COUNTERS
template <class T, class CharT>
std::vector<event_count>
time_it_ns(std::vector<std::basic_string<CharT>> &lines, T const &function,
size_t repeat) {
std::vector<event_count> aggregate;
bool printed_bug = false;
for (size_t i = 0; i < repeat; i++) {
collector.start();
double ts = function(lines);
if (ts == 0 && !printed_bug) {
printf("bug\n");
printed_bug = true;
}
aggregate.push_back(collector.end());
}
return aggregate;
}
void pretty_print(double volume, size_t number_of_floats, std::string name,
std::vector<event_count> events) {
double volumeMB = volume / (1024. * 1024.);
double average_ns{0};
double min_ns{DBL_MAX};
double cycles_min{DBL_MAX};
double instructions_min{DBL_MAX};
double cycles_avg{0};
double instructions_avg{0};
double branches_min{0};
double branches_avg{0};
double branch_misses_min{0};
double branch_misses_avg{0};
for (event_count e : events) {
double ns = e.elapsed_ns();
average_ns += ns;
min_ns = min_ns < ns ? min_ns : ns;
double cycles = e.cycles();
cycles_avg += cycles;
cycles_min = cycles_min < cycles ? cycles_min : cycles;
double instructions = e.instructions();
instructions_avg += instructions;
instructions_min =
instructions_min < instructions ? instructions_min : instructions;
double branches = e.branches();
branches_avg += branches;
branches_min = branches_min < branches ? branches_min : branches;
double branch_misses = e.missed_branches();
branch_misses_avg += branch_misses;
branch_misses_min =
branch_misses_min < branch_misses ? branch_misses_min : branch_misses;
}
cycles_avg /= events.size();
instructions_avg /= events.size();
average_ns /= events.size();
branches_avg /= events.size();
printf("%-40s: %8.2f MB/s (+/- %.1f %%) ", name.data(),
volumeMB * 1000000000 / min_ns,
(average_ns - min_ns) * 100.0 / average_ns);
printf("%8.2f Mfloat/s ", number_of_floats * 1000 / min_ns);
if (instructions_min > 0) {
printf(" %8.2f i/B %8.2f i/f (+/- %.1f %%) ", instructions_min / volume,
instructions_min / number_of_floats,
(instructions_avg - instructions_min) * 100.0 / instructions_avg);
printf(" %8.2f c/B %8.2f c/f (+/- %.1f %%) ", cycles_min / volume,
cycles_min / number_of_floats,
(cycles_avg - cycles_min) * 100.0 / cycles_avg);
printf(" %8.2f i/c ", instructions_min / cycles_min);
printf(" %8.2f b/f ", branches_avg / number_of_floats);
printf(" %8.2f bm/f ", branch_misses_avg / number_of_floats);
printf(" %8.2f GHz ", cycles_min / min_ns);
}
printf("\n");
}
#else
template <class T, class CharT>
std::pair<double, double>
time_it_ns(std::vector<std::basic_string<CharT>> &lines, T const &function,
size_t repeat) {
std::chrono::high_resolution_clock::time_point t1, t2;
double average = 0;
double min_value = DBL_MAX;
bool printed_bug = false;
for (size_t i = 0; i < repeat; i++) {
t1 = std::chrono::high_resolution_clock::now();
double ts = function(lines);
if (ts == 0 && !printed_bug) {
printf("bug\n");
printed_bug = true;
}
t2 = std::chrono::high_resolution_clock::now();
double dif =
std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count();
average += dif;
min_value = min_value < dif ? min_value : dif;
}
average /= repeat;
return std::make_pair(min_value, average);
}
void pretty_print(double volume, size_t number_of_floats, std::string name,
std::pair<double, double> result) {
double volumeMB = volume / (1024. * 1024.);
printf("%-40s: %8.2f MB/s (+/- %.1f %%) ", name.data(),
volumeMB * 1000000000 / result.first,
(result.second - result.first) * 100.0 / result.second);
printf("%8.2f Mfloat/s ", number_of_floats * 1000 / result.first);
printf(" %8.2f ns/f \n", double(result.first) / number_of_floats);
}
#endif
// this is okay, all chars are ASCII
inline std::u16string widen(std::string line) {
std::u16string u16line;
u16line.resize(line.size());
for (size_t i = 0; i < line.size(); ++i) {
u16line[i] = char16_t(line[i]);
}
return u16line;
}
std::vector<std::u16string> widen(const std::vector<std::string> &lines) {
std::vector<std::u16string> u16lines;
u16lines.reserve(lines.size());
for (auto const &line : lines) {
u16lines.push_back(widen(line));
}
return u16lines;
}
void process(std::vector<std::string> &lines, size_t volume) {
size_t repeat = 100;
double volumeMB = volume / (1024. * 1024.);
std::cout << "ASCII volume = " << volumeMB << " MB " << std::endl;
pretty_print(volume, lines.size(), "fastfloat (64)",
time_it_ns(lines, findmax_fastfloat64<char>, repeat));
pretty_print(volume, lines.size(), "fastfloat (32)",
time_it_ns(lines, findmax_fastfloat32<char>, repeat));
std::vector<std::u16string> lines16 = widen(lines);
volume = 2 * volume;
volumeMB = volume / (1024. * 1024.);
std::cout << "UTF-16 volume = " << volumeMB << " MB " << std::endl;
pretty_print(volume, lines.size(), "fastfloat (64)",
time_it_ns(lines16, findmax_fastfloat64<char16_t>, repeat));
pretty_print(volume, lines.size(), "fastfloat (32)",
time_it_ns(lines16, findmax_fastfloat32<char16_t>, repeat));
}
void fileload(std::string filename) {
std::ifstream inputfile(filename);
if (!inputfile) {
std::cerr << "can't open " << filename << std::endl;
return;
}
std::cout << "#### " << std::endl;
std::cout << "# reading " << filename << std::endl;
std::cout << "#### " << std::endl;
std::string line;
std::vector<std::string> lines;
lines.reserve(10000); // let us reserve plenty of memory.
size_t volume = 0;
while (getline(inputfile, line)) {
volume += line.size();
lines.push_back(line);
}
std::cout << "# read " << lines.size() << " lines " << std::endl;
process(lines, volume);
}
int main(int argc, char **argv) {
if (collector.has_events()) {
std::cout << "# Using hardware counters" << std::endl;
} else {
#if defined(__linux__) || (__APPLE__ && __aarch64__)
std::cout << "# Hardware counters not available, try to run in privileged "
"mode (e.g., sudo)."
<< std::endl;
#endif
}
fileload(std::string(BENCHMARK_DATA_DIR) + "/canada.txt");
fileload(std::string(BENCHMARK_DATA_DIR) + "/mesh.txt");
}

161
benchmarks/event_counter.h Normal file
View File

@ -0,0 +1,161 @@
#ifndef __EVENT_COUNTER_H
#define __EVENT_COUNTER_H
#include <cctype>
#ifndef _MSC_VER
#include <dirent.h>
#endif
#include <cinttypes>
#include <cstring>
#include <chrono>
#include <vector>
#include "linux-perf-events.h"
#ifdef __linux__
#include <libgen.h>
#endif
#if __APPLE__ && __aarch64__
#include "apple_arm_events.h"
#endif
struct event_count {
std::chrono::duration<double> elapsed;
std::vector<unsigned long long> event_counts;
event_count() : elapsed(0), event_counts{0, 0, 0, 0, 0} {}
event_count(const std::chrono::duration<double> _elapsed,
const std::vector<unsigned long long> _event_counts)
: elapsed(_elapsed), event_counts(_event_counts) {}
event_count(const event_count &other)
: elapsed(other.elapsed), event_counts(other.event_counts) {}
// The types of counters (so we can read the getter more easily)
enum event_counter_types {
CPU_CYCLES = 0,
INSTRUCTIONS = 1,
BRANCHES = 2,
MISSED_BRANCHES = 3
};
double elapsed_sec() const {
return std::chrono::duration<double>(elapsed).count();
}
double elapsed_ns() const {
return std::chrono::duration<double, std::nano>(elapsed).count();
}
double cycles() const {
return static_cast<double>(event_counts[CPU_CYCLES]);
}
double instructions() const {
return static_cast<double>(event_counts[INSTRUCTIONS]);
}
double branches() const {
return static_cast<double>(event_counts[BRANCHES]);
}
double missed_branches() const {
return static_cast<double>(event_counts[MISSED_BRANCHES]);
}
event_count &operator=(const event_count &other) {
this->elapsed = other.elapsed;
this->event_counts = other.event_counts;
return *this;
}
event_count operator+(const event_count &other) const {
return event_count(elapsed + other.elapsed,
{
event_counts[0] + other.event_counts[0],
event_counts[1] + other.event_counts[1],
event_counts[2] + other.event_counts[2],
event_counts[3] + other.event_counts[3],
event_counts[4] + other.event_counts[4],
});
}
void operator+=(const event_count &other) { *this = *this + other; }
};
struct event_aggregate {
bool has_events = false;
int iterations = 0;
event_count total{};
event_count best{};
event_count worst{};
event_aggregate() = default;
void operator<<(const event_count &other) {
if (iterations == 0 || other.elapsed < best.elapsed) {
best = other;
}
if (iterations == 0 || other.elapsed > worst.elapsed) {
worst = other;
}
iterations++;
total += other;
}
double elapsed_sec() const { return total.elapsed_sec() / iterations; }
double elapsed_ns() const { return total.elapsed_ns() / iterations; }
double cycles() const { return total.cycles() / iterations; }
double instructions() const { return total.instructions() / iterations; }
double branches() const { return total.branches() / iterations; }
double missed_branches() const {
return total.missed_branches() / iterations;
}
};
struct event_collector {
event_count count{};
std::chrono::time_point<std::chrono::steady_clock> start_clock{};
#if defined(__linux__)
LinuxEvents<PERF_TYPE_HARDWARE> linux_events;
event_collector()
: linux_events(std::vector<int>{
PERF_COUNT_HW_CPU_CYCLES, PERF_COUNT_HW_INSTRUCTIONS,
PERF_COUNT_HW_BRANCH_INSTRUCTIONS, // Retired branch instructions
PERF_COUNT_HW_BRANCH_MISSES}) {}
bool has_events() { return linux_events.is_working(); }
#elif __APPLE__ && __aarch64__
performance_counters diff;
event_collector() : diff(0) { setup_performance_counters(); }
bool has_events() { return setup_performance_counters(); }
#else
event_collector() {}
bool has_events() { return false; }
#endif
inline void start() {
#if defined(__linux)
linux_events.start();
#elif __APPLE__ && __aarch64__
if (has_events()) {
diff = get_counters();
}
#endif
start_clock = std::chrono::steady_clock::now();
}
inline event_count &end() {
const auto end_clock = std::chrono::steady_clock::now();
#if defined(__linux)
linux_events.end(count.event_counts);
#elif __APPLE__ && __aarch64__
if (has_events()) {
performance_counters end = get_counters();
diff = end - diff;
}
count.event_counts[0] = diff.cycles;
count.event_counts[1] = diff.instructions;
count.event_counts[2] = diff.branches;
count.event_counts[3] = diff.missed_branches;
count.event_counts[4] = 0;
#endif
count.elapsed = end_clock - start_clock;
return count;
}
};
#endif

View File

@ -0,0 +1,104 @@
#pragma once
#ifdef __linux__
#include <asm/unistd.h> // for __NR_perf_event_open
#include <linux/perf_event.h> // for perf event constants
#include <sys/ioctl.h> // for ioctl
#include <unistd.h> // for syscall
#include <cerrno> // for errno
#include <cstring> // for memset
#include <stdexcept>
#include <iostream>
#include <vector>
template <int TYPE = PERF_TYPE_HARDWARE> class LinuxEvents {
int fd;
bool working;
perf_event_attr attribs{};
size_t num_events{};
std::vector<uint64_t> temp_result_vec{};
std::vector<uint64_t> ids{};
public:
explicit LinuxEvents(std::vector<int> config_vec) : fd(0), working(true) {
memset(&attribs, 0, sizeof(attribs));
attribs.type = TYPE;
attribs.size = sizeof(attribs);
attribs.disabled = 1;
attribs.exclude_kernel = 1;
attribs.exclude_hv = 1;
attribs.sample_period = 0;
attribs.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
const int pid = 0; // the current process
const int cpu = -1; // all CPUs
const unsigned long flags = 0;
int group = -1; // no group
num_events = config_vec.size();
ids.resize(config_vec.size());
uint32_t i = 0;
for (auto config : config_vec) {
attribs.config = config;
int _fd = static_cast<int>(
syscall(__NR_perf_event_open, &attribs, pid, cpu, group, flags));
if (_fd == -1) {
report_error("perf_event_open");
}
ioctl(_fd, PERF_EVENT_IOC_ID, &ids[i++]);
if (group == -1) {
group = _fd;
fd = _fd;
}
}
temp_result_vec.resize(num_events * 2 + 1);
}
~LinuxEvents() {
if (fd != -1) {
close(fd);
}
}
inline void start() {
if (fd != -1) {
if (ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) {
report_error("ioctl(PERF_EVENT_IOC_RESET)");
}
if (ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == -1) {
report_error("ioctl(PERF_EVENT_IOC_ENABLE)");
}
}
}
inline void end(std::vector<unsigned long long> &results) {
if (fd != -1) {
if (ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) == -1) {
report_error("ioctl(PERF_EVENT_IOC_DISABLE)");
}
if (read(fd, temp_result_vec.data(), temp_result_vec.size() * 8) == -1) {
report_error("read");
}
}
// our actual results are in slots 1,3,5, ... of this structure
for (uint32_t i = 1; i < temp_result_vec.size(); i += 2) {
results[i / 2] = temp_result_vec[i];
}
for (uint32_t i = 2; i < temp_result_vec.size(); i += 2) {
if (ids[i / 2 - 1] != temp_result_vec[i]) {
report_error("event mismatch");
}
}
}
bool is_working() { return working; }
private:
void report_error(const std::string &) { working = false; }
};
#endif

View File

@ -97,6 +97,15 @@ cc_test(
],
)
cc_test(
name = "supported_chars_test",
srcs = ["supported_chars_test.cpp"],
deps = [
"//:fast_float",
"@doctest//doctest",
],
)
cc_test(
name = "string_test",
srcs = ["string_test.cpp"],

View File

@ -68,6 +68,7 @@ endfunction(fast_float_add_cpp_test)
fast_float_add_cpp_test(rcppfastfloat_test)
fast_float_add_cpp_test(wide_char_test)
fast_float_add_cpp_test(supported_chars_test)
fast_float_add_cpp_test(example_test)
fast_float_add_cpp_test(example_comma_test)
fast_float_add_cpp_test(basictest)

View File

@ -0,0 +1,53 @@
#include "fast_float/fast_float.h"
#include <iostream>
#include <string>
#include <system_error>
template <typename UC> bool test(std::string s, double expected) {
std::basic_string<UC> input(s.begin(), s.end());
double result;
auto answer =
fast_float::from_chars(input.data(), input.data() + input.size(), result);
if (answer.ec != std::errc()) {
std::cerr << "parsing of \"" << s << "\" should succeed\n";
return false;
}
if (result != expected && !(std::isnan(result) && std::isnan(expected))) {
std::cerr << "parsing of \"" << s << "\" succeeded, expected " << expected
<< " got " << result << "\n";
return false;
}
return true;
}
int main() {
if (!test<char>("4.2", 4.2)) {
std::cout << "test failure for char" << std::endl;
return EXIT_FAILURE;
}
if (!test<wchar_t>("4.2", 4.2)) {
std::cout << "test failure for wchar_t" << std::endl;
return EXIT_FAILURE;
}
#ifdef __cpp_char8_t
if (!test<char8_t>("4.2", 4.2)) {
std::cout << "test failure for char8_t" << std::endl;
return EXIT_FAILURE;
}
#endif
if (!test<char16_t>("4.2", 4.2)) {
std::cout << "test failure for char16_t" << std::endl;
return EXIT_FAILURE;
}
if (!test<char32_t>("4.2", 4.2)) {
std::cout << "test failure for char32_t" << std::endl;
return EXIT_FAILURE;
}
std::cout << "all ok" << std::endl;
return EXIT_SUCCESS;
}