mirror of
https://github.com/fastfloat/fast_float.git
synced 2025-12-24 04:24:52 +08:00
Merge pull request #350 from fastfloat/add_bench_ip
adding IP address benchmark
This commit is contained in:
commit
62fb615be9
@ -1,9 +1,27 @@
|
||||
include(FetchContent)
|
||||
|
||||
FetchContent_Declare(
|
||||
counters
|
||||
GIT_REPOSITORY https://github.com/lemire/counters.git
|
||||
GIT_TAG v2.0.0
|
||||
)
|
||||
|
||||
FetchContent_MakeAvailable(counters)
|
||||
|
||||
add_executable(realbenchmark benchmark.cpp)
|
||||
target_link_libraries(realbenchmark PRIVATE counters::counters)
|
||||
add_executable(bench_ip bench_ip.cpp)
|
||||
target_link_libraries(bench_ip PRIVATE counters::counters)
|
||||
|
||||
set_property(
|
||||
TARGET realbenchmark
|
||||
PROPERTY CXX_STANDARD 17)
|
||||
|
||||
set_property(
|
||||
TARGET bench_ip
|
||||
PROPERTY CXX_STANDARD 17)
|
||||
target_link_libraries(realbenchmark PUBLIC fast_float)
|
||||
target_link_libraries(bench_ip PUBLIC fast_float)
|
||||
|
||||
include(ExternalProject)
|
||||
|
||||
# Define the external project
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
164
benchmarks/bench_ip.cpp
Normal file
164
benchmarks/bench_ip.cpp
Normal file
@ -0,0 +1,164 @@
|
||||
#include "counters/bench.h"
|
||||
#include "fast_float/fast_float.h"
|
||||
#include <charconv>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <random>
|
||||
#include <atomic>
|
||||
#include <string>
|
||||
|
||||
void pretty_print(size_t volume, size_t bytes, std::string name,
|
||||
counters::event_aggregate agg) {
|
||||
printf("%-40s : ", name.c_str());
|
||||
printf(" %5.2f GB/s ", bytes / agg.fastest_elapsed_ns());
|
||||
printf(" %5.1f Ma/s ", volume * 1000.0 / agg.fastest_elapsed_ns());
|
||||
printf(" %5.2f ns/d ", agg.fastest_elapsed_ns() / volume);
|
||||
if (counters::event_collector().has_events()) {
|
||||
printf(" %5.2f GHz ", agg.fastest_cycles() / agg.fastest_elapsed_ns());
|
||||
printf(" %5.2f c/d ", agg.fastest_cycles() / volume);
|
||||
printf(" %5.2f i/d ", agg.fastest_instructions() / volume);
|
||||
printf(" %5.2f c/b ", agg.fastest_cycles() / bytes);
|
||||
printf(" %5.2f i/b ", agg.fastest_instructions() / bytes);
|
||||
printf(" %5.2f i/c ", agg.fastest_instructions() / agg.fastest_cycles());
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int parse_u8_fastfloat(const char *&p, const char *pend, uint8_t *out) {
|
||||
if (p == pend)
|
||||
return 0;
|
||||
auto r = fast_float::from_chars(p, pend, *out);
|
||||
if (r.ec == std::errc()) {
|
||||
p = r.ptr;
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int parse_u8_fromchars(const char *&p, const char *pend,
|
||||
uint8_t *out) {
|
||||
if (p == pend)
|
||||
return 0;
|
||||
auto r = std::from_chars(p, pend, *out);
|
||||
if (r.ec == std::errc()) {
|
||||
p = r.ptr;
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename Parser>
|
||||
static inline int parse_ip_line(const char *&p, const char *pend, uint32_t &sum,
|
||||
Parser parse_uint8) {
|
||||
uint8_t o = 0;
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (!parse_uint8(p, pend, &o))
|
||||
return 0;
|
||||
sum += o;
|
||||
if (i != 3) {
|
||||
if (p == pend || *p != '.')
|
||||
return 0;
|
||||
++p;
|
||||
}
|
||||
}
|
||||
// consume optional '\r'
|
||||
if (p != pend && *p == '\r')
|
||||
++p;
|
||||
// expect '\n' or end
|
||||
if (p != pend && *p == '\n')
|
||||
++p;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static std::string make_ip_line(uint8_t a, uint8_t b, uint8_t c, uint8_t d) {
|
||||
std::string s;
|
||||
s.reserve(16);
|
||||
s += std::to_string(a);
|
||||
s += '.';
|
||||
s += std::to_string(b);
|
||||
s += '.';
|
||||
s += std::to_string(c);
|
||||
s += '.';
|
||||
s += std::to_string(d);
|
||||
s += '\n';
|
||||
return s;
|
||||
}
|
||||
|
||||
int main() {
|
||||
constexpr size_t N = 500000;
|
||||
std::mt19937 rng(1234);
|
||||
std::uniform_int_distribution<int> dist(0, 255);
|
||||
|
||||
std::string buf;
|
||||
buf.reserve(N * 16);
|
||||
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
uint8_t a = (uint8_t)dist(rng);
|
||||
uint8_t b = (uint8_t)dist(rng);
|
||||
uint8_t c = (uint8_t)dist(rng);
|
||||
uint8_t d = (uint8_t)dist(rng);
|
||||
buf += make_ip_line(a, b, c, d);
|
||||
}
|
||||
|
||||
// sentinel to allow 4-byte loads at end
|
||||
buf.append(4, '\0');
|
||||
|
||||
const size_t bytes = buf.size() - 4; // exclude sentinel from throughput
|
||||
const size_t volume = N;
|
||||
|
||||
// validate correctness
|
||||
{
|
||||
const char *start = buf.data();
|
||||
const char *end = buf.data() + bytes;
|
||||
const char *p = start;
|
||||
const char *pend = end;
|
||||
uint32_t sum = 0;
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
int ok = parse_ip_line(p, pend, sum, parse_u8_fromchars);
|
||||
if (!ok) {
|
||||
std::fprintf(stderr, "fromchars parse failed at line %zu\n", i);
|
||||
std::abort();
|
||||
}
|
||||
p = start;
|
||||
pend = end;
|
||||
ok = parse_ip_line(p, pend, sum, parse_u8_fastfloat);
|
||||
if (!ok) {
|
||||
std::fprintf(stderr, "fastswar parse failed at line %zu\n", i);
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t sink = 0;
|
||||
|
||||
pretty_print(volume, bytes, "parse_ip_std_fromchars", counters::bench([&]() {
|
||||
const char *p = buf.data();
|
||||
const char *pend = buf.data() + bytes;
|
||||
uint32_t sum = 0;
|
||||
int ok = 0;
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
ok = parse_ip_line(p, pend, sum, parse_u8_fromchars);
|
||||
if (!ok)
|
||||
std::abort();
|
||||
}
|
||||
sink += sum;
|
||||
}));
|
||||
|
||||
pretty_print(volume, bytes, "parse_ip_fastfloat", counters::bench([&]() {
|
||||
const char *p = buf.data();
|
||||
const char *pend = buf.data() + bytes;
|
||||
uint32_t sum = 0;
|
||||
int ok = 0;
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
ok = parse_ip_line(p, pend, sum, parse_u8_fastfloat);
|
||||
if (!ok)
|
||||
std::abort();
|
||||
}
|
||||
sink += sum;
|
||||
}));
|
||||
|
||||
std::printf("sink=%u\n", sink);
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
@ -1,7 +1,7 @@
|
||||
#if defined(__linux__) || (__APPLE__ && __aarch64__)
|
||||
#define USING_COUNTERS
|
||||
#endif
|
||||
#include "event_counter.h"
|
||||
#include "counters/event_counter.h"
|
||||
#include <algorithm>
|
||||
#include "fast_float/fast_float.h"
|
||||
#include <chrono>
|
||||
@ -50,14 +50,14 @@ double findmax_fastfloat32(std::vector<std::basic_string<CharT>> &s) {
|
||||
return answer;
|
||||
}
|
||||
|
||||
event_collector collector{};
|
||||
counters::event_collector collector{};
|
||||
|
||||
#ifdef USING_COUNTERS
|
||||
template <class T, class CharT>
|
||||
std::vector<event_count>
|
||||
std::vector<counters::event_count>
|
||||
time_it_ns(std::vector<std::basic_string<CharT>> &lines, T const &function,
|
||||
size_t repeat) {
|
||||
std::vector<event_count> aggregate;
|
||||
std::vector<counters::event_count> aggregate;
|
||||
bool printed_bug = false;
|
||||
for (size_t i = 0; i < repeat; i++) {
|
||||
collector.start();
|
||||
@ -72,7 +72,7 @@ time_it_ns(std::vector<std::basic_string<CharT>> &lines, T const &function,
|
||||
}
|
||||
|
||||
void pretty_print(double volume, size_t number_of_floats, std::string name,
|
||||
std::vector<event_count> events) {
|
||||
std::vector<counters::event_count> events) {
|
||||
double volumeMB = volume / (1024. * 1024.);
|
||||
double average_ns{0};
|
||||
double min_ns{DBL_MAX};
|
||||
@ -84,7 +84,7 @@ void pretty_print(double volume, size_t number_of_floats, std::string name,
|
||||
double branches_avg{0};
|
||||
double branch_misses_min{0};
|
||||
double branch_misses_avg{0};
|
||||
for (event_count e : events) {
|
||||
for (counters::event_count e : events) {
|
||||
double ns = e.elapsed_ns();
|
||||
average_ns += ns;
|
||||
min_ns = min_ns < ns ? min_ns : ns;
|
||||
@ -102,7 +102,7 @@ void pretty_print(double volume, size_t number_of_floats, std::string name,
|
||||
branches_avg += branches;
|
||||
branches_min = branches_min < branches ? branches_min : branches;
|
||||
|
||||
double branch_misses = e.missed_branches();
|
||||
double branch_misses = e.branch_misses();
|
||||
branch_misses_avg += branch_misses;
|
||||
branch_misses_min =
|
||||
branch_misses_min < branch_misses ? branch_misses_min : branch_misses;
|
||||
|
||||
@ -1,181 +0,0 @@
|
||||
#ifndef __EVENT_COUNTER_H
|
||||
#define __EVENT_COUNTER_H
|
||||
|
||||
#include <cctype>
|
||||
#ifndef _MSC_VER
|
||||
#include <dirent.h>
|
||||
#endif
|
||||
#include <cinttypes>
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include <chrono>
|
||||
#include <vector>
|
||||
|
||||
#include "linux-perf-events.h"
|
||||
#ifdef __linux__
|
||||
#include <libgen.h>
|
||||
#endif
|
||||
|
||||
#if (defined(__APPLE__) && __APPLE__) && (defined(__aarch64__) && __aarch64__)
|
||||
#include "apple_arm_events.h"
|
||||
#endif
|
||||
|
||||
struct event_count {
|
||||
std::chrono::duration<double> elapsed;
|
||||
std::vector<unsigned long long> event_counts;
|
||||
|
||||
event_count() : elapsed(0), event_counts{0, 0, 0, 0, 0} {}
|
||||
|
||||
event_count(const std::chrono::duration<double> _elapsed,
|
||||
const std::vector<unsigned long long> _event_counts)
|
||||
: elapsed(_elapsed), event_counts(_event_counts) {}
|
||||
|
||||
event_count(const event_count &other)
|
||||
: elapsed(other.elapsed), event_counts(other.event_counts) {}
|
||||
|
||||
// The types of counters (so we can read the getter more easily)
|
||||
enum event_counter_types {
|
||||
CPU_CYCLES = 0,
|
||||
INSTRUCTIONS = 1,
|
||||
BRANCHES = 2,
|
||||
MISSED_BRANCHES = 3
|
||||
};
|
||||
|
||||
double elapsed_sec() const {
|
||||
return std::chrono::duration<double>(elapsed).count();
|
||||
}
|
||||
|
||||
double elapsed_ns() const {
|
||||
return std::chrono::duration<double, std::nano>(elapsed).count();
|
||||
}
|
||||
|
||||
double cycles() const {
|
||||
return static_cast<double>(event_counts[CPU_CYCLES]);
|
||||
}
|
||||
|
||||
double instructions() const {
|
||||
return static_cast<double>(event_counts[INSTRUCTIONS]);
|
||||
}
|
||||
|
||||
double branches() const {
|
||||
return static_cast<double>(event_counts[BRANCHES]);
|
||||
}
|
||||
|
||||
double missed_branches() const {
|
||||
return static_cast<double>(event_counts[MISSED_BRANCHES]);
|
||||
}
|
||||
|
||||
event_count &operator=(const event_count &other) {
|
||||
this->elapsed = other.elapsed;
|
||||
this->event_counts = other.event_counts;
|
||||
return *this;
|
||||
}
|
||||
|
||||
event_count operator+(const event_count &other) const {
|
||||
return event_count(elapsed + other.elapsed,
|
||||
{
|
||||
event_counts[0] + other.event_counts[0],
|
||||
event_counts[1] + other.event_counts[1],
|
||||
event_counts[2] + other.event_counts[2],
|
||||
event_counts[3] + other.event_counts[3],
|
||||
event_counts[4] + other.event_counts[4],
|
||||
});
|
||||
}
|
||||
|
||||
void operator+=(const event_count &other) { *this = *this + other; }
|
||||
};
|
||||
|
||||
struct event_aggregate {
|
||||
bool has_events = false;
|
||||
int iterations = 0;
|
||||
event_count total{};
|
||||
event_count best{};
|
||||
event_count worst{};
|
||||
|
||||
event_aggregate() = default;
|
||||
|
||||
void operator<<(const event_count &other) {
|
||||
if (iterations == 0 || other.elapsed < best.elapsed) {
|
||||
best = other;
|
||||
}
|
||||
if (iterations == 0 || other.elapsed > worst.elapsed) {
|
||||
worst = other;
|
||||
}
|
||||
iterations++;
|
||||
total += other;
|
||||
}
|
||||
|
||||
double elapsed_sec() const { return total.elapsed_sec() / iterations; }
|
||||
|
||||
double elapsed_ns() const { return total.elapsed_ns() / iterations; }
|
||||
|
||||
double cycles() const { return total.cycles() / iterations; }
|
||||
|
||||
double instructions() const { return total.instructions() / iterations; }
|
||||
|
||||
double branches() const { return total.branches() / iterations; }
|
||||
|
||||
double missed_branches() const {
|
||||
return total.missed_branches() / iterations;
|
||||
}
|
||||
};
|
||||
|
||||
struct event_collector {
|
||||
event_count count{};
|
||||
std::chrono::time_point<std::chrono::steady_clock> start_clock{};
|
||||
|
||||
#if defined(__linux__)
|
||||
LinuxEvents<PERF_TYPE_HARDWARE> linux_events;
|
||||
|
||||
event_collector()
|
||||
: linux_events(std::vector<int>{
|
||||
PERF_COUNT_HW_CPU_CYCLES, PERF_COUNT_HW_INSTRUCTIONS,
|
||||
PERF_COUNT_HW_BRANCH_INSTRUCTIONS, // Retired branch instructions
|
||||
PERF_COUNT_HW_BRANCH_MISSES}) {}
|
||||
|
||||
bool has_events() { return linux_events.is_working(); }
|
||||
#elif __APPLE__ && __aarch64__
|
||||
performance_counters diff;
|
||||
|
||||
event_collector() : diff(0) { setup_performance_counters(); }
|
||||
|
||||
bool has_events() { return setup_performance_counters(); }
|
||||
#else
|
||||
event_collector() {}
|
||||
|
||||
bool has_events() { return false; }
|
||||
#endif
|
||||
|
||||
inline void start() {
|
||||
#if defined(__linux)
|
||||
linux_events.start();
|
||||
#elif __APPLE__ && __aarch64__
|
||||
if (has_events()) {
|
||||
diff = get_counters();
|
||||
}
|
||||
#endif
|
||||
start_clock = std::chrono::steady_clock::now();
|
||||
}
|
||||
|
||||
inline event_count &end() {
|
||||
const auto end_clock = std::chrono::steady_clock::now();
|
||||
#if defined(__linux)
|
||||
linux_events.end(count.event_counts);
|
||||
#elif __APPLE__ && __aarch64__
|
||||
if (has_events()) {
|
||||
performance_counters end = get_counters();
|
||||
diff = end - diff;
|
||||
}
|
||||
count.event_counts[0] = diff.cycles;
|
||||
count.event_counts[1] = diff.instructions;
|
||||
count.event_counts[2] = diff.branches;
|
||||
count.event_counts[3] = diff.missed_branches;
|
||||
count.event_counts[4] = 0;
|
||||
#endif
|
||||
count.elapsed = end_clock - start_clock;
|
||||
return count;
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
@ -1,104 +0,0 @@
|
||||
#pragma once
|
||||
#ifdef __linux__
|
||||
|
||||
#include <asm/unistd.h> // for __NR_perf_event_open
|
||||
#include <linux/perf_event.h> // for perf event constants
|
||||
#include <sys/ioctl.h> // for ioctl
|
||||
#include <unistd.h> // for syscall
|
||||
|
||||
#include <cerrno> // for errno
|
||||
#include <cstring> // for memset
|
||||
#include <stdexcept>
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
template <int TYPE = PERF_TYPE_HARDWARE> class LinuxEvents {
|
||||
int fd;
|
||||
bool working;
|
||||
perf_event_attr attribs{};
|
||||
size_t num_events{};
|
||||
std::vector<uint64_t> temp_result_vec{};
|
||||
std::vector<uint64_t> ids{};
|
||||
|
||||
public:
|
||||
explicit LinuxEvents(std::vector<int> config_vec) : fd(0), working(true) {
|
||||
memset(&attribs, 0, sizeof(attribs));
|
||||
attribs.type = TYPE;
|
||||
attribs.size = sizeof(attribs);
|
||||
attribs.disabled = 1;
|
||||
attribs.exclude_kernel = 1;
|
||||
attribs.exclude_hv = 1;
|
||||
|
||||
attribs.sample_period = 0;
|
||||
attribs.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
|
||||
const int pid = 0; // the current process
|
||||
const int cpu = -1; // all CPUs
|
||||
const unsigned long flags = 0;
|
||||
|
||||
int group = -1; // no group
|
||||
num_events = config_vec.size();
|
||||
ids.resize(config_vec.size());
|
||||
uint32_t i = 0;
|
||||
for (auto config : config_vec) {
|
||||
attribs.config = config;
|
||||
int _fd = static_cast<int>(
|
||||
syscall(__NR_perf_event_open, &attribs, pid, cpu, group, flags));
|
||||
if (_fd == -1) {
|
||||
report_error("perf_event_open");
|
||||
}
|
||||
ioctl(_fd, PERF_EVENT_IOC_ID, &ids[i++]);
|
||||
if (group == -1) {
|
||||
group = _fd;
|
||||
fd = _fd;
|
||||
}
|
||||
}
|
||||
|
||||
temp_result_vec.resize(num_events * 2 + 1);
|
||||
}
|
||||
|
||||
~LinuxEvents() {
|
||||
if (fd != -1) {
|
||||
close(fd);
|
||||
}
|
||||
}
|
||||
|
||||
inline void start() {
|
||||
if (fd != -1) {
|
||||
if (ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) {
|
||||
report_error("ioctl(PERF_EVENT_IOC_RESET)");
|
||||
}
|
||||
|
||||
if (ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == -1) {
|
||||
report_error("ioctl(PERF_EVENT_IOC_ENABLE)");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void end(std::vector<unsigned long long> &results) {
|
||||
if (fd != -1) {
|
||||
if (ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) == -1) {
|
||||
report_error("ioctl(PERF_EVENT_IOC_DISABLE)");
|
||||
}
|
||||
|
||||
if (read(fd, temp_result_vec.data(), temp_result_vec.size() * 8) == -1) {
|
||||
report_error("read");
|
||||
}
|
||||
}
|
||||
// our actual results are in slots 1,3,5, ... of this structure
|
||||
for (uint32_t i = 1; i < temp_result_vec.size(); i += 2) {
|
||||
results[i / 2] = temp_result_vec[i];
|
||||
}
|
||||
for (uint32_t i = 2; i < temp_result_vec.size(); i += 2) {
|
||||
if (ids[i / 2 - 1] != temp_result_vec[i]) {
|
||||
report_error("event mismatch");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool is_working() { return working; }
|
||||
|
||||
private:
|
||||
void report_error(const std::string &) { working = false; }
|
||||
};
|
||||
#endif
|
||||
Loading…
x
Reference in New Issue
Block a user