fast_float/benchmarks/event_counter.h
2024-11-25 11:59:03 -05:00

153 lines
4.4 KiB
C++

#ifndef __EVENT_COUNTER_H
#define __EVENT_COUNTER_H
#include <cctype>
#ifndef _MSC_VER
#include <dirent.h>
#endif
#include <cinttypes>
#include <cstring>
#include <chrono>
#include <vector>
#include "linux-perf-events.h"
#ifdef __linux__
#include <libgen.h>
#endif
#if __APPLE__ && __aarch64__
#include "apple_arm_events.h"
#endif
struct event_count {
std::chrono::duration<double> elapsed;
std::vector<unsigned long long> event_counts;
event_count() : elapsed(0), event_counts{0,0,0,0,0} {}
event_count(const std::chrono::duration<double> _elapsed, const std::vector<unsigned long long> _event_counts) : elapsed(_elapsed), event_counts(_event_counts) {}
event_count(const event_count& other): elapsed(other.elapsed), event_counts(other.event_counts) { }
// The types of counters (so we can read the getter more easily)
enum event_counter_types {
CPU_CYCLES = 0,
INSTRUCTIONS = 1,
BRANCHES = 2,
MISSED_BRANCHES = 3
};
double elapsed_sec() const { return std::chrono::duration<double>(elapsed).count(); }
double elapsed_ns() const { return std::chrono::duration<double, std::nano>(elapsed).count(); }
double cycles() const { return static_cast<double>(event_counts[CPU_CYCLES]); }
double instructions() const { return static_cast<double>(event_counts[INSTRUCTIONS]); }
double branches() const { return static_cast<double>(event_counts[BRANCHES]); }
double missed_branches() const { return static_cast<double>(event_counts[MISSED_BRANCHES]); }
event_count& operator=(const event_count& other) {
this->elapsed = other.elapsed;
this->event_counts = other.event_counts;
return *this;
}
event_count operator+(const event_count& other) const {
return event_count(elapsed+other.elapsed, {
event_counts[0]+other.event_counts[0],
event_counts[1]+other.event_counts[1],
event_counts[2]+other.event_counts[2],
event_counts[3]+other.event_counts[3],
event_counts[4]+other.event_counts[4],
});
}
void operator+=(const event_count& other) {
*this = *this + other;
}
};
struct event_aggregate {
bool has_events = false;
int iterations = 0;
event_count total{};
event_count best{};
event_count worst{};
event_aggregate() = default;
void operator<<(const event_count& other) {
if (iterations == 0 || other.elapsed < best.elapsed) {
best = other;
}
if (iterations == 0 || other.elapsed > worst.elapsed) {
worst = other;
}
iterations++;
total += other;
}
double elapsed_sec() const { return total.elapsed_sec() / iterations; }
double elapsed_ns() const { return total.elapsed_ns() / iterations; }
double cycles() const { return total.cycles() / iterations; }
double instructions() const { return total.instructions() / iterations; }
double branches() const { return total.branches() / iterations; }
double missed_branches() const { return total.missed_branches() / iterations; }
};
struct event_collector {
event_count count{};
std::chrono::time_point<std::chrono::steady_clock> start_clock{};
#if defined(__linux__)
LinuxEvents<PERF_TYPE_HARDWARE> linux_events;
event_collector() : linux_events(std::vector<int>{
PERF_COUNT_HW_CPU_CYCLES,
PERF_COUNT_HW_INSTRUCTIONS,
PERF_COUNT_HW_BRANCH_INSTRUCTIONS, // Retired branch instructions
PERF_COUNT_HW_BRANCH_MISSES
}) {}
bool has_events() {
return linux_events.is_working();
}
#elif __APPLE__ && __aarch64__
performance_counters diff;
event_collector() : diff(0) {
setup_performance_counters();
}
bool has_events() {
return setup_performance_counters();
}
#else
event_collector() {}
bool has_events() {
return false;
}
#endif
inline void start() {
#if defined(__linux)
linux_events.start();
#elif __APPLE__ && __aarch64__
if(has_events()) { diff = get_counters(); }
#endif
start_clock = std::chrono::steady_clock::now();
}
inline event_count& end() {
const auto end_clock = std::chrono::steady_clock::now();
#if defined(__linux)
linux_events.end(count.event_counts);
#elif __APPLE__ && __aarch64__
if(has_events()) {
performance_counters end = get_counters();
diff = end - diff;
}
count.event_counts[0] = diff.cycles;
count.event_counts[1] = diff.instructions;
count.event_counts[2] = diff.branches;
count.event_counts[3] = diff.missed_branches;
count.event_counts[4] = 0;
#endif
count.elapsed = end_clock - start_clock;
return count;
}
};
#endif