#ifndef __EVENT_COUNTER_H #define __EVENT_COUNTER_H #include #ifndef _MSC_VER #include #endif #include #include #include #include #include "linux-perf-events.h" #ifdef __linux__ #include #endif #if __APPLE__ && __aarch64__ #include "apple_arm_events.h" #endif struct event_count { std::chrono::duration elapsed; std::vector event_counts; event_count() : elapsed(0), event_counts{0, 0, 0, 0, 0} {} event_count(const std::chrono::duration _elapsed, const std::vector _event_counts) : elapsed(_elapsed), event_counts(_event_counts) {} event_count(const event_count &other) : elapsed(other.elapsed), event_counts(other.event_counts) {} // The types of counters (so we can read the getter more easily) enum event_counter_types { CPU_CYCLES = 0, INSTRUCTIONS = 1, BRANCHES = 2, MISSED_BRANCHES = 3 }; double elapsed_sec() const { return std::chrono::duration(elapsed).count(); } double elapsed_ns() const { return std::chrono::duration(elapsed).count(); } double cycles() const { return static_cast(event_counts[CPU_CYCLES]); } double instructions() const { return static_cast(event_counts[INSTRUCTIONS]); } double branches() const { return static_cast(event_counts[BRANCHES]); } double missed_branches() const { return static_cast(event_counts[MISSED_BRANCHES]); } event_count &operator=(const event_count &other) { this->elapsed = other.elapsed; this->event_counts = other.event_counts; return *this; } event_count operator+(const event_count &other) const { return event_count(elapsed + other.elapsed, { event_counts[0] + other.event_counts[0], event_counts[1] + other.event_counts[1], event_counts[2] + other.event_counts[2], event_counts[3] + other.event_counts[3], event_counts[4] + other.event_counts[4], }); } void operator+=(const event_count &other) { *this = *this + other; } }; struct event_aggregate { bool has_events = false; int iterations = 0; event_count total{}; event_count best{}; event_count worst{}; event_aggregate() = default; void operator<<(const event_count &other) { if (iterations == 0 || other.elapsed < best.elapsed) { best = other; } if (iterations == 0 || other.elapsed > worst.elapsed) { worst = other; } iterations++; total += other; } double elapsed_sec() const { return total.elapsed_sec() / iterations; } double elapsed_ns() const { return total.elapsed_ns() / iterations; } double cycles() const { return total.cycles() / iterations; } double instructions() const { return total.instructions() / iterations; } double branches() const { return total.branches() / iterations; } double missed_branches() const { return total.missed_branches() / iterations; } }; struct event_collector { event_count count{}; std::chrono::time_point start_clock{}; #if defined(__linux__) LinuxEvents linux_events; event_collector() : linux_events(std::vector{ PERF_COUNT_HW_CPU_CYCLES, PERF_COUNT_HW_INSTRUCTIONS, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, // Retired branch instructions PERF_COUNT_HW_BRANCH_MISSES}) {} bool has_events() { return linux_events.is_working(); } #elif __APPLE__ && __aarch64__ performance_counters diff; event_collector() : diff(0) { setup_performance_counters(); } bool has_events() { return setup_performance_counters(); } #else event_collector() {} bool has_events() { return false; } #endif inline void start() { #if defined(__linux) linux_events.start(); #elif __APPLE__ && __aarch64__ if (has_events()) { diff = get_counters(); } #endif start_clock = std::chrono::steady_clock::now(); } inline event_count &end() { const auto end_clock = std::chrono::steady_clock::now(); #if defined(__linux) linux_events.end(count.event_counts); #elif __APPLE__ && __aarch64__ if (has_events()) { performance_counters end = get_counters(); diff = end - diff; } count.event_counts[0] = diff.cycles; count.event_counts[1] = diff.instructions; count.event_counts[2] = diff.branches; count.event_counts[3] = diff.missed_branches; count.event_counts[4] = 0; #endif count.elapsed = end_clock - start_clock; return count; } }; #endif