mirror of
https://github.com/fastfloat/fast_float.git
synced 2025-12-06 16:56:57 +08:00
1112 lines
39 KiB
C
1112 lines
39 KiB
C
// Original design from:
|
|
// =============================================================================
|
|
// XNU kperf/kpc
|
|
// Available for 64-bit Intel/Apple Silicon, macOS/iOS, with root privileges
|
|
//
|
|
// References:
|
|
//
|
|
// XNU source (since xnu 2422.1.72):
|
|
// https://github.com/apple/darwin-xnu/blob/main/osfmk/kern/kpc.h
|
|
// https://github.com/apple/darwin-xnu/blob/main/bsd/kern/kern_kpc.c
|
|
//
|
|
// Lightweight PET (Profile Every Thread, since xnu 3789.1.32):
|
|
// https://github.com/apple/darwin-xnu/blob/main/osfmk/kperf/pet.c
|
|
// https://github.com/apple/darwin-xnu/blob/main/osfmk/kperf/kperf_kpc.c
|
|
//
|
|
// System Private frameworks (since macOS 10.11, iOS 8.0):
|
|
// /System/Library/PrivateFrameworks/kperf.framework
|
|
// /System/Library/PrivateFrameworks/kperfdata.framework
|
|
//
|
|
// Xcode framework (since Xcode 7.0):
|
|
// /Applications/Xcode.app/Contents/SharedFrameworks/DVTInstrumentsFoundation.framework
|
|
//
|
|
// CPU database (plist files)
|
|
// macOS (since macOS 10.11):
|
|
// /usr/share/kpep/<name>.plist
|
|
// iOS (copied from Xcode, since iOS 10.0, Xcode 8.0):
|
|
// /Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform
|
|
// /DeviceSupport/<version>/DeveloperDiskImage.dmg/usr/share/kpep/<name>.plist
|
|
//
|
|
//
|
|
// Created by YaoYuan <ibireme@gmail.com> on 2021.
|
|
// Released into the public domain (unlicense.org).
|
|
// =============================================================================
|
|
|
|
#ifndef M1CYCLES_H
|
|
#define M1CYCLES_H
|
|
|
|
#include <stdbool.h>
|
|
#include <stdint.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include <dlfcn.h> // for dlopen() and dlsym()
|
|
#include <mach/mach_time.h> // for mach_absolute_time()
|
|
#include <sys/kdebug.h> // for kdebug trace decode
|
|
#include <sys/sysctl.h> // for sysctl()
|
|
#include <unistd.h> // for usleep()
|
|
|
|
struct performance_counters {
|
|
double cycles;
|
|
double branches;
|
|
double missed_branches;
|
|
double instructions;
|
|
performance_counters(uint64_t c, uint64_t b, uint64_t m, uint64_t i)
|
|
: cycles(c), branches(b), missed_branches(m), instructions(i) {}
|
|
performance_counters(double c, double b, double m, double i)
|
|
: cycles(c), branches(b), missed_branches(m), instructions(i) {}
|
|
performance_counters(double init)
|
|
: cycles(init), branches(init), missed_branches(init),
|
|
instructions(init) {}
|
|
|
|
inline performance_counters &operator-=(const performance_counters &other) {
|
|
cycles -= other.cycles;
|
|
branches -= other.branches;
|
|
missed_branches -= other.missed_branches;
|
|
instructions -= other.instructions;
|
|
return *this;
|
|
}
|
|
inline performance_counters &min(const performance_counters &other) {
|
|
cycles = other.cycles < cycles ? other.cycles : cycles;
|
|
branches = other.branches < branches ? other.branches : branches;
|
|
missed_branches = other.missed_branches < missed_branches
|
|
? other.missed_branches
|
|
: missed_branches;
|
|
instructions =
|
|
other.instructions < instructions ? other.instructions : instructions;
|
|
return *this;
|
|
}
|
|
inline performance_counters &operator+=(const performance_counters &other) {
|
|
cycles += other.cycles;
|
|
branches += other.branches;
|
|
missed_branches += other.missed_branches;
|
|
instructions += other.instructions;
|
|
return *this;
|
|
}
|
|
|
|
inline performance_counters &operator/=(double numerator) {
|
|
cycles /= numerator;
|
|
branches /= numerator;
|
|
missed_branches /= numerator;
|
|
instructions /= numerator;
|
|
return *this;
|
|
}
|
|
};
|
|
|
|
inline performance_counters operator-(const performance_counters &a,
|
|
const performance_counters &b) {
|
|
return performance_counters(a.cycles - b.cycles, a.branches - b.branches,
|
|
a.missed_branches - b.missed_branches,
|
|
a.instructions - b.instructions);
|
|
}
|
|
|
|
typedef float f32;
|
|
typedef double f64;
|
|
typedef int8_t i8;
|
|
typedef uint8_t u8;
|
|
typedef int16_t i16;
|
|
typedef uint16_t u16;
|
|
typedef int32_t i32;
|
|
typedef uint32_t u32;
|
|
typedef int64_t i64;
|
|
typedef uint64_t u64;
|
|
typedef size_t usize;
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// <kperf.framework> header (reverse engineered)
|
|
// This framework wraps some sysctl calls to communicate with the kpc in kernel.
|
|
// Most functions requires root privileges, or process is "blessed".
|
|
// -----------------------------------------------------------------------------
|
|
|
|
// Cross-platform class constants.
|
|
#define KPC_CLASS_FIXED (0)
|
|
#define KPC_CLASS_CONFIGURABLE (1)
|
|
#define KPC_CLASS_POWER (2)
|
|
#define KPC_CLASS_RAWPMU (3)
|
|
|
|
// Cross-platform class mask constants.
|
|
#define KPC_CLASS_FIXED_MASK (1u << KPC_CLASS_FIXED) // 1
|
|
#define KPC_CLASS_CONFIGURABLE_MASK (1u << KPC_CLASS_CONFIGURABLE) // 2
|
|
#define KPC_CLASS_POWER_MASK (1u << KPC_CLASS_POWER) // 4
|
|
#define KPC_CLASS_RAWPMU_MASK (1u << KPC_CLASS_RAWPMU) // 8
|
|
|
|
// PMU version constants.
|
|
#define KPC_PMU_ERROR (0) // Error
|
|
#define KPC_PMU_INTEL_V3 (1) // Intel
|
|
#define KPC_PMU_ARM_APPLE (2) // ARM64
|
|
#define KPC_PMU_INTEL_V2 (3) // Old Intel
|
|
#define KPC_PMU_ARM_V2 (4) // Old ARM
|
|
|
|
// The maximum number of counters we could read from every class in one go.
|
|
// ARMV7: FIXED: 1, CONFIGURABLE: 4
|
|
// ARM32: FIXED: 2, CONFIGURABLE: 6
|
|
// ARM64: FIXED: 2, CONFIGURABLE: CORE_NCTRS - FIXED (6 or 8)
|
|
// x86: 32
|
|
#define KPC_MAX_COUNTERS 32
|
|
|
|
// Bits for defining what to do on an action.
|
|
// Defined in https://github.com/apple/darwin-xnu/blob/main/osfmk/kperf/action.h
|
|
#define KPERF_SAMPLER_TH_INFO (1U << 0)
|
|
#define KPERF_SAMPLER_TH_SNAPSHOT (1U << 1)
|
|
#define KPERF_SAMPLER_KSTACK (1U << 2)
|
|
#define KPERF_SAMPLER_USTACK (1U << 3)
|
|
#define KPERF_SAMPLER_PMC_THREAD (1U << 4)
|
|
#define KPERF_SAMPLER_PMC_CPU (1U << 5)
|
|
#define KPERF_SAMPLER_PMC_CONFIG (1U << 6)
|
|
#define KPERF_SAMPLER_MEMINFO (1U << 7)
|
|
#define KPERF_SAMPLER_TH_SCHEDULING (1U << 8)
|
|
#define KPERF_SAMPLER_TH_DISPATCH (1U << 9)
|
|
#define KPERF_SAMPLER_TK_SNAPSHOT (1U << 10)
|
|
#define KPERF_SAMPLER_SYS_MEM (1U << 11)
|
|
#define KPERF_SAMPLER_TH_INSCYC (1U << 12)
|
|
#define KPERF_SAMPLER_TK_INFO (1U << 13)
|
|
|
|
// Maximum number of kperf action ids.
|
|
#define KPERF_ACTION_MAX (32)
|
|
|
|
// Maximum number of kperf timer ids.
|
|
#define KPERF_TIMER_MAX (8)
|
|
|
|
// x86/arm config registers are 64-bit
|
|
typedef u64 kpc_config_t;
|
|
|
|
/// Print current CPU identification string to the buffer (same as snprintf),
|
|
/// such as "cpu_7_8_10b282dc_46". This string can be used to locate the PMC
|
|
/// database in /usr/share/kpep.
|
|
/// @return string's length, or negative value if error occurs.
|
|
/// @note This method does not requires root privileges.
|
|
/// @details sysctl get(hw.cputype), get(hw.cpusubtype),
|
|
/// get(hw.cpufamily), get(machdep.cpu.model)
|
|
static int (*kpc_cpu_string)(char *buf, usize buf_size);
|
|
|
|
/// Get the version of KPC that's being run.
|
|
/// @return See `PMU version constants` above.
|
|
/// @details sysctl get(kpc.pmu_version)
|
|
static u32 (*kpc_pmu_version)(void);
|
|
|
|
/// Get running PMC classes.
|
|
/// @return See `class mask constants` above,
|
|
/// 0 if error occurs or no class is set.
|
|
/// @details sysctl get(kpc.counting)
|
|
static u32 (*kpc_get_counting)(void);
|
|
|
|
/// Set PMC classes to enable counting.
|
|
/// @param classes See `class mask constants` above, set 0 to shutdown counting.
|
|
/// @return 0 for success.
|
|
/// @details sysctl set(kpc.counting)
|
|
static int (*kpc_set_counting)(u32 classes);
|
|
|
|
/// Get running PMC classes for current thread.
|
|
/// @return See `class mask constants` above,
|
|
/// 0 if error occurs or no class is set.
|
|
/// @details sysctl get(kpc.thread_counting)
|
|
static u32 (*kpc_get_thread_counting)(void);
|
|
|
|
/// Set PMC classes to enable counting for current thread.
|
|
/// @param classes See `class mask constants` above, set 0 to shutdown counting.
|
|
/// @return 0 for success.
|
|
/// @details sysctl set(kpc.thread_counting)
|
|
static int (*kpc_set_thread_counting)(u32 classes);
|
|
|
|
/// Get how many config registers there are for a given mask.
|
|
/// For example: Intel may returns 1 for `KPC_CLASS_FIXED_MASK`,
|
|
/// returns 4 for `KPC_CLASS_CONFIGURABLE_MASK`.
|
|
/// @param classes See `class mask constants` above.
|
|
/// @return 0 if error occurs or no class is set.
|
|
/// @note This method does not requires root privileges.
|
|
/// @details sysctl get(kpc.config_count)
|
|
static u32 (*kpc_get_config_count)(u32 classes);
|
|
|
|
/// Get config registers.
|
|
/// @param classes see `class mask constants` above.
|
|
/// @param config Config buffer to receive values, should not smaller than
|
|
/// kpc_get_config_count(classes) * sizeof(kpc_config_t).
|
|
/// @return 0 for success.
|
|
/// @details sysctl get(kpc.config_count), get(kpc.config)
|
|
static int (*kpc_get_config)(u32 classes, kpc_config_t *config);
|
|
|
|
/// Set config registers.
|
|
/// @param classes see `class mask constants` above.
|
|
/// @param config Config buffer, should not smaller than
|
|
/// kpc_get_config_count(classes) * sizeof(kpc_config_t).
|
|
/// @return 0 for success.
|
|
/// @details sysctl get(kpc.config_count), set(kpc.config)
|
|
static int (*kpc_set_config)(u32 classes, kpc_config_t *config);
|
|
|
|
/// Get how many counters there are for a given mask.
|
|
/// For example: Intel may returns 3 for `KPC_CLASS_FIXED_MASK`,
|
|
/// returns 4 for `KPC_CLASS_CONFIGURABLE_MASK`.
|
|
/// @param classes See `class mask constants` above.
|
|
/// @note This method does not requires root privileges.
|
|
/// @details sysctl get(kpc.counter_count)
|
|
static u32 (*kpc_get_counter_count)(u32 classes);
|
|
|
|
/// Get counter accumulations.
|
|
/// If `all_cpus` is true, the buffer count should not smaller than
|
|
/// (cpu_count * counter_count). Otherwize, the buffer count should not smaller
|
|
/// than (counter_count).
|
|
/// @see kpc_get_counter_count(), kpc_cpu_count().
|
|
/// @param all_cpus true for all CPUs, false for current cpu.
|
|
/// @param classes See `class mask constants` above.
|
|
/// @param curcpu A pointer to receive current cpu id, can be NULL.
|
|
/// @param buf Buffer to receive counter's value.
|
|
/// @return 0 for success.
|
|
/// @details sysctl get(hw.ncpu), get(kpc.counter_count), get(kpc.counters)
|
|
static int (*kpc_get_cpu_counters)(bool all_cpus, u32 classes, int *curcpu,
|
|
u64 *buf);
|
|
|
|
/// Get counter accumulations for current thread.
|
|
/// @param tid Thread id, should be 0.
|
|
/// @param buf_count The number of buf's elements (not bytes),
|
|
/// should not smaller than kpc_get_counter_count().
|
|
/// @param buf Buffer to receive counter's value.
|
|
/// @return 0 for success.
|
|
/// @details sysctl get(kpc.thread_counters)
|
|
static int (*kpc_get_thread_counters)(u32 tid, u32 buf_count, u64 *buf);
|
|
|
|
/// Acquire/release the counters used by the Power Manager.
|
|
/// @param val 1:acquire, 0:release
|
|
/// @return 0 for success.
|
|
/// @details sysctl set(kpc.force_all_ctrs)
|
|
static int (*kpc_force_all_ctrs_set)(int val);
|
|
|
|
/// Get the state of all_ctrs.
|
|
/// @return 0 for success.
|
|
/// @details sysctl get(kpc.force_all_ctrs)
|
|
static int (*kpc_force_all_ctrs_get)(int *val_out);
|
|
|
|
/// Set number of actions, should be `KPERF_ACTION_MAX`.
|
|
/// @details sysctl set(kperf.action.count)
|
|
static int (*kperf_action_count_set)(u32 count);
|
|
|
|
/// Get number of actions.
|
|
/// @details sysctl get(kperf.action.count)
|
|
static int (*kperf_action_count_get)(u32 *count);
|
|
|
|
/// Set what to sample when a trigger fires an action, e.g.
|
|
/// `KPERF_SAMPLER_PMC_CPU`.
|
|
/// @details sysctl set(kperf.action.samplers)
|
|
static int (*kperf_action_samplers_set)(u32 actionid, u32 sample);
|
|
|
|
/// Get what to sample when a trigger fires an action.
|
|
/// @details sysctl get(kperf.action.samplers)
|
|
static int (*kperf_action_samplers_get)(u32 actionid, u32 *sample);
|
|
|
|
/// Apply a task filter to the action, -1 to disable filter.
|
|
/// @details sysctl set(kperf.action.filter_by_task)
|
|
static int (*kperf_action_filter_set_by_task)(u32 actionid, i32 port);
|
|
|
|
/// Apply a pid filter to the action, -1 to disable filter.
|
|
/// @details sysctl set(kperf.action.filter_by_pid)
|
|
static int (*kperf_action_filter_set_by_pid)(u32 actionid, i32 pid);
|
|
|
|
/// Set number of time triggers, should be `KPERF_TIMER_MAX`.
|
|
/// @details sysctl set(kperf.timer.count)
|
|
static int (*kperf_timer_count_set)(u32 count);
|
|
|
|
/// Get number of time triggers.
|
|
/// @details sysctl get(kperf.timer.count)
|
|
static int (*kperf_timer_count_get)(u32 *count);
|
|
|
|
/// Set timer number and period.
|
|
/// @details sysctl set(kperf.timer.period)
|
|
static int (*kperf_timer_period_set)(u32 actionid, u64 tick);
|
|
|
|
/// Get timer number and period.
|
|
/// @details sysctl get(kperf.timer.period)
|
|
static int (*kperf_timer_period_get)(u32 actionid, u64 *tick);
|
|
|
|
/// Set timer number and actionid.
|
|
/// @details sysctl set(kperf.timer.action)
|
|
static int (*kperf_timer_action_set)(u32 actionid, u32 timerid);
|
|
|
|
/// Get timer number and actionid.
|
|
/// @details sysctl get(kperf.timer.action)
|
|
static int (*kperf_timer_action_get)(u32 actionid, u32 *timerid);
|
|
|
|
/// Set which timer ID does PET (Profile Every Thread).
|
|
/// @details sysctl set(kperf.timer.pet_timer)
|
|
static int (*kperf_timer_pet_set)(u32 timerid);
|
|
|
|
/// Get which timer ID does PET (Profile Every Thread).
|
|
/// @details sysctl get(kperf.timer.pet_timer)
|
|
static int (*kperf_timer_pet_get)(u32 *timerid);
|
|
|
|
/// Enable or disable sampling.
|
|
/// @details sysctl set(kperf.sampling)
|
|
static int (*kperf_sample_set)(u32 enabled);
|
|
|
|
/// Get is currently sampling.
|
|
/// @details sysctl get(kperf.sampling)
|
|
static int (*kperf_sample_get)(u32 *enabled);
|
|
|
|
/// Reset kperf: stop sampling, kdebug, timers and actions.
|
|
/// @return 0 for success.
|
|
static int (*kperf_reset)(void);
|
|
|
|
/// Nanoseconds to CPU ticks.
|
|
static u64 (*kperf_ns_to_ticks)(u64 ns);
|
|
|
|
/// CPU ticks to nanoseconds.
|
|
static u64 (*kperf_ticks_to_ns)(u64 ticks);
|
|
|
|
/// CPU ticks frequency (mach_absolute_time).
|
|
static u64 (*kperf_tick_frequency)(void);
|
|
|
|
/// Get lightweight PET mode (not in kperf.framework).
|
|
static int kperf_lightweight_pet_get(u32 *enabled) {
|
|
if (!enabled)
|
|
return -1;
|
|
usize size = 4;
|
|
return sysctlbyname("kperf.lightweight_pet", enabled, &size, NULL, 0);
|
|
}
|
|
|
|
/// Set lightweight PET mode (not in kperf.framework).
|
|
static int kperf_lightweight_pet_set(u32 enabled) {
|
|
return sysctlbyname("kperf.lightweight_pet", NULL, NULL, &enabled, 4);
|
|
}
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// <kperfdata.framework> header (reverse engineered)
|
|
// This framework provides some functions to access the local CPU database.
|
|
// These functions do not require root privileges.
|
|
// -----------------------------------------------------------------------------
|
|
|
|
// KPEP CPU archtecture constants.
|
|
#define KPEP_ARCH_I386 0
|
|
#define KPEP_ARCH_X86_64 1
|
|
#define KPEP_ARCH_ARM 2
|
|
#define KPEP_ARCH_ARM64 3
|
|
|
|
/// KPEP event (size: 48/28 bytes on 64/32 bit OS)
|
|
typedef struct kpep_event {
|
|
const char *name; ///< Unique name of a event, such as "INST_RETIRED.ANY".
|
|
const char *description; ///< Description for this event.
|
|
const char *errata; ///< Errata, currently NULL.
|
|
const char *alias; ///< Alias name, such as "Instructions", "Cycles".
|
|
const char *fallback; ///< Fallback event name for fixed counter.
|
|
u32 mask;
|
|
u8 number;
|
|
u8 umask;
|
|
u8 reserved;
|
|
u8 is_fixed;
|
|
} kpep_event;
|
|
|
|
/// KPEP database (size: 144/80 bytes on 64/32 bit OS)
|
|
typedef struct kpep_db {
|
|
const char *name; ///< Database name, such as "haswell".
|
|
const char *cpu_id; ///< Plist name, such as "cpu_7_8_10b282dc".
|
|
const char *marketing_name; ///< Marketing name, such as "Intel Haswell".
|
|
void *plist_data; ///< Plist data (CFDataRef), currently NULL.
|
|
void *event_map; ///< All events (CFDict<CFSTR(event_name), kpep_event *>).
|
|
kpep_event
|
|
*event_arr; ///< Event struct buffer (sizeof(kpep_event) * events_count).
|
|
kpep_event **fixed_event_arr; ///< Fixed counter events (sizeof(kpep_event *)
|
|
///< * fixed_counter_count)
|
|
void *alias_map; ///< All aliases (CFDict<CFSTR(event_name), kpep_event *>).
|
|
usize reserved_1;
|
|
usize reserved_2;
|
|
usize reserved_3;
|
|
usize event_count; ///< All events count.
|
|
usize alias_count;
|
|
usize fixed_counter_count;
|
|
usize config_counter_count;
|
|
usize power_counter_count;
|
|
u32 archtecture; ///< see `KPEP CPU archtecture constants` above.
|
|
u32 fixed_counter_bits;
|
|
u32 config_counter_bits;
|
|
u32 power_counter_bits;
|
|
} kpep_db;
|
|
|
|
/// KPEP config (size: 80/44 bytes on 64/32 bit OS)
|
|
typedef struct kpep_config {
|
|
kpep_db *db;
|
|
kpep_event **ev_arr; ///< (sizeof(kpep_event *) * counter_count), init NULL
|
|
usize *ev_map; ///< (sizeof(usize *) * counter_count), init 0
|
|
usize *ev_idx; ///< (sizeof(usize *) * counter_count), init -1
|
|
u32 *flags; ///< (sizeof(u32 *) * counter_count), init 0
|
|
u64 *kpc_periods; ///< (sizeof(u64 *) * counter_count), init 0
|
|
usize event_count; /// kpep_config_events_count()
|
|
usize counter_count;
|
|
u32 classes; ///< See `class mask constants` above.
|
|
u32 config_counter;
|
|
u32 power_counter;
|
|
u32 reserved;
|
|
} kpep_config;
|
|
|
|
/// Error code for kpep_config_xxx() and kpep_db_xxx() functions.
|
|
typedef enum {
|
|
KPEP_CONFIG_ERROR_NONE = 0,
|
|
KPEP_CONFIG_ERROR_INVALID_ARGUMENT = 1,
|
|
KPEP_CONFIG_ERROR_OUT_OF_MEMORY = 2,
|
|
KPEP_CONFIG_ERROR_IO = 3,
|
|
KPEP_CONFIG_ERROR_BUFFER_TOO_SMALL = 4,
|
|
KPEP_CONFIG_ERROR_CUR_SYSTEM_UNKNOWN = 5,
|
|
KPEP_CONFIG_ERROR_DB_PATH_INVALID = 6,
|
|
KPEP_CONFIG_ERROR_DB_NOT_FOUND = 7,
|
|
KPEP_CONFIG_ERROR_DB_ARCH_UNSUPPORTED = 8,
|
|
KPEP_CONFIG_ERROR_DB_VERSION_UNSUPPORTED = 9,
|
|
KPEP_CONFIG_ERROR_DB_CORRUPT = 10,
|
|
KPEP_CONFIG_ERROR_EVENT_NOT_FOUND = 11,
|
|
KPEP_CONFIG_ERROR_CONFLICTING_EVENTS = 12,
|
|
KPEP_CONFIG_ERROR_COUNTERS_NOT_FORCED = 13,
|
|
KPEP_CONFIG_ERROR_EVENT_UNAVAILABLE = 14,
|
|
KPEP_CONFIG_ERROR_ERRNO = 15,
|
|
KPEP_CONFIG_ERROR_MAX
|
|
} kpep_config_error_code;
|
|
|
|
/// Error description for kpep_config_error_code.
|
|
static const char *kpep_config_error_names[KPEP_CONFIG_ERROR_MAX] = {
|
|
"none",
|
|
"invalid argument",
|
|
"out of memory",
|
|
"I/O",
|
|
"buffer too small",
|
|
"current system unknown",
|
|
"database path invalid",
|
|
"database not found",
|
|
"database architecture unsupported",
|
|
"database version unsupported",
|
|
"database corrupt",
|
|
"event not found",
|
|
"conflicting events",
|
|
"all counters must be forced",
|
|
"event unavailable",
|
|
"check errno"};
|
|
|
|
/// Error description.
|
|
static const char *kpep_config_error_desc(int code) {
|
|
if (0 <= code && code < KPEP_CONFIG_ERROR_MAX) {
|
|
return kpep_config_error_names[code];
|
|
}
|
|
return "unknown error";
|
|
}
|
|
|
|
/// Create a config.
|
|
/// @param db A kpep db, see kpep_db_create()
|
|
/// @param cfg_ptr A pointer to receive the new config.
|
|
/// @return kpep_config_error_code, 0 for success.
|
|
static int (*kpep_config_create)(kpep_db *db, kpep_config **cfg_ptr);
|
|
|
|
/// Free the config.
|
|
static void (*kpep_config_free)(kpep_config *cfg);
|
|
|
|
/// Add an event to config.
|
|
/// @param cfg The config.
|
|
/// @param ev_ptr A event pointer.
|
|
/// @param flag 0: all, 1: user space only
|
|
/// @param err Error bitmap pointer, can be NULL.
|
|
/// If return value is `CONFLICTING_EVENTS`, this bitmap contains
|
|
/// the conflicted event indices, e.g. "1 << 2" means index 2.
|
|
/// @return kpep_config_error_code, 0 for success.
|
|
static int (*kpep_config_add_event)(kpep_config *cfg, kpep_event **ev_ptr,
|
|
u32 flag, u32 *err);
|
|
|
|
/// Remove event at index.
|
|
/// @return kpep_config_error_code, 0 for success.
|
|
static int (*kpep_config_remove_event)(kpep_config *cfg, usize idx);
|
|
|
|
/// Force all counters.
|
|
/// @return kpep_config_error_code, 0 for success.
|
|
static int (*kpep_config_force_counters)(kpep_config *cfg);
|
|
|
|
/// Get events count.
|
|
/// @return kpep_config_error_code, 0 for success.
|
|
static int (*kpep_config_events_count)(kpep_config *cfg, usize *count_ptr);
|
|
|
|
/// Get all event pointers.
|
|
/// @param buf A buffer to receive event pointers.
|
|
/// @param buf_size The buffer's size in bytes, should not smaller than
|
|
/// kpep_config_events_count() * sizeof(void *).
|
|
/// @return kpep_config_error_code, 0 for success.
|
|
static int (*kpep_config_events)(kpep_config *cfg, kpep_event **buf,
|
|
usize buf_size);
|
|
|
|
/// Get kpc register configs.
|
|
/// @param buf A buffer to receive kpc register configs.
|
|
/// @param buf_size The buffer's size in bytes, should not smaller than
|
|
/// kpep_config_kpc_count() * sizeof(kpc_config_t).
|
|
/// @return kpep_config_error_code, 0 for success.
|
|
static int (*kpep_config_kpc)(kpep_config *cfg, kpc_config_t *buf,
|
|
usize buf_size);
|
|
|
|
/// Get kpc register config count.
|
|
/// @return kpep_config_error_code, 0 for success.
|
|
static int (*kpep_config_kpc_count)(kpep_config *cfg, usize *count_ptr);
|
|
|
|
/// Get kpc classes.
|
|
/// @param classes See `class mask constants` above.
|
|
/// @return kpep_config_error_code, 0 for success.
|
|
static int (*kpep_config_kpc_classes)(kpep_config *cfg, u32 *classes_ptr);
|
|
|
|
/// Get the index mapping from event to counter.
|
|
/// @param buf A buffer to receive indexes.
|
|
/// @param buf_size The buffer's size in bytes, should not smaller than
|
|
/// kpep_config_events_count() * sizeof(kpc_config_t).
|
|
/// @return kpep_config_error_code, 0 for success.
|
|
static int (*kpep_config_kpc_map)(kpep_config *cfg, usize *buf, usize buf_size);
|
|
|
|
/// Open a kpep database file in "/usr/share/kpep/" or "/usr/local/share/kpep/".
|
|
/// @param name File name, for example "haswell", "cpu_100000c_1_92fb37c8".
|
|
/// Pass NULL for current CPU.
|
|
/// @return kpep_config_error_code, 0 for success.
|
|
static int (*kpep_db_create)(const char *name, kpep_db **db_ptr);
|
|
|
|
/// Free the kpep database.
|
|
static void (*kpep_db_free)(kpep_db *db);
|
|
|
|
/// Get the database's name.
|
|
/// @return kpep_config_error_code, 0 for success.
|
|
static int (*kpep_db_name)(kpep_db *db, const char **name);
|
|
|
|
/// Get the event alias count.
|
|
/// @return kpep_config_error_code, 0 for success.
|
|
static int (*kpep_db_aliases_count)(kpep_db *db, usize *count);
|
|
|
|
/// Get all alias.
|
|
/// @param buf A buffer to receive all alias strings.
|
|
/// @param buf_size The buffer's size in bytes,
|
|
/// should not smaller than kpep_db_aliases_count() * sizeof(void *).
|
|
/// @return kpep_config_error_code, 0 for success.
|
|
static int (*kpep_db_aliases)(kpep_db *db, const char **buf, usize buf_size);
|
|
|
|
/// Get counters count for given classes.
|
|
/// @param classes 1: Fixed, 2: Configurable.
|
|
/// @return kpep_config_error_code, 0 for success.
|
|
static int (*kpep_db_counters_count)(kpep_db *db, u8 classes, usize *count);
|
|
|
|
/// Get all event count.
|
|
/// @return kpep_config_error_code, 0 for success.
|
|
static int (*kpep_db_events_count)(kpep_db *db, usize *count);
|
|
|
|
/// Get all events.
|
|
/// @param buf A buffer to receive all event pointers.
|
|
/// @param buf_size The buffer's size in bytes,
|
|
/// should not smaller than kpep_db_events_count() * sizeof(void *).
|
|
/// @return kpep_config_error_code, 0 for success.
|
|
static int (*kpep_db_events)(kpep_db *db, kpep_event **buf, usize buf_size);
|
|
|
|
/// Get one event by name.
|
|
/// @return kpep_config_error_code, 0 for success.
|
|
static int (*kpep_db_event)(kpep_db *db, const char *name, kpep_event **ev_ptr);
|
|
|
|
/// Get event's name.
|
|
/// @return kpep_config_error_code, 0 for success.
|
|
static int (*kpep_event_name)(kpep_event *ev, const char **name_ptr);
|
|
|
|
/// Get event's alias.
|
|
/// @return kpep_config_error_code, 0 for success.
|
|
static int (*kpep_event_alias)(kpep_event *ev, const char **alias_ptr);
|
|
|
|
/// Get event's description.
|
|
/// @return kpep_config_error_code, 0 for success.
|
|
static int (*kpep_event_description)(kpep_event *ev, const char **str_ptr);
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// load kperf/kperfdata dynamic library
|
|
// -----------------------------------------------------------------------------
|
|
|
|
typedef struct {
|
|
const char *name;
|
|
void **impl;
|
|
} lib_symbol;
|
|
|
|
#define lib_nelems(x) (sizeof(x) / sizeof((x)[0]))
|
|
#define lib_symbol_def(name) \
|
|
{ #name, (void **)&name }
|
|
|
|
static const lib_symbol lib_symbols_kperf[] = {
|
|
lib_symbol_def(kpc_pmu_version),
|
|
lib_symbol_def(kpc_cpu_string),
|
|
lib_symbol_def(kpc_set_counting),
|
|
lib_symbol_def(kpc_get_counting),
|
|
lib_symbol_def(kpc_set_thread_counting),
|
|
lib_symbol_def(kpc_get_thread_counting),
|
|
lib_symbol_def(kpc_get_config_count),
|
|
lib_symbol_def(kpc_get_counter_count),
|
|
lib_symbol_def(kpc_set_config),
|
|
lib_symbol_def(kpc_get_config),
|
|
lib_symbol_def(kpc_get_cpu_counters),
|
|
lib_symbol_def(kpc_get_thread_counters),
|
|
lib_symbol_def(kpc_force_all_ctrs_set),
|
|
lib_symbol_def(kpc_force_all_ctrs_get),
|
|
lib_symbol_def(kperf_action_count_set),
|
|
lib_symbol_def(kperf_action_count_get),
|
|
lib_symbol_def(kperf_action_samplers_set),
|
|
lib_symbol_def(kperf_action_samplers_get),
|
|
lib_symbol_def(kperf_action_filter_set_by_task),
|
|
lib_symbol_def(kperf_action_filter_set_by_pid),
|
|
lib_symbol_def(kperf_timer_count_set),
|
|
lib_symbol_def(kperf_timer_count_get),
|
|
lib_symbol_def(kperf_timer_period_set),
|
|
lib_symbol_def(kperf_timer_period_get),
|
|
lib_symbol_def(kperf_timer_action_set),
|
|
lib_symbol_def(kperf_timer_action_get),
|
|
lib_symbol_def(kperf_sample_set),
|
|
lib_symbol_def(kperf_sample_get),
|
|
lib_symbol_def(kperf_reset),
|
|
lib_symbol_def(kperf_timer_pet_set),
|
|
lib_symbol_def(kperf_timer_pet_get),
|
|
lib_symbol_def(kperf_ns_to_ticks),
|
|
lib_symbol_def(kperf_ticks_to_ns),
|
|
lib_symbol_def(kperf_tick_frequency),
|
|
};
|
|
|
|
static const lib_symbol lib_symbols_kperfdata[] = {
|
|
lib_symbol_def(kpep_config_create),
|
|
lib_symbol_def(kpep_config_free),
|
|
lib_symbol_def(kpep_config_add_event),
|
|
lib_symbol_def(kpep_config_remove_event),
|
|
lib_symbol_def(kpep_config_force_counters),
|
|
lib_symbol_def(kpep_config_events_count),
|
|
lib_symbol_def(kpep_config_events),
|
|
lib_symbol_def(kpep_config_kpc),
|
|
lib_symbol_def(kpep_config_kpc_count),
|
|
lib_symbol_def(kpep_config_kpc_classes),
|
|
lib_symbol_def(kpep_config_kpc_map),
|
|
lib_symbol_def(kpep_db_create),
|
|
lib_symbol_def(kpep_db_free),
|
|
lib_symbol_def(kpep_db_name),
|
|
lib_symbol_def(kpep_db_aliases_count),
|
|
lib_symbol_def(kpep_db_aliases),
|
|
lib_symbol_def(kpep_db_counters_count),
|
|
lib_symbol_def(kpep_db_events_count),
|
|
lib_symbol_def(kpep_db_events),
|
|
lib_symbol_def(kpep_db_event),
|
|
lib_symbol_def(kpep_event_name),
|
|
lib_symbol_def(kpep_event_alias),
|
|
lib_symbol_def(kpep_event_description),
|
|
};
|
|
|
|
#define lib_path_kperf "/System/Library/PrivateFrameworks/kperf.framework/kperf"
|
|
#define lib_path_kperfdata \
|
|
"/System/Library/PrivateFrameworks/kperfdata.framework/kperfdata"
|
|
|
|
static bool lib_inited = false;
|
|
static bool lib_has_err = false;
|
|
static char lib_err_msg[256];
|
|
|
|
static void *lib_handle_kperf = NULL;
|
|
static void *lib_handle_kperfdata = NULL;
|
|
|
|
static void lib_deinit(void) {
|
|
lib_inited = false;
|
|
lib_has_err = false;
|
|
if (lib_handle_kperf)
|
|
dlclose(lib_handle_kperf);
|
|
if (lib_handle_kperfdata)
|
|
dlclose(lib_handle_kperfdata);
|
|
lib_handle_kperf = NULL;
|
|
lib_handle_kperfdata = NULL;
|
|
for (usize i = 0; i < lib_nelems(lib_symbols_kperf); i++) {
|
|
const lib_symbol *symbol = &lib_symbols_kperf[i];
|
|
*symbol->impl = NULL;
|
|
}
|
|
for (usize i = 0; i < lib_nelems(lib_symbols_kperfdata); i++) {
|
|
const lib_symbol *symbol = &lib_symbols_kperfdata[i];
|
|
*symbol->impl = NULL;
|
|
}
|
|
}
|
|
|
|
static bool lib_init(void) {
|
|
#define return_err() \
|
|
do { \
|
|
lib_deinit(); \
|
|
lib_inited = true; \
|
|
lib_has_err = true; \
|
|
return false; \
|
|
} while (false)
|
|
|
|
if (lib_inited)
|
|
return !lib_has_err;
|
|
|
|
// load dynamic library
|
|
lib_handle_kperf = dlopen(lib_path_kperf, RTLD_LAZY);
|
|
if (!lib_handle_kperf) {
|
|
snprintf(lib_err_msg, sizeof(lib_err_msg),
|
|
"Failed to load kperf.framework, message: %s.", dlerror());
|
|
return_err();
|
|
}
|
|
lib_handle_kperfdata = dlopen(lib_path_kperfdata, RTLD_LAZY);
|
|
if (!lib_handle_kperfdata) {
|
|
snprintf(lib_err_msg, sizeof(lib_err_msg),
|
|
"Failed to load kperfdata.framework, message: %s.", dlerror());
|
|
return_err();
|
|
}
|
|
|
|
// load symbol address from dynamic library
|
|
for (usize i = 0; i < lib_nelems(lib_symbols_kperf); i++) {
|
|
const lib_symbol *symbol = &lib_symbols_kperf[i];
|
|
*symbol->impl = dlsym(lib_handle_kperf, symbol->name);
|
|
if (!*symbol->impl) {
|
|
snprintf(lib_err_msg, sizeof(lib_err_msg),
|
|
"Failed to load kperf function: %s.", symbol->name);
|
|
return_err();
|
|
}
|
|
}
|
|
for (usize i = 0; i < lib_nelems(lib_symbols_kperfdata); i++) {
|
|
const lib_symbol *symbol = &lib_symbols_kperfdata[i];
|
|
*symbol->impl = dlsym(lib_handle_kperfdata, symbol->name);
|
|
if (!*symbol->impl) {
|
|
snprintf(lib_err_msg, sizeof(lib_err_msg),
|
|
"Failed to load kperfdata function: %s.", symbol->name);
|
|
return_err();
|
|
}
|
|
}
|
|
|
|
lib_inited = true;
|
|
lib_has_err = false;
|
|
return true;
|
|
|
|
#undef return_err
|
|
}
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// kdebug private structs
|
|
// https://github.com/apple/darwin-xnu/blob/main/bsd/sys_private/kdebug_private.h
|
|
// -----------------------------------------------------------------------------
|
|
|
|
/*
|
|
* Ensure that both LP32 and LP64 variants of arm64 use the same kd_buf
|
|
* structure.
|
|
*/
|
|
#if defined(__arm64__)
|
|
typedef uint64_t kd_buf_argtype;
|
|
#else
|
|
typedef uintptr_t kd_buf_argtype;
|
|
#endif
|
|
|
|
typedef struct {
|
|
uint64_t timestamp;
|
|
kd_buf_argtype arg1;
|
|
kd_buf_argtype arg2;
|
|
kd_buf_argtype arg3;
|
|
kd_buf_argtype arg4;
|
|
kd_buf_argtype arg5; /* the thread ID */
|
|
uint32_t debugid; /* see <sys/kdebug.h> */
|
|
|
|
/*
|
|
* Ensure that both LP32 and LP64 variants of arm64 use the same kd_buf
|
|
* structure.
|
|
*/
|
|
#if defined(__LP64__) || defined(__arm64__)
|
|
uint32_t cpuid; /* cpu index, from 0 */
|
|
kd_buf_argtype unused;
|
|
#endif
|
|
} kd_buf;
|
|
|
|
/* bits for the type field of kd_regtype */
|
|
#define KDBG_CLASSTYPE 0x10000
|
|
#define KDBG_SUBCLSTYPE 0x20000
|
|
#define KDBG_RANGETYPE 0x40000
|
|
#define KDBG_TYPENONE 0x80000
|
|
#define KDBG_CKTYPES 0xF0000
|
|
|
|
/* only trace at most 4 types of events, at the code granularity */
|
|
#define KDBG_VALCHECK 0x00200000U
|
|
|
|
typedef struct {
|
|
unsigned int type;
|
|
unsigned int value1;
|
|
unsigned int value2;
|
|
unsigned int value3;
|
|
unsigned int value4;
|
|
} kd_regtype;
|
|
|
|
typedef struct {
|
|
/* number of events that can fit in the buffers */
|
|
int nkdbufs;
|
|
/* set if trace is disabled */
|
|
int nolog;
|
|
/* kd_ctrl_page.flags */
|
|
unsigned int flags;
|
|
/* number of threads in thread map */
|
|
int nkdthreads;
|
|
/* the owning pid */
|
|
int bufid;
|
|
} kbufinfo_t;
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// kdebug utils
|
|
// -----------------------------------------------------------------------------
|
|
|
|
/// Clean up trace buffers and reset ktrace/kdebug/kperf.
|
|
/// @return 0 on success.
|
|
static int kdebug_reset(void) {
|
|
int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDREMOVE};
|
|
return sysctl(mib, 3, NULL, NULL, NULL, 0);
|
|
}
|
|
|
|
/// Disable and reinitialize the trace buffers.
|
|
/// @return 0 on success.
|
|
static int kdebug_reinit(void) {
|
|
int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDSETUP};
|
|
return sysctl(mib, 3, NULL, NULL, NULL, 0);
|
|
}
|
|
|
|
/// Set debug filter.
|
|
static int kdebug_setreg(kd_regtype *kdr) {
|
|
int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDSETREG};
|
|
usize size = sizeof(kd_regtype);
|
|
return sysctl(mib, 3, kdr, &size, NULL, 0);
|
|
}
|
|
|
|
/// Set maximum number of trace entries (kd_buf).
|
|
/// Only allow allocation up to half the available memory (sane_size).
|
|
/// @return 0 on success.
|
|
static int kdebug_trace_setbuf(int nbufs) {
|
|
int mib[4] = {CTL_KERN, KERN_KDEBUG, KERN_KDSETBUF, nbufs};
|
|
return sysctl(mib, 4, NULL, NULL, NULL, 0);
|
|
}
|
|
|
|
/// Enable or disable kdebug trace.
|
|
/// Trace buffer must already be initialized.
|
|
/// @return 0 on success.
|
|
static int kdebug_trace_enable(bool enable) {
|
|
int mib[4] = {CTL_KERN, KERN_KDEBUG, KERN_KDENABLE, enable};
|
|
return sysctl(mib, 4, NULL, 0, NULL, 0);
|
|
}
|
|
|
|
/// Retrieve trace buffer information from kernel.
|
|
/// @return 0 on success.
|
|
static int kdebug_get_bufinfo(kbufinfo_t *info) {
|
|
if (!info)
|
|
return -1;
|
|
int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDGETBUF};
|
|
size_t needed = sizeof(kbufinfo_t);
|
|
return sysctl(mib, 3, info, &needed, NULL, 0);
|
|
}
|
|
|
|
/// Retrieve trace buffers from kernel.
|
|
/// @param buf Memory to receive buffer data, array of `kd_buf`.
|
|
/// @param len Length of `buf` in bytes.
|
|
/// @param count Number of trace entries (kd_buf) obtained.
|
|
/// @return 0 on success.
|
|
static int kdebug_trace_read(void *buf, usize len, usize *count) {
|
|
if (count)
|
|
*count = 0;
|
|
if (!buf || !len)
|
|
return -1;
|
|
|
|
// Note: the input and output units are not the same.
|
|
// input: bytes
|
|
// output: number of kd_buf
|
|
int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDREADTR};
|
|
int ret = sysctl(mib, 3, buf, &len, NULL, 0);
|
|
if (ret != 0)
|
|
return ret;
|
|
*count = len;
|
|
return 0;
|
|
}
|
|
|
|
/// Block until there are new buffers filled or `timeout_ms` have passed.
|
|
/// @param timeout_ms timeout milliseconds, 0 means wait forever.
|
|
/// @param suc set true if new buffers filled.
|
|
/// @return 0 on success.
|
|
static int kdebug_wait(usize timeout_ms, bool *suc) {
|
|
if (timeout_ms == 0)
|
|
return -1;
|
|
int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDBUFWAIT};
|
|
usize val = timeout_ms;
|
|
int ret = sysctl(mib, 3, NULL, &val, NULL, 0);
|
|
if (suc)
|
|
*suc = !!val;
|
|
return ret;
|
|
}
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// Demo
|
|
// -----------------------------------------------------------------------------
|
|
|
|
#define EVENT_NAME_MAX 8
|
|
typedef struct {
|
|
const char *alias; /// name for print
|
|
const char *names[EVENT_NAME_MAX]; /// name from pmc db
|
|
} event_alias;
|
|
|
|
/// Event names from /usr/share/kpep/<name>.plist
|
|
static const event_alias profile_events[] = {
|
|
{"cycles",
|
|
{
|
|
"FIXED_CYCLES", // Apple A7-A15//CORE_ACTIVE_CYCLE
|
|
"CPU_CLK_UNHALTED.THREAD", // Intel Core 1th-10th
|
|
"CPU_CLK_UNHALTED.CORE", // Intel Yonah, Merom
|
|
}},
|
|
{"instructions",
|
|
{
|
|
"FIXED_INSTRUCTIONS", // Apple A7-A15
|
|
"INST_RETIRED.ANY" // Intel Yonah, Merom, Core 1th-10th
|
|
}},
|
|
{"branches",
|
|
{
|
|
"INST_BRANCH", // Apple A7-A15
|
|
"BR_INST_RETIRED.ALL_BRANCHES", // Intel Core 1th-10th
|
|
"INST_RETIRED.ANY", // Intel Yonah, Merom
|
|
}},
|
|
{"branch-misses",
|
|
{
|
|
"BRANCH_MISPRED_NONSPEC", // Apple A7-A15, since iOS 15, macOS 12
|
|
"BRANCH_MISPREDICT", // Apple A7-A14
|
|
"BR_MISP_RETIRED.ALL_BRANCHES", // Intel Core 2th-10th
|
|
"BR_INST_RETIRED.MISPRED", // Intel Yonah, Merom
|
|
}},
|
|
};
|
|
|
|
static kpep_event *get_event(kpep_db *db, const event_alias *alias) {
|
|
for (usize j = 0; j < EVENT_NAME_MAX; j++) {
|
|
const char *name = alias->names[j];
|
|
if (!name)
|
|
break;
|
|
kpep_event *ev = NULL;
|
|
if (kpep_db_event(db, name, &ev) == 0) {
|
|
return ev;
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
kpc_config_t regs[KPC_MAX_COUNTERS] = {0};
|
|
usize counter_map[KPC_MAX_COUNTERS] = {0};
|
|
u64 counters_0[KPC_MAX_COUNTERS] = {0};
|
|
u64 counters_1[KPC_MAX_COUNTERS] = {0};
|
|
const usize ev_count = sizeof(profile_events) / sizeof(profile_events[0]);
|
|
|
|
bool setup_performance_counters() {
|
|
static bool init = false;
|
|
static bool worked = false;
|
|
|
|
if (init) {
|
|
return worked;
|
|
}
|
|
init = true;
|
|
|
|
// load dylib
|
|
if (!lib_init()) {
|
|
printf("Error: %s\n", lib_err_msg);
|
|
return (worked = false);
|
|
}
|
|
|
|
// check permission
|
|
int force_ctrs = 0;
|
|
if (kpc_force_all_ctrs_get(&force_ctrs)) {
|
|
// printf("Permission denied, xnu/kpc requires root privileges.\n");
|
|
return (worked = false);
|
|
}
|
|
int ret;
|
|
// load pmc db
|
|
kpep_db *db = NULL;
|
|
if ((ret = kpep_db_create(NULL, &db))) {
|
|
printf("Error: cannot load pmc database: %d.\n", ret);
|
|
return (worked = false);
|
|
}
|
|
printf("loaded db: %s (%s)\n", db->name, db->marketing_name);
|
|
|
|
// create a config
|
|
kpep_config *cfg = NULL;
|
|
if ((ret = kpep_config_create(db, &cfg))) {
|
|
printf("Failed to create kpep config: %d (%s).\n", ret,
|
|
kpep_config_error_desc(ret));
|
|
return (worked = false);
|
|
}
|
|
if ((ret = kpep_config_force_counters(cfg))) {
|
|
printf("Failed to force counters: %d (%s).\n", ret,
|
|
kpep_config_error_desc(ret));
|
|
return (worked = false);
|
|
}
|
|
|
|
// get events
|
|
kpep_event *ev_arr[ev_count] = {0};
|
|
for (usize i = 0; i < ev_count; i++) {
|
|
const event_alias *alias = profile_events + i;
|
|
ev_arr[i] = get_event(db, alias);
|
|
if (!ev_arr[i]) {
|
|
printf("Cannot find event: %s.\n", alias->alias);
|
|
return (worked = false);
|
|
}
|
|
}
|
|
|
|
// add event to config
|
|
for (usize i = 0; i < ev_count; i++) {
|
|
kpep_event *ev = ev_arr[i];
|
|
if ((ret = kpep_config_add_event(cfg, &ev, 0, NULL))) {
|
|
printf("Failed to add event: %d (%s).\n", ret,
|
|
kpep_config_error_desc(ret));
|
|
return (worked = false);
|
|
}
|
|
}
|
|
|
|
// prepare buffer and config
|
|
u32 classes = 0;
|
|
usize reg_count = 0;
|
|
if ((ret = kpep_config_kpc_classes(cfg, &classes))) {
|
|
printf("Failed get kpc classes: %d (%s).\n", ret,
|
|
kpep_config_error_desc(ret));
|
|
return (worked = false);
|
|
}
|
|
if ((ret = kpep_config_kpc_count(cfg, ®_count))) {
|
|
printf("Failed get kpc count: %d (%s).\n", ret,
|
|
kpep_config_error_desc(ret));
|
|
return (worked = false);
|
|
}
|
|
if ((ret = kpep_config_kpc_map(cfg, counter_map, sizeof(counter_map)))) {
|
|
printf("Failed get kpc map: %d (%s).\n", ret, kpep_config_error_desc(ret));
|
|
return (worked = false);
|
|
}
|
|
if ((ret = kpep_config_kpc(cfg, regs, sizeof(regs)))) {
|
|
printf("Failed get kpc registers: %d (%s).\n", ret,
|
|
kpep_config_error_desc(ret));
|
|
return (worked = false);
|
|
}
|
|
|
|
// set config to kernel
|
|
if ((ret = kpc_force_all_ctrs_set(1))) {
|
|
printf("Failed force all ctrs: %d.\n", ret);
|
|
return (worked = false);
|
|
}
|
|
if ((classes & KPC_CLASS_CONFIGURABLE_MASK) && reg_count) {
|
|
if ((ret = kpc_set_config(classes, regs))) {
|
|
printf("Failed set kpc config: %d.\n", ret);
|
|
return (worked = false);
|
|
}
|
|
}
|
|
|
|
// start counting
|
|
if ((ret = kpc_set_counting(classes))) {
|
|
printf("Failed set counting: %d.\n", ret);
|
|
return (worked = false);
|
|
}
|
|
if ((ret = kpc_set_thread_counting(classes))) {
|
|
printf("Failed set thread counting: %d.\n", ret);
|
|
return (worked = false);
|
|
}
|
|
|
|
return (worked = true);
|
|
}
|
|
|
|
inline performance_counters get_counters() {
|
|
static bool warned = false;
|
|
int ret;
|
|
// get counters before
|
|
if ((ret = kpc_get_thread_counters(0, KPC_MAX_COUNTERS, counters_0))) {
|
|
if (!warned) {
|
|
|
|
printf("Failed get thread counters before: %d.\n", ret);
|
|
warned = true;
|
|
}
|
|
return 1;
|
|
}
|
|
/*printf("counters value:\n");
|
|
for (usize i = 0; i < ev_count; i++) {
|
|
const event_alias *alias = profile_events + i;
|
|
usize idx = counter_map[i];
|
|
u64 val = counters_1[idx] - counters_0[idx];
|
|
printf("%14s: %llu\n", alias->alias, val);
|
|
}*/
|
|
return performance_counters{
|
|
counters_0[counter_map[0]], counters_0[counter_map[2]],
|
|
counters_0[counter_map[3]], counters_0[counter_map[1]]};
|
|
}
|
|
|
|
#endif
|