test/profiler

This commit is contained in:
mutouyun 2021-06-06 19:35:39 +08:00
parent 55e75d4ed6
commit 54bc3386dd
4 changed files with 175 additions and 0 deletions

11
test/profiler/README.md Normal file
View File

@ -0,0 +1,11 @@
# A Quick Introduction to C++ Performance Tuning
(From: https://github.com/adah1972/cpp_summit_2020.git)
This repository contains the presentation file and example code for my
presentation at the C++ Summit 2020 held in Shenzhen, China on 45 December
2020.
The presentation content is shared under a [Creative Commons Attribution-Share
Alike 2.5 Licence](http://creativecommons.org/licenses/by-sa/2.5/). The code
is put in the public domain (i.e. do whatever you like with it), though an
acknowledgement will be appreciated (but not required).

View File

@ -0,0 +1,77 @@
#include "profiler.h"
#include <cassert>
#include <iostream>
#include <vector>
namespace {
struct profiling_data {
int number;
int call_count{};
uint64_t call_duration{};
};
class profiler {
public:
profiler();
~profiler();
void add_data(int number, uint64_t duration);
private:
std::vector<profiling_data> data_;
};
profiler::profiler()
{
size_t len = 0;
for (;;) {
if (name_map[len].name == NULL) {
break;
}
++len;
}
data_.resize(len);
int i = 0;
for (auto& item : data_) {
assert(i == name_map[i].number);
item.number = i;
++i;
}
}
profiler::~profiler()
{
#ifndef NDEBUG
for (auto& item : data_) {
if (item.call_count == 0) {
continue;
}
std::cout << item.number << " " << name_map[item.number].name
<< ":\n";
std::cout << " Call count: " << item.call_count << '\n';
std::cout << " Call duration: " << item.call_duration << '\n';
std::cout << " Average duration: "
<< item.call_duration * 1.0 /
(item.call_count != 0 ? item.call_count : 1)
<< '\n';
}
#endif
}
void profiler::add_data(int number, uint64_t duration)
{
assert(number >= 0 && number < static_cast<int>(data_.size()));
data_[number].call_count++;
data_[number].call_duration += duration;
}
profiler profiler_instance;
} // unnamed namespace
profiling_checker::~profiling_checker()
{
auto end_time = rdtsc();
profiler_instance.add_data(number_, end_time - start_time_);
}

35
test/profiler/profiler.h Normal file
View File

@ -0,0 +1,35 @@
#ifndef PROFILER_H
#define PROFILER_H
#include "rdtsc.h"
struct name_mapper {
int number;
const char* name;
};
extern name_mapper name_map[];
class profiling_checker {
public:
profiling_checker(int number);
~profiling_checker();
private:
int number_;
uint64_t start_time_;
};
inline profiling_checker::profiling_checker(int number)
: number_(number)
{
start_time_ = rdtsc();
}
#ifdef NDEBUG
#define PROFILE_CHECK(func_number) (void)0
#else
#define PROFILE_CHECK(func_number) profiling_checker _checker(func_number)
#endif
#endif // PROFILER_H

52
test/profiler/rdtsc.h Normal file
View File

@ -0,0 +1,52 @@
#ifndef RDTSC_H
#define RDTSC_H
#include <stdint.h> // uint64_t
#if defined(_M_X64) || defined(_M_IX86) || defined(__x86_64) || defined(__i386)
# ifdef _WIN32
# include <intrin.h> // __rdtsc
# else
# include <x86intrin.h> // __rdtsc
# endif
# define HAS_HW_RDTSC 1
#else
# include <chrono> // std::chrono::high_resolution_clock
# define HAS_HW_RDTSC 0
#endif
inline uint64_t rdtsc()
{
#if HAS_HW_RDTSC
// _mm_lfence() might be used to serialize the instruction stream,
// and it would guarantee that RDTSC will not be reordered with
// other instructions. However, measurements show that the overhead
// may be too big (easily 15 to 30 CPU cycles) for profiling
// purposes: if reordering matters, the overhead matters too!
// Forbid the compiler from reordering instructions
# ifdef _MSC_VER
_ReadWriteBarrier();
# else
__asm__ __volatile__("" : : : "memory");
# endif
uint64_t result = __rdtsc();
// Forbid the compiler from reordering instructions
# ifdef _MSC_VER
_ReadWriteBarrier();
# else
__asm__ __volatile__("" : : : "memory");
# endif
return result;
#else
auto now = std::chrono::high_resolution_clock::now();
return std::chrono::duration_cast<std::chrono::nanoseconds>(
now.time_since_epoch())
.count();
#endif
}
#endif // RDTSC_H