From 54bc3386dd434c9c68e911b0bd039dc5c39c9e4a Mon Sep 17 00:00:00 2001 From: mutouyun Date: Sun, 6 Jun 2021 19:35:39 +0800 Subject: [PATCH] test/profiler --- test/profiler/README.md | 11 ++++++ test/profiler/profiler.cpp | 77 ++++++++++++++++++++++++++++++++++++++ test/profiler/profiler.h | 35 +++++++++++++++++ test/profiler/rdtsc.h | 52 +++++++++++++++++++++++++ 4 files changed, 175 insertions(+) create mode 100644 test/profiler/README.md create mode 100644 test/profiler/profiler.cpp create mode 100644 test/profiler/profiler.h create mode 100644 test/profiler/rdtsc.h diff --git a/test/profiler/README.md b/test/profiler/README.md new file mode 100644 index 0000000..d4a6bea --- /dev/null +++ b/test/profiler/README.md @@ -0,0 +1,11 @@ +# A Quick Introduction to C++ Performance Tuning +(From: https://github.com/adah1972/cpp_summit_2020.git) + +This repository contains the presentation file and example code for my +presentation at the C++ Summit 2020 held in Shenzhen, China on 4–5 December +2020. + +The presentation content is shared under a [Creative Commons Attribution-Share +Alike 2.5 Licence](http://creativecommons.org/licenses/by-sa/2.5/). The code +is put in the public domain (i.e. do whatever you like with it), though an +acknowledgement will be appreciated (but not required). diff --git a/test/profiler/profiler.cpp b/test/profiler/profiler.cpp new file mode 100644 index 0000000..d8fd7bc --- /dev/null +++ b/test/profiler/profiler.cpp @@ -0,0 +1,77 @@ +#include "profiler.h" +#include +#include +#include + +namespace { + +struct profiling_data { + int number; + int call_count{}; + uint64_t call_duration{}; +}; + +class profiler { +public: + profiler(); + ~profiler(); + + void add_data(int number, uint64_t duration); + +private: + std::vector data_; +}; + +profiler::profiler() +{ + size_t len = 0; + for (;;) { + if (name_map[len].name == NULL) { + break; + } + ++len; + } + data_.resize(len); + int i = 0; + for (auto& item : data_) { + assert(i == name_map[i].number); + item.number = i; + ++i; + } +} + +profiler::~profiler() +{ +#ifndef NDEBUG + for (auto& item : data_) { + if (item.call_count == 0) { + continue; + } + std::cout << item.number << " " << name_map[item.number].name + << ":\n"; + std::cout << " Call count: " << item.call_count << '\n'; + std::cout << " Call duration: " << item.call_duration << '\n'; + std::cout << " Average duration: " + << item.call_duration * 1.0 / + (item.call_count != 0 ? item.call_count : 1) + << '\n'; + } +#endif +} + +void profiler::add_data(int number, uint64_t duration) +{ + assert(number >= 0 && number < static_cast(data_.size())); + data_[number].call_count++; + data_[number].call_duration += duration; +} + +profiler profiler_instance; + +} // unnamed namespace + +profiling_checker::~profiling_checker() +{ + auto end_time = rdtsc(); + profiler_instance.add_data(number_, end_time - start_time_); +} diff --git a/test/profiler/profiler.h b/test/profiler/profiler.h new file mode 100644 index 0000000..d04264a --- /dev/null +++ b/test/profiler/profiler.h @@ -0,0 +1,35 @@ +#ifndef PROFILER_H +#define PROFILER_H + +#include "rdtsc.h" + +struct name_mapper { + int number; + const char* name; +}; + +extern name_mapper name_map[]; + +class profiling_checker { +public: + profiling_checker(int number); + ~profiling_checker(); + +private: + int number_; + uint64_t start_time_; +}; + +inline profiling_checker::profiling_checker(int number) + : number_(number) +{ + start_time_ = rdtsc(); +} + +#ifdef NDEBUG +#define PROFILE_CHECK(func_number) (void)0 +#else +#define PROFILE_CHECK(func_number) profiling_checker _checker(func_number) +#endif + +#endif // PROFILER_H diff --git a/test/profiler/rdtsc.h b/test/profiler/rdtsc.h new file mode 100644 index 0000000..80e35c7 --- /dev/null +++ b/test/profiler/rdtsc.h @@ -0,0 +1,52 @@ +#ifndef RDTSC_H +#define RDTSC_H + +#include // uint64_t + +#if defined(_M_X64) || defined(_M_IX86) || defined(__x86_64) || defined(__i386) +# ifdef _WIN32 +# include // __rdtsc +# else +# include // __rdtsc +# endif +# define HAS_HW_RDTSC 1 +#else +# include // std::chrono::high_resolution_clock +# define HAS_HW_RDTSC 0 +#endif + +inline uint64_t rdtsc() +{ +#if HAS_HW_RDTSC + // _mm_lfence() might be used to serialize the instruction stream, + // and it would guarantee that RDTSC will not be reordered with + // other instructions. However, measurements show that the overhead + // may be too big (easily 15 to 30 CPU cycles) for profiling + // purposes: if reordering matters, the overhead matters too! + + // Forbid the compiler from reordering instructions +# ifdef _MSC_VER + _ReadWriteBarrier(); +# else + __asm__ __volatile__("" : : : "memory"); +# endif + + uint64_t result = __rdtsc(); + + // Forbid the compiler from reordering instructions +# ifdef _MSC_VER + _ReadWriteBarrier(); +# else + __asm__ __volatile__("" : : : "memory"); +# endif + + return result; +#else + auto now = std::chrono::high_resolution_clock::now(); + return std::chrono::duration_cast( + now.time_since_epoch()) + .count(); +#endif +} + +#endif // RDTSC_H