mirror of
https://github.com/mutouyun/cpp-ipc.git
synced 2025-12-06 16:56:45 +08:00
Update CMakeLists.txt and expected.h, remove unused files, and improve thread safety in block_pool.cpp
This commit is contained in:
parent
f615f200df
commit
2a1d8fa5fa
@ -11,7 +11,7 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
if (MSVC)
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /fsanitize=address /Zi")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Zi")
|
||||
set(CompilerFlags
|
||||
CMAKE_CXX_FLAGS
|
||||
CMAKE_CXX_FLAGS_DEBUG
|
||||
|
||||
325
benchmark/benchmark_new.cpp
Normal file
325
benchmark/benchmark_new.cpp
Normal file
@ -0,0 +1,325 @@
|
||||
|
||||
#include <array>
|
||||
#include <cstdlib>
|
||||
#include <cstddef>
|
||||
|
||||
#include "benchmark/benchmark.h"
|
||||
|
||||
#include "libpmr/new.h"
|
||||
|
||||
namespace {
|
||||
|
||||
template <typename T, std::size_t N>
|
||||
class cache {
|
||||
public:
|
||||
template <typename U>
|
||||
void push(U &&u) noexcept {
|
||||
data_[idx_++] = std::forward<U>(u);
|
||||
}
|
||||
|
||||
T &pop() noexcept {
|
||||
return data_[--idx_];
|
||||
}
|
||||
|
||||
bool at_begin() const noexcept {
|
||||
return idx_ == 0;
|
||||
}
|
||||
|
||||
bool at_end() const noexcept {
|
||||
return idx_ == N;
|
||||
}
|
||||
|
||||
private:
|
||||
std::array<T, N> data_{};
|
||||
std::size_t idx_{};
|
||||
};
|
||||
|
||||
template <typename P, std::size_t CacheSize = 128>
|
||||
class test_suit {
|
||||
void next(std::size_t &idx) noexcept {
|
||||
idx = (idx + 1) % 3;
|
||||
}
|
||||
|
||||
public:
|
||||
~test_suit() noexcept {
|
||||
for (auto &pts : pts_) {
|
||||
while (!pts.at_begin()) {
|
||||
P::deallocate(pts.pop());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool test_allocate() noexcept {
|
||||
auto &pts = pts_[idx_a_];
|
||||
pts.push(P::allocate());
|
||||
if (pts.at_end()) {
|
||||
next(idx_a_);
|
||||
idx_d_ = idx_a_;
|
||||
}
|
||||
return ++allocated_ < CacheSize;
|
||||
}
|
||||
|
||||
bool test_deallocate() noexcept {
|
||||
auto &pts = pts_[idx_d_];
|
||||
if (pts.at_begin()) {
|
||||
next(idx_d_);
|
||||
if (allocated_ == CacheSize) {
|
||||
allocated_ = CacheSize / 2;
|
||||
return true;
|
||||
}
|
||||
return allocated_ > 0;
|
||||
}
|
||||
P::deallocate(pts.pop());
|
||||
--allocated_;
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
cache<void *, CacheSize / 2> pts_[3];
|
||||
std::size_t idx_a_{};
|
||||
std::size_t idx_d_{};
|
||||
std::size_t allocated_{};
|
||||
};
|
||||
|
||||
template <std::size_t AllocSize>
|
||||
struct policy_malloc {
|
||||
static void *allocate() noexcept {
|
||||
return std::malloc(AllocSize);
|
||||
}
|
||||
|
||||
static void deallocate(void *p) noexcept {
|
||||
std::free(p);
|
||||
}
|
||||
};
|
||||
|
||||
template <std::size_t AllocSize>
|
||||
struct policy_cpp_new {
|
||||
static void *allocate() noexcept {
|
||||
return new char[AllocSize];
|
||||
}
|
||||
|
||||
static void deallocate(void *p) noexcept {
|
||||
delete[] static_cast<char *>(p);
|
||||
}
|
||||
};
|
||||
|
||||
template <std::size_t AllocSize>
|
||||
struct policy_pmr_new {
|
||||
static void *allocate() noexcept {
|
||||
return pmr::new$<std::array<char, AllocSize>>();
|
||||
}
|
||||
|
||||
static void deallocate(void *p) noexcept {
|
||||
pmr::delete$(static_cast<std::array<char, AllocSize> *>(p));
|
||||
}
|
||||
};
|
||||
|
||||
template <template <std::size_t> class P, std::size_t AllocSize>
|
||||
void pmr_allocate(benchmark::State &state) {
|
||||
test_suit<P<AllocSize>> suit;
|
||||
for (auto _ : state) {
|
||||
if (suit.test_allocate()) continue;
|
||||
state.PauseTiming();
|
||||
while (suit.test_deallocate()) ;
|
||||
state.ResumeTiming();
|
||||
}
|
||||
}
|
||||
|
||||
template <template <std::size_t> class P, std::size_t AllocSize>
|
||||
void pmr_deallocate(benchmark::State &state) {
|
||||
test_suit<P<AllocSize>> suit;
|
||||
for (auto _ : state) {
|
||||
if (suit.test_deallocate()) continue;
|
||||
state.PauseTiming();
|
||||
while (suit.test_allocate()) ;
|
||||
state.ResumeTiming();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
BENCHMARK(pmr_allocate<policy_malloc, 8>)->ThreadRange(1, 16);
|
||||
BENCHMARK(pmr_allocate<policy_malloc, 32>)->ThreadRange(1, 16);
|
||||
BENCHMARK(pmr_allocate<policy_malloc, 128>)->ThreadRange(1, 16);
|
||||
BENCHMARK(pmr_allocate<policy_malloc, 1024>)->ThreadRange(1, 16);
|
||||
BENCHMARK(pmr_deallocate<policy_malloc, 8>)->ThreadRange(1, 16);
|
||||
BENCHMARK(pmr_deallocate<policy_malloc, 32>)->ThreadRange(1, 16);
|
||||
BENCHMARK(pmr_deallocate<policy_malloc, 128>)->ThreadRange(1, 16);
|
||||
BENCHMARK(pmr_deallocate<policy_malloc, 1024>)->ThreadRange(1, 16);
|
||||
|
||||
BENCHMARK(pmr_allocate<policy_cpp_new, 8>)->ThreadRange(1, 16);
|
||||
BENCHMARK(pmr_allocate<policy_cpp_new, 32>)->ThreadRange(1, 16);
|
||||
BENCHMARK(pmr_allocate<policy_cpp_new, 128>)->ThreadRange(1, 16);
|
||||
BENCHMARK(pmr_allocate<policy_cpp_new, 1024>)->ThreadRange(1, 16);
|
||||
BENCHMARK(pmr_deallocate<policy_cpp_new, 8>)->ThreadRange(1, 16);
|
||||
BENCHMARK(pmr_deallocate<policy_cpp_new, 32>)->ThreadRange(1, 16);
|
||||
BENCHMARK(pmr_deallocate<policy_cpp_new, 128>)->ThreadRange(1, 16);
|
||||
BENCHMARK(pmr_deallocate<policy_cpp_new, 1024>)->ThreadRange(1, 16);
|
||||
|
||||
BENCHMARK(pmr_allocate<policy_pmr_new, 8>)->ThreadRange(1, 16);
|
||||
BENCHMARK(pmr_allocate<policy_pmr_new, 32>)->ThreadRange(1, 16);
|
||||
BENCHMARK(pmr_allocate<policy_pmr_new, 128>)->ThreadRange(1, 16);
|
||||
BENCHMARK(pmr_allocate<policy_pmr_new, 1024>)->ThreadRange(1, 16);
|
||||
BENCHMARK(pmr_deallocate<policy_pmr_new, 8>)->ThreadRange(1, 16);
|
||||
BENCHMARK(pmr_deallocate<policy_pmr_new, 32>)->ThreadRange(1, 16);
|
||||
BENCHMARK(pmr_deallocate<policy_pmr_new, 128>)->ThreadRange(1, 16);
|
||||
BENCHMARK(pmr_deallocate<policy_pmr_new, 1024>)->ThreadRange(1, 16);
|
||||
|
||||
/*
|
||||
Run on (16 X 2313.68 MHz CPU s)
|
||||
CPU Caches:
|
||||
L1 Data 48 KiB (x8)
|
||||
L1 Instruction 32 KiB (x8)
|
||||
L2 Unified 1280 KiB (x8)
|
||||
L3 Unified 24576 KiB (x1)
|
||||
------------------------------------------------------------------------------------------
|
||||
Benchmark Time CPU Iterations
|
||||
------------------------------------------------------------------------------------------
|
||||
pmr_allocate<policy_malloc, 8>/threads:1 28.9 ns 29.8 ns 29866667
|
||||
pmr_allocate<policy_malloc, 8>/threads:2 28.9 ns 53.7 ns 12800000
|
||||
pmr_allocate<policy_malloc, 8>/threads:4 13.4 ns 43.9 ns 14933332
|
||||
pmr_allocate<policy_malloc, 8>/threads:8 11.3 ns 68.4 ns 8000000
|
||||
pmr_allocate<policy_malloc, 8>/threads:16 5.07 ns 56.7 ns 14336000
|
||||
|
||||
pmr_allocate<policy_malloc, 32>/threads:1 44.0 ns 42.1 ns 19298462
|
||||
pmr_allocate<policy_malloc, 32>/threads:2 27.4 ns 54.9 ns 12800000
|
||||
pmr_allocate<policy_malloc, 32>/threads:4 11.7 ns 47.1 ns 17920000
|
||||
pmr_allocate<policy_malloc, 32>/threads:8 5.96 ns 43.7 ns 21082352
|
||||
pmr_allocate<policy_malloc, 32>/threads:16 4.09 ns 56.7 ns 17920000
|
||||
|
||||
pmr_allocate<policy_malloc, 128>/threads:1 45.8 ns 39.6 ns 16592593
|
||||
pmr_allocate<policy_malloc, 128>/threads:2 36.9 ns 75.3 ns 9955556
|
||||
pmr_allocate<policy_malloc, 128>/threads:4 16.3 ns 66.7 ns 11946668
|
||||
pmr_allocate<policy_malloc, 128>/threads:8 10.7 ns 77.1 ns 13784616
|
||||
pmr_allocate<policy_malloc, 128>/threads:16 7.87 ns 94.8 ns 14336000
|
||||
|
||||
pmr_allocate<policy_malloc, 1024>/threads:1 75.5 ns 78.8 ns 8726261
|
||||
pmr_allocate<policy_malloc, 1024>/threads:2 49.6 ns 46.9 ns 20000000
|
||||
pmr_allocate<policy_malloc, 1024>/threads:4 18.4 ns 40.8 ns 29866668
|
||||
pmr_allocate<policy_malloc, 1024>/threads:8 6.56 ns 25.6 ns 29866664
|
||||
pmr_allocate<policy_malloc, 1024>/threads:16 6.25 ns 56.6 ns 16000000
|
||||
------------------------------------------------------------------------------------------
|
||||
pmr_deallocate<policy_malloc, 8>/threads:1 18.6 ns 19.9 ns 47786667
|
||||
pmr_deallocate<policy_malloc, 8>/threads:2 8.52 ns 16.2 ns 47157894
|
||||
pmr_deallocate<policy_malloc, 8>/threads:4 4.75 ns 18.8 ns 40000000
|
||||
pmr_deallocate<policy_malloc, 8>/threads:8 3.18 ns 24.7 ns 51200000
|
||||
pmr_deallocate<policy_malloc, 8>/threads:16 2.94 ns 38.1 ns 16000000
|
||||
|
||||
pmr_deallocate<policy_malloc, 32>/threads:1 18.0 ns 16.7 ns 45875200
|
||||
pmr_deallocate<policy_malloc, 32>/threads:2 8.76 ns 19.9 ns 47157894
|
||||
pmr_deallocate<policy_malloc, 32>/threads:4 4.50 ns 14.6 ns 44800000
|
||||
pmr_deallocate<policy_malloc, 32>/threads:8 2.70 ns 16.7 ns 59733336
|
||||
pmr_deallocate<policy_malloc, 32>/threads:16 2.30 ns 36.6 ns 23893328
|
||||
|
||||
pmr_deallocate<policy_malloc, 128>/threads:1 17.9 ns 16.3 ns 34461538
|
||||
pmr_deallocate<policy_malloc, 128>/threads:2 8.29 ns 21.0 ns 66901334
|
||||
pmr_deallocate<policy_malloc, 128>/threads:4 4.40 ns 14.5 ns 40000000
|
||||
pmr_deallocate<policy_malloc, 128>/threads:8 2.65 ns 20.1 ns 80000000
|
||||
pmr_deallocate<policy_malloc, 128>/threads:16 2.95 ns 40.5 ns 24717248
|
||||
|
||||
pmr_deallocate<policy_malloc, 1024>/threads:1 19.0 ns 20.4 ns 34461538
|
||||
pmr_deallocate<policy_malloc, 1024>/threads:2 8.62 ns 19.7 ns 38956522
|
||||
pmr_deallocate<policy_malloc, 1024>/threads:4 4.70 ns 11.7 ns 64000000
|
||||
pmr_deallocate<policy_malloc, 1024>/threads:8 2.80 ns 16.0 ns 80000000
|
||||
pmr_deallocate<policy_malloc, 1024>/threads:16 2.45 ns 26.1 ns 27569232
|
||||
------------------------------------------------------------------------------------------
|
||||
Benchmark Time CPU Iterations
|
||||
------------------------------------------------------------------------------------------
|
||||
pmr_allocate<policy_cpp_new, 8>/threads:1 69.5 ns 78.5 ns 8960000
|
||||
pmr_allocate<policy_cpp_new, 8>/threads:2 24.0 ns 40.1 ns 17920000
|
||||
pmr_allocate<policy_cpp_new, 8>/threads:4 10.4 ns 37.1 ns 29866668
|
||||
pmr_allocate<policy_cpp_new, 8>/threads:8 5.39 ns 42.2 ns 21082352
|
||||
pmr_allocate<policy_cpp_new, 8>/threads:16 3.40 ns 45.9 ns 16000000
|
||||
|
||||
pmr_allocate<policy_cpp_new, 32>/threads:1 29.4 ns 29.5 ns 34461538
|
||||
pmr_allocate<policy_cpp_new, 32>/threads:2 24.8 ns 43.9 ns 14933334
|
||||
pmr_allocate<policy_cpp_new, 32>/threads:4 11.5 ns 45.1 ns 16290908
|
||||
pmr_allocate<policy_cpp_new, 32>/threads:8 5.26 ns 34.8 ns 25600000
|
||||
pmr_allocate<policy_cpp_new, 32>/threads:16 5.82 ns 90.7 ns 5513840
|
||||
|
||||
pmr_allocate<policy_cpp_new, 128>/threads:1 74.0 ns 66.3 ns 8960000
|
||||
pmr_allocate<policy_cpp_new, 128>/threads:2 37.7 ns 80.6 ns 14933334
|
||||
pmr_allocate<policy_cpp_new, 128>/threads:4 15.1 ns 52.1 ns 13784616
|
||||
pmr_allocate<policy_cpp_new, 128>/threads:8 9.84 ns 63.2 ns 12358624
|
||||
pmr_allocate<policy_cpp_new, 128>/threads:16 8.05 ns 110 ns 11946672
|
||||
|
||||
pmr_allocate<policy_cpp_new, 1024>/threads:1 79.5 ns 82.8 ns 10000000
|
||||
pmr_allocate<policy_cpp_new, 1024>/threads:2 27.2 ns 59.6 ns 14933334
|
||||
pmr_allocate<policy_cpp_new, 1024>/threads:4 12.3 ns 37.5 ns 40000000
|
||||
pmr_allocate<policy_cpp_new, 1024>/threads:8 10.3 ns 29.8 ns 39822224
|
||||
pmr_allocate<policy_cpp_new, 1024>/threads:16 6.17 ns 39.1 ns 16000000
|
||||
------------------------------------------------------------------------------------------
|
||||
pmr_deallocate<policy_cpp_new, 8>/threads:1 17.9 ns 18.9 ns 50301754
|
||||
pmr_deallocate<policy_cpp_new, 8>/threads:2 8.73 ns 15.4 ns 51794580
|
||||
pmr_deallocate<policy_cpp_new, 8>/threads:4 4.26 ns 15.2 ns 40000000
|
||||
pmr_deallocate<policy_cpp_new, 8>/threads:8 2.46 ns 17.9 ns 71680000
|
||||
pmr_deallocate<policy_cpp_new, 8>/threads:16 2.20 ns 34.0 ns 27569232
|
||||
|
||||
pmr_deallocate<policy_cpp_new, 32>/threads:1 16.1 ns 16.5 ns 56000000
|
||||
pmr_deallocate<policy_cpp_new, 32>/threads:2 8.84 ns 16.1 ns 68923076
|
||||
pmr_deallocate<policy_cpp_new, 32>/threads:4 4.48 ns 19.5 ns 44800000
|
||||
pmr_deallocate<policy_cpp_new, 32>/threads:8 2.65 ns 15.3 ns 35840000
|
||||
pmr_deallocate<policy_cpp_new, 32>/threads:16 2.20 ns 35.3 ns 23893328
|
||||
|
||||
pmr_deallocate<policy_cpp_new, 128>/threads:1 18.3 ns 21.1 ns 40727273
|
||||
pmr_deallocate<policy_cpp_new, 128>/threads:2 8.83 ns 15.6 ns 38956522
|
||||
pmr_deallocate<policy_cpp_new, 128>/threads:4 4.51 ns 17.6 ns 40000000
|
||||
pmr_deallocate<policy_cpp_new, 128>/threads:8 2.93 ns 18.4 ns 39841984
|
||||
pmr_deallocate<policy_cpp_new, 128>/threads:16 2.85 ns 36.1 ns 16000000
|
||||
|
||||
pmr_deallocate<policy_cpp_new, 1024>/threads:1 19.6 ns 16.0 ns 49777778
|
||||
pmr_deallocate<policy_cpp_new, 1024>/threads:2 8.90 ns 17.9 ns 43631304
|
||||
pmr_deallocate<policy_cpp_new, 1024>/threads:4 4.93 ns 13.7 ns 40000000
|
||||
pmr_deallocate<policy_cpp_new, 1024>/threads:8 2.72 ns 19.9 ns 80000000
|
||||
pmr_deallocate<policy_cpp_new, 1024>/threads:16 2.23 ns 29.0 ns 39822224
|
||||
------------------------------------------------------------------------------------------
|
||||
Benchmark Time CPU Iterations
|
||||
------------------------------------------------------------------------------------------
|
||||
pmr_allocate<policy_pmr_new, 8>/threads:1 17.6 ns 18.8 ns 49777778
|
||||
pmr_allocate<policy_pmr_new, 8>/threads:2 9.16 ns 14.5 ns 34461538
|
||||
pmr_allocate<policy_pmr_new, 8>/threads:4 4.82 ns 18.8 ns 40000000
|
||||
pmr_allocate<policy_pmr_new, 8>/threads:8 2.73 ns 17.4 ns 71680000
|
||||
pmr_allocate<policy_pmr_new, 8>/threads:16 2.10 ns 34.1 ns 24717248
|
||||
|
||||
pmr_allocate<policy_pmr_new, 32>/threads:1 17.5 ns 14.6 ns 44800000
|
||||
pmr_allocate<policy_pmr_new, 32>/threads:2 8.88 ns 16.0 ns 38956522
|
||||
pmr_allocate<policy_pmr_new, 32>/threads:4 4.71 ns 17.8 ns 44800000
|
||||
pmr_allocate<policy_pmr_new, 32>/threads:8 2.57 ns 16.7 ns 44800000
|
||||
pmr_allocate<policy_pmr_new, 32>/threads:16 2.08 ns 41.0 ns 16000000
|
||||
|
||||
pmr_allocate<policy_pmr_new, 128>/threads:1 18.5 ns 14.4 ns 44600889
|
||||
pmr_allocate<policy_pmr_new, 128>/threads:2 10.7 ns 22.5 ns 38956522
|
||||
pmr_allocate<policy_pmr_new, 128>/threads:4 5.09 ns 21.3 ns 44800000
|
||||
pmr_allocate<policy_pmr_new, 128>/threads:8 2.78 ns 19.6 ns 59733336
|
||||
pmr_allocate<policy_pmr_new, 128>/threads:16 2.33 ns 28.8 ns 26548144
|
||||
|
||||
pmr_allocate<policy_pmr_new, 1024>/threads:1 24.8 ns 27.9 ns 22400000
|
||||
pmr_allocate<policy_pmr_new, 1024>/threads:2 12.8 ns 24.0 ns 35840000
|
||||
pmr_allocate<policy_pmr_new, 1024>/threads:4 6.67 ns 26.5 ns 44800000
|
||||
pmr_allocate<policy_pmr_new, 1024>/threads:8 4.28 ns 28.3 ns 29866664
|
||||
pmr_allocate<policy_pmr_new, 1024>/threads:16 3.21 ns 41.0 ns 16000000
|
||||
------------------------------------------------------------------------------------------
|
||||
pmr_deallocate<policy_pmr_new, 8>/threads:1 17.1 ns 16.3 ns 47786667
|
||||
pmr_deallocate<policy_pmr_new, 8>/threads:2 8.89 ns 18.1 ns 34461538
|
||||
pmr_deallocate<policy_pmr_new, 8>/threads:4 4.50 ns 16.1 ns 35840000
|
||||
pmr_deallocate<policy_pmr_new, 8>/threads:8 2.77 ns 17.3 ns 59733336
|
||||
pmr_deallocate<policy_pmr_new, 8>/threads:16 1.98 ns 34.2 ns 16000000
|
||||
|
||||
pmr_deallocate<policy_pmr_new, 32>/threads:1 17.1 ns 14.8 ns 49777778
|
||||
pmr_deallocate<policy_pmr_new, 32>/threads:2 8.70 ns 18.7 ns 56000000
|
||||
pmr_deallocate<policy_pmr_new, 32>/threads:4 4.58 ns 14.3 ns 44800000
|
||||
pmr_deallocate<policy_pmr_new, 32>/threads:8 2.72 ns 17.3 ns 89600000
|
||||
pmr_deallocate<policy_pmr_new, 32>/threads:16 2.04 ns 30.5 ns 25600000
|
||||
|
||||
pmr_deallocate<policy_pmr_new, 128>/threads:1 17.2 ns 15.3 ns 44800000
|
||||
pmr_deallocate<policy_pmr_new, 128>/threads:2 8.67 ns 15.7 ns 52705882
|
||||
pmr_deallocate<policy_pmr_new, 128>/threads:4 4.67 ns 18.7 ns 35840000
|
||||
pmr_deallocate<policy_pmr_new, 128>/threads:8 2.76 ns 18.0 ns 39822224
|
||||
pmr_deallocate<policy_pmr_new, 128>/threads:16 2.04 ns 28.3 ns 27569232
|
||||
|
||||
pmr_deallocate<policy_pmr_new, 1024>/threads:1 17.2 ns 21.5 ns 32000000
|
||||
pmr_deallocate<policy_pmr_new, 1024>/threads:2 8.74 ns 16.0 ns 38956522
|
||||
pmr_deallocate<policy_pmr_new, 1024>/threads:4 4.84 ns 16.7 ns 44800000
|
||||
pmr_deallocate<policy_pmr_new, 1024>/threads:8 2.73 ns 16.0 ns 44800000
|
||||
pmr_deallocate<policy_pmr_new, 1024>/threads:16 2.04 ns 37.1 ns 16000000
|
||||
*/
|
||||
@ -8,8 +8,6 @@
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
#include "libimp/aligned.h"
|
||||
|
||||
#define LIBPMR pmr
|
||||
#define LIBPMR_NAMESPACE_BEG_ namespace LIBPMR {
|
||||
#define LIBPMR_NAMESPACE_END_ }
|
||||
@ -20,7 +18,6 @@ LIBPMR_NAMESPACE_BEG_
|
||||
|
||||
enum : std::size_t {
|
||||
central_cache_default_size = 1024 * 1024, ///< 1MB
|
||||
regular_head_size = ::LIBIMP::round_up(sizeof(std::size_t), alignof(std::max_align_t)),
|
||||
};
|
||||
|
||||
LIBPMR_NAMESPACE_END_
|
||||
|
||||
@ -7,7 +7,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <unordered_map>
|
||||
#include <algorithm>
|
||||
|
||||
#include "libimp/aligned.h"
|
||||
@ -22,6 +21,18 @@
|
||||
|
||||
LIBPMR_NAMESPACE_BEG_
|
||||
|
||||
/// \brief Defines the memory block collector interface.
|
||||
class LIBIMP_EXPORT block_collector {
|
||||
public:
|
||||
virtual ~block_collector() noexcept = default;
|
||||
virtual void deallocate(void *p) noexcept = 0;
|
||||
};
|
||||
|
||||
using get_block_collector_t = block_collector *(*)() noexcept;
|
||||
|
||||
static constexpr std::size_t regular_head_size
|
||||
= ::LIBIMP::round_up(sizeof(get_block_collector_t), alignof(std::max_align_t));
|
||||
|
||||
/// \brief Select the incremental level based on the size.
|
||||
constexpr inline std::size_t regular_level(std::size_t s) noexcept {
|
||||
return (s <= 128 ) ? 0 :
|
||||
@ -32,52 +43,25 @@ constexpr inline std::size_t regular_level(std::size_t s) noexcept {
|
||||
|
||||
/// \brief Calculates the appropriate memory block size based on the increment level and size.
|
||||
constexpr inline std::size_t regular_sizeof_impl(std::size_t l, std::size_t s) noexcept {
|
||||
return (l == 0) ? std::max<std::size_t>(::LIBIMP::round_up<std::size_t>(s, 8), regular_head_size) :
|
||||
return (l == 0) ? ::LIBIMP::round_up<std::size_t>(s, regular_head_size) :
|
||||
(l == 1) ? ::LIBIMP::round_up<std::size_t>(s, 128 ) :
|
||||
(l == 2) ? ::LIBIMP::round_up<std::size_t>(s, 1024) :
|
||||
(l == 3) ? ::LIBIMP::round_up<std::size_t>(s, 8192) : (std::numeric_limits<std::size_t>::max)();
|
||||
}
|
||||
|
||||
/// \brief Calculates the appropriate memory block size based on the size.
|
||||
constexpr inline std::size_t regular_sizeof(std::size_t s) noexcept {
|
||||
constexpr inline std::size_t regular_sizeof_impl(std::size_t s) noexcept {
|
||||
return regular_sizeof_impl(regular_level(s), s);
|
||||
}
|
||||
|
||||
/// \brief Calculates the appropriate memory block size based on the specific type.
|
||||
template <typename T>
|
||||
constexpr inline std::size_t regular_sizeof() noexcept {
|
||||
return regular_sizeof(regular_head_size + sizeof(T));
|
||||
return regular_sizeof_impl(regular_head_size + sizeof(T));
|
||||
}
|
||||
|
||||
/// \brief Defines the memory block collector interface.
|
||||
class LIBIMP_EXPORT block_collector {
|
||||
public:
|
||||
virtual ~block_collector() noexcept = default;
|
||||
virtual void deallocate(void *p) noexcept = 0;
|
||||
};
|
||||
|
||||
/// \brief Gets all block pools of the thread cache.
|
||||
LIBIMP_EXPORT auto get_thread_block_pool_map() noexcept
|
||||
-> std::unordered_map<std::size_t, block_collector *> &;
|
||||
|
||||
/// \brief Defines block pool memory resource based on block pool.
|
||||
template <std::size_t BlockSize, std::size_t BlockPoolExpansion>
|
||||
class block_pool_resource;
|
||||
|
||||
/// \brief Memory block collector of unknown size.
|
||||
/// \note This memory resource is only used to temporarily collect memory blocks
|
||||
/// that cannot find a suitable block pool memory resource.
|
||||
template <>
|
||||
class block_pool_resource<0, 0> : public block_pool<0, 0>
|
||||
, public block_collector {
|
||||
public:
|
||||
void deallocate(void *p) noexcept override {
|
||||
block_pool<0, 0>::deallocate(p);
|
||||
}
|
||||
};
|
||||
|
||||
/// \brief A block pool memory resource for a block of memory of a specific size.
|
||||
template <std::size_t BlockSize, std::size_t BlockPoolExpansion>
|
||||
class block_pool_resource : public block_pool<BlockSize, BlockPoolExpansion>
|
||||
, public block_collector {
|
||||
|
||||
@ -88,99 +72,59 @@ class block_pool_resource : public block_pool<BlockSize, BlockPoolExpansion>
|
||||
}
|
||||
|
||||
public:
|
||||
static block_pool_resource *get() noexcept;
|
||||
static block_collector *get() noexcept {
|
||||
thread_local block_pool_resource instance;
|
||||
return &instance;
|
||||
}
|
||||
|
||||
using base_t::base_t;
|
||||
|
||||
void *allocate(std::size_t /*bytes*/, std::size_t /*alignment*/ = alignof(std::max_align_t)) noexcept {
|
||||
void *p = base_t::allocate();
|
||||
p = ::LIBIMP::construct<std::size_t>(p, BlockSize);
|
||||
return reinterpret_cast<::LIBIMP::byte *>(p) + regular_head_size;
|
||||
*static_cast<get_block_collector_t *>(p) = get;
|
||||
return static_cast<::LIBIMP::byte *>(p) + regular_head_size;
|
||||
}
|
||||
|
||||
void deallocate(void *p, std::size_t /*bytes*/, std::size_t /*alignment*/ = alignof(std::max_align_t)) noexcept {
|
||||
p = reinterpret_cast<::LIBIMP::byte *>(p) - regular_head_size;
|
||||
auto r_size = *static_cast<std::size_t *>(p);
|
||||
if (r_size <= BlockSize) {
|
||||
p = static_cast<::LIBIMP::byte *>(p) - regular_head_size;
|
||||
auto g = *static_cast<get_block_collector_t *>(p);
|
||||
if (g == get) {
|
||||
base_t::deallocate(p);
|
||||
return;
|
||||
}
|
||||
// When the actual size exceeds the current memory block size,
|
||||
// try to find a suitable pool among all memory block pools for this thread.
|
||||
auto &map = get_thread_block_pool_map();
|
||||
auto it = map.find(r_size);
|
||||
if ((it == map.end()) || (it->second == nullptr)) {
|
||||
block_pool_resource<0, 0> *bp = nullptr;
|
||||
LIBIMP_TRY {
|
||||
// If the corresponding memory resource cannot be found,
|
||||
// create a temporary general-purpose block pool to deallocate memory.
|
||||
it = map.emplace(r_size, bp = new block_pool_resource<0, 0>).first;
|
||||
} LIBIMP_CATCH(...) {
|
||||
// If the memory resource cannot be created,
|
||||
// store the pointer directly to avoid leakage.
|
||||
delete bp;
|
||||
base_t::deallocate(p);
|
||||
return;
|
||||
}
|
||||
}
|
||||
it->second->deallocate(p);
|
||||
g()->deallocate(p);
|
||||
}
|
||||
};
|
||||
|
||||
template <std::size_t BlockSize, std::size_t BlockPoolExpansion>
|
||||
auto block_pool_resource<BlockSize, BlockPoolExpansion>::get() noexcept
|
||||
-> block_pool_resource<BlockSize, BlockPoolExpansion> * {
|
||||
thread_local block_pool_resource *pi = nullptr;
|
||||
if (pi != nullptr) {
|
||||
return pi;
|
||||
}
|
||||
// Create a new block pool resource for this thread.
|
||||
auto &map = get_thread_block_pool_map();
|
||||
auto it = map.find(BlockSize);
|
||||
if ((it != map.end()) && (it->second != nullptr)) {
|
||||
// If there are existing block pool resources in the thread cache,
|
||||
// a new block pool resource is constructed based on it and the cache is updated.
|
||||
auto *bp = static_cast <block_pool<0, 0> *>(
|
||||
dynamic_cast<block_pool_resource<0, 0> *>(it->second));
|
||||
if (bp == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
thread_local block_pool_resource instance(std::move(*bp));
|
||||
delete static_cast<block_pool_resource<0, 0> *>(bp);
|
||||
it->second = pi = &instance;
|
||||
return pi;
|
||||
} else {
|
||||
// If there are no existing block pool resources in the thread cache,
|
||||
// the thread local storage instance is constructed and the pointer is cached.
|
||||
thread_local block_pool_resource instance;
|
||||
LIBIMP_TRY {
|
||||
map.emplace(BlockSize, pi = &instance);
|
||||
return pi;
|
||||
} LIBIMP_CATCH(...) {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Match the appropriate memory block resources
|
||||
/// according to the size of the specification.
|
||||
template <std::size_t N, std::size_t L = regular_level(N)>
|
||||
class regular_resource : public new_delete_resource {};
|
||||
|
||||
/// \brief Different increment levels match different chunk sizes.
|
||||
/// 512 means that 512 consecutive memory blocks are allocated at a time, and the block size is N.
|
||||
template <std::size_t N> class regular_resource<N, 0> : public block_pool_resource<N, 512> {};
|
||||
template <std::size_t N> class regular_resource<N, 1> : public block_pool_resource<N, 256> {};
|
||||
template <std::size_t N> class regular_resource<N, 2> : public block_pool_resource<N, 128> {};
|
||||
template <std::size_t N> class regular_resource<N, 3> : public block_pool_resource<N, 64 > {};
|
||||
template <std::size_t L>
|
||||
constexpr static std::size_t block_pool_expansion = 0;
|
||||
|
||||
template <> constexpr static std::size_t block_pool_expansion<0> = 512;
|
||||
template <> constexpr static std::size_t block_pool_expansion<1> = 256;
|
||||
template <> constexpr static std::size_t block_pool_expansion<2> = 128;
|
||||
template <> constexpr static std::size_t block_pool_expansion<3> = 64;
|
||||
|
||||
/// \brief Match the appropriate memory block resources according to the size of the specification.
|
||||
template <std::size_t N, std::size_t L = regular_level(N)>
|
||||
struct regular_resource {
|
||||
static auto *get() noexcept {
|
||||
using block_poll_resource_t = block_pool_resource<N, block_pool_expansion<L>>;
|
||||
return dynamic_cast<block_poll_resource_t *>(block_poll_resource_t::get());
|
||||
}
|
||||
};
|
||||
|
||||
template <std::size_t N>
|
||||
struct regular_resource<N, 4> : new_delete_resource {};
|
||||
|
||||
/// \brief Creates an object based on the specified type and parameters with block pool resource.
|
||||
/// \note This function is thread-safe.
|
||||
template <typename T, typename... A>
|
||||
T *new$(A &&... args) noexcept {
|
||||
auto *mem_res = regular_resource<regular_sizeof<T>()>::get();
|
||||
if (mem_res == nullptr) return nullptr;
|
||||
return ::LIBIMP::construct<T>(mem_res->allocate(sizeof(T), alignof(T)), std::forward<A>(args)...);
|
||||
auto *res = regular_resource<regular_sizeof<T>()>::get();
|
||||
if (res == nullptr) return nullptr;
|
||||
return ::LIBIMP::construct<T>(res->allocate(sizeof(T), alignof(T)), std::forward<A>(args)...);
|
||||
}
|
||||
|
||||
/// \brief Destroys object previously allocated by the `new$` and releases obtained memory area.
|
||||
@ -190,13 +134,13 @@ template <typename T>
|
||||
void delete$(T *p) noexcept {
|
||||
if (p == nullptr) return;
|
||||
::LIBIMP::destroy(p);
|
||||
auto *mem_res = regular_resource<regular_sizeof<T>()>::get();
|
||||
if (mem_res == nullptr) return;
|
||||
auto *res = regular_resource<regular_sizeof<T>()>::get();
|
||||
if (res == nullptr) return;
|
||||
#if defined(LIBIMP_CC_MSVC_2015)
|
||||
// `alignof` of vs2015 requires that type must be able to be instantiated.
|
||||
mem_res->deallocate(p, sizeof(T));
|
||||
res->deallocate(p, sizeof(T));
|
||||
#else
|
||||
mem_res->deallocate(p, sizeof(T), alignof(T));
|
||||
res->deallocate(p, sizeof(T), alignof(T));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -1,13 +1,41 @@
|
||||
|
||||
#include <mutex>
|
||||
|
||||
#include "libimp/detect_plat.h"
|
||||
|
||||
#include "libpmr/block_pool.h"
|
||||
#include "libpmr/monotonic_buffer_resource.h"
|
||||
|
||||
LIBPMR_NAMESPACE_BEG_
|
||||
|
||||
class thread_safe_resource : public monotonic_buffer_resource {
|
||||
public:
|
||||
thread_safe_resource(::LIBIMP::span<::LIBIMP::byte> buffer) noexcept
|
||||
: monotonic_buffer_resource(buffer) {}
|
||||
|
||||
~thread_safe_resource() noexcept {
|
||||
LIBIMP_UNUSED std::lock_guard<std::mutex> lock(mutex_);
|
||||
monotonic_buffer_resource::release();
|
||||
}
|
||||
|
||||
void *allocate(std::size_t bytes, std::size_t alignment) noexcept {
|
||||
LIBIMP_UNUSED std::lock_guard<std::mutex> lock(mutex_);
|
||||
return monotonic_buffer_resource::allocate(bytes, alignment);
|
||||
}
|
||||
|
||||
void deallocate(void *p, std::size_t bytes, std::size_t alignment) noexcept {
|
||||
LIBIMP_UNUSED std::lock_guard<std::mutex> lock(mutex_);
|
||||
monotonic_buffer_resource::deallocate(p, bytes, alignment);
|
||||
}
|
||||
|
||||
private:
|
||||
std::mutex mutex_;
|
||||
};
|
||||
|
||||
allocator ¢ral_cache_allocator() noexcept {
|
||||
static std::array<::LIBIMP::byte, central_cache_default_size> buffer;
|
||||
static monotonic_buffer_resource mr(buffer);
|
||||
static allocator a(&mr);
|
||||
static std::array<::LIBIMP::byte, central_cache_default_size> buf;
|
||||
static thread_safe_resource res(buf);
|
||||
static allocator a(&res);
|
||||
return a;
|
||||
}
|
||||
|
||||
|
||||
@ -1,12 +0,0 @@
|
||||
|
||||
#include "libpmr/new.h"
|
||||
|
||||
LIBPMR_NAMESPACE_BEG_
|
||||
|
||||
auto get_thread_block_pool_map() noexcept
|
||||
-> std::unordered_map<std::size_t, block_collector *> & {
|
||||
thread_local std::unordered_map<std::size_t, block_collector *> instances;
|
||||
return instances;
|
||||
}
|
||||
|
||||
LIBPMR_NAMESPACE_END_
|
||||
@ -3,6 +3,7 @@
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include <cstddef>
|
||||
#include <thread>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
@ -114,3 +115,32 @@ TEST(pmr_new, delete$null) {
|
||||
pmr::delete$(p);
|
||||
SUCCEED();
|
||||
}
|
||||
|
||||
TEST(pmr_new, multi_thread) {
|
||||
std::array<std::thread, 16> threads;
|
||||
for (auto &t : threads) {
|
||||
t = std::thread([] {
|
||||
for (int i = 0; i < 10000; ++i) {
|
||||
auto p = pmr::new$<int>();
|
||||
*p = i;
|
||||
pmr::delete$(p);
|
||||
}
|
||||
std::array<void *, 10000> pts;
|
||||
for (int i = 0; i < 10000; ++i) {
|
||||
auto p = pmr::new$<std::array<char, 10>>();
|
||||
pts[i] = p;
|
||||
std::memset(p, i, sizeof(std::array<char, 10>));
|
||||
}
|
||||
for (int i = 0; i < 10000; ++i) {
|
||||
std::array<char, 10> tmp;
|
||||
std::memset(&tmp, i, sizeof(std::array<char, 10>));
|
||||
ASSERT_EQ(std::memcmp(pts[i], &tmp, sizeof(std::array<char, 10>)), 0);
|
||||
pmr::delete$(static_cast<std::array<char, 10> *>(pts[i]));
|
||||
}
|
||||
});
|
||||
}
|
||||
for (auto &t : threads) {
|
||||
t.join();
|
||||
}
|
||||
SUCCEED();
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user