Update CMakeLists.txt and expected.h, remove unused files, and improve thread safety in block_pool.cpp

This commit is contained in:
mutouyun 2024-03-09 19:14:41 +08:00
parent f615f200df
commit 2a1d8fa5fa
8 changed files with 439 additions and 127 deletions

View File

@ -11,7 +11,7 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD 17)
if (MSVC) if (MSVC)
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /fsanitize=address /Zi") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Zi")
set(CompilerFlags set(CompilerFlags
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS
CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_DEBUG

325
benchmark/benchmark_new.cpp Normal file
View File

@ -0,0 +1,325 @@
#include <array>
#include <cstdlib>
#include <cstddef>
#include "benchmark/benchmark.h"
#include "libpmr/new.h"
namespace {
template <typename T, std::size_t N>
class cache {
public:
template <typename U>
void push(U &&u) noexcept {
data_[idx_++] = std::forward<U>(u);
}
T &pop() noexcept {
return data_[--idx_];
}
bool at_begin() const noexcept {
return idx_ == 0;
}
bool at_end() const noexcept {
return idx_ == N;
}
private:
std::array<T, N> data_{};
std::size_t idx_{};
};
template <typename P, std::size_t CacheSize = 128>
class test_suit {
void next(std::size_t &idx) noexcept {
idx = (idx + 1) % 3;
}
public:
~test_suit() noexcept {
for (auto &pts : pts_) {
while (!pts.at_begin()) {
P::deallocate(pts.pop());
}
}
}
bool test_allocate() noexcept {
auto &pts = pts_[idx_a_];
pts.push(P::allocate());
if (pts.at_end()) {
next(idx_a_);
idx_d_ = idx_a_;
}
return ++allocated_ < CacheSize;
}
bool test_deallocate() noexcept {
auto &pts = pts_[idx_d_];
if (pts.at_begin()) {
next(idx_d_);
if (allocated_ == CacheSize) {
allocated_ = CacheSize / 2;
return true;
}
return allocated_ > 0;
}
P::deallocate(pts.pop());
--allocated_;
return true;
}
private:
cache<void *, CacheSize / 2> pts_[3];
std::size_t idx_a_{};
std::size_t idx_d_{};
std::size_t allocated_{};
};
template <std::size_t AllocSize>
struct policy_malloc {
static void *allocate() noexcept {
return std::malloc(AllocSize);
}
static void deallocate(void *p) noexcept {
std::free(p);
}
};
template <std::size_t AllocSize>
struct policy_cpp_new {
static void *allocate() noexcept {
return new char[AllocSize];
}
static void deallocate(void *p) noexcept {
delete[] static_cast<char *>(p);
}
};
template <std::size_t AllocSize>
struct policy_pmr_new {
static void *allocate() noexcept {
return pmr::new$<std::array<char, AllocSize>>();
}
static void deallocate(void *p) noexcept {
pmr::delete$(static_cast<std::array<char, AllocSize> *>(p));
}
};
template <template <std::size_t> class P, std::size_t AllocSize>
void pmr_allocate(benchmark::State &state) {
test_suit<P<AllocSize>> suit;
for (auto _ : state) {
if (suit.test_allocate()) continue;
state.PauseTiming();
while (suit.test_deallocate()) ;
state.ResumeTiming();
}
}
template <template <std::size_t> class P, std::size_t AllocSize>
void pmr_deallocate(benchmark::State &state) {
test_suit<P<AllocSize>> suit;
for (auto _ : state) {
if (suit.test_deallocate()) continue;
state.PauseTiming();
while (suit.test_allocate()) ;
state.ResumeTiming();
}
}
} // namespace
BENCHMARK(pmr_allocate<policy_malloc, 8>)->ThreadRange(1, 16);
BENCHMARK(pmr_allocate<policy_malloc, 32>)->ThreadRange(1, 16);
BENCHMARK(pmr_allocate<policy_malloc, 128>)->ThreadRange(1, 16);
BENCHMARK(pmr_allocate<policy_malloc, 1024>)->ThreadRange(1, 16);
BENCHMARK(pmr_deallocate<policy_malloc, 8>)->ThreadRange(1, 16);
BENCHMARK(pmr_deallocate<policy_malloc, 32>)->ThreadRange(1, 16);
BENCHMARK(pmr_deallocate<policy_malloc, 128>)->ThreadRange(1, 16);
BENCHMARK(pmr_deallocate<policy_malloc, 1024>)->ThreadRange(1, 16);
BENCHMARK(pmr_allocate<policy_cpp_new, 8>)->ThreadRange(1, 16);
BENCHMARK(pmr_allocate<policy_cpp_new, 32>)->ThreadRange(1, 16);
BENCHMARK(pmr_allocate<policy_cpp_new, 128>)->ThreadRange(1, 16);
BENCHMARK(pmr_allocate<policy_cpp_new, 1024>)->ThreadRange(1, 16);
BENCHMARK(pmr_deallocate<policy_cpp_new, 8>)->ThreadRange(1, 16);
BENCHMARK(pmr_deallocate<policy_cpp_new, 32>)->ThreadRange(1, 16);
BENCHMARK(pmr_deallocate<policy_cpp_new, 128>)->ThreadRange(1, 16);
BENCHMARK(pmr_deallocate<policy_cpp_new, 1024>)->ThreadRange(1, 16);
BENCHMARK(pmr_allocate<policy_pmr_new, 8>)->ThreadRange(1, 16);
BENCHMARK(pmr_allocate<policy_pmr_new, 32>)->ThreadRange(1, 16);
BENCHMARK(pmr_allocate<policy_pmr_new, 128>)->ThreadRange(1, 16);
BENCHMARK(pmr_allocate<policy_pmr_new, 1024>)->ThreadRange(1, 16);
BENCHMARK(pmr_deallocate<policy_pmr_new, 8>)->ThreadRange(1, 16);
BENCHMARK(pmr_deallocate<policy_pmr_new, 32>)->ThreadRange(1, 16);
BENCHMARK(pmr_deallocate<policy_pmr_new, 128>)->ThreadRange(1, 16);
BENCHMARK(pmr_deallocate<policy_pmr_new, 1024>)->ThreadRange(1, 16);
/*
Run on (16 X 2313.68 MHz CPU s)
CPU Caches:
L1 Data 48 KiB (x8)
L1 Instruction 32 KiB (x8)
L2 Unified 1280 KiB (x8)
L3 Unified 24576 KiB (x1)
------------------------------------------------------------------------------------------
Benchmark Time CPU Iterations
------------------------------------------------------------------------------------------
pmr_allocate<policy_malloc, 8>/threads:1 28.9 ns 29.8 ns 29866667
pmr_allocate<policy_malloc, 8>/threads:2 28.9 ns 53.7 ns 12800000
pmr_allocate<policy_malloc, 8>/threads:4 13.4 ns 43.9 ns 14933332
pmr_allocate<policy_malloc, 8>/threads:8 11.3 ns 68.4 ns 8000000
pmr_allocate<policy_malloc, 8>/threads:16 5.07 ns 56.7 ns 14336000
pmr_allocate<policy_malloc, 32>/threads:1 44.0 ns 42.1 ns 19298462
pmr_allocate<policy_malloc, 32>/threads:2 27.4 ns 54.9 ns 12800000
pmr_allocate<policy_malloc, 32>/threads:4 11.7 ns 47.1 ns 17920000
pmr_allocate<policy_malloc, 32>/threads:8 5.96 ns 43.7 ns 21082352
pmr_allocate<policy_malloc, 32>/threads:16 4.09 ns 56.7 ns 17920000
pmr_allocate<policy_malloc, 128>/threads:1 45.8 ns 39.6 ns 16592593
pmr_allocate<policy_malloc, 128>/threads:2 36.9 ns 75.3 ns 9955556
pmr_allocate<policy_malloc, 128>/threads:4 16.3 ns 66.7 ns 11946668
pmr_allocate<policy_malloc, 128>/threads:8 10.7 ns 77.1 ns 13784616
pmr_allocate<policy_malloc, 128>/threads:16 7.87 ns 94.8 ns 14336000
pmr_allocate<policy_malloc, 1024>/threads:1 75.5 ns 78.8 ns 8726261
pmr_allocate<policy_malloc, 1024>/threads:2 49.6 ns 46.9 ns 20000000
pmr_allocate<policy_malloc, 1024>/threads:4 18.4 ns 40.8 ns 29866668
pmr_allocate<policy_malloc, 1024>/threads:8 6.56 ns 25.6 ns 29866664
pmr_allocate<policy_malloc, 1024>/threads:16 6.25 ns 56.6 ns 16000000
------------------------------------------------------------------------------------------
pmr_deallocate<policy_malloc, 8>/threads:1 18.6 ns 19.9 ns 47786667
pmr_deallocate<policy_malloc, 8>/threads:2 8.52 ns 16.2 ns 47157894
pmr_deallocate<policy_malloc, 8>/threads:4 4.75 ns 18.8 ns 40000000
pmr_deallocate<policy_malloc, 8>/threads:8 3.18 ns 24.7 ns 51200000
pmr_deallocate<policy_malloc, 8>/threads:16 2.94 ns 38.1 ns 16000000
pmr_deallocate<policy_malloc, 32>/threads:1 18.0 ns 16.7 ns 45875200
pmr_deallocate<policy_malloc, 32>/threads:2 8.76 ns 19.9 ns 47157894
pmr_deallocate<policy_malloc, 32>/threads:4 4.50 ns 14.6 ns 44800000
pmr_deallocate<policy_malloc, 32>/threads:8 2.70 ns 16.7 ns 59733336
pmr_deallocate<policy_malloc, 32>/threads:16 2.30 ns 36.6 ns 23893328
pmr_deallocate<policy_malloc, 128>/threads:1 17.9 ns 16.3 ns 34461538
pmr_deallocate<policy_malloc, 128>/threads:2 8.29 ns 21.0 ns 66901334
pmr_deallocate<policy_malloc, 128>/threads:4 4.40 ns 14.5 ns 40000000
pmr_deallocate<policy_malloc, 128>/threads:8 2.65 ns 20.1 ns 80000000
pmr_deallocate<policy_malloc, 128>/threads:16 2.95 ns 40.5 ns 24717248
pmr_deallocate<policy_malloc, 1024>/threads:1 19.0 ns 20.4 ns 34461538
pmr_deallocate<policy_malloc, 1024>/threads:2 8.62 ns 19.7 ns 38956522
pmr_deallocate<policy_malloc, 1024>/threads:4 4.70 ns 11.7 ns 64000000
pmr_deallocate<policy_malloc, 1024>/threads:8 2.80 ns 16.0 ns 80000000
pmr_deallocate<policy_malloc, 1024>/threads:16 2.45 ns 26.1 ns 27569232
------------------------------------------------------------------------------------------
Benchmark Time CPU Iterations
------------------------------------------------------------------------------------------
pmr_allocate<policy_cpp_new, 8>/threads:1 69.5 ns 78.5 ns 8960000
pmr_allocate<policy_cpp_new, 8>/threads:2 24.0 ns 40.1 ns 17920000
pmr_allocate<policy_cpp_new, 8>/threads:4 10.4 ns 37.1 ns 29866668
pmr_allocate<policy_cpp_new, 8>/threads:8 5.39 ns 42.2 ns 21082352
pmr_allocate<policy_cpp_new, 8>/threads:16 3.40 ns 45.9 ns 16000000
pmr_allocate<policy_cpp_new, 32>/threads:1 29.4 ns 29.5 ns 34461538
pmr_allocate<policy_cpp_new, 32>/threads:2 24.8 ns 43.9 ns 14933334
pmr_allocate<policy_cpp_new, 32>/threads:4 11.5 ns 45.1 ns 16290908
pmr_allocate<policy_cpp_new, 32>/threads:8 5.26 ns 34.8 ns 25600000
pmr_allocate<policy_cpp_new, 32>/threads:16 5.82 ns 90.7 ns 5513840
pmr_allocate<policy_cpp_new, 128>/threads:1 74.0 ns 66.3 ns 8960000
pmr_allocate<policy_cpp_new, 128>/threads:2 37.7 ns 80.6 ns 14933334
pmr_allocate<policy_cpp_new, 128>/threads:4 15.1 ns 52.1 ns 13784616
pmr_allocate<policy_cpp_new, 128>/threads:8 9.84 ns 63.2 ns 12358624
pmr_allocate<policy_cpp_new, 128>/threads:16 8.05 ns 110 ns 11946672
pmr_allocate<policy_cpp_new, 1024>/threads:1 79.5 ns 82.8 ns 10000000
pmr_allocate<policy_cpp_new, 1024>/threads:2 27.2 ns 59.6 ns 14933334
pmr_allocate<policy_cpp_new, 1024>/threads:4 12.3 ns 37.5 ns 40000000
pmr_allocate<policy_cpp_new, 1024>/threads:8 10.3 ns 29.8 ns 39822224
pmr_allocate<policy_cpp_new, 1024>/threads:16 6.17 ns 39.1 ns 16000000
------------------------------------------------------------------------------------------
pmr_deallocate<policy_cpp_new, 8>/threads:1 17.9 ns 18.9 ns 50301754
pmr_deallocate<policy_cpp_new, 8>/threads:2 8.73 ns 15.4 ns 51794580
pmr_deallocate<policy_cpp_new, 8>/threads:4 4.26 ns 15.2 ns 40000000
pmr_deallocate<policy_cpp_new, 8>/threads:8 2.46 ns 17.9 ns 71680000
pmr_deallocate<policy_cpp_new, 8>/threads:16 2.20 ns 34.0 ns 27569232
pmr_deallocate<policy_cpp_new, 32>/threads:1 16.1 ns 16.5 ns 56000000
pmr_deallocate<policy_cpp_new, 32>/threads:2 8.84 ns 16.1 ns 68923076
pmr_deallocate<policy_cpp_new, 32>/threads:4 4.48 ns 19.5 ns 44800000
pmr_deallocate<policy_cpp_new, 32>/threads:8 2.65 ns 15.3 ns 35840000
pmr_deallocate<policy_cpp_new, 32>/threads:16 2.20 ns 35.3 ns 23893328
pmr_deallocate<policy_cpp_new, 128>/threads:1 18.3 ns 21.1 ns 40727273
pmr_deallocate<policy_cpp_new, 128>/threads:2 8.83 ns 15.6 ns 38956522
pmr_deallocate<policy_cpp_new, 128>/threads:4 4.51 ns 17.6 ns 40000000
pmr_deallocate<policy_cpp_new, 128>/threads:8 2.93 ns 18.4 ns 39841984
pmr_deallocate<policy_cpp_new, 128>/threads:16 2.85 ns 36.1 ns 16000000
pmr_deallocate<policy_cpp_new, 1024>/threads:1 19.6 ns 16.0 ns 49777778
pmr_deallocate<policy_cpp_new, 1024>/threads:2 8.90 ns 17.9 ns 43631304
pmr_deallocate<policy_cpp_new, 1024>/threads:4 4.93 ns 13.7 ns 40000000
pmr_deallocate<policy_cpp_new, 1024>/threads:8 2.72 ns 19.9 ns 80000000
pmr_deallocate<policy_cpp_new, 1024>/threads:16 2.23 ns 29.0 ns 39822224
------------------------------------------------------------------------------------------
Benchmark Time CPU Iterations
------------------------------------------------------------------------------------------
pmr_allocate<policy_pmr_new, 8>/threads:1 17.6 ns 18.8 ns 49777778
pmr_allocate<policy_pmr_new, 8>/threads:2 9.16 ns 14.5 ns 34461538
pmr_allocate<policy_pmr_new, 8>/threads:4 4.82 ns 18.8 ns 40000000
pmr_allocate<policy_pmr_new, 8>/threads:8 2.73 ns 17.4 ns 71680000
pmr_allocate<policy_pmr_new, 8>/threads:16 2.10 ns 34.1 ns 24717248
pmr_allocate<policy_pmr_new, 32>/threads:1 17.5 ns 14.6 ns 44800000
pmr_allocate<policy_pmr_new, 32>/threads:2 8.88 ns 16.0 ns 38956522
pmr_allocate<policy_pmr_new, 32>/threads:4 4.71 ns 17.8 ns 44800000
pmr_allocate<policy_pmr_new, 32>/threads:8 2.57 ns 16.7 ns 44800000
pmr_allocate<policy_pmr_new, 32>/threads:16 2.08 ns 41.0 ns 16000000
pmr_allocate<policy_pmr_new, 128>/threads:1 18.5 ns 14.4 ns 44600889
pmr_allocate<policy_pmr_new, 128>/threads:2 10.7 ns 22.5 ns 38956522
pmr_allocate<policy_pmr_new, 128>/threads:4 5.09 ns 21.3 ns 44800000
pmr_allocate<policy_pmr_new, 128>/threads:8 2.78 ns 19.6 ns 59733336
pmr_allocate<policy_pmr_new, 128>/threads:16 2.33 ns 28.8 ns 26548144
pmr_allocate<policy_pmr_new, 1024>/threads:1 24.8 ns 27.9 ns 22400000
pmr_allocate<policy_pmr_new, 1024>/threads:2 12.8 ns 24.0 ns 35840000
pmr_allocate<policy_pmr_new, 1024>/threads:4 6.67 ns 26.5 ns 44800000
pmr_allocate<policy_pmr_new, 1024>/threads:8 4.28 ns 28.3 ns 29866664
pmr_allocate<policy_pmr_new, 1024>/threads:16 3.21 ns 41.0 ns 16000000
------------------------------------------------------------------------------------------
pmr_deallocate<policy_pmr_new, 8>/threads:1 17.1 ns 16.3 ns 47786667
pmr_deallocate<policy_pmr_new, 8>/threads:2 8.89 ns 18.1 ns 34461538
pmr_deallocate<policy_pmr_new, 8>/threads:4 4.50 ns 16.1 ns 35840000
pmr_deallocate<policy_pmr_new, 8>/threads:8 2.77 ns 17.3 ns 59733336
pmr_deallocate<policy_pmr_new, 8>/threads:16 1.98 ns 34.2 ns 16000000
pmr_deallocate<policy_pmr_new, 32>/threads:1 17.1 ns 14.8 ns 49777778
pmr_deallocate<policy_pmr_new, 32>/threads:2 8.70 ns 18.7 ns 56000000
pmr_deallocate<policy_pmr_new, 32>/threads:4 4.58 ns 14.3 ns 44800000
pmr_deallocate<policy_pmr_new, 32>/threads:8 2.72 ns 17.3 ns 89600000
pmr_deallocate<policy_pmr_new, 32>/threads:16 2.04 ns 30.5 ns 25600000
pmr_deallocate<policy_pmr_new, 128>/threads:1 17.2 ns 15.3 ns 44800000
pmr_deallocate<policy_pmr_new, 128>/threads:2 8.67 ns 15.7 ns 52705882
pmr_deallocate<policy_pmr_new, 128>/threads:4 4.67 ns 18.7 ns 35840000
pmr_deallocate<policy_pmr_new, 128>/threads:8 2.76 ns 18.0 ns 39822224
pmr_deallocate<policy_pmr_new, 128>/threads:16 2.04 ns 28.3 ns 27569232
pmr_deallocate<policy_pmr_new, 1024>/threads:1 17.2 ns 21.5 ns 32000000
pmr_deallocate<policy_pmr_new, 1024>/threads:2 8.74 ns 16.0 ns 38956522
pmr_deallocate<policy_pmr_new, 1024>/threads:4 4.84 ns 16.7 ns 44800000
pmr_deallocate<policy_pmr_new, 1024>/threads:8 2.73 ns 16.0 ns 44800000
pmr_deallocate<policy_pmr_new, 1024>/threads:16 2.04 ns 37.1 ns 16000000
*/

View File

@ -322,7 +322,7 @@ R or_else(E &&exp, F &&f) {
*/ */
template <typename T, typename E> template <typename T, typename E>
class expected : public detail_expected::storage<typename std::remove_cv<T>::type, E> { class expected : public detail_expected::storage<typename std::remove_cv<T>::type, E> {
public: public:
using value_type = typename std::remove_cv<T>::type; using value_type = typename std::remove_cv<T>::type;
using error_type = E; using error_type = E;

View File

@ -8,8 +8,6 @@
#include <cstddef> #include <cstddef>
#include "libimp/aligned.h"
#define LIBPMR pmr #define LIBPMR pmr
#define LIBPMR_NAMESPACE_BEG_ namespace LIBPMR { #define LIBPMR_NAMESPACE_BEG_ namespace LIBPMR {
#define LIBPMR_NAMESPACE_END_ } #define LIBPMR_NAMESPACE_END_ }
@ -20,7 +18,6 @@ LIBPMR_NAMESPACE_BEG_
enum : std::size_t { enum : std::size_t {
central_cache_default_size = 1024 * 1024, ///< 1MB central_cache_default_size = 1024 * 1024, ///< 1MB
regular_head_size = ::LIBIMP::round_up(sizeof(std::size_t), alignof(std::max_align_t)),
}; };
LIBPMR_NAMESPACE_END_ LIBPMR_NAMESPACE_END_

View File

@ -7,7 +7,6 @@
#pragma once #pragma once
#include <cstddef> #include <cstddef>
#include <unordered_map>
#include <algorithm> #include <algorithm>
#include "libimp/aligned.h" #include "libimp/aligned.h"
@ -22,6 +21,18 @@
LIBPMR_NAMESPACE_BEG_ LIBPMR_NAMESPACE_BEG_
/// \brief Defines the memory block collector interface.
class LIBIMP_EXPORT block_collector {
public:
virtual ~block_collector() noexcept = default;
virtual void deallocate(void *p) noexcept = 0;
};
using get_block_collector_t = block_collector *(*)() noexcept;
static constexpr std::size_t regular_head_size
= ::LIBIMP::round_up(sizeof(get_block_collector_t), alignof(std::max_align_t));
/// \brief Select the incremental level based on the size. /// \brief Select the incremental level based on the size.
constexpr inline std::size_t regular_level(std::size_t s) noexcept { constexpr inline std::size_t regular_level(std::size_t s) noexcept {
return (s <= 128 ) ? 0 : return (s <= 128 ) ? 0 :
@ -32,52 +43,25 @@ constexpr inline std::size_t regular_level(std::size_t s) noexcept {
/// \brief Calculates the appropriate memory block size based on the increment level and size. /// \brief Calculates the appropriate memory block size based on the increment level and size.
constexpr inline std::size_t regular_sizeof_impl(std::size_t l, std::size_t s) noexcept { constexpr inline std::size_t regular_sizeof_impl(std::size_t l, std::size_t s) noexcept {
return (l == 0) ? std::max<std::size_t>(::LIBIMP::round_up<std::size_t>(s, 8), regular_head_size) : return (l == 0) ? ::LIBIMP::round_up<std::size_t>(s, regular_head_size) :
(l == 1) ? ::LIBIMP::round_up<std::size_t>(s, 128 ) : (l == 1) ? ::LIBIMP::round_up<std::size_t>(s, 128 ) :
(l == 2) ? ::LIBIMP::round_up<std::size_t>(s, 1024) : (l == 2) ? ::LIBIMP::round_up<std::size_t>(s, 1024) :
(l == 3) ? ::LIBIMP::round_up<std::size_t>(s, 8192) : (std::numeric_limits<std::size_t>::max)(); (l == 3) ? ::LIBIMP::round_up<std::size_t>(s, 8192) : (std::numeric_limits<std::size_t>::max)();
} }
/// \brief Calculates the appropriate memory block size based on the size. /// \brief Calculates the appropriate memory block size based on the size.
constexpr inline std::size_t regular_sizeof(std::size_t s) noexcept { constexpr inline std::size_t regular_sizeof_impl(std::size_t s) noexcept {
return regular_sizeof_impl(regular_level(s), s); return regular_sizeof_impl(regular_level(s), s);
} }
/// \brief Calculates the appropriate memory block size based on the specific type. /// \brief Calculates the appropriate memory block size based on the specific type.
template <typename T> template <typename T>
constexpr inline std::size_t regular_sizeof() noexcept { constexpr inline std::size_t regular_sizeof() noexcept {
return regular_sizeof(regular_head_size + sizeof(T)); return regular_sizeof_impl(regular_head_size + sizeof(T));
} }
/// \brief Defines the memory block collector interface.
class LIBIMP_EXPORT block_collector {
public:
virtual ~block_collector() noexcept = default;
virtual void deallocate(void *p) noexcept = 0;
};
/// \brief Gets all block pools of the thread cache.
LIBIMP_EXPORT auto get_thread_block_pool_map() noexcept
-> std::unordered_map<std::size_t, block_collector *> &;
/// \brief Defines block pool memory resource based on block pool. /// \brief Defines block pool memory resource based on block pool.
template <std::size_t BlockSize, std::size_t BlockPoolExpansion> template <std::size_t BlockSize, std::size_t BlockPoolExpansion>
class block_pool_resource;
/// \brief Memory block collector of unknown size.
/// \note This memory resource is only used to temporarily collect memory blocks
/// that cannot find a suitable block pool memory resource.
template <>
class block_pool_resource<0, 0> : public block_pool<0, 0>
, public block_collector {
public:
void deallocate(void *p) noexcept override {
block_pool<0, 0>::deallocate(p);
}
};
/// \brief A block pool memory resource for a block of memory of a specific size.
template <std::size_t BlockSize, std::size_t BlockPoolExpansion>
class block_pool_resource : public block_pool<BlockSize, BlockPoolExpansion> class block_pool_resource : public block_pool<BlockSize, BlockPoolExpansion>
, public block_collector { , public block_collector {
@ -88,99 +72,59 @@ class block_pool_resource : public block_pool<BlockSize, BlockPoolExpansion>
} }
public: public:
static block_pool_resource *get() noexcept; static block_collector *get() noexcept {
thread_local block_pool_resource instance;
return &instance;
}
using base_t::base_t; using base_t::base_t;
void *allocate(std::size_t /*bytes*/, std::size_t /*alignment*/ = alignof(std::max_align_t)) noexcept { void *allocate(std::size_t /*bytes*/, std::size_t /*alignment*/ = alignof(std::max_align_t)) noexcept {
void *p = base_t::allocate(); void *p = base_t::allocate();
p = ::LIBIMP::construct<std::size_t>(p, BlockSize); *static_cast<get_block_collector_t *>(p) = get;
return reinterpret_cast<::LIBIMP::byte *>(p) + regular_head_size; return static_cast<::LIBIMP::byte *>(p) + regular_head_size;
} }
void deallocate(void *p, std::size_t /*bytes*/, std::size_t /*alignment*/ = alignof(std::max_align_t)) noexcept { void deallocate(void *p, std::size_t /*bytes*/, std::size_t /*alignment*/ = alignof(std::max_align_t)) noexcept {
p = reinterpret_cast<::LIBIMP::byte *>(p) - regular_head_size; p = static_cast<::LIBIMP::byte *>(p) - regular_head_size;
auto r_size = *static_cast<std::size_t *>(p); auto g = *static_cast<get_block_collector_t *>(p);
if (r_size <= BlockSize) { if (g == get) {
base_t::deallocate(p); base_t::deallocate(p);
return; return;
} }
// When the actual size exceeds the current memory block size, g()->deallocate(p);
// try to find a suitable pool among all memory block pools for this thread.
auto &map = get_thread_block_pool_map();
auto it = map.find(r_size);
if ((it == map.end()) || (it->second == nullptr)) {
block_pool_resource<0, 0> *bp = nullptr;
LIBIMP_TRY {
// If the corresponding memory resource cannot be found,
// create a temporary general-purpose block pool to deallocate memory.
it = map.emplace(r_size, bp = new block_pool_resource<0, 0>).first;
} LIBIMP_CATCH(...) {
// If the memory resource cannot be created,
// store the pointer directly to avoid leakage.
delete bp;
base_t::deallocate(p);
return;
}
}
it->second->deallocate(p);
} }
}; };
template <std::size_t BlockSize, std::size_t BlockPoolExpansion>
auto block_pool_resource<BlockSize, BlockPoolExpansion>::get() noexcept
-> block_pool_resource<BlockSize, BlockPoolExpansion> * {
thread_local block_pool_resource *pi = nullptr;
if (pi != nullptr) {
return pi;
}
// Create a new block pool resource for this thread.
auto &map = get_thread_block_pool_map();
auto it = map.find(BlockSize);
if ((it != map.end()) && (it->second != nullptr)) {
// If there are existing block pool resources in the thread cache,
// a new block pool resource is constructed based on it and the cache is updated.
auto *bp = static_cast <block_pool<0, 0> *>(
dynamic_cast<block_pool_resource<0, 0> *>(it->second));
if (bp == nullptr) {
return nullptr;
}
thread_local block_pool_resource instance(std::move(*bp));
delete static_cast<block_pool_resource<0, 0> *>(bp);
it->second = pi = &instance;
return pi;
} else {
// If there are no existing block pool resources in the thread cache,
// the thread local storage instance is constructed and the pointer is cached.
thread_local block_pool_resource instance;
LIBIMP_TRY {
map.emplace(BlockSize, pi = &instance);
return pi;
} LIBIMP_CATCH(...) {
return nullptr;
}
}
}
/// \brief Match the appropriate memory block resources
/// according to the size of the specification.
template <std::size_t N, std::size_t L = regular_level(N)>
class regular_resource : public new_delete_resource {};
/// \brief Different increment levels match different chunk sizes. /// \brief Different increment levels match different chunk sizes.
/// 512 means that 512 consecutive memory blocks are allocated at a time, and the block size is N. /// 512 means that 512 consecutive memory blocks are allocated at a time, and the block size is N.
template <std::size_t N> class regular_resource<N, 0> : public block_pool_resource<N, 512> {}; template <std::size_t L>
template <std::size_t N> class regular_resource<N, 1> : public block_pool_resource<N, 256> {}; constexpr static std::size_t block_pool_expansion = 0;
template <std::size_t N> class regular_resource<N, 2> : public block_pool_resource<N, 128> {};
template <std::size_t N> class regular_resource<N, 3> : public block_pool_resource<N, 64 > {}; template <> constexpr static std::size_t block_pool_expansion<0> = 512;
template <> constexpr static std::size_t block_pool_expansion<1> = 256;
template <> constexpr static std::size_t block_pool_expansion<2> = 128;
template <> constexpr static std::size_t block_pool_expansion<3> = 64;
/// \brief Match the appropriate memory block resources according to the size of the specification.
template <std::size_t N, std::size_t L = regular_level(N)>
struct regular_resource {
static auto *get() noexcept {
using block_poll_resource_t = block_pool_resource<N, block_pool_expansion<L>>;
return dynamic_cast<block_poll_resource_t *>(block_poll_resource_t::get());
}
};
template <std::size_t N>
struct regular_resource<N, 4> : new_delete_resource {};
/// \brief Creates an object based on the specified type and parameters with block pool resource. /// \brief Creates an object based on the specified type and parameters with block pool resource.
/// \note This function is thread-safe. /// \note This function is thread-safe.
template <typename T, typename... A> template <typename T, typename... A>
T *new$(A &&... args) noexcept { T *new$(A &&... args) noexcept {
auto *mem_res = regular_resource<regular_sizeof<T>()>::get(); auto *res = regular_resource<regular_sizeof<T>()>::get();
if (mem_res == nullptr) return nullptr; if (res == nullptr) return nullptr;
return ::LIBIMP::construct<T>(mem_res->allocate(sizeof(T), alignof(T)), std::forward<A>(args)...); return ::LIBIMP::construct<T>(res->allocate(sizeof(T), alignof(T)), std::forward<A>(args)...);
} }
/// \brief Destroys object previously allocated by the `new$` and releases obtained memory area. /// \brief Destroys object previously allocated by the `new$` and releases obtained memory area.
@ -190,13 +134,13 @@ template <typename T>
void delete$(T *p) noexcept { void delete$(T *p) noexcept {
if (p == nullptr) return; if (p == nullptr) return;
::LIBIMP::destroy(p); ::LIBIMP::destroy(p);
auto *mem_res = regular_resource<regular_sizeof<T>()>::get(); auto *res = regular_resource<regular_sizeof<T>()>::get();
if (mem_res == nullptr) return; if (res == nullptr) return;
#if defined(LIBIMP_CC_MSVC_2015) #if defined(LIBIMP_CC_MSVC_2015)
// `alignof` of vs2015 requires that type must be able to be instantiated. // `alignof` of vs2015 requires that type must be able to be instantiated.
mem_res->deallocate(p, sizeof(T)); res->deallocate(p, sizeof(T));
#else #else
mem_res->deallocate(p, sizeof(T), alignof(T)); res->deallocate(p, sizeof(T), alignof(T));
#endif #endif
} }

View File

@ -1,13 +1,41 @@
#include <mutex>
#include "libimp/detect_plat.h"
#include "libpmr/block_pool.h" #include "libpmr/block_pool.h"
#include "libpmr/monotonic_buffer_resource.h" #include "libpmr/monotonic_buffer_resource.h"
LIBPMR_NAMESPACE_BEG_ LIBPMR_NAMESPACE_BEG_
class thread_safe_resource : public monotonic_buffer_resource {
public:
thread_safe_resource(::LIBIMP::span<::LIBIMP::byte> buffer) noexcept
: monotonic_buffer_resource(buffer) {}
~thread_safe_resource() noexcept {
LIBIMP_UNUSED std::lock_guard<std::mutex> lock(mutex_);
monotonic_buffer_resource::release();
}
void *allocate(std::size_t bytes, std::size_t alignment) noexcept {
LIBIMP_UNUSED std::lock_guard<std::mutex> lock(mutex_);
return monotonic_buffer_resource::allocate(bytes, alignment);
}
void deallocate(void *p, std::size_t bytes, std::size_t alignment) noexcept {
LIBIMP_UNUSED std::lock_guard<std::mutex> lock(mutex_);
monotonic_buffer_resource::deallocate(p, bytes, alignment);
}
private:
std::mutex mutex_;
};
allocator &central_cache_allocator() noexcept { allocator &central_cache_allocator() noexcept {
static std::array<::LIBIMP::byte, central_cache_default_size> buffer; static std::array<::LIBIMP::byte, central_cache_default_size> buf;
static monotonic_buffer_resource mr(buffer); static thread_safe_resource res(buf);
static allocator a(&mr); static allocator a(&res);
return a; return a;
} }

View File

@ -1,12 +0,0 @@
#include "libpmr/new.h"
LIBPMR_NAMESPACE_BEG_
auto get_thread_block_pool_map() noexcept
-> std::unordered_map<std::size_t, block_collector *> & {
thread_local std::unordered_map<std::size_t, block_collector *> instances;
return instances;
}
LIBPMR_NAMESPACE_END_

View File

@ -3,6 +3,7 @@
#include <array> #include <array>
#include <cstring> #include <cstring>
#include <cstddef> #include <cstddef>
#include <thread>
#include "gtest/gtest.h" #include "gtest/gtest.h"
@ -114,3 +115,32 @@ TEST(pmr_new, delete$null) {
pmr::delete$(p); pmr::delete$(p);
SUCCEED(); SUCCEED();
} }
TEST(pmr_new, multi_thread) {
std::array<std::thread, 16> threads;
for (auto &t : threads) {
t = std::thread([] {
for (int i = 0; i < 10000; ++i) {
auto p = pmr::new$<int>();
*p = i;
pmr::delete$(p);
}
std::array<void *, 10000> pts;
for (int i = 0; i < 10000; ++i) {
auto p = pmr::new$<std::array<char, 10>>();
pts[i] = p;
std::memset(p, i, sizeof(std::array<char, 10>));
}
for (int i = 0; i < 10000; ++i) {
std::array<char, 10> tmp;
std::memset(&tmp, i, sizeof(std::array<char, 10>));
ASSERT_EQ(std::memcmp(pts[i], &tmp, sizeof(std::array<char, 10>)), 0);
pmr::delete$(static_cast<std::array<char, 10> *>(pts[i]));
}
});
}
for (auto &t : threads) {
t.join();
}
SUCCEED();
}