Update CMakeLists.txt and expected.h, remove unused files, and improve thread safety in block_pool.cpp

2026-02-08 18:56:41 +08:00 · 2024-03-09 19:14:41 +08:00 · 2024-03-09 19:14:41 +08:00 · 2a1d8fa5fa
commit 2a1d8fa5fa
parent f615f200df
8 changed files with 439 additions and 127 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -11,7 +11,7 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 set(CMAKE_CXX_STANDARD 17)
 if (MSVC)
-  set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /fsanitize=address /Zi")
+  set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Zi")
  set(CompilerFlags
      CMAKE_CXX_FLAGS
      CMAKE_CXX_FLAGS_DEBUG
--- a/benchmark/benchmark_new.cpp
+++ b/benchmark/benchmark_new.cpp
@ -0,0 +1,325 @@
 #include <array>
 #include <cstdlib>
 #include <cstddef>
 #include "benchmark/benchmark.h"
 #include "libpmr/new.h"
 namespace {
 template <typename T, std::size_t N>
 class cache {
 public:
  template <typename U>
  void push(U &&u) noexcept {
    data_[idx_++] = std::forward<U>(u);
  }
  T &pop() noexcept {
    return data_[--idx_];
  }
  bool at_begin() const noexcept {
    return idx_ == 0;
  }
  bool at_end() const noexcept {
    return idx_ == N;
  }
 private:
  std::array<T, N> data_{};
  std::size_t idx_{};
 };
 template <typename P, std::size_t CacheSize = 128>
 class test_suit {
  void next(std::size_t &idx) noexcept {
    idx = (idx + 1) % 3;
  }
 public:
  ~test_suit() noexcept {
    for (auto &pts : pts_) {
      while (!pts.at_begin()) {
        P::deallocate(pts.pop());
      }
    }
  }
  bool test_allocate() noexcept {
    auto &pts = pts_[idx_a_];
    pts.push(P::allocate());
    if (pts.at_end()) {
      next(idx_a_);
      idx_d_ = idx_a_;
    }
    return ++allocated_ < CacheSize;
  }
  bool test_deallocate() noexcept {
    auto &pts = pts_[idx_d_];
    if (pts.at_begin()) {
      next(idx_d_);
      if (allocated_ == CacheSize) {
        allocated_ = CacheSize / 2;
        return true;
      }
      return allocated_ > 0;
    }
    P::deallocate(pts.pop());
    --allocated_;
    return true;
  }
 private:
  cache<void *, CacheSize / 2> pts_[3];
  std::size_t idx_a_{};
  std::size_t idx_d_{};
  std::size_t allocated_{};
 };
 template <std::size_t AllocSize>
 struct policy_malloc {
  static void *allocate() noexcept {
    return std::malloc(AllocSize);
  }
  static void deallocate(void *p) noexcept {
    std::free(p);
  }
 };
 template <std::size_t AllocSize>
 struct policy_cpp_new {
  static void *allocate() noexcept {
    return new char[AllocSize];
  }
  static void deallocate(void *p) noexcept {
    delete[] static_cast<char *>(p);
  }
 };
 template <std::size_t AllocSize>
 struct policy_pmr_new {
  static void *allocate() noexcept {
    return pmr::new$<std::array<char, AllocSize>>();
  }
  static void deallocate(void *p) noexcept {
    pmr::delete$(static_cast<std::array<char, AllocSize> *>(p));
  }
 };
 template <template <std::size_t> class P, std::size_t AllocSize>
 void pmr_allocate(benchmark::State &state) {
  test_suit<P<AllocSize>> suit;
  for (auto _ : state) {
    if (suit.test_allocate()) continue;
    state.PauseTiming();
    while (suit.test_deallocate()) ;
    state.ResumeTiming();
  }
 }
 template <template <std::size_t> class P, std::size_t AllocSize>
 void pmr_deallocate(benchmark::State &state) {
  test_suit<P<AllocSize>> suit;
  for (auto _ : state) {
    if (suit.test_deallocate()) continue;
    state.PauseTiming();
    while (suit.test_allocate()) ;
    state.ResumeTiming();
  }
 }
 } // namespace
 BENCHMARK(pmr_allocate<policy_malloc, 8>)->ThreadRange(1, 16);
 BENCHMARK(pmr_allocate<policy_malloc, 32>)->ThreadRange(1, 16);
 BENCHMARK(pmr_allocate<policy_malloc, 128>)->ThreadRange(1, 16);
 BENCHMARK(pmr_allocate<policy_malloc, 1024>)->ThreadRange(1, 16);
 BENCHMARK(pmr_deallocate<policy_malloc, 8>)->ThreadRange(1, 16);
 BENCHMARK(pmr_deallocate<policy_malloc, 32>)->ThreadRange(1, 16);
 BENCHMARK(pmr_deallocate<policy_malloc, 128>)->ThreadRange(1, 16);
 BENCHMARK(pmr_deallocate<policy_malloc, 1024>)->ThreadRange(1, 16);
 BENCHMARK(pmr_allocate<policy_cpp_new, 8>)->ThreadRange(1, 16);
 BENCHMARK(pmr_allocate<policy_cpp_new, 32>)->ThreadRange(1, 16);
 BENCHMARK(pmr_allocate<policy_cpp_new, 128>)->ThreadRange(1, 16);
 BENCHMARK(pmr_allocate<policy_cpp_new, 1024>)->ThreadRange(1, 16);
 BENCHMARK(pmr_deallocate<policy_cpp_new, 8>)->ThreadRange(1, 16);
 BENCHMARK(pmr_deallocate<policy_cpp_new, 32>)->ThreadRange(1, 16);
 BENCHMARK(pmr_deallocate<policy_cpp_new, 128>)->ThreadRange(1, 16);
 BENCHMARK(pmr_deallocate<policy_cpp_new, 1024>)->ThreadRange(1, 16);
 BENCHMARK(pmr_allocate<policy_pmr_new, 8>)->ThreadRange(1, 16);
 BENCHMARK(pmr_allocate<policy_pmr_new, 32>)->ThreadRange(1, 16);
 BENCHMARK(pmr_allocate<policy_pmr_new, 128>)->ThreadRange(1, 16);
 BENCHMARK(pmr_allocate<policy_pmr_new, 1024>)->ThreadRange(1, 16);
 BENCHMARK(pmr_deallocate<policy_pmr_new, 8>)->ThreadRange(1, 16);
 BENCHMARK(pmr_deallocate<policy_pmr_new, 32>)->ThreadRange(1, 16);
 BENCHMARK(pmr_deallocate<policy_pmr_new, 128>)->ThreadRange(1, 16);
 BENCHMARK(pmr_deallocate<policy_pmr_new, 1024>)->ThreadRange(1, 16);
 /*
 Run on (16 X 2313.68 MHz CPU s)
 CPU Caches:
  L1 Data 48 KiB (x8)
  L1 Instruction 32 KiB (x8)
  L2 Unified 1280 KiB (x8)
  L3 Unified 24576 KiB (x1)
 ------------------------------------------------------------------------------------------
 Benchmark                                                Time             CPU   Iterations
 ------------------------------------------------------------------------------------------
 pmr_allocate<policy_malloc, 8>/threads:1              28.9 ns         29.8 ns     29866667
 pmr_allocate<policy_malloc, 8>/threads:2              28.9 ns         53.7 ns     12800000
 pmr_allocate<policy_malloc, 8>/threads:4              13.4 ns         43.9 ns     14933332
 pmr_allocate<policy_malloc, 8>/threads:8              11.3 ns         68.4 ns      8000000
 pmr_allocate<policy_malloc, 8>/threads:16             5.07 ns         56.7 ns     14336000
 pmr_allocate<policy_malloc, 32>/threads:1             44.0 ns         42.1 ns     19298462
 pmr_allocate<policy_malloc, 32>/threads:2             27.4 ns         54.9 ns     12800000
 pmr_allocate<policy_malloc, 32>/threads:4             11.7 ns         47.1 ns     17920000
 pmr_allocate<policy_malloc, 32>/threads:8             5.96 ns         43.7 ns     21082352
 pmr_allocate<policy_malloc, 32>/threads:16            4.09 ns         56.7 ns     17920000
 pmr_allocate<policy_malloc, 128>/threads:1            45.8 ns         39.6 ns     16592593
 pmr_allocate<policy_malloc, 128>/threads:2            36.9 ns         75.3 ns      9955556
 pmr_allocate<policy_malloc, 128>/threads:4            16.3 ns         66.7 ns     11946668
 pmr_allocate<policy_malloc, 128>/threads:8            10.7 ns         77.1 ns     13784616
 pmr_allocate<policy_malloc, 128>/threads:16           7.87 ns         94.8 ns     14336000
 pmr_allocate<policy_malloc, 1024>/threads:1           75.5 ns         78.8 ns      8726261
 pmr_allocate<policy_malloc, 1024>/threads:2           49.6 ns         46.9 ns     20000000
 pmr_allocate<policy_malloc, 1024>/threads:4           18.4 ns         40.8 ns     29866668
 pmr_allocate<policy_malloc, 1024>/threads:8           6.56 ns         25.6 ns     29866664
 pmr_allocate<policy_malloc, 1024>/threads:16          6.25 ns         56.6 ns     16000000
 ------------------------------------------------------------------------------------------
 pmr_deallocate<policy_malloc, 8>/threads:1            18.6 ns         19.9 ns     47786667
 pmr_deallocate<policy_malloc, 8>/threads:2            8.52 ns         16.2 ns     47157894
 pmr_deallocate<policy_malloc, 8>/threads:4            4.75 ns         18.8 ns     40000000
 pmr_deallocate<policy_malloc, 8>/threads:8            3.18 ns         24.7 ns     51200000
 pmr_deallocate<policy_malloc, 8>/threads:16           2.94 ns         38.1 ns     16000000
 pmr_deallocate<policy_malloc, 32>/threads:1           18.0 ns         16.7 ns     45875200
 pmr_deallocate<policy_malloc, 32>/threads:2           8.76 ns         19.9 ns     47157894
 pmr_deallocate<policy_malloc, 32>/threads:4           4.50 ns         14.6 ns     44800000
 pmr_deallocate<policy_malloc, 32>/threads:8           2.70 ns         16.7 ns     59733336
 pmr_deallocate<policy_malloc, 32>/threads:16          2.30 ns         36.6 ns     23893328
 pmr_deallocate<policy_malloc, 128>/threads:1          17.9 ns         16.3 ns     34461538
 pmr_deallocate<policy_malloc, 128>/threads:2          8.29 ns         21.0 ns     66901334
 pmr_deallocate<policy_malloc, 128>/threads:4          4.40 ns         14.5 ns     40000000
 pmr_deallocate<policy_malloc, 128>/threads:8          2.65 ns         20.1 ns     80000000
 pmr_deallocate<policy_malloc, 128>/threads:16         2.95 ns         40.5 ns     24717248
 pmr_deallocate<policy_malloc, 1024>/threads:1         19.0 ns         20.4 ns     34461538
 pmr_deallocate<policy_malloc, 1024>/threads:2         8.62 ns         19.7 ns     38956522
 pmr_deallocate<policy_malloc, 1024>/threads:4         4.70 ns         11.7 ns     64000000
 pmr_deallocate<policy_malloc, 1024>/threads:8         2.80 ns         16.0 ns     80000000
 pmr_deallocate<policy_malloc, 1024>/threads:16        2.45 ns         26.1 ns     27569232
 ------------------------------------------------------------------------------------------
 Benchmark                                                Time             CPU   Iterations
 ------------------------------------------------------------------------------------------
 pmr_allocate<policy_cpp_new, 8>/threads:1             69.5 ns         78.5 ns      8960000
 pmr_allocate<policy_cpp_new, 8>/threads:2             24.0 ns         40.1 ns     17920000
 pmr_allocate<policy_cpp_new, 8>/threads:4             10.4 ns         37.1 ns     29866668
 pmr_allocate<policy_cpp_new, 8>/threads:8             5.39 ns         42.2 ns     21082352
 pmr_allocate<policy_cpp_new, 8>/threads:16            3.40 ns         45.9 ns     16000000
 pmr_allocate<policy_cpp_new, 32>/threads:1            29.4 ns         29.5 ns     34461538
 pmr_allocate<policy_cpp_new, 32>/threads:2            24.8 ns         43.9 ns     14933334
 pmr_allocate<policy_cpp_new, 32>/threads:4            11.5 ns         45.1 ns     16290908
 pmr_allocate<policy_cpp_new, 32>/threads:8            5.26 ns         34.8 ns     25600000
 pmr_allocate<policy_cpp_new, 32>/threads:16           5.82 ns         90.7 ns      5513840
 pmr_allocate<policy_cpp_new, 128>/threads:1           74.0 ns         66.3 ns      8960000
 pmr_allocate<policy_cpp_new, 128>/threads:2           37.7 ns         80.6 ns     14933334
 pmr_allocate<policy_cpp_new, 128>/threads:4           15.1 ns         52.1 ns     13784616
 pmr_allocate<policy_cpp_new, 128>/threads:8           9.84 ns         63.2 ns     12358624
 pmr_allocate<policy_cpp_new, 128>/threads:16          8.05 ns          110 ns     11946672
 pmr_allocate<policy_cpp_new, 1024>/threads:1          79.5 ns         82.8 ns     10000000
 pmr_allocate<policy_cpp_new, 1024>/threads:2          27.2 ns         59.6 ns     14933334
 pmr_allocate<policy_cpp_new, 1024>/threads:4          12.3 ns         37.5 ns     40000000
 pmr_allocate<policy_cpp_new, 1024>/threads:8          10.3 ns         29.8 ns     39822224
 pmr_allocate<policy_cpp_new, 1024>/threads:16         6.17 ns         39.1 ns     16000000
 ------------------------------------------------------------------------------------------
 pmr_deallocate<policy_cpp_new, 8>/threads:1           17.9 ns         18.9 ns     50301754
 pmr_deallocate<policy_cpp_new, 8>/threads:2           8.73 ns         15.4 ns     51794580
 pmr_deallocate<policy_cpp_new, 8>/threads:4           4.26 ns         15.2 ns     40000000
 pmr_deallocate<policy_cpp_new, 8>/threads:8           2.46 ns         17.9 ns     71680000
 pmr_deallocate<policy_cpp_new, 8>/threads:16          2.20 ns         34.0 ns     27569232
 pmr_deallocate<policy_cpp_new, 32>/threads:1          16.1 ns         16.5 ns     56000000
 pmr_deallocate<policy_cpp_new, 32>/threads:2          8.84 ns         16.1 ns     68923076
 pmr_deallocate<policy_cpp_new, 32>/threads:4          4.48 ns         19.5 ns     44800000
 pmr_deallocate<policy_cpp_new, 32>/threads:8          2.65 ns         15.3 ns     35840000
 pmr_deallocate<policy_cpp_new, 32>/threads:16         2.20 ns         35.3 ns     23893328
 pmr_deallocate<policy_cpp_new, 128>/threads:1         18.3 ns         21.1 ns     40727273
 pmr_deallocate<policy_cpp_new, 128>/threads:2         8.83 ns         15.6 ns     38956522
 pmr_deallocate<policy_cpp_new, 128>/threads:4         4.51 ns         17.6 ns     40000000
 pmr_deallocate<policy_cpp_new, 128>/threads:8         2.93 ns         18.4 ns     39841984
 pmr_deallocate<policy_cpp_new, 128>/threads:16        2.85 ns         36.1 ns     16000000
 pmr_deallocate<policy_cpp_new, 1024>/threads:1        19.6 ns         16.0 ns     49777778
 pmr_deallocate<policy_cpp_new, 1024>/threads:2        8.90 ns         17.9 ns     43631304
 pmr_deallocate<policy_cpp_new, 1024>/threads:4        4.93 ns         13.7 ns     40000000
 pmr_deallocate<policy_cpp_new, 1024>/threads:8        2.72 ns         19.9 ns     80000000
 pmr_deallocate<policy_cpp_new, 1024>/threads:16       2.23 ns         29.0 ns     39822224
 ------------------------------------------------------------------------------------------
 Benchmark                                                Time             CPU   Iterations
 ------------------------------------------------------------------------------------------
 pmr_allocate<policy_pmr_new, 8>/threads:1             17.6 ns         18.8 ns     49777778
 pmr_allocate<policy_pmr_new, 8>/threads:2             9.16 ns         14.5 ns     34461538
 pmr_allocate<policy_pmr_new, 8>/threads:4             4.82 ns         18.8 ns     40000000
 pmr_allocate<policy_pmr_new, 8>/threads:8             2.73 ns         17.4 ns     71680000
 pmr_allocate<policy_pmr_new, 8>/threads:16            2.10 ns         34.1 ns     24717248
 pmr_allocate<policy_pmr_new, 32>/threads:1            17.5 ns         14.6 ns     44800000
 pmr_allocate<policy_pmr_new, 32>/threads:2            8.88 ns         16.0 ns     38956522
 pmr_allocate<policy_pmr_new, 32>/threads:4            4.71 ns         17.8 ns     44800000
 pmr_allocate<policy_pmr_new, 32>/threads:8            2.57 ns         16.7 ns     44800000
 pmr_allocate<policy_pmr_new, 32>/threads:16           2.08 ns         41.0 ns     16000000
 pmr_allocate<policy_pmr_new, 128>/threads:1           18.5 ns         14.4 ns     44600889
 pmr_allocate<policy_pmr_new, 128>/threads:2           10.7 ns         22.5 ns     38956522
 pmr_allocate<policy_pmr_new, 128>/threads:4           5.09 ns         21.3 ns     44800000
 pmr_allocate<policy_pmr_new, 128>/threads:8           2.78 ns         19.6 ns     59733336
 pmr_allocate<policy_pmr_new, 128>/threads:16          2.33 ns         28.8 ns     26548144
 pmr_allocate<policy_pmr_new, 1024>/threads:1          24.8 ns         27.9 ns     22400000
 pmr_allocate<policy_pmr_new, 1024>/threads:2          12.8 ns         24.0 ns     35840000
 pmr_allocate<policy_pmr_new, 1024>/threads:4          6.67 ns         26.5 ns     44800000
 pmr_allocate<policy_pmr_new, 1024>/threads:8          4.28 ns         28.3 ns     29866664
 pmr_allocate<policy_pmr_new, 1024>/threads:16         3.21 ns         41.0 ns     16000000
 ------------------------------------------------------------------------------------------
 pmr_deallocate<policy_pmr_new, 8>/threads:1           17.1 ns         16.3 ns     47786667
 pmr_deallocate<policy_pmr_new, 8>/threads:2           8.89 ns         18.1 ns     34461538
 pmr_deallocate<policy_pmr_new, 8>/threads:4           4.50 ns         16.1 ns     35840000
 pmr_deallocate<policy_pmr_new, 8>/threads:8           2.77 ns         17.3 ns     59733336
 pmr_deallocate<policy_pmr_new, 8>/threads:16          1.98 ns         34.2 ns     16000000
 pmr_deallocate<policy_pmr_new, 32>/threads:1          17.1 ns         14.8 ns     49777778
 pmr_deallocate<policy_pmr_new, 32>/threads:2          8.70 ns         18.7 ns     56000000
 pmr_deallocate<policy_pmr_new, 32>/threads:4          4.58 ns         14.3 ns     44800000
 pmr_deallocate<policy_pmr_new, 32>/threads:8          2.72 ns         17.3 ns     89600000
 pmr_deallocate<policy_pmr_new, 32>/threads:16         2.04 ns         30.5 ns     25600000
 pmr_deallocate<policy_pmr_new, 128>/threads:1         17.2 ns         15.3 ns     44800000
 pmr_deallocate<policy_pmr_new, 128>/threads:2         8.67 ns         15.7 ns     52705882
 pmr_deallocate<policy_pmr_new, 128>/threads:4         4.67 ns         18.7 ns     35840000
 pmr_deallocate<policy_pmr_new, 128>/threads:8         2.76 ns         18.0 ns     39822224
 pmr_deallocate<policy_pmr_new, 128>/threads:16        2.04 ns         28.3 ns     27569232
 pmr_deallocate<policy_pmr_new, 1024>/threads:1        17.2 ns         21.5 ns     32000000
 pmr_deallocate<policy_pmr_new, 1024>/threads:2        8.74 ns         16.0 ns     38956522
 pmr_deallocate<policy_pmr_new, 1024>/threads:4        4.84 ns         16.7 ns     44800000
 pmr_deallocate<policy_pmr_new, 1024>/threads:8        2.73 ns         16.0 ns     44800000
 pmr_deallocate<policy_pmr_new, 1024>/threads:16       2.04 ns         37.1 ns     16000000
 */
--- a/include/libimp/expected.h
+++ b/include/libimp/expected.h
@ -322,7 +322,7 @@ R or_else(E &&exp, F &&f) {
 */
 template <typename T, typename E>
 class expected : public detail_expected::storage<typename std::remove_cv<T>::type, E> {
- public:
+public:
  using value_type = typename std::remove_cv<T>::type;
  using error_type = E;
--- a/include/libpmr/def.h
+++ b/include/libpmr/def.h
@ -8,8 +8,6 @@
 #include <cstddef>
 #include "libimp/aligned.h"
 #define LIBPMR                pmr
 #define LIBPMR_NAMESPACE_BEG_ namespace LIBPMR {
 #define LIBPMR_NAMESPACE_END_ }
@ -20,7 +18,6 @@ LIBPMR_NAMESPACE_BEG_
 enum : std::size_t {
  central_cache_default_size = 1024 * 1024, ///< 1MB
  regular_head_size          = ::LIBIMP::round_up(sizeof(std::size_t), alignof(std::max_align_t)),
 };
 LIBPMR_NAMESPACE_END_
--- a/include/libpmr/new.h
+++ b/include/libpmr/new.h
@ -7,7 +7,6 @@
 #pragma once
 #include <cstddef>
 #include <unordered_map>
 #include <algorithm>
 #include "libimp/aligned.h"
@ -22,6 +21,18 @@
 LIBPMR_NAMESPACE_BEG_
 /// \brief Defines the memory block collector interface.
 class LIBIMP_EXPORT block_collector {
 public:
  virtual ~block_collector() noexcept = default;
  virtual void deallocate(void *p) noexcept = 0;
 };
 using get_block_collector_t = block_collector *(*)() noexcept;
 static constexpr std::size_t regular_head_size
    = ::LIBIMP::round_up(sizeof(get_block_collector_t), alignof(std::max_align_t));
 /// \brief Select the incremental level based on the size.
 constexpr inline std::size_t regular_level(std::size_t s) noexcept {
  return (s <= 128  ) ? 0 :
@ -32,52 +43,25 @@ constexpr inline std::size_t regular_level(std::size_t s) noexcept {
 /// \brief Calculates the appropriate memory block size based on the increment level and size.
 constexpr inline std::size_t regular_sizeof_impl(std::size_t l, std::size_t s) noexcept {
-  return (l == 0) ? std::max<std::size_t>(::LIBIMP::round_up<std::size_t>(s, 8), regular_head_size) :
+  return (l == 0) ? ::LIBIMP::round_up<std::size_t>(s, regular_head_size) :
         (l == 1) ? ::LIBIMP::round_up<std::size_t>(s, 128 ) :
         (l == 2) ? ::LIBIMP::round_up<std::size_t>(s, 1024) :
         (l == 3) ? ::LIBIMP::round_up<std::size_t>(s, 8192) : (std::numeric_limits<std::size_t>::max)();
 }
 /// \brief Calculates the appropriate memory block size based on the size.
-constexpr inline std::size_t regular_sizeof(std::size_t s) noexcept {
+constexpr inline std::size_t regular_sizeof_impl(std::size_t s) noexcept {
  return regular_sizeof_impl(regular_level(s), s);
 }
 /// \brief Calculates the appropriate memory block size based on the specific type.
 template <typename T>
 constexpr inline std::size_t regular_sizeof() noexcept {
-  return regular_sizeof(regular_head_size + sizeof(T));
+  return regular_sizeof_impl(regular_head_size + sizeof(T));
 }
 /// \brief Defines the memory block collector interface.
 class LIBIMP_EXPORT block_collector {
 public:
  virtual ~block_collector() noexcept = default;
  virtual void deallocate(void *p) noexcept = 0;
 };
 /// \brief Gets all block pools of the thread cache.
 LIBIMP_EXPORT auto get_thread_block_pool_map() noexcept
  -> std::unordered_map<std::size_t, block_collector *> &;
 /// \brief Defines block pool memory resource based on block pool.
 template <std::size_t BlockSize, std::size_t BlockPoolExpansion>
 class block_pool_resource;
 /// \brief Memory block collector of unknown size.
 /// \note This memory resource is only used to temporarily collect memory blocks 
 ///       that cannot find a suitable block pool memory resource.
 template <>
 class block_pool_resource<0, 0> : public block_pool<0, 0>
                                , public block_collector {
 public:
  void deallocate(void *p) noexcept override {
    block_pool<0, 0>::deallocate(p);
  }
 };
 /// \brief A block pool memory resource for a block of memory of a specific size.
 template <std::size_t BlockSize, std::size_t BlockPoolExpansion>
 class block_pool_resource : public block_pool<BlockSize, BlockPoolExpansion>
                          , public block_collector {
@ -88,99 +72,59 @@ class block_pool_resource : public block_pool<BlockSize, BlockPoolExpansion>
  }
 public:
-  static block_pool_resource *get() noexcept;
+  static block_collector *get() noexcept {
    thread_local block_pool_resource instance;
    return &instance;
  }
  using base_t::base_t;
  void *allocate(std::size_t /*bytes*/, std::size_t /*alignment*/ = alignof(std::max_align_t)) noexcept {
    void *p = base_t::allocate();
-    p = ::LIBIMP::construct<std::size_t>(p, BlockSize);
+    *static_cast<get_block_collector_t *>(p) = get;
-    return reinterpret_cast<::LIBIMP::byte *>(p) + regular_head_size;
+    return static_cast<::LIBIMP::byte *>(p) + regular_head_size;
  }
  void deallocate(void *p, std::size_t /*bytes*/, std::size_t /*alignment*/ = alignof(std::max_align_t)) noexcept {
-    p = reinterpret_cast<::LIBIMP::byte *>(p) - regular_head_size;
+    p = static_cast<::LIBIMP::byte *>(p) - regular_head_size;
-    auto r_size = *static_cast<std::size_t *>(p);
+    auto g = *static_cast<get_block_collector_t *>(p);
-    if (r_size <= BlockSize) {
+    if (g == get) {
      base_t::deallocate(p);
      return;
    }
-    // When the actual size exceeds the current memory block size, 
+    g()->deallocate(p);
    // try to find a suitable pool among all memory block pools for this thread.
    auto &map = get_thread_block_pool_map();
    auto it = map.find(r_size);
    if ((it == map.end()) || (it->second == nullptr)) {
      block_pool_resource<0, 0> *bp = nullptr;
      LIBIMP_TRY {
        // If the corresponding memory resource cannot be found, 
        // create a temporary general-purpose block pool to deallocate memory.
        it = map.emplace(r_size, bp = new block_pool_resource<0, 0>).first;
      } LIBIMP_CATCH(...) {
        // If the memory resource cannot be created, 
        // store the pointer directly to avoid leakage.
        delete bp;
        base_t::deallocate(p);
        return;
      }
    }
    it->second->deallocate(p);
  }
 };
 template <std::size_t BlockSize, std::size_t BlockPoolExpansion>
 auto block_pool_resource<BlockSize, BlockPoolExpansion>::get() noexcept
  -> block_pool_resource<BlockSize, BlockPoolExpansion> * {
  thread_local block_pool_resource *pi = nullptr;
  if (pi != nullptr) {
    return pi;
  }
  // Create a new block pool resource for this thread.
  auto &map = get_thread_block_pool_map();
  auto it = map.find(BlockSize);
  if ((it != map.end()) && (it->second != nullptr)) {
    // If there are existing block pool resources in the thread cache, 
    // a new block pool resource is constructed based on it and the cache is updated.
    auto *bp = static_cast <block_pool<0, 0> *>(
               dynamic_cast<block_pool_resource<0, 0> *>(it->second));
    if (bp == nullptr) {
      return nullptr;
    }
    thread_local block_pool_resource instance(std::move(*bp));
    delete static_cast<block_pool_resource<0, 0> *>(bp);
    it->second = pi = &instance;
    return pi;
  } else {
    // If there are no existing block pool resources in the thread cache, 
    // the thread local storage instance is constructed and the pointer is cached.
    thread_local block_pool_resource instance;
    LIBIMP_TRY {
      map.emplace(BlockSize, pi = &instance);
      return pi;
    } LIBIMP_CATCH(...) {
      return nullptr;
    }
  }
 }
 /// \brief Match the appropriate memory block resources 
 ///        according to the size of the specification.
 template <std::size_t N, std::size_t L = regular_level(N)>
 class regular_resource : public new_delete_resource {};
 /// \brief Different increment levels match different chunk sizes.
 ///        512 means that 512 consecutive memory blocks are allocated at a time, and the block size is N.
-template <std::size_t N> class regular_resource<N, 0> : public block_pool_resource<N, 512> {};
+template <std::size_t L>
-template <std::size_t N> class regular_resource<N, 1> : public block_pool_resource<N, 256> {};
+constexpr static std::size_t block_pool_expansion = 0;
-template <std::size_t N> class regular_resource<N, 2> : public block_pool_resource<N, 128> {};
+
-template <std::size_t N> class regular_resource<N, 3> : public block_pool_resource<N, 64 > {};
+template <> constexpr static std::size_t block_pool_expansion<0> = 512;
 template <> constexpr static std::size_t block_pool_expansion<1> = 256;
 template <> constexpr static std::size_t block_pool_expansion<2> = 128;
 template <> constexpr static std::size_t block_pool_expansion<3> = 64;
 /// \brief Match the appropriate memory block resources according to the size of the specification.
 template <std::size_t N, std::size_t L = regular_level(N)>
 struct regular_resource {
  static auto *get() noexcept {
    using block_poll_resource_t = block_pool_resource<N, block_pool_expansion<L>>;
    return dynamic_cast<block_poll_resource_t *>(block_poll_resource_t::get());
  }
 };
 template <std::size_t N>
 struct regular_resource<N, 4> : new_delete_resource {};
 /// \brief Creates an object based on the specified type and parameters with block pool resource.
 /// \note This function is thread-safe.
 template <typename T, typename... A>
 T *new$(A &&... args) noexcept {
-  auto *mem_res = regular_resource<regular_sizeof<T>()>::get();
+  auto *res = regular_resource<regular_sizeof<T>()>::get();
-  if (mem_res == nullptr) return nullptr;
+  if (res == nullptr) return nullptr;
-  return ::LIBIMP::construct<T>(mem_res->allocate(sizeof(T), alignof(T)), std::forward<A>(args)...);
+  return ::LIBIMP::construct<T>(res->allocate(sizeof(T), alignof(T)), std::forward<A>(args)...);
 }
 /// \brief Destroys object previously allocated by the `new$` and releases obtained memory area.
@ -190,13 +134,13 @@ template <typename T>
 void delete$(T *p) noexcept {
  if (p == nullptr) return;
  ::LIBIMP::destroy(p);
-  auto *mem_res = regular_resource<regular_sizeof<T>()>::get();
+  auto *res = regular_resource<regular_sizeof<T>()>::get();
-  if (mem_res == nullptr) return;
+  if (res == nullptr) return;
 #if defined(LIBIMP_CC_MSVC_2015)
  // `alignof` of vs2015 requires that type must be able to be instantiated.
-  mem_res->deallocate(p, sizeof(T));
+  res->deallocate(p, sizeof(T));
 #else
-  mem_res->deallocate(p, sizeof(T), alignof(T));
+  res->deallocate(p, sizeof(T), alignof(T));
 #endif
 }
--- a/src/libpmr/block_pool.cpp
+++ b/src/libpmr/block_pool.cpp
@ -1,13 +1,41 @@
 #include <mutex>
 #include "libimp/detect_plat.h"
 #include "libpmr/block_pool.h"
 #include "libpmr/monotonic_buffer_resource.h"
 LIBPMR_NAMESPACE_BEG_
 class thread_safe_resource : public monotonic_buffer_resource {
 public:
  thread_safe_resource(::LIBIMP::span<::LIBIMP::byte> buffer) noexcept
      : monotonic_buffer_resource(buffer) {}
  ~thread_safe_resource() noexcept {
    LIBIMP_UNUSED std::lock_guard<std::mutex> lock(mutex_);
    monotonic_buffer_resource::release();
  }
  void *allocate(std::size_t bytes, std::size_t alignment) noexcept {
    LIBIMP_UNUSED std::lock_guard<std::mutex> lock(mutex_);
    return monotonic_buffer_resource::allocate(bytes, alignment);
  }
  void deallocate(void *p, std::size_t bytes, std::size_t alignment) noexcept {
    LIBIMP_UNUSED std::lock_guard<std::mutex> lock(mutex_);
    monotonic_buffer_resource::deallocate(p, bytes, alignment);
  }
 private:
  std::mutex mutex_;
 };
 allocator &central_cache_allocator() noexcept {
-  static std::array<::LIBIMP::byte, central_cache_default_size> buffer;
+  static std::array<::LIBIMP::byte, central_cache_default_size> buf;
-  static monotonic_buffer_resource mr(buffer);
+  static thread_safe_resource res(buf);
-  static allocator a(&mr);
+  static allocator a(&res);
  return a;
 }
--- a/src/libpmr/new.cpp
+++ b/src/libpmr/new.cpp
@ -1,12 +0,0 @@
 #include "libpmr/new.h"
 LIBPMR_NAMESPACE_BEG_
 auto get_thread_block_pool_map() noexcept 
  -> std::unordered_map<std::size_t, block_collector *> & {
  thread_local std::unordered_map<std::size_t, block_collector *> instances;
  return instances;
 }
 LIBPMR_NAMESPACE_END_
--- a/test/pmr/test_pmr_new.cpp
+++ b/test/pmr/test_pmr_new.cpp
@ -3,6 +3,7 @@
 #include <array>
 #include <cstring>
 #include <cstddef>
 #include <thread>
 #include "gtest/gtest.h"
@ -114,3 +115,32 @@ TEST(pmr_new, delete$null) {
  pmr::delete$(p);
  SUCCEED();
 }
 TEST(pmr_new, multi_thread) {
  std::array<std::thread, 16> threads;
  for (auto &t : threads) {
    t = std::thread([] {
      for (int i = 0; i < 10000; ++i) {
        auto p = pmr::new$<int>();
        *p = i;
        pmr::delete$(p);
      }
      std::array<void *, 10000> pts;
      for (int i = 0; i < 10000; ++i) {
        auto p = pmr::new$<std::array<char, 10>>();
        pts[i] = p;
        std::memset(p, i, sizeof(std::array<char, 10>));
      }
      for (int i = 0; i < 10000; ++i) {
        std::array<char, 10> tmp;
        std::memset(&tmp, i, sizeof(std::array<char, 10>));
        ASSERT_EQ(std::memcmp(pts[i], &tmp, sizeof(std::array<char, 10>)), 0);
        pmr::delete$(static_cast<std::array<char, 10> *>(pts[i]));
      }
    });
  }
  for (auto &t : threads) {
    t.join();
  }
  SUCCEED();
 }