add benchmark for lock-test

2026-02-09 03:06:42 +08:00 · 2018-12-12 17:48:02 +08:00 · 2018-12-12 17:48:02 +08:00 · 29d25e2226
commit 29d25e2226
parent bc25cd6912
9 changed files with 179 additions and 48 deletions
--- a/include/rw_lock.h
+++ b/include/rw_lock.h
@ -14,7 +14,7 @@ class rw_lock {
    };
 public:
-    void lock(void) {
+    void lock() {
        while (1) {
            std::size_t expected = 0;
            if (lc_.compare_exchange_weak(expected, w_flag, std::memory_order_acq_rel)) {
@ -24,11 +24,11 @@ public:
        }
    }
-    void unlock(void) {
+    void unlock() {
        lc_.store(0, std::memory_order_release);
    }
-    void lock_shared(void) {
+    void lock_shared() {
        while(1) {
            std::size_t old = lc_.load(std::memory_order_relaxed);
            std::size_t unlocked = old + 1;
@ -41,7 +41,7 @@ public:
        }
    }
-    void unlock_shared(void) {
+    void unlock_shared() {
        lc_.fetch_sub(1, std::memory_order_release);
    }
 };
--- a/src/ipc.cpp
+++ b/src/ipc.cpp
@ -1,3 +1,5 @@
 #include "ipc.h"
 #include <unordered_map>
 #include <memory>
 #include <type_traits>
@ -6,7 +8,6 @@
 #include <algorithm>
 #include <utility>
 #include "ipc.h"
 #include "circ_queue.h"
 #include "rw_lock.h"
--- a/src/platform/shm_linux.cpp
+++ b/src/platform/shm_linux.cpp
@ -1,11 +1,11 @@
 #include "shm.h"
 #include <sys/shm.h>
 #include <sys/stat.h>
 #include <sys/mman.h>
 #include <unistd.h>
 #include <fcntl.h>
 #include "shm.h"
 namespace ipc {
 namespace shm {
--- a/src/platform/shm_win.cpp
+++ b/src/platform/shm_win.cpp
@ -1,3 +1,5 @@
 #include "shm.h"
 #include <windows.h>
 #include <type_traits>
@ -6,8 +8,6 @@
 #include <codecvt>
 #include <utility>
 #include "shm.h"
 namespace {
 template <typename T, typename S, typename R = S>
--- a/src/shm.cpp
+++ b/src/shm.cpp
@ -1,8 +1,8 @@
 #include "shm.h"
 #include <string>
 #include <utility>
 #include "shm.h"
 namespace ipc {
 namespace shm {
--- a/test/spin_lock.hpp
+++ b/test/spin_lock.hpp
@ -0,0 +1,104 @@
 /*
    The Capo Library
    Code covered by the MIT License
    Author: mutouyun (http://orzz.org)
 */
 #pragma once
 #include <atomic>       // std::atomic_flag, std::atomic_signal_fence
 #include <thread>       // std::this_thread
 #include <chrono>       // std::chrono::milliseconds
 #if defined(_MSC_VER)
 #include <windows.h>    // YieldProcessor
 #endif/*_MSC_VER*/
 namespace capo {
 namespace detail_spin_lock {
 ////////////////////////////////////////////////////////////////
 /// Gives hint to processor that improves performance of spin-wait loops.
 ////////////////////////////////////////////////////////////////
 #if defined(_MSC_VER)
 /*
    See: http://msdn.microsoft.com/en-us/library/windows/desktop/ms687419(v=vs.85).aspx
    Not for intel c++ compiler, so ignore http://software.intel.com/en-us/forums/topic/296168
 */
 #   define CAPO_SPIN_LOCK_PAUSE_() YieldProcessor()
 #elif defined(__GNUC__)
 #if defined(__i386__) || defined(__x86_64__)
 /*
    See: Intel(R) 64 and IA-32 Architectures Software Developer's Manual V2
         PAUSE-Spin Loop Hint, 4-57
         http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.html?wapkw=instruction+set+reference
 */
 #   define CAPO_SPIN_LOCK_PAUSE_() __asm__ __volatile__("pause")
 #elif defined(__ia64__) || defined(__ia64)
 /*
    See: Intel(R) Itanium(R) Architecture Developer's Manual, Vol.3
         hint - Performance Hint, 3:145
         http://www.intel.com/content/www/us/en/processors/itanium/itanium-architecture-vol-3-manual.html
 */
 #   define CAPO_SPIN_LOCK_PAUSE_() __asm__ __volatile__ ("hint @pause")
 #elif defined(__arm__)
 /*
    See: ARM Architecture Reference Manuals (YIELD)
         http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.subset.architecture.reference/index.html
 */
 #   define CAPO_SPIN_LOCK_PAUSE_() __asm__ __volatile__ ("yield")
 #endif
 #endif/*compilers*/
 #if !defined(CAPO_SPIN_LOCK_PAUSE_)
 /*
    Just use a compiler fence, prevent compiler from optimizing loop
 */
 #   define CAPO_SPIN_LOCK_PAUSE_() std::atomic_signal_fence(std::memory_order_seq_cst)
 #endif/*!defined(CAPO_SPIN_LOCK_PAUSE_)*/
 ////////////////////////////////////////////////////////////////
 /// Yield to other threads
 ////////////////////////////////////////////////////////////////
 inline void yield(unsigned k)
 {
    if (k < 4)  { /* Do nothing */ }
    else
    if (k < 16) { CAPO_SPIN_LOCK_PAUSE_(); }
    else
    if (k < 32) { std::this_thread::yield(); }
    else
    { std::this_thread::sleep_for(std::chrono::milliseconds(1)); }
 }
 } // namespace detail_spin_lock
 ////////////////////////////////////////////////////////////////
 /// Spinlock
 ////////////////////////////////////////////////////////////////
 class spin_lock
 {
    std::atomic_flag lc_ = ATOMIC_FLAG_INIT;
 public:
    bool try_lock(void)
    {
        return !lc_.test_and_set(std::memory_order_acquire);
    }
    void lock(void)
    {
        for (unsigned k = 0; lc_.test_and_set(std::memory_order_acquire); ++k)
            detail_spin_lock::yield(k);
    }
    void unlock(void)
    {
        lc_.clear(std::memory_order_release);
    }
 };
 } // namespace capo
--- a/test/test.h
+++ b/test/test.h
@ -2,6 +2,10 @@
 #include <QtTest>
 #include <iostream>
 #include "stopwatch.hpp"
 class TestSuite : public QObject
 {
    Q_OBJECT
@ -15,3 +19,21 @@ protected:
 protected slots:
    virtual void initTestCase();
 };
 struct test_stopwatch {
    capo::stopwatch<> sw_;
    std::atomic_flag started_ = ATOMIC_FLAG_INIT;
    void start() {
        if (!started_.test_and_set()) {
            sw_.start();
        }
    }
    void print_elapsed(int N, int M, int Loops) {
        auto ts = sw_.elapsed<std::chrono::microseconds>();
        std::cout << "[" << N << ":" << M << ", " << Loops << "]" << std::endl
                  << "performance: " << (ts / 1000.0) << " ms, "
                  << (double(ts) / double(Loops * N)) << " us/d" << std::endl;
    }
 };
--- a/test/test_circ.cpp
+++ b/test/test_circ.cpp
@ -9,7 +9,6 @@
 #include "circ_elem_array.h"
 #include "circ_queue.h"
 #include "stopwatch.hpp"
 #include "test.h"
 namespace {
@ -73,23 +72,6 @@ struct msg_t {
    int dat_;
 };
 struct test_stopwatch {
    capo::stopwatch<> sw_;
    std::atomic_flag started_ = ATOMIC_FLAG_INIT;
    void start() {
        if (!started_.test_and_set()) {
            sw_.start();
        }
    }
    void print_elapsed(int N, int M, int Loops) {
        auto ts = sw_.elapsed<std::chrono::microseconds>();
        std::cout << "[" << N << ":" << M << ", " << Loops << "]" << std::endl
                  << "performance: " << (double(ts) / double(Loops * N)) << " us/d" << std::endl;
    }
 };
 template <bool V>
 struct test_verify {
    std::unordered_map<int, std::vector<int>>* list_;
--- a/test/test_ipc.cpp
+++ b/test/test_ipc.cpp
@ -4,9 +4,12 @@
 #include <iostream>
 #include <shared_mutex>
 #include <mutex>
 #include <typeinfo>
 #include "ipc.h"
 #include "rw_lock.h"
 #include "stopwatch.hpp"
 #include "spin_lock.hpp"
 #include "test.h"
 namespace {
@ -25,12 +28,28 @@ private slots:
 #include "test_ipc.moc"
-void Unit::test_rw_lock() {
+template <typename T>
-    std::thread r_trd[4];
+constexpr T acc(T b, T e) {
-    std::thread w_trd[4];
+    return (e + b) * (e - b + 1) / 2;
 }
 template <typename Mutex>
 struct lc_wrapper : Mutex {
    void lock_shared  () { lock  (); }
    void unlock_shared() { unlock(); }
 };
 template <typename Lc, int Loops = 100000, int R = 4, int W = 4>
 void benchmark() {
    std::thread r_trd[R];
    std::thread w_trd[W];
    std::atomic_int fini { 0 };
    std::vector<int> datas;
-    ipc::rw_lock lc;
+    Lc lc;
    test_stopwatch sw;
    std::cout << std::endl << typeid(Lc).name() << std::endl;
    for (auto& t : r_trd) {
        t = std::thread([&] {
@ -39,7 +58,7 @@ void Unit::test_rw_lock() {
            while (1) {
                int x = -1;
                {
-                    [[maybe_unused]] std::shared_lock<ipc::rw_lock> guard { lc };
+                    [[maybe_unused]] std::shared_lock<Lc> guard { lc };
                    if (cnt < datas.size()) {
                        x = datas[cnt];
                    }
@ -51,21 +70,23 @@ void Unit::test_rw_lock() {
                }
                std::this_thread::yield();
            }
-            std::size_t sum = 0;
+            if (++fini == std::extent<decltype(r_trd)>::value) {
-            for (int i : seq) {
+                sw.print_elapsed(R, W, Loops);
                sum += static_cast<std::size_t>(i);
            }
-            std::cout << std::endl;
+            std::uint64_t sum = 0;
-            QCOMPARE(sum, 5050 * std::extent<decltype(w_trd)>::value);
+            for (int i : seq) sum += i;
            QCOMPARE(sum, acc<std::uint64_t>(1, Loops) * std::extent<decltype(w_trd)>::value);
        });
    }
    for (auto& t : w_trd) {
        t = std::thread([&] {
-            for (int i = 1; i <= 100; ++i) {
+            sw.start();
-                lc.lock();
+            for (int i = 1; i <= Loops; ++i) {
                {
                    [[maybe_unused]] std::unique_lock<Lc> guard { lc };
                    datas.push_back(i);
-                lc.unlock();
+                }
                std::this_thread::yield();
            }
        });
@ -75,13 +96,14 @@ void Unit::test_rw_lock() {
    lc.lock();
    datas.push_back(0);
    lc.unlock();
    for (auto& t : r_trd) t.join();
    for (int i : datas) {
        std::cout << i << " ";
 }
-    std::cout << std::endl;
+
 void Unit::test_rw_lock() {
    benchmark<ipc::rw_lock>();
    benchmark<lc_wrapper<capo::spin_lock>>();
    benchmark<lc_wrapper<std::mutex>>();
    benchmark<std::shared_mutex>();
 }
 void Unit::test_send_recv() {