add benchmark for lock-test

2025-12-07 01:06:45 +08:00 · 2018-12-12 17:48:02 +08:00 · 2018-12-12 17:48:02 +08:00 · 29d25e2226
commit 29d25e2226
parent bc25cd6912
9 changed files with 179 additions and 48 deletions
--- a/include/rw_lock.h
+++ b/include/rw_lock.h
@ -14,7 +14,7 @@ class rw_lock {
    };

 public:
-    void lock(void) {
+    void lock() {
        while (1) {
            std::size_t expected = 0;
            if (lc_.compare_exchange_weak(expected, w_flag, std::memory_order_acq_rel)) {
@ -24,11 +24,11 @@ public:
        }
    }

-    void unlock(void) {
+    void unlock() {
        lc_.store(0, std::memory_order_release);
    }

-    void lock_shared(void) {
+    void lock_shared() {
        while(1) {
            std::size_t old = lc_.load(std::memory_order_relaxed);
            std::size_t unlocked = old + 1;
@ -41,7 +41,7 @@ public:
        }
    }

-    void unlock_shared(void) {
+    void unlock_shared() {
        lc_.fetch_sub(1, std::memory_order_release);
    }
 };
--- a/src/ipc.cpp
+++ b/src/ipc.cpp
@ -1,3 +1,5 @@
+#include "ipc.h"
+
 #include <unordered_map>
 #include <memory>
 #include <type_traits>
@ -6,7 +8,6 @@
 #include <algorithm>
 #include <utility>

-#include "ipc.h"
 #include "circ_queue.h"
 #include "rw_lock.h"

--- a/src/platform/shm_linux.cpp
+++ b/src/platform/shm_linux.cpp
@ -1,11 +1,11 @@
+#include "shm.h"
+
 #include <sys/shm.h>
 #include <sys/stat.h>
 #include <sys/mman.h>
 #include <unistd.h>
 #include <fcntl.h>

-#include "shm.h"
-
 namespace ipc {
 namespace shm {

--- a/src/platform/shm_win.cpp
+++ b/src/platform/shm_win.cpp
@ -1,3 +1,5 @@
+#include "shm.h"
+
 #include <windows.h>

 #include <type_traits>
@ -6,8 +8,6 @@
 #include <codecvt>
 #include <utility>

-#include "shm.h"
-
 namespace {

 template <typename T, typename S, typename R = S>
--- a/src/shm.cpp
+++ b/src/shm.cpp
@ -1,8 +1,8 @@
+#include "shm.h"
+
 #include <string>
 #include <utility>

-#include "shm.h"
-
 namespace ipc {
 namespace shm {

--- a/test/spin_lock.hpp
+++ b/test/spin_lock.hpp
@ -0,0 +1,104 @@
+/*
+    The Capo Library
+    Code covered by the MIT License
+
+    Author: mutouyun (http://orzz.org)
+*/
+
+#pragma once
+
+#include <atomic>       // std::atomic_flag, std::atomic_signal_fence
+#include <thread>       // std::this_thread
+#include <chrono>       // std::chrono::milliseconds
+#if defined(_MSC_VER)
+#include <windows.h>    // YieldProcessor
+#endif/*_MSC_VER*/
+
+namespace capo {
+namespace detail_spin_lock {
+
+////////////////////////////////////////////////////////////////
+/// Gives hint to processor that improves performance of spin-wait loops.
+////////////////////////////////////////////////////////////////
+
+#if defined(_MSC_VER)
+/*
+    See: http://msdn.microsoft.com/en-us/library/windows/desktop/ms687419(v=vs.85).aspx
+    Not for intel c++ compiler, so ignore http://software.intel.com/en-us/forums/topic/296168
+*/
+#   define CAPO_SPIN_LOCK_PAUSE_() YieldProcessor()
+#elif defined(__GNUC__)
+#if defined(__i386__) || defined(__x86_64__)
+/*
+    See: Intel(R) 64 and IA-32 Architectures Software Developer's Manual V2
+         PAUSE-Spin Loop Hint, 4-57
+         http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.html?wapkw=instruction+set+reference
+*/
+#   define CAPO_SPIN_LOCK_PAUSE_() __asm__ __volatile__("pause")
+#elif defined(__ia64__) || defined(__ia64)
+/*
+    See: Intel(R) Itanium(R) Architecture Developer's Manual, Vol.3
+         hint - Performance Hint, 3:145
+         http://www.intel.com/content/www/us/en/processors/itanium/itanium-architecture-vol-3-manual.html
+*/
+#   define CAPO_SPIN_LOCK_PAUSE_() __asm__ __volatile__ ("hint @pause")
+#elif defined(__arm__)
+/*
+    See: ARM Architecture Reference Manuals (YIELD)
+         http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.subset.architecture.reference/index.html
+*/
+#   define CAPO_SPIN_LOCK_PAUSE_() __asm__ __volatile__ ("yield")
+#endif
+#endif/*compilers*/
+
+#if !defined(CAPO_SPIN_LOCK_PAUSE_)
+/*
+    Just use a compiler fence, prevent compiler from optimizing loop
+*/
+#   define CAPO_SPIN_LOCK_PAUSE_() std::atomic_signal_fence(std::memory_order_seq_cst)
+#endif/*!defined(CAPO_SPIN_LOCK_PAUSE_)*/
+
+////////////////////////////////////////////////////////////////
+/// Yield to other threads
+////////////////////////////////////////////////////////////////
+
+inline void yield(unsigned k)
+{
+    if (k < 4)  { /* Do nothing */ }
+    else
+    if (k < 16) { CAPO_SPIN_LOCK_PAUSE_(); }
+    else
+    if (k < 32) { std::this_thread::yield(); }
+    else
+    { std::this_thread::sleep_for(std::chrono::milliseconds(1)); }
+}
+
+} // namespace detail_spin_lock
+
+////////////////////////////////////////////////////////////////
+/// Spinlock
+////////////////////////////////////////////////////////////////
+
+class spin_lock
+{
+    std::atomic_flag lc_ = ATOMIC_FLAG_INIT;
+
+public:
+    bool try_lock(void)
+    {
+        return !lc_.test_and_set(std::memory_order_acquire);
+    }
+
+    void lock(void)
+    {
+        for (unsigned k = 0; lc_.test_and_set(std::memory_order_acquire); ++k)
+            detail_spin_lock::yield(k);
+    }
+
+    void unlock(void)
+    {
+        lc_.clear(std::memory_order_release);
+    }
+};
+
+} // namespace capo
--- a/test/test.h
+++ b/test/test.h
@ -2,6 +2,10 @@

 #include <QtTest>

+#include <iostream>
+
+#include "stopwatch.hpp"
+
 class TestSuite : public QObject
 {
    Q_OBJECT
@ -15,3 +19,21 @@ protected:
 protected slots:
    virtual void initTestCase();
 };
+
+struct test_stopwatch {
+    capo::stopwatch<> sw_;
+    std::atomic_flag started_ = ATOMIC_FLAG_INIT;
+
+    void start() {
+        if (!started_.test_and_set()) {
+            sw_.start();
+        }
+    }
+
+    void print_elapsed(int N, int M, int Loops) {
+        auto ts = sw_.elapsed<std::chrono::microseconds>();
+        std::cout << "[" << N << ":" << M << ", " << Loops << "]" << std::endl
+                  << "performance: " << (ts / 1000.0) << " ms, "
+                  << (double(ts) / double(Loops * N)) << " us/d" << std::endl;
+    }
+};
--- a/test/test_circ.cpp
+++ b/test/test_circ.cpp
@ -9,7 +9,6 @@

 #include "circ_elem_array.h"
 #include "circ_queue.h"
-#include "stopwatch.hpp"
 #include "test.h"

 namespace {
@ -73,23 +72,6 @@ struct msg_t {
    int dat_;
 };

-struct test_stopwatch {
-    capo::stopwatch<> sw_;
-    std::atomic_flag started_ = ATOMIC_FLAG_INIT;
-
-    void start() {
-        if (!started_.test_and_set()) {
-            sw_.start();
-        }
-    }
-
-    void print_elapsed(int N, int M, int Loops) {
-        auto ts = sw_.elapsed<std::chrono::microseconds>();
-        std::cout << "[" << N << ":" << M << ", " << Loops << "]" << std::endl
-                  << "performance: " << (double(ts) / double(Loops * N)) << " us/d" << std::endl;
-    }
-};
-
 template <bool V>
 struct test_verify {
    std::unordered_map<int, std::vector<int>>* list_;
--- a/test/test_ipc.cpp
+++ b/test/test_ipc.cpp
@ -4,9 +4,12 @@
 #include <iostream>
 #include <shared_mutex>
 #include <mutex>
+#include <typeinfo>

 #include "ipc.h"
 #include "rw_lock.h"
+#include "stopwatch.hpp"
+#include "spin_lock.hpp"
 #include "test.h"

 namespace {
@ -25,12 +28,28 @@ private slots:

 #include "test_ipc.moc"

-void Unit::test_rw_lock() {
-    std::thread r_trd[4];
-    std::thread w_trd[4];
+template <typename T>
+constexpr T acc(T b, T e) {
+    return (e + b) * (e - b + 1) / 2;
+}
+
+template <typename Mutex>
+struct lc_wrapper : Mutex {
+    void lock_shared  () { lock  (); }
+    void unlock_shared() { unlock(); }
+};
+
+template <typename Lc, int Loops = 100000, int R = 4, int W = 4>
+void benchmark() {
+    std::thread r_trd[R];
+    std::thread w_trd[W];
+    std::atomic_int fini { 0 };

    std::vector<int> datas;
-    ipc::rw_lock lc;
+    Lc lc;
+
+    test_stopwatch sw;
+    std::cout << std::endl << typeid(Lc).name() << std::endl;

    for (auto& t : r_trd) {
        t = std::thread([&] {
@ -39,7 +58,7 @@ void Unit::test_rw_lock() {
            while (1) {
                int x = -1;
                {
-                    [[maybe_unused]] std::shared_lock<ipc::rw_lock> guard { lc };
+                    [[maybe_unused]] std::shared_lock<Lc> guard { lc };
                    if (cnt < datas.size()) {
                        x = datas[cnt];
                    }
@ -51,21 +70,23 @@ void Unit::test_rw_lock() {
                }
                std::this_thread::yield();
            }
-            std::size_t sum = 0;
-            for (int i : seq) {
-                sum += static_cast<std::size_t>(i);
+            if (++fini == std::extent<decltype(r_trd)>::value) {
+                sw.print_elapsed(R, W, Loops);
            }
-            std::cout << std::endl;
-            QCOMPARE(sum, 5050 * std::extent<decltype(w_trd)>::value);
+            std::uint64_t sum = 0;
+            for (int i : seq) sum += i;
+            QCOMPARE(sum, acc<std::uint64_t>(1, Loops) * std::extent<decltype(w_trd)>::value);
        });
    }

    for (auto& t : w_trd) {
        t = std::thread([&] {
-            for (int i = 1; i <= 100; ++i) {
-                lc.lock();
-                datas.push_back(i);
-                lc.unlock();
+            sw.start();
+            for (int i = 1; i <= Loops; ++i) {
+                {
+                    [[maybe_unused]] std::unique_lock<Lc> guard { lc };
+                    datas.push_back(i);
+                }
                std::this_thread::yield();
            }
        });
@ -75,13 +96,14 @@ void Unit::test_rw_lock() {
    lc.lock();
    datas.push_back(0);
    lc.unlock();
-
    for (auto& t : r_trd) t.join();
+}

-    for (int i : datas) {
-        std::cout << i << " ";
-    }
-    std::cout << std::endl;
+void Unit::test_rw_lock() {
+    benchmark<ipc::rw_lock>();
+    benchmark<lc_wrapper<capo::spin_lock>>();
+    benchmark<lc_wrapper<std::mutex>>();
+    benchmark<std::shared_mutex>();
 }

 void Unit::test_send_recv() {