fix bugs; optimize performance

2026-02-16 23:29:49 +08:00 · 2018-12-12 18:49:10 +08:00 · 2018-12-12 18:49:10 +08:00 · c40dddcc06
commit c40dddcc06
parent 29d25e2226
4 changed files with 88 additions and 13 deletions
--- a/include/rw_lock.h
+++ b/include/rw_lock.h
@ -2,25 +2,91 @@
 #include <atomic>
 #include <thread>
 #include <chrono>
 #include <limits>
 ////////////////////////////////////////////////////////////////
 /// Gives hint to processor that improves performance of spin-wait loops.
 ////////////////////////////////////////////////////////////////
 #pragma push_macro("IPC_LOCK_PAUSE_")
 #undef  IPC_LOCK_PAUSE_
 #if defined(_MSC_VER)
 #include <windows.h>    // YieldProcessor
 /*
    See: http://msdn.microsoft.com/en-us/library/windows/desktop/ms687419(v=vs.85).aspx
    Not for intel c++ compiler, so ignore http://software.intel.com/en-us/forums/topic/296168
 */
 #   define IPC_LOCK_PAUSE_() YieldProcessor()
 #elif defined(__GNUC__)
 #if defined(__i386__) || defined(__x86_64__)
 /*
    See: Intel(R) 64 and IA-32 Architectures Software Developer's Manual V2
         PAUSE-Spin Loop Hint, 4-57
         http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.html?wapkw=instruction+set+reference
 */
 #   define IPC_LOCK_PAUSE_() __asm__ __volatile__("pause")
 #elif defined(__ia64__) || defined(__ia64)
 /*
    See: Intel(R) Itanium(R) Architecture Developer's Manual, Vol.3
         hint - Performance Hint, 3:145
         http://www.intel.com/content/www/us/en/processors/itanium/itanium-architecture-vol-3-manual.html
 */
 #   define IPC_LOCK_PAUSE_() __asm__ __volatile__ ("hint @pause")
 #elif defined(__arm__)
 /*
    See: ARM Architecture Reference Manuals (YIELD)
         http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.subset.architecture.reference/index.html
 */
 #   define IPC_LOCK_PAUSE_() __asm__ __volatile__ ("yield")
 #endif
 #endif/*compilers*/
 #if !defined(IPC_LOCK_PAUSE_)
 /*
    Just use a compiler fence, prevent compiler from optimizing loop
 */
 #   define IPC_LOCK_PAUSE_() std::atomic_signal_fence(std::memory_order_seq_cst)
 #endif/*!defined(IPC_LOCK_PAUSE_)*/
 ////////////////////////////////////////////////////////////////
 /// Yield to other threads
 ////////////////////////////////////////////////////////////////
 namespace ipc {
 inline void yield(unsigned k) {
    if (k < 4)  { /* Do nothing */ }
    else
    if (k < 16) { IPC_LOCK_PAUSE_(); }
    else
    if (k < 32) { std::this_thread::yield(); }
    else
    { std::this_thread::sleep_for(std::chrono::milliseconds(1)); }
 }
 } // namespace ipc
 #pragma pop_macro("IPC_LOCK_PAUSE_")
 namespace ipc {
 class rw_lock {
    std::atomic_size_t lc_ { 0 };
    enum : std::size_t {
-        w_flag = std::numeric_limits<std::size_t>::max()
+        w_flag = (std::numeric_limits<std::size_t>::max)()
    };
 public:
    void lock() {
-        while (1) {
+        for (unsigned k = 0;; ++k) {
            std::size_t expected = 0;
            if (lc_.compare_exchange_weak(expected, w_flag, std::memory_order_acq_rel)) {
                break;
            }
-            std::this_thread::yield();
+            yield(k);
        }
    }
@ -29,14 +95,14 @@ public:
    }
    void lock_shared() {
-        while(1) {
+        for (unsigned k = 0;; ++k) {
            std::size_t old = lc_.load(std::memory_order_relaxed);
            std::size_t unlocked = old + 1;
            if (unlocked &&
                lc_.compare_exchange_weak(old, unlocked, std::memory_order_acq_rel)) {
                break;
            }
-            std::this_thread::yield();
+            yield(k);
            std::atomic_thread_fence(std::memory_order_acquire);
        }
    }
--- a/src/ipc.cpp
+++ b/src/ipc.cpp
@ -133,7 +133,7 @@ std::vector<byte_t> recv(handle_t h) {
            cache.resize(last_size + remain);
            std::memcpy(cache.data() + last_size, msg.data_, remain);
            // finish this message, erase it from cache
-            auto ret { std::move(cache) };
+            auto ret = std::move(cache);
            all.erase(msg.id_);
            return ret;
        }
--- a/test/test.h
+++ b/test/test.h
@ -3,6 +3,7 @@
 #include <QtTest>
 #include <iostream>
 #include <atomic>
 #include "stopwatch.hpp"
--- a/test/test_ipc.cpp
+++ b/test/test_ipc.cpp
@ -35,11 +35,11 @@ constexpr T acc(T b, T e) {
 template <typename Mutex>
 struct lc_wrapper : Mutex {
-    void lock_shared  () { lock  (); }
+    void lock_shared  () { Mutex::lock  (); }
-    void unlock_shared() { unlock(); }
+    void unlock_shared() { Mutex::unlock(); }
 };
-template <typename Lc, int Loops = 100000, int R = 4, int W = 4>
+template <typename Lc, int R = 4, int W = 4, int Loops = 100000>
 void benchmark() {
    std::thread r_trd[R];
    std::thread w_trd[W];
@ -99,11 +99,19 @@ void benchmark() {
    for (auto& t : r_trd) t.join();
 }
 template <int R, int W>
 void test_performance() {
    benchmark<ipc::rw_lock               , R, W>();
    benchmark<lc_wrapper<capo::spin_lock>, R, W>();
    benchmark<lc_wrapper<std::mutex>     , R, W>();
    benchmark<std::shared_mutex          , R, W>();
 }
 void Unit::test_rw_lock() {
-    benchmark<ipc::rw_lock>();
+    test_performance<1, 1>();
-    benchmark<lc_wrapper<capo::spin_lock>>();
+    test_performance<4, 4>();
-    benchmark<lc_wrapper<std::mutex>>();
+    test_performance<1, 8>();
-    benchmark<std::shared_mutex>();
+    test_performance<8, 1>();
 }
 void Unit::test_send_recv() {