From c40dddcc06b82cb66346431a05988b783b787bfa Mon Sep 17 00:00:00 2001 From: mutouyun Date: Wed, 12 Dec 2018 18:49:10 +0800 Subject: [PATCH] fix bugs; optimize performance --- include/rw_lock.h | 76 +++++++++++++++++++++++++++++++++++++++++++---- src/ipc.cpp | 2 +- test/test.h | 1 + test/test_ipc.cpp | 22 +++++++++----- 4 files changed, 88 insertions(+), 13 deletions(-) diff --git a/include/rw_lock.h b/include/rw_lock.h index 0565f78..b382e88 100644 --- a/include/rw_lock.h +++ b/include/rw_lock.h @@ -2,25 +2,91 @@ #include #include +#include #include +//////////////////////////////////////////////////////////////// +/// Gives hint to processor that improves performance of spin-wait loops. +//////////////////////////////////////////////////////////////// + +#pragma push_macro("IPC_LOCK_PAUSE_") +#undef IPC_LOCK_PAUSE_ + +#if defined(_MSC_VER) +#include // YieldProcessor +/* + See: http://msdn.microsoft.com/en-us/library/windows/desktop/ms687419(v=vs.85).aspx + Not for intel c++ compiler, so ignore http://software.intel.com/en-us/forums/topic/296168 +*/ +# define IPC_LOCK_PAUSE_() YieldProcessor() +#elif defined(__GNUC__) +#if defined(__i386__) || defined(__x86_64__) +/* + See: Intel(R) 64 and IA-32 Architectures Software Developer's Manual V2 + PAUSE-Spin Loop Hint, 4-57 + http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.html?wapkw=instruction+set+reference +*/ +# define IPC_LOCK_PAUSE_() __asm__ __volatile__("pause") +#elif defined(__ia64__) || defined(__ia64) +/* + See: Intel(R) Itanium(R) Architecture Developer's Manual, Vol.3 + hint - Performance Hint, 3:145 + http://www.intel.com/content/www/us/en/processors/itanium/itanium-architecture-vol-3-manual.html +*/ +# define IPC_LOCK_PAUSE_() __asm__ __volatile__ ("hint @pause") +#elif defined(__arm__) +/* + See: ARM Architecture Reference Manuals (YIELD) + http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.subset.architecture.reference/index.html +*/ +# define IPC_LOCK_PAUSE_() __asm__ __volatile__ ("yield") +#endif +#endif/*compilers*/ + +#if !defined(IPC_LOCK_PAUSE_) +/* + Just use a compiler fence, prevent compiler from optimizing loop +*/ +# define IPC_LOCK_PAUSE_() std::atomic_signal_fence(std::memory_order_seq_cst) +#endif/*!defined(IPC_LOCK_PAUSE_)*/ + +//////////////////////////////////////////////////////////////// +/// Yield to other threads +//////////////////////////////////////////////////////////////// + +namespace ipc { + +inline void yield(unsigned k) { + if (k < 4) { /* Do nothing */ } + else + if (k < 16) { IPC_LOCK_PAUSE_(); } + else + if (k < 32) { std::this_thread::yield(); } + else + { std::this_thread::sleep_for(std::chrono::milliseconds(1)); } +} + +} // namespace ipc + +#pragma pop_macro("IPC_LOCK_PAUSE_") + namespace ipc { class rw_lock { std::atomic_size_t lc_ { 0 }; enum : std::size_t { - w_flag = std::numeric_limits::max() + w_flag = (std::numeric_limits::max)() }; public: void lock() { - while (1) { + for (unsigned k = 0;; ++k) { std::size_t expected = 0; if (lc_.compare_exchange_weak(expected, w_flag, std::memory_order_acq_rel)) { break; } - std::this_thread::yield(); + yield(k); } } @@ -29,14 +95,14 @@ public: } void lock_shared() { - while(1) { + for (unsigned k = 0;; ++k) { std::size_t old = lc_.load(std::memory_order_relaxed); std::size_t unlocked = old + 1; if (unlocked && lc_.compare_exchange_weak(old, unlocked, std::memory_order_acq_rel)) { break; } - std::this_thread::yield(); + yield(k); std::atomic_thread_fence(std::memory_order_acquire); } } diff --git a/src/ipc.cpp b/src/ipc.cpp index fd1de2d..2c02d11 100644 --- a/src/ipc.cpp +++ b/src/ipc.cpp @@ -133,7 +133,7 @@ std::vector recv(handle_t h) { cache.resize(last_size + remain); std::memcpy(cache.data() + last_size, msg.data_, remain); // finish this message, erase it from cache - auto ret { std::move(cache) }; + auto ret = std::move(cache); all.erase(msg.id_); return ret; } diff --git a/test/test.h b/test/test.h index 8952186..b7e929c 100644 --- a/test/test.h +++ b/test/test.h @@ -3,6 +3,7 @@ #include #include +#include #include "stopwatch.hpp" diff --git a/test/test_ipc.cpp b/test/test_ipc.cpp index 4311fe7..8728383 100644 --- a/test/test_ipc.cpp +++ b/test/test_ipc.cpp @@ -35,11 +35,11 @@ constexpr T acc(T b, T e) { template struct lc_wrapper : Mutex { - void lock_shared () { lock (); } - void unlock_shared() { unlock(); } + void lock_shared () { Mutex::lock (); } + void unlock_shared() { Mutex::unlock(); } }; -template +template void benchmark() { std::thread r_trd[R]; std::thread w_trd[W]; @@ -99,11 +99,19 @@ void benchmark() { for (auto& t : r_trd) t.join(); } +template +void test_performance() { + benchmark(); + benchmark, R, W>(); + benchmark , R, W>(); + benchmark(); +} + void Unit::test_rw_lock() { - benchmark(); - benchmark>(); - benchmark>(); - benchmark(); + test_performance<1, 1>(); + test_performance<4, 4>(); + test_performance<1, 8>(); + test_performance<8, 1>(); } void Unit::test_send_recv() {