diff --git a/include/rw_lock.h b/include/rw_lock.h index 43d08b8..0565f78 100644 --- a/include/rw_lock.h +++ b/include/rw_lock.h @@ -14,7 +14,7 @@ class rw_lock { }; public: - void lock(void) { + void lock() { while (1) { std::size_t expected = 0; if (lc_.compare_exchange_weak(expected, w_flag, std::memory_order_acq_rel)) { @@ -24,11 +24,11 @@ public: } } - void unlock(void) { + void unlock() { lc_.store(0, std::memory_order_release); } - void lock_shared(void) { + void lock_shared() { while(1) { std::size_t old = lc_.load(std::memory_order_relaxed); std::size_t unlocked = old + 1; @@ -41,7 +41,7 @@ public: } } - void unlock_shared(void) { + void unlock_shared() { lc_.fetch_sub(1, std::memory_order_release); } }; diff --git a/src/ipc.cpp b/src/ipc.cpp index e5904ca..fd1de2d 100644 --- a/src/ipc.cpp +++ b/src/ipc.cpp @@ -1,3 +1,5 @@ +#include "ipc.h" + #include #include #include @@ -6,7 +8,6 @@ #include #include -#include "ipc.h" #include "circ_queue.h" #include "rw_lock.h" diff --git a/src/platform/shm_linux.cpp b/src/platform/shm_linux.cpp index c64c568..d048fc4 100644 --- a/src/platform/shm_linux.cpp +++ b/src/platform/shm_linux.cpp @@ -1,11 +1,11 @@ +#include "shm.h" + #include #include #include #include #include -#include "shm.h" - namespace ipc { namespace shm { diff --git a/src/platform/shm_win.cpp b/src/platform/shm_win.cpp index a2eda46..ee29e53 100644 --- a/src/platform/shm_win.cpp +++ b/src/platform/shm_win.cpp @@ -1,3 +1,5 @@ +#include "shm.h" + #include #include @@ -6,8 +8,6 @@ #include #include -#include "shm.h" - namespace { template diff --git a/src/shm.cpp b/src/shm.cpp index 688c91c..eec3b24 100644 --- a/src/shm.cpp +++ b/src/shm.cpp @@ -1,8 +1,8 @@ +#include "shm.h" + #include #include -#include "shm.h" - namespace ipc { namespace shm { diff --git a/test/spin_lock.hpp b/test/spin_lock.hpp new file mode 100644 index 0000000..8ca2635 --- /dev/null +++ b/test/spin_lock.hpp @@ -0,0 +1,104 @@ +/* + The Capo Library + Code covered by the MIT License + + Author: mutouyun (http://orzz.org) +*/ + +#pragma once + +#include // std::atomic_flag, std::atomic_signal_fence +#include // std::this_thread +#include // std::chrono::milliseconds +#if defined(_MSC_VER) +#include // YieldProcessor +#endif/*_MSC_VER*/ + +namespace capo { +namespace detail_spin_lock { + +//////////////////////////////////////////////////////////////// +/// Gives hint to processor that improves performance of spin-wait loops. +//////////////////////////////////////////////////////////////// + +#if defined(_MSC_VER) +/* + See: http://msdn.microsoft.com/en-us/library/windows/desktop/ms687419(v=vs.85).aspx + Not for intel c++ compiler, so ignore http://software.intel.com/en-us/forums/topic/296168 +*/ +# define CAPO_SPIN_LOCK_PAUSE_() YieldProcessor() +#elif defined(__GNUC__) +#if defined(__i386__) || defined(__x86_64__) +/* + See: Intel(R) 64 and IA-32 Architectures Software Developer's Manual V2 + PAUSE-Spin Loop Hint, 4-57 + http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.html?wapkw=instruction+set+reference +*/ +# define CAPO_SPIN_LOCK_PAUSE_() __asm__ __volatile__("pause") +#elif defined(__ia64__) || defined(__ia64) +/* + See: Intel(R) Itanium(R) Architecture Developer's Manual, Vol.3 + hint - Performance Hint, 3:145 + http://www.intel.com/content/www/us/en/processors/itanium/itanium-architecture-vol-3-manual.html +*/ +# define CAPO_SPIN_LOCK_PAUSE_() __asm__ __volatile__ ("hint @pause") +#elif defined(__arm__) +/* + See: ARM Architecture Reference Manuals (YIELD) + http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.subset.architecture.reference/index.html +*/ +# define CAPO_SPIN_LOCK_PAUSE_() __asm__ __volatile__ ("yield") +#endif +#endif/*compilers*/ + +#if !defined(CAPO_SPIN_LOCK_PAUSE_) +/* + Just use a compiler fence, prevent compiler from optimizing loop +*/ +# define CAPO_SPIN_LOCK_PAUSE_() std::atomic_signal_fence(std::memory_order_seq_cst) +#endif/*!defined(CAPO_SPIN_LOCK_PAUSE_)*/ + +//////////////////////////////////////////////////////////////// +/// Yield to other threads +//////////////////////////////////////////////////////////////// + +inline void yield(unsigned k) +{ + if (k < 4) { /* Do nothing */ } + else + if (k < 16) { CAPO_SPIN_LOCK_PAUSE_(); } + else + if (k < 32) { std::this_thread::yield(); } + else + { std::this_thread::sleep_for(std::chrono::milliseconds(1)); } +} + +} // namespace detail_spin_lock + +//////////////////////////////////////////////////////////////// +/// Spinlock +//////////////////////////////////////////////////////////////// + +class spin_lock +{ + std::atomic_flag lc_ = ATOMIC_FLAG_INIT; + +public: + bool try_lock(void) + { + return !lc_.test_and_set(std::memory_order_acquire); + } + + void lock(void) + { + for (unsigned k = 0; lc_.test_and_set(std::memory_order_acquire); ++k) + detail_spin_lock::yield(k); + } + + void unlock(void) + { + lc_.clear(std::memory_order_release); + } +}; + +} // namespace capo diff --git a/test/test.h b/test/test.h index 5fb915d..8952186 100644 --- a/test/test.h +++ b/test/test.h @@ -2,6 +2,10 @@ #include +#include + +#include "stopwatch.hpp" + class TestSuite : public QObject { Q_OBJECT @@ -15,3 +19,21 @@ protected: protected slots: virtual void initTestCase(); }; + +struct test_stopwatch { + capo::stopwatch<> sw_; + std::atomic_flag started_ = ATOMIC_FLAG_INIT; + + void start() { + if (!started_.test_and_set()) { + sw_.start(); + } + } + + void print_elapsed(int N, int M, int Loops) { + auto ts = sw_.elapsed(); + std::cout << "[" << N << ":" << M << ", " << Loops << "]" << std::endl + << "performance: " << (ts / 1000.0) << " ms, " + << (double(ts) / double(Loops * N)) << " us/d" << std::endl; + } +}; diff --git a/test/test_circ.cpp b/test/test_circ.cpp index 53208db..61cc68a 100644 --- a/test/test_circ.cpp +++ b/test/test_circ.cpp @@ -9,7 +9,6 @@ #include "circ_elem_array.h" #include "circ_queue.h" -#include "stopwatch.hpp" #include "test.h" namespace { @@ -73,23 +72,6 @@ struct msg_t { int dat_; }; -struct test_stopwatch { - capo::stopwatch<> sw_; - std::atomic_flag started_ = ATOMIC_FLAG_INIT; - - void start() { - if (!started_.test_and_set()) { - sw_.start(); - } - } - - void print_elapsed(int N, int M, int Loops) { - auto ts = sw_.elapsed(); - std::cout << "[" << N << ":" << M << ", " << Loops << "]" << std::endl - << "performance: " << (double(ts) / double(Loops * N)) << " us/d" << std::endl; - } -}; - template struct test_verify { std::unordered_map>* list_; diff --git a/test/test_ipc.cpp b/test/test_ipc.cpp index eecc1ca..4311fe7 100644 --- a/test/test_ipc.cpp +++ b/test/test_ipc.cpp @@ -4,9 +4,12 @@ #include #include #include +#include #include "ipc.h" #include "rw_lock.h" +#include "stopwatch.hpp" +#include "spin_lock.hpp" #include "test.h" namespace { @@ -25,12 +28,28 @@ private slots: #include "test_ipc.moc" -void Unit::test_rw_lock() { - std::thread r_trd[4]; - std::thread w_trd[4]; +template +constexpr T acc(T b, T e) { + return (e + b) * (e - b + 1) / 2; +} + +template +struct lc_wrapper : Mutex { + void lock_shared () { lock (); } + void unlock_shared() { unlock(); } +}; + +template +void benchmark() { + std::thread r_trd[R]; + std::thread w_trd[W]; + std::atomic_int fini { 0 }; std::vector datas; - ipc::rw_lock lc; + Lc lc; + + test_stopwatch sw; + std::cout << std::endl << typeid(Lc).name() << std::endl; for (auto& t : r_trd) { t = std::thread([&] { @@ -39,7 +58,7 @@ void Unit::test_rw_lock() { while (1) { int x = -1; { - [[maybe_unused]] std::shared_lock guard { lc }; + [[maybe_unused]] std::shared_lock guard { lc }; if (cnt < datas.size()) { x = datas[cnt]; } @@ -51,21 +70,23 @@ void Unit::test_rw_lock() { } std::this_thread::yield(); } - std::size_t sum = 0; - for (int i : seq) { - sum += static_cast(i); + if (++fini == std::extent::value) { + sw.print_elapsed(R, W, Loops); } - std::cout << std::endl; - QCOMPARE(sum, 5050 * std::extent::value); + std::uint64_t sum = 0; + for (int i : seq) sum += i; + QCOMPARE(sum, acc(1, Loops) * std::extent::value); }); } for (auto& t : w_trd) { t = std::thread([&] { - for (int i = 1; i <= 100; ++i) { - lc.lock(); - datas.push_back(i); - lc.unlock(); + sw.start(); + for (int i = 1; i <= Loops; ++i) { + { + [[maybe_unused]] std::unique_lock guard { lc }; + datas.push_back(i); + } std::this_thread::yield(); } }); @@ -75,13 +96,14 @@ void Unit::test_rw_lock() { lc.lock(); datas.push_back(0); lc.unlock(); - for (auto& t : r_trd) t.join(); +} - for (int i : datas) { - std::cout << i << " "; - } - std::cout << std::endl; +void Unit::test_rw_lock() { + benchmark(); + benchmark>(); + benchmark>(); + benchmark(); } void Unit::test_send_recv() {