diff --git a/include/circ_elem_array.h b/include/circ_elem_array.h index 9ab4f59..95c86be 100644 --- a/include/circ_elem_array.h +++ b/include/circ_elem_array.h @@ -38,8 +38,8 @@ struct alignas(std::max_align_t) elem_array_head { } auto acquire(void) { - while (lc_.exchange(1, std::memory_order_acquire)) { - std::this_thread::yield(); + for (unsigned k = 0; lc_.exchange(1, std::memory_order_acquire); ++k) { + yield(k); } return index_of(wt_.load(std::memory_order_relaxed)); } @@ -107,7 +107,7 @@ public: void* acquire(void) { elem_t* el = elem(base_t::acquire()); // check all consumers have finished reading - while(1) { + for (unsigned k = 0;; ++k) { uint_t<32> expected = 0; if (el->head_.rc_.compare_exchange_weak( expected, @@ -115,7 +115,7 @@ public: std::memory_order_release)) { break; } - std::this_thread::yield(); + yield(k); } return el->data_; } diff --git a/include/def.h b/include/def.h index 4fd1730..111e7e2 100644 --- a/include/def.h +++ b/include/def.h @@ -3,6 +3,8 @@ #include #include #include +#include +#include namespace ipc { @@ -28,3 +30,68 @@ enum : std::size_t { }; } // namespace ipc + +//////////////////////////////////////////////////////////////// +/// Gives hint to processor that improves performance of spin-wait loops. +//////////////////////////////////////////////////////////////// + +#pragma push_macro("IPC_LOCK_PAUSE_") +#undef IPC_LOCK_PAUSE_ + +#if defined(_MSC_VER) +#include // YieldProcessor +/* + See: http://msdn.microsoft.com/en-us/library/windows/desktop/ms687419(v=vs.85).aspx + Not for intel c++ compiler, so ignore http://software.intel.com/en-us/forums/topic/296168 +*/ +# define IPC_LOCK_PAUSE_() YieldProcessor() +#elif defined(__GNUC__) +#if defined(__i386__) || defined(__x86_64__) +/* + See: Intel(R) 64 and IA-32 Architectures Software Developer's Manual V2 + PAUSE-Spin Loop Hint, 4-57 + http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.html?wapkw=instruction+set+reference +*/ +# define IPC_LOCK_PAUSE_() __asm__ __volatile__("pause") +#elif defined(__ia64__) || defined(__ia64) +/* + See: Intel(R) Itanium(R) Architecture Developer's Manual, Vol.3 + hint - Performance Hint, 3:145 + http://www.intel.com/content/www/us/en/processors/itanium/itanium-architecture-vol-3-manual.html +*/ +# define IPC_LOCK_PAUSE_() __asm__ __volatile__ ("hint @pause") +#elif defined(__arm__) +/* + See: ARM Architecture Reference Manuals (YIELD) + http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.subset.architecture.reference/index.html +*/ +# define IPC_LOCK_PAUSE_() __asm__ __volatile__ ("yield") +#endif +#endif/*compilers*/ + +#if !defined(IPC_LOCK_PAUSE_) +/* + Just use a compiler fence, prevent compiler from optimizing loop +*/ +# define IPC_LOCK_PAUSE_() std::atomic_signal_fence(std::memory_order_seq_cst) +#endif/*!defined(IPC_LOCK_PAUSE_)*/ + +//////////////////////////////////////////////////////////////// +/// Yield to other threads +//////////////////////////////////////////////////////////////// + +namespace ipc { + +inline void yield(unsigned k) { + if (k < 4) { /* Do nothing */ } + else + if (k < 16) { IPC_LOCK_PAUSE_(); } + else + if (k < 32) { std::this_thread::yield(); } + else + { std::this_thread::sleep_for(std::chrono::milliseconds(1)); } +} + +} // namespace ipc + +#pragma pop_macro("IPC_LOCK_PAUSE_") diff --git a/include/rw_lock.h b/include/rw_lock.h index b382e88..3290337 100644 --- a/include/rw_lock.h +++ b/include/rw_lock.h @@ -1,74 +1,9 @@ #pragma once #include -#include -#include #include -//////////////////////////////////////////////////////////////// -/// Gives hint to processor that improves performance of spin-wait loops. -//////////////////////////////////////////////////////////////// - -#pragma push_macro("IPC_LOCK_PAUSE_") -#undef IPC_LOCK_PAUSE_ - -#if defined(_MSC_VER) -#include // YieldProcessor -/* - See: http://msdn.microsoft.com/en-us/library/windows/desktop/ms687419(v=vs.85).aspx - Not for intel c++ compiler, so ignore http://software.intel.com/en-us/forums/topic/296168 -*/ -# define IPC_LOCK_PAUSE_() YieldProcessor() -#elif defined(__GNUC__) -#if defined(__i386__) || defined(__x86_64__) -/* - See: Intel(R) 64 and IA-32 Architectures Software Developer's Manual V2 - PAUSE-Spin Loop Hint, 4-57 - http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.html?wapkw=instruction+set+reference -*/ -# define IPC_LOCK_PAUSE_() __asm__ __volatile__("pause") -#elif defined(__ia64__) || defined(__ia64) -/* - See: Intel(R) Itanium(R) Architecture Developer's Manual, Vol.3 - hint - Performance Hint, 3:145 - http://www.intel.com/content/www/us/en/processors/itanium/itanium-architecture-vol-3-manual.html -*/ -# define IPC_LOCK_PAUSE_() __asm__ __volatile__ ("hint @pause") -#elif defined(__arm__) -/* - See: ARM Architecture Reference Manuals (YIELD) - http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.subset.architecture.reference/index.html -*/ -# define IPC_LOCK_PAUSE_() __asm__ __volatile__ ("yield") -#endif -#endif/*compilers*/ - -#if !defined(IPC_LOCK_PAUSE_) -/* - Just use a compiler fence, prevent compiler from optimizing loop -*/ -# define IPC_LOCK_PAUSE_() std::atomic_signal_fence(std::memory_order_seq_cst) -#endif/*!defined(IPC_LOCK_PAUSE_)*/ - -//////////////////////////////////////////////////////////////// -/// Yield to other threads -//////////////////////////////////////////////////////////////// - -namespace ipc { - -inline void yield(unsigned k) { - if (k < 4) { /* Do nothing */ } - else - if (k < 16) { IPC_LOCK_PAUSE_(); } - else - if (k < 32) { std::this_thread::yield(); } - else - { std::this_thread::sleep_for(std::chrono::milliseconds(1)); } -} - -} // namespace ipc - -#pragma pop_macro("IPC_LOCK_PAUSE_") +#include "def.h" namespace ipc { diff --git a/test/test_circ.cpp b/test/test_circ.cpp index 61cc68a..b42d2a5 100644 --- a/test/test_circ.cpp +++ b/test/test_circ.cpp @@ -31,7 +31,7 @@ private slots: void test_prod_cons_performance(); void test_queue(); -} /*unit__*/; +} unit__; #include "test_circ.moc" diff --git a/test/test_ipc.cpp b/test/test_ipc.cpp index ce40e11..fe47bd7 100644 --- a/test/test_ipc.cpp +++ b/test/test_ipc.cpp @@ -44,10 +44,10 @@ struct lc_wrapper : Mutex { void unlock_shared() { Mutex::unlock(); } }; -template +template void benchmark() { - std::thread r_trd[R]; std::thread w_trd[W]; + std::thread r_trd[R]; std::atomic_int fini { 0 }; std::vector datas; @@ -86,7 +86,7 @@ void benchmark() { std::this_thread::yield(); } if (++fini == std::extent::value) { - sw.print_elapsed(R, W, Loops); + sw.print_elapsed(W, R, Loops); } std::int64_t sum = 0; for (int i : seq) sum += i; @@ -114,17 +114,17 @@ void benchmark() { for (auto& t : r_trd) t.join(); } -template +template void test_performance() { std::cout << std::endl - << "test_performance: [" << R << ":" << W << "]" + << "test_performance: [" << W << ":" << R << "]" << std::endl; - benchmark(); - benchmark, R, W>(); - benchmark , R, W>(); - benchmark(); + benchmark(); + benchmark, W, R>(); + benchmark , W, R>(); + benchmark(); } void Unit::test_rw_lock() {