mirror of
https://github.com/mutouyun/cpp-ipc.git
synced 2025-12-06 16:56:45 +08:00
use ipc::yield instead of this_thread::yield
This commit is contained in:
parent
5ffc820983
commit
b6248ad5dc
@ -38,8 +38,8 @@ struct alignas(std::max_align_t) elem_array_head {
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto acquire(void) {
|
auto acquire(void) {
|
||||||
while (lc_.exchange(1, std::memory_order_acquire)) {
|
for (unsigned k = 0; lc_.exchange(1, std::memory_order_acquire); ++k) {
|
||||||
std::this_thread::yield();
|
yield(k);
|
||||||
}
|
}
|
||||||
return index_of(wt_.load(std::memory_order_relaxed));
|
return index_of(wt_.load(std::memory_order_relaxed));
|
||||||
}
|
}
|
||||||
@ -107,7 +107,7 @@ public:
|
|||||||
void* acquire(void) {
|
void* acquire(void) {
|
||||||
elem_t* el = elem(base_t::acquire());
|
elem_t* el = elem(base_t::acquire());
|
||||||
// check all consumers have finished reading
|
// check all consumers have finished reading
|
||||||
while(1) {
|
for (unsigned k = 0;; ++k) {
|
||||||
uint_t<32> expected = 0;
|
uint_t<32> expected = 0;
|
||||||
if (el->head_.rc_.compare_exchange_weak(
|
if (el->head_.rc_.compare_exchange_weak(
|
||||||
expected,
|
expected,
|
||||||
@ -115,7 +115,7 @@ public:
|
|||||||
std::memory_order_release)) {
|
std::memory_order_release)) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
std::this_thread::yield();
|
yield(k);
|
||||||
}
|
}
|
||||||
return el->data_;
|
return el->data_;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -3,6 +3,8 @@
|
|||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
|
#include <thread>
|
||||||
|
#include <chrono>
|
||||||
|
|
||||||
namespace ipc {
|
namespace ipc {
|
||||||
|
|
||||||
@ -28,3 +30,68 @@ enum : std::size_t {
|
|||||||
};
|
};
|
||||||
|
|
||||||
} // namespace ipc
|
} // namespace ipc
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
/// Gives hint to processor that improves performance of spin-wait loops.
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#pragma push_macro("IPC_LOCK_PAUSE_")
|
||||||
|
#undef IPC_LOCK_PAUSE_
|
||||||
|
|
||||||
|
#if defined(_MSC_VER)
|
||||||
|
#include <windows.h> // YieldProcessor
|
||||||
|
/*
|
||||||
|
See: http://msdn.microsoft.com/en-us/library/windows/desktop/ms687419(v=vs.85).aspx
|
||||||
|
Not for intel c++ compiler, so ignore http://software.intel.com/en-us/forums/topic/296168
|
||||||
|
*/
|
||||||
|
# define IPC_LOCK_PAUSE_() YieldProcessor()
|
||||||
|
#elif defined(__GNUC__)
|
||||||
|
#if defined(__i386__) || defined(__x86_64__)
|
||||||
|
/*
|
||||||
|
See: Intel(R) 64 and IA-32 Architectures Software Developer's Manual V2
|
||||||
|
PAUSE-Spin Loop Hint, 4-57
|
||||||
|
http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.html?wapkw=instruction+set+reference
|
||||||
|
*/
|
||||||
|
# define IPC_LOCK_PAUSE_() __asm__ __volatile__("pause")
|
||||||
|
#elif defined(__ia64__) || defined(__ia64)
|
||||||
|
/*
|
||||||
|
See: Intel(R) Itanium(R) Architecture Developer's Manual, Vol.3
|
||||||
|
hint - Performance Hint, 3:145
|
||||||
|
http://www.intel.com/content/www/us/en/processors/itanium/itanium-architecture-vol-3-manual.html
|
||||||
|
*/
|
||||||
|
# define IPC_LOCK_PAUSE_() __asm__ __volatile__ ("hint @pause")
|
||||||
|
#elif defined(__arm__)
|
||||||
|
/*
|
||||||
|
See: ARM Architecture Reference Manuals (YIELD)
|
||||||
|
http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.subset.architecture.reference/index.html
|
||||||
|
*/
|
||||||
|
# define IPC_LOCK_PAUSE_() __asm__ __volatile__ ("yield")
|
||||||
|
#endif
|
||||||
|
#endif/*compilers*/
|
||||||
|
|
||||||
|
#if !defined(IPC_LOCK_PAUSE_)
|
||||||
|
/*
|
||||||
|
Just use a compiler fence, prevent compiler from optimizing loop
|
||||||
|
*/
|
||||||
|
# define IPC_LOCK_PAUSE_() std::atomic_signal_fence(std::memory_order_seq_cst)
|
||||||
|
#endif/*!defined(IPC_LOCK_PAUSE_)*/
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
/// Yield to other threads
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
namespace ipc {
|
||||||
|
|
||||||
|
inline void yield(unsigned k) {
|
||||||
|
if (k < 4) { /* Do nothing */ }
|
||||||
|
else
|
||||||
|
if (k < 16) { IPC_LOCK_PAUSE_(); }
|
||||||
|
else
|
||||||
|
if (k < 32) { std::this_thread::yield(); }
|
||||||
|
else
|
||||||
|
{ std::this_thread::sleep_for(std::chrono::milliseconds(1)); }
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace ipc
|
||||||
|
|
||||||
|
#pragma pop_macro("IPC_LOCK_PAUSE_")
|
||||||
|
|||||||
@ -1,74 +1,9 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
#include <thread>
|
|
||||||
#include <chrono>
|
|
||||||
#include <limits>
|
#include <limits>
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////
|
#include "def.h"
|
||||||
/// Gives hint to processor that improves performance of spin-wait loops.
|
|
||||||
////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
#pragma push_macro("IPC_LOCK_PAUSE_")
|
|
||||||
#undef IPC_LOCK_PAUSE_
|
|
||||||
|
|
||||||
#if defined(_MSC_VER)
|
|
||||||
#include <windows.h> // YieldProcessor
|
|
||||||
/*
|
|
||||||
See: http://msdn.microsoft.com/en-us/library/windows/desktop/ms687419(v=vs.85).aspx
|
|
||||||
Not for intel c++ compiler, so ignore http://software.intel.com/en-us/forums/topic/296168
|
|
||||||
*/
|
|
||||||
# define IPC_LOCK_PAUSE_() YieldProcessor()
|
|
||||||
#elif defined(__GNUC__)
|
|
||||||
#if defined(__i386__) || defined(__x86_64__)
|
|
||||||
/*
|
|
||||||
See: Intel(R) 64 and IA-32 Architectures Software Developer's Manual V2
|
|
||||||
PAUSE-Spin Loop Hint, 4-57
|
|
||||||
http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.html?wapkw=instruction+set+reference
|
|
||||||
*/
|
|
||||||
# define IPC_LOCK_PAUSE_() __asm__ __volatile__("pause")
|
|
||||||
#elif defined(__ia64__) || defined(__ia64)
|
|
||||||
/*
|
|
||||||
See: Intel(R) Itanium(R) Architecture Developer's Manual, Vol.3
|
|
||||||
hint - Performance Hint, 3:145
|
|
||||||
http://www.intel.com/content/www/us/en/processors/itanium/itanium-architecture-vol-3-manual.html
|
|
||||||
*/
|
|
||||||
# define IPC_LOCK_PAUSE_() __asm__ __volatile__ ("hint @pause")
|
|
||||||
#elif defined(__arm__)
|
|
||||||
/*
|
|
||||||
See: ARM Architecture Reference Manuals (YIELD)
|
|
||||||
http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.subset.architecture.reference/index.html
|
|
||||||
*/
|
|
||||||
# define IPC_LOCK_PAUSE_() __asm__ __volatile__ ("yield")
|
|
||||||
#endif
|
|
||||||
#endif/*compilers*/
|
|
||||||
|
|
||||||
#if !defined(IPC_LOCK_PAUSE_)
|
|
||||||
/*
|
|
||||||
Just use a compiler fence, prevent compiler from optimizing loop
|
|
||||||
*/
|
|
||||||
# define IPC_LOCK_PAUSE_() std::atomic_signal_fence(std::memory_order_seq_cst)
|
|
||||||
#endif/*!defined(IPC_LOCK_PAUSE_)*/
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////
|
|
||||||
/// Yield to other threads
|
|
||||||
////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
namespace ipc {
|
|
||||||
|
|
||||||
inline void yield(unsigned k) {
|
|
||||||
if (k < 4) { /* Do nothing */ }
|
|
||||||
else
|
|
||||||
if (k < 16) { IPC_LOCK_PAUSE_(); }
|
|
||||||
else
|
|
||||||
if (k < 32) { std::this_thread::yield(); }
|
|
||||||
else
|
|
||||||
{ std::this_thread::sleep_for(std::chrono::milliseconds(1)); }
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace ipc
|
|
||||||
|
|
||||||
#pragma pop_macro("IPC_LOCK_PAUSE_")
|
|
||||||
|
|
||||||
namespace ipc {
|
namespace ipc {
|
||||||
|
|
||||||
|
|||||||
@ -31,7 +31,7 @@ private slots:
|
|||||||
void test_prod_cons_performance();
|
void test_prod_cons_performance();
|
||||||
|
|
||||||
void test_queue();
|
void test_queue();
|
||||||
} /*unit__*/;
|
} unit__;
|
||||||
|
|
||||||
#include "test_circ.moc"
|
#include "test_circ.moc"
|
||||||
|
|
||||||
|
|||||||
@ -44,10 +44,10 @@ struct lc_wrapper : Mutex {
|
|||||||
void unlock_shared() { Mutex::unlock(); }
|
void unlock_shared() { Mutex::unlock(); }
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Lc, int R = 4, int W = 4, int Loops = 100000>
|
template <typename Lc, int W = 4, int R = 4, int Loops = 100000>
|
||||||
void benchmark() {
|
void benchmark() {
|
||||||
std::thread r_trd[R];
|
|
||||||
std::thread w_trd[W];
|
std::thread w_trd[W];
|
||||||
|
std::thread r_trd[R];
|
||||||
std::atomic_int fini { 0 };
|
std::atomic_int fini { 0 };
|
||||||
|
|
||||||
std::vector<int> datas;
|
std::vector<int> datas;
|
||||||
@ -86,7 +86,7 @@ void benchmark() {
|
|||||||
std::this_thread::yield();
|
std::this_thread::yield();
|
||||||
}
|
}
|
||||||
if (++fini == std::extent<decltype(r_trd)>::value) {
|
if (++fini == std::extent<decltype(r_trd)>::value) {
|
||||||
sw.print_elapsed(R, W, Loops);
|
sw.print_elapsed(W, R, Loops);
|
||||||
}
|
}
|
||||||
std::int64_t sum = 0;
|
std::int64_t sum = 0;
|
||||||
for (int i : seq) sum += i;
|
for (int i : seq) sum += i;
|
||||||
@ -114,17 +114,17 @@ void benchmark() {
|
|||||||
for (auto& t : r_trd) t.join();
|
for (auto& t : r_trd) t.join();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int R, int W>
|
template <int W, int R>
|
||||||
void test_performance() {
|
void test_performance() {
|
||||||
|
|
||||||
std::cout << std::endl
|
std::cout << std::endl
|
||||||
<< "test_performance: [" << R << ":" << W << "]"
|
<< "test_performance: [" << W << ":" << R << "]"
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
|
|
||||||
benchmark<ipc::rw_lock , R, W>();
|
benchmark<ipc::rw_lock , W, R>();
|
||||||
benchmark<lc_wrapper<capo::spin_lock>, R, W>();
|
benchmark<lc_wrapper<capo::spin_lock>, W, R>();
|
||||||
benchmark<lc_wrapper<std::mutex> , R, W>();
|
benchmark<lc_wrapper<std::mutex> , W, R>();
|
||||||
benchmark<std::shared_timed_mutex , R, W>();
|
benchmark<std::shared_timed_mutex , W, R>();
|
||||||
}
|
}
|
||||||
|
|
||||||
void Unit::test_rw_lock() {
|
void Unit::test_rw_lock() {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user