fix bugs; optimize performance

This commit is contained in:
mutouyun 2018-12-12 18:49:10 +08:00
parent 29d25e2226
commit c40dddcc06
4 changed files with 88 additions and 13 deletions

View File

@ -2,25 +2,91 @@
#include <atomic> #include <atomic>
#include <thread> #include <thread>
#include <chrono>
#include <limits> #include <limits>
////////////////////////////////////////////////////////////////
/// Gives hint to processor that improves performance of spin-wait loops.
////////////////////////////////////////////////////////////////
#pragma push_macro("IPC_LOCK_PAUSE_")
#undef IPC_LOCK_PAUSE_
#if defined(_MSC_VER)
#include <windows.h> // YieldProcessor
/*
See: http://msdn.microsoft.com/en-us/library/windows/desktop/ms687419(v=vs.85).aspx
Not for intel c++ compiler, so ignore http://software.intel.com/en-us/forums/topic/296168
*/
# define IPC_LOCK_PAUSE_() YieldProcessor()
#elif defined(__GNUC__)
#if defined(__i386__) || defined(__x86_64__)
/*
See: Intel(R) 64 and IA-32 Architectures Software Developer's Manual V2
PAUSE-Spin Loop Hint, 4-57
http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.html?wapkw=instruction+set+reference
*/
# define IPC_LOCK_PAUSE_() __asm__ __volatile__("pause")
#elif defined(__ia64__) || defined(__ia64)
/*
See: Intel(R) Itanium(R) Architecture Developer's Manual, Vol.3
hint - Performance Hint, 3:145
http://www.intel.com/content/www/us/en/processors/itanium/itanium-architecture-vol-3-manual.html
*/
# define IPC_LOCK_PAUSE_() __asm__ __volatile__ ("hint @pause")
#elif defined(__arm__)
/*
See: ARM Architecture Reference Manuals (YIELD)
http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.subset.architecture.reference/index.html
*/
# define IPC_LOCK_PAUSE_() __asm__ __volatile__ ("yield")
#endif
#endif/*compilers*/
#if !defined(IPC_LOCK_PAUSE_)
/*
Just use a compiler fence, prevent compiler from optimizing loop
*/
# define IPC_LOCK_PAUSE_() std::atomic_signal_fence(std::memory_order_seq_cst)
#endif/*!defined(IPC_LOCK_PAUSE_)*/
////////////////////////////////////////////////////////////////
/// Yield to other threads
////////////////////////////////////////////////////////////////
namespace ipc {
inline void yield(unsigned k) {
if (k < 4) { /* Do nothing */ }
else
if (k < 16) { IPC_LOCK_PAUSE_(); }
else
if (k < 32) { std::this_thread::yield(); }
else
{ std::this_thread::sleep_for(std::chrono::milliseconds(1)); }
}
} // namespace ipc
#pragma pop_macro("IPC_LOCK_PAUSE_")
namespace ipc { namespace ipc {
class rw_lock { class rw_lock {
std::atomic_size_t lc_ { 0 }; std::atomic_size_t lc_ { 0 };
enum : std::size_t { enum : std::size_t {
w_flag = std::numeric_limits<std::size_t>::max() w_flag = (std::numeric_limits<std::size_t>::max)()
}; };
public: public:
void lock() { void lock() {
while (1) { for (unsigned k = 0;; ++k) {
std::size_t expected = 0; std::size_t expected = 0;
if (lc_.compare_exchange_weak(expected, w_flag, std::memory_order_acq_rel)) { if (lc_.compare_exchange_weak(expected, w_flag, std::memory_order_acq_rel)) {
break; break;
} }
std::this_thread::yield(); yield(k);
} }
} }
@ -29,14 +95,14 @@ public:
} }
void lock_shared() { void lock_shared() {
while(1) { for (unsigned k = 0;; ++k) {
std::size_t old = lc_.load(std::memory_order_relaxed); std::size_t old = lc_.load(std::memory_order_relaxed);
std::size_t unlocked = old + 1; std::size_t unlocked = old + 1;
if (unlocked && if (unlocked &&
lc_.compare_exchange_weak(old, unlocked, std::memory_order_acq_rel)) { lc_.compare_exchange_weak(old, unlocked, std::memory_order_acq_rel)) {
break; break;
} }
std::this_thread::yield(); yield(k);
std::atomic_thread_fence(std::memory_order_acquire); std::atomic_thread_fence(std::memory_order_acquire);
} }
} }

View File

@ -133,7 +133,7 @@ std::vector<byte_t> recv(handle_t h) {
cache.resize(last_size + remain); cache.resize(last_size + remain);
std::memcpy(cache.data() + last_size, msg.data_, remain); std::memcpy(cache.data() + last_size, msg.data_, remain);
// finish this message, erase it from cache // finish this message, erase it from cache
auto ret { std::move(cache) }; auto ret = std::move(cache);
all.erase(msg.id_); all.erase(msg.id_);
return ret; return ret;
} }

View File

@ -3,6 +3,7 @@
#include <QtTest> #include <QtTest>
#include <iostream> #include <iostream>
#include <atomic>
#include "stopwatch.hpp" #include "stopwatch.hpp"

View File

@ -35,11 +35,11 @@ constexpr T acc(T b, T e) {
template <typename Mutex> template <typename Mutex>
struct lc_wrapper : Mutex { struct lc_wrapper : Mutex {
void lock_shared () { lock (); } void lock_shared () { Mutex::lock (); }
void unlock_shared() { unlock(); } void unlock_shared() { Mutex::unlock(); }
}; };
template <typename Lc, int Loops = 100000, int R = 4, int W = 4> template <typename Lc, int R = 4, int W = 4, int Loops = 100000>
void benchmark() { void benchmark() {
std::thread r_trd[R]; std::thread r_trd[R];
std::thread w_trd[W]; std::thread w_trd[W];
@ -99,11 +99,19 @@ void benchmark() {
for (auto& t : r_trd) t.join(); for (auto& t : r_trd) t.join();
} }
template <int R, int W>
void test_performance() {
benchmark<ipc::rw_lock , R, W>();
benchmark<lc_wrapper<capo::spin_lock>, R, W>();
benchmark<lc_wrapper<std::mutex> , R, W>();
benchmark<std::shared_mutex , R, W>();
}
void Unit::test_rw_lock() { void Unit::test_rw_lock() {
benchmark<ipc::rw_lock>(); test_performance<1, 1>();
benchmark<lc_wrapper<capo::spin_lock>>(); test_performance<4, 4>();
benchmark<lc_wrapper<std::mutex>>(); test_performance<1, 8>();
benchmark<std::shared_mutex>(); test_performance<8, 1>();
} }
void Unit::test_send_recv() { void Unit::test_send_recv() {