19 #include <condition_variable> 24 #include <google/base/spinlock.h> 38 DEFINE_int32(unlocked_work, 1000,
"Number of unlocked work cycles");
41 std::thread::hardware_concurrency(),
42 "Number of threads for fairness test");
44 static void burn(
size_t n) {
45 for (
size_t i = 0;
i < n; ++
i) {
52 template <
typename Mutex>
54 return std::unique_lock<Mutex>{mutex};
56 template <
typename Mutex,
typename Other>
57 void unlock(
Mutex&, Other) {}
61 template <
typename State>
66 struct SimpleBarrier {
67 explicit SimpleBarrier(
int count) : count_(count) {}
71 for (
auto i = 0;
i < 100000; ++
i) {
75 while (num_.load() != count_) {
80 std::atomic<int> num_{0};
85 template <
typename Lock>
114 template <
typename Lock>
117 size_t totalthreads = std::thread::hardware_concurrency();
118 if (totalthreads < numThreads) {
119 totalthreads = numThreads;
121 size_t threadgroups = totalthreads / numThreads;
130 (
struct lockstruct*)calloc(threadgroups,
sizeof(
struct lockstruct));
134 std::vector<std::thread>
threads(totalthreads);
136 SimpleBarrier runbarrier(totalthreads + 1);
138 for (
size_t t = 0;
t < totalthreads; ++
t) {
139 threads[
t] = std::thread([&,
t] {
142 for (
size_t op = 0;
op < numOps;
op += 1) {
147 burn(FLAGS_unlocked_work);
154 for (
auto& thr : threads) {
160 template <
typename Lock>
162 size_t numThreads = FLAGS_threads;
163 size_t totalthreads = std::thread::hardware_concurrency();
164 if (totalthreads < numThreads) {
165 totalthreads = numThreads;
167 long threadgroups = totalthreads / numThreads;
174 (
struct lockstruct*)calloc(threadgroups,
sizeof(
struct lockstruct));
178 std::vector<std::thread>
threads(totalthreads);
180 std::atomic<bool>
stop{
false};
183 std::vector<long> results;
184 std::vector<std::chrono::microseconds> maxes;
186 std::vector<std::chrono::microseconds> aqTime;
187 std::vector<unsigned long> aqTimeSq;
189 SimpleBarrier runbarrier(totalthreads + 1);
191 for (
size_t t = 0;
t < totalthreads; ++
t) {
192 threads[
t] = std::thread([&,
t] {
195 std::chrono::microseconds
max(0);
196 std::chrono::microseconds
time(0);
197 unsigned long timeSq(0);
200 std::chrono::steady_clock::time_point prelock =
203 std::chrono::steady_clock::time_point postlock =
205 auto diff = std::chrono::duration_cast<std::chrono::microseconds>(
208 timeSq +=
diff.count() *
diff.count();
215 burn(FLAGS_unlocked_work);
218 std::lock_guard<std::mutex>
g(rlock);
219 results.push_back(value);
220 maxes.push_back(max);
221 aqTime.push_back(time);
222 aqTimeSq.push_back(timeSq);
229 std::this_thread::sleep_for(std::chrono::seconds(2));
232 for (
auto& thr : threads) {
238 double m = sum / results.size();
241 std::for_each(results.begin(), results.end(), [&](
const double d) {
242 accum += (d -
m) * (d - m);
244 double stdev = std::sqrt(accum / (results.size() - 1));
245 std::chrono::microseconds mx = *std::max_element(maxes.begin(), maxes.end());
247 aqTime.begin(), aqTime.end(), std::chrono::microseconds(0));
248 unsigned long agAqTimeSq =
250 std::chrono::microseconds mean = agAqTime /
sum;
251 double variance = (sum * agAqTimeSq - (agAqTime.count() * agAqTime.count())) /
253 double stddev2 = std::sqrt(variance);
255 printf(
"Sum: %li Mean: %.0f stddev: %.0f\n", sum, m, stdev);
257 "Lock time stats in us: mean %li stddev %.0f max %li\n",
263 template <
typename Mutex>
266 for (
auto i = std::size_t{0};
i < iters; ++
i) {
273 runUncontended<std::mutex>(iters);
277 runUncontended<GoogleSpinLockAdapter>(iters);
281 runUncontended<InitLock<folly::MicroSpinLock>>(iters);
285 runUncontended<InitLock<folly::PicoSpinLock<std::uint16_t>>>(iters);
289 runUncontended<InitLock<folly::MicroLock>>(iters);
293 runUncontended<folly::SharedMutex>(iters);
296 BENCHMARK(DistributedMutexUncontendedBenchmark, iters) {
297 runUncontended<folly::DistributedMutex>(iters);
301 auto&& atomic = std::atomic<uint64_t>{0};
308 virtual void foo() = 0;
334 #define BENCH_BASE(...) FB_VA_GLUE(BENCHMARK_NAMED_PARAM, (__VA_ARGS__)) 335 #define BENCH_REL(...) FB_VA_GLUE(BENCHMARK_RELATIVE_NAMED_PARAM, (__VA_ARGS__)) 337 static void std_mutex(
size_t numOps,
size_t numThreads) {
338 runContended<std::mutex>(numOps, numThreads);
341 runContended<GoogleSpinLockAdapter>(numOps, numThreads);
344 runContended<InitLock<folly::MicroSpinLock>>(numOps, numThreads);
347 runContended<InitLock<folly::PicoSpinLock<uint16_t>>>(numOps, numThreads);
350 runContended<folly::MicroLock>(numOps, numThreads);
353 runContended<folly::SharedMutex>(numOps, numThreads);
356 runContended<folly::DistributedMutex>(numOps, numThreads);
367 BENCHMARK_DRAW_LINE();
374 BENCH_REL(folly_distributedmutex, 2thread, 2)
375 BENCHMARK_DRAW_LINE();
382 BENCH_REL(folly_distributedmutex, 4thread, 4)
383 BENCHMARK_DRAW_LINE();
390 BENCH_REL(folly_distributedmutex, 8thread, 8)
391 BENCHMARK_DRAW_LINE();
397 BENCH_REL(folly_sharedmutex, 16thread, 16)
398 BENCH_REL(folly_distributedmutex, 16thread, 16)
399 BENCHMARK_DRAW_LINE();
405 BENCH_REL(folly_sharedmutex, 32thread, 32)
406 BENCH_REL(folly_distributedmutex, 32thread, 32)
407 BENCHMARK_DRAW_LINE();
413 BENCH_REL(folly_sharedmutex, 64thread, 64)
414 BENCH_REL(folly_distributedmutex, 64thread, 64)
415 BENCHMARK_DRAW_LINE();
418 BENCH_REL(folly_microspin, 128thread, 128)
419 BENCH_REL(folly_picospin, 128thread, 128)
420 BENCH_REL(folly_microlock, 128thread, 128)
421 BENCH_REL(folly_sharedmutex, 128thread, 128)
422 BENCH_REL(folly_distributedmutex, 128thread, 128)
424 #define FairnessTest(type) \ 426 printf(#type ": \n"); \ 427 runFairness<type>(); \ 431 gflags::ParseCommandLineFlags(&argc, &argv,
true);
std::atomic< int64_t > sum(0)
void accumulate(std::vector< std::size_t > &a, std::vector< std::size_t > const &d)
static std::unique_ptr< SSLLock[]> & locks()
static void burn(size_t n)
std::chrono::steady_clock::time_point now()
constexpr detail::Map< Move > move
#define FairnessTest(type)
auto rlock(Synchronized &synchronized, Args &&...args)
void unlock(DistributedMutexStateProxy)
static void folly_distributedmutex(size_t numOps, size_t numThreads)
DEFINE_int32(work, 100,"Number of work cycles")
std::vector< std::thread::id > threads
State
See Core for details.
auto lock(SynchronizedLocker...lockersIn) -> std::tuple< typename SynchronizedLocker::LockedPtr... >
void runUncontended(std::size_t iters)
static void runFairness()
static map< string, int > m
bool wait(Waiter *waiter, bool shouldSleep, Waiter *&next)
static void folly_microspin(size_t numOps, size_t numThreads)
static void folly_microlock(size_t numOps, size_t numThreads)
static const char *const value
uint64_t diff(uint64_t a, uint64_t b)
auto dismissing(F f) -> invoke_result_t< F >
__attribute__((noinline, noclone)) VirtualBase *makeVirtual()
void for_each(T const &range, Function< void(typename T::value_type const &) const > const &func)
BENCHMARK(StdMutexUncontendedBenchmark, iters)
static void std_mutex(size_t numOps, size_t numThreads)
DistributedMutexStateProxy lock()
std::chrono::nanoseconds time()
static void runContended(size_t numOps, size_t numThreads)
auto doNotOptimizeAway(const T &datum) -> typename std::enable_if< !detail::DoNotOptimizeAwayNeedsIndirect< T >::value >::type