proxygen
SynchronizedBenchmark.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017-present Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include <folly/Synchronized.h>
17 
18 #include <folly/Benchmark.h>
20 
21 #include <algorithm>
22 #include <condition_variable>
23 #include <map>
24 #include <memory>
25 #include <mutex>
26 #include <shared_mutex>
27 #include <thread>
28 
29 using namespace folly;
30 using namespace folly::detail;
31 
32 DEFINE_uint64(iterations, 100, "The number of iterations with lock held");
33 
135 namespace {
136 template <typename NonMovableType>
137 std::vector<std::unique_ptr<NonMovableType>> makeVector(int num);
138 
142 void spin(int n);
143 
147 template <typename Mutex>
148 void ordered(
149  std::size_t iters,
150  int threads,
151  std::vector<std::unique_ptr<Mutex>> mutexes);
152 template <typename Mutex>
153 void smart(
154  std::size_t iters,
155  int threads,
156  std::vector<std::unique_ptr<Mutex>> mutexes);
157 template <typename Mutex>
158 void persistent(
159  std::size_t iters,
160  int threads,
161  std::vector<std::unique_ptr<Mutex>> mutexes);
162 
177 template <typename LockingFunc>
178 void pathological(std::size_t iters, LockingFunc func);
179 
185 template <typename LockingFunc>
186 void simple(std::size_t iters, LockingFunc func);
187 
191 template <typename LockingFunc>
192 void uncontended(std::size_t iters, LockingFunc func);
193 
198 class BenchmarkStartBarrier {
199  public:
200  explicit BenchmarkStartBarrier(int threads) : threads_{threads + 1} {}
201 
202  void wait() {
203  auto lck = std::unique_lock<std::mutex>{mutex_};
204  ++started_;
205 
206  // if all the threads have started the benchmarks
207  if (started_ == threads_) {
208  cv_.notify_all();
209  return;
210  }
211 
212  // wait till all the threads have started
213  while (started_ != threads_) {
214  cv_.wait(lck);
215  }
216  }
217 
219  std::condition_variable cv_;
220  const int threads_;
221  int started_{0};
222 };
223 } // namespace
224 
225 BENCHMARK(UncontendedFollyLock, iters) {
226  uncontended(iters, [](auto& one, auto& two) { folly::lock(one, two); });
227 }
228 
229 BENCHMARK(UncontendedStdLock, iters) {
230  uncontended(iters, [](auto& one, auto& two) { std::lock(one, two); });
231 }
232 
233 BENCHMARK(UncontendedOrdered, iters) {
234  uncontended(iters, [](auto& one, auto& two) {
235  one.lock();
236  two.lock();
237  });
238 }
239 
240 BENCHMARK(UncontendedReverseOrdered, iters) {
241  uncontended(iters, [](auto& one, auto& two) {
242  two.lock();
243  one.lock();
244  });
245 }
246 
248 
249 BENCHMARK(ThreeThreadsSimpleFollyLock, iters) {
250  simple(iters, [](auto& one, auto& two) { folly::lock(one, two); });
251 }
252 
253 BENCHMARK(ThreeThreadsSimpleStdLock, iters) {
254  simple(iters, [](auto& one, auto& two) { std::lock(one, two); });
255 }
256 
257 BENCHMARK(ThreeThreadsSimpleOrdered, iters) {
258  simple(iters, [](auto& one, auto& two) {
259  one.lock();
260  two.lock();
261  });
262 }
263 
264 BENCHMARK(ThreeThreadsSimpleReverseOrdered, iters) {
265  simple(iters, [](auto& one, auto& two) {
266  two.lock();
267  one.lock();
268  });
269 }
270 
272 
273 BENCHMARK(ThreeThreadsPathologicalFollyLock, iters) {
274  pathological(iters, [](auto& one, auto& two, auto& three) {
275  folly::lock(one, two, three);
276  });
277 }
278 
279 BENCHMARK(ThreeThreadsPathologicalStdLock, iters) {
280  pathological(iters, [](auto& one, auto& two, auto& three) {
281  std::lock(one, two, three);
282  });
283 }
284 
285 BENCHMARK(ThreeThreadsPathologicalOrdered, iters) {
286  pathological(iters, [](auto& one, auto& two, auto& three) {
287  one.lock();
288  two.lock();
289  three.lock();
290  });
291 }
292 
293 BENCHMARK(ThreeThreadsPathologicalCarefullyOrdered, iters) {
294  pathological(iters, [](auto& one, auto& two, auto& three) {
295  two.lock();
296  three.lock();
297  one.lock();
298  });
299 }
300 
302 
303 BENCHMARK(TwoThreadsTwoMutexesOrdered, iters) {
304  ordered(iters, 2, makeVector<std::mutex>(2));
305 }
306 BENCHMARK(TwoThreadsTwoMutexesSmart, iters) {
307  smart(iters, 2, makeVector<std::mutex>(2));
308 }
309 BENCHMARK(TwoThreadsTwoMutexesPersistent, iters) {
310  persistent(iters, 2, makeVector<std::mutex>(2));
311 }
312 
314 
315 BENCHMARK(TwoThreadsFourMutexesOrdered, iters) {
316  ordered(iters, 2, makeVector<std::mutex>(4));
317 }
318 BENCHMARK(TwoThreadsFourMutexesSmart, iters) {
319  smart(iters, 2, makeVector<std::mutex>(4));
320 }
321 BENCHMARK(TwoThreadsFourMutexesPersistent, iters) {
322  persistent(iters, 2, makeVector<std::mutex>(4));
323 }
324 
326 
327 BENCHMARK(TwoThreadsEightMutexesOrdered, iters) {
328  ordered(iters, 2, makeVector<std::mutex>(8));
329 }
330 BENCHMARK(TwoThreadsEightMutexesSmart, iters) {
331  smart(iters, 2, makeVector<std::mutex>(8));
332 }
333 BENCHMARK(TwoThreadsEightMutexesPersistent, iters) {
334  persistent(iters, 2, makeVector<std::mutex>(8));
335 }
336 
338 
339 BENCHMARK(TwoThreadsSixteenMutexesOrdered, iters) {
340  ordered(iters, 2, makeVector<std::mutex>(16));
341 }
342 BENCHMARK(TwoThreadsSixteenMutexesSmart, iters) {
343  smart(iters, 2, makeVector<std::mutex>(16));
344 }
345 BENCHMARK(TwoThreadsSixteenMutexesPersistent, iters) {
346  persistent(iters, 2, makeVector<std::mutex>(16));
347 }
348 
350 
351 BENCHMARK(FourThreadsTwoMutexesOrdered, iters) {
352  ordered(iters, 4, makeVector<std::mutex>(2));
353 }
354 BENCHMARK(FourThreadsTwoMutexesSmart, iters) {
355  smart(iters, 4, makeVector<std::mutex>(2));
356 }
357 BENCHMARK(FourThreadsTwoMutexesPersistent, iters) {
358  persistent(iters, 4, makeVector<std::mutex>(2));
359 }
360 
362 
363 BENCHMARK(FourThreadsFourMutexesOrdered, iters) {
364  ordered(iters, 4, makeVector<std::mutex>(4));
365 }
366 BENCHMARK(FourThreadsFourMutexesSmart, iters) {
367  smart(iters, 4, makeVector<std::mutex>(4));
368 }
369 BENCHMARK(FourThreadsFourMutexesPersistent, iters) {
370  persistent(iters, 4, makeVector<std::mutex>(4));
371 }
372 
374 
375 BENCHMARK(FourThreadsEightMutexesOrdered, iters) {
376  ordered(iters, 4, makeVector<std::mutex>(8));
377 }
378 BENCHMARK(FourThreadsEightMutexesSmart, iters) {
379  smart(iters, 4, makeVector<std::mutex>(8));
380 }
381 BENCHMARK(FourThreadsEightMutexesPersistent, iters) {
382  persistent(iters, 4, makeVector<std::mutex>(8));
383 }
384 
386 
387 BENCHMARK(FourThreadsSixteenMutexesOrdered, iters) {
388  ordered(iters, 4, makeVector<std::mutex>(16));
389 }
390 BENCHMARK(FourThreadsSixteenMutexesSmart, iters) {
391  smart(iters, 4, makeVector<std::mutex>(16));
392 }
393 BENCHMARK(FourThreadsSixteenMutexesPersistent, iters) {
394  persistent(iters, 4, makeVector<std::mutex>(16));
395 }
396 
398 
399 BENCHMARK(EightThreadsTwoMutexesOrdered, iters) {
400  ordered(iters, 8, makeVector<std::mutex>(2));
401 }
402 BENCHMARK(EightThreadsTwoMutexesSmart, iters) {
403  smart(iters, 8, makeVector<std::mutex>(2));
404 }
405 BENCHMARK(EightThreadsTwoMutexesPersistent, iters) {
406  persistent(iters, 8, makeVector<std::mutex>(2));
407 }
408 
410 
411 BENCHMARK(EightThreadsFourMutexesOrdered, iters) {
412  ordered(iters, 8, makeVector<std::mutex>(4));
413 }
414 BENCHMARK(EightThreadsFourMutexesSmart, iters) {
415  smart(iters, 8, makeVector<std::mutex>(4));
416 }
417 BENCHMARK(EightThreadsFourMutexesPersistent, iters) {
418  persistent(iters, 8, makeVector<std::mutex>(4));
419 }
420 
422 
423 BENCHMARK(EightThreadsEightMutexesOrdered, iters) {
424  ordered(iters, 8, makeVector<std::mutex>(8));
425 }
426 BENCHMARK(EightThreadsEightMutexesSmart, iters) {
427  smart(iters, 8, makeVector<std::mutex>(8));
428 }
429 BENCHMARK(EightThreadsEightMutexesPersistent, iters) {
430  persistent(iters, 8, makeVector<std::mutex>(8));
431 }
432 
434 
435 BENCHMARK(EightThreadsSixteenMutexesOrdered, iters) {
436  ordered(iters, 8, makeVector<std::mutex>(16));
437 }
438 BENCHMARK(EightThreadsSixteenMutexesSmart, iters) {
439  smart(iters, 8, makeVector<std::mutex>(16));
440 }
441 BENCHMARK(EightThreadsSixteenMutexesPersistent, iters) {
442  persistent(iters, 8, makeVector<std::mutex>(16));
443 }
444 
446 
447 BENCHMARK(SixteenThreadsTwoMutexesOrdered, iters) {
448  ordered(iters, 16, makeVector<std::mutex>(2));
449 }
450 BENCHMARK(SixteenThreadsTwoMutexesSmart, iters) {
451  smart(iters, 16, makeVector<std::mutex>(2));
452 }
453 BENCHMARK(SixteenThreadsTwoMutexesPersistent, iters) {
454  persistent(iters, 16, makeVector<std::mutex>(2));
455 }
456 
458 
459 BENCHMARK(SixteenThreadsFourMutexesOrdered, iters) {
460  ordered(iters, 16, makeVector<std::mutex>(4));
461 }
462 BENCHMARK(SixteenThreadsFourMutexesSmart, iters) {
463  smart(iters, 16, makeVector<std::mutex>(4));
464 }
465 BENCHMARK(SixteenThreadsFourMutexesPersistent, iters) {
466  persistent(iters, 16, makeVector<std::mutex>(4));
467 }
468 
470 
471 BENCHMARK(SixteenThreadsEightMutexesOrdered, iters) {
472  ordered(iters, 16, makeVector<std::mutex>(8));
473 }
474 BENCHMARK(SixteenThreadsEightMutexesSmart, iters) {
475  smart(iters, 16, makeVector<std::mutex>(8));
476 }
477 BENCHMARK(SixteenThreadsEightMutexesPersistent, iters) {
478  persistent(iters, 16, makeVector<std::mutex>(8));
479 }
480 
482 
483 BENCHMARK(SixteenThreadsSixteenMutexesOrdered, iters) {
484  ordered(iters, 16, makeVector<std::mutex>(16));
485 }
486 BENCHMARK(SixteenThreadsSixteenMutexesSmart, iters) {
487  smart(iters, 16, makeVector<std::mutex>(16));
488 }
489 BENCHMARK(SixteenThreadsSixteenMutexesPersistent, iters) {
490  persistent(iters, 16, makeVector<std::mutex>(16));
491 }
492 
493 int main(int argc, char** argv) {
494  gflags::ParseCommandLineFlags(&argc, &argv, true);
496 }
497 
498 namespace {
499 std::pair<int, int> getMutexIndices(int threadId, int mutexListSize) {
500  // assign two mutexes to the current thread, we need to prevent
501  // deadlocks here by resorting the indexes in increasing order,
502  // because a thread might pick the last mutex as the one it locks and
503  // then adding one to that will make it wrap around, breaking the
504  // ordering
505  auto index = threadId % mutexListSize;
506 
507  auto firstMutexIndex = ((index + 1) == mutexListSize) ? 0 : index;
508  auto secondMutexIndex =
509  ((index + 1) == mutexListSize) ? (mutexListSize - 1) : (index + 1);
510 
511  return std::make_pair(firstMutexIndex, secondMutexIndex);
512 }
513 
514 template <typename NonMovableType>
515 std::vector<std::unique_ptr<NonMovableType>> makeVector(int num) {
516  auto vector = std::vector<std::unique_ptr<NonMovableType>>{};
517  vector.reserve(num);
518  for (auto i = 0; i < num; ++i) {
519  vector.push_back(std::make_unique<NonMovableType>());
520  }
521  return vector;
522 }
523 
524 template <typename Mutex>
525 void ordered(
526  std::size_t iters,
527  int numThreads,
528  std::vector<std::unique_ptr<Mutex>> mutexes) {
529  auto suspender = BenchmarkSuspender{};
530 
531  // Sort the mutexes so there is no deadlock because of lock acquisition
532  // ordering
533  std::sort(mutexes.begin(), mutexes.end(), [](auto& one, auto& two) {
534  return one.get() < two.get();
535  });
536 
537  auto threads = std::vector<std::thread>{};
538  auto&& barrier = BenchmarkStartBarrier{numThreads};
539 
540  for (auto thread = 0; thread < numThreads; ++thread) {
541  threads.emplace_back([&mutexes, iters, thread, &barrier] {
542  barrier.wait();
543 
544  auto indices = getMutexIndices(thread, mutexes.size());
545  for (auto i = std::size_t{0}; i < iters; ++i) {
546  // lock the mutexes
547  mutexes[indices.first]->lock();
548  mutexes[indices.second]->lock();
549 
550  spin(FLAGS_iterations);
551 
552  mutexes[indices.first]->unlock();
553  mutexes[indices.second]->unlock();
554  }
555  });
556  }
557 
558  barrier.wait();
559  suspender.dismissing([&] {
560  for (auto& thread : threads) {
561  thread.join();
562  }
563  });
564 }
565 
566 template <typename Mutex>
567 void smart(
568  std::size_t iters,
569  int numThreads,
570  std::vector<std::unique_ptr<Mutex>> mutexes) {
571  auto suspender = BenchmarkSuspender{};
572 
573  auto threads = std::vector<std::thread>{};
574  auto&& barrier = BenchmarkStartBarrier{numThreads};
575 
576  for (auto thread = 0; thread < numThreads; ++thread) {
577  threads.emplace_back([iters, &mutexes, thread, &barrier] {
578  barrier.wait();
579 
580  auto indices = std::make_pair(
581  thread % mutexes.size(), (thread + 1) % mutexes.size());
582  for (auto iter = std::size_t{0}; iter < iters; ++iter) {
583  while (true) {
584  mutexes[indices.first]->lock();
585  if (mutexes[indices.second]->try_lock()) {
586  break;
587  }
588 
589  mutexes[indices.first]->unlock();
590  std::swap(indices.first, indices.second);
592  }
593 
594  spin(FLAGS_iterations);
595 
596  mutexes[indices.first]->unlock();
597  mutexes[indices.second]->unlock();
598  }
599  });
600  }
601 
602  barrier.wait();
603  suspender.dismissing([&] {
604  for (auto& thread : threads) {
605  thread.join();
606  }
607  });
608 }
609 
610 template <typename Mutex>
611 void persistent(
612  std::size_t iters,
613  int numThreads,
614  std::vector<std::unique_ptr<Mutex>> mutexes) {
615  auto suspender = BenchmarkSuspender{};
616 
617  auto threads = std::vector<std::thread>{};
618  auto&& barrier = BenchmarkStartBarrier{numThreads};
619 
620  for (auto thread = 0; thread < numThreads; ++thread) {
621  threads.emplace_back([iters, &mutexes, thread, &barrier] {
622  barrier.wait();
623 
624  auto indices = std::make_pair(
625  thread % mutexes.size(), (thread + 1) % mutexes.size());
626  for (auto iter = std::size_t{0}; iter < iters; ++iter) {
627  // lock the mutexes by first locking a mutex and then acquiring the
628  // next mutex (or mutexes) with a try_lock()
629  while (true) {
630  mutexes[indices.first]->lock();
631  if (mutexes[indices.second]->try_lock()) {
632  break;
633  }
634 
635  mutexes[indices.first]->unlock();
636  }
637 
638  spin(FLAGS_iterations);
639 
640  mutexes[indices.first]->unlock();
641  mutexes[indices.second]->unlock();
642  }
643  });
644  }
645 
646  barrier.wait();
647  suspender.dismissing([&] {
648  for (auto& thread : threads) {
649  thread.join();
650  }
651  });
652 }
653 
654 template <typename LockingFunc>
655 void simple(std::size_t iters, LockingFunc func) {
656  auto&& suspender = BenchmarkSuspender{};
657  auto&& one = std::mutex{};
658  auto&& two = std::mutex{};
659  auto&& barrier = BenchmarkStartBarrier{3};
660 
661  auto threadOne = std::thread{[&] {
662  barrier.wait();
663 
664  for (auto i = std::size_t{0}; i < iters; ++i) {
665  auto lckOne = std::unique_lock<std::mutex>{one, std::defer_lock};
666  auto lckTwo = std::unique_lock<std::mutex>{two, std::defer_lock};
667  func(lckOne, lckTwo);
668 
669  spin(FLAGS_iterations);
670  }
671  }};
672 
673  auto threadTwo = std::thread{[&] {
674  barrier.wait();
675 
676  for (auto i = std::size_t{0}; i < iters; ++i) {
677  auto lck = std::unique_lock<std::mutex>{one};
678  spin(FLAGS_iterations * FLAGS_iterations);
679  }
680  }};
681 
682  auto threadThree = std::thread{[&] {
683  barrier.wait();
684 
685  for (auto i = std::size_t{0}; i < iters; ++i) {
686  auto lck = std::unique_lock<std::mutex>{two};
687  spin(FLAGS_iterations * FLAGS_iterations);
688  }
689  }};
690 
691  barrier.wait();
692  suspender.dismissing([&] {
693  threadOne.join();
694  threadTwo.join();
695  threadThree.join();
696  });
697 }
698 
699 template <typename LockingFunc>
700 void pathological(std::size_t iters, LockingFunc func) {
701  auto&& suspender = BenchmarkSuspender{};
702  auto&& one = std::mutex{};
703  auto&& two = std::mutex{};
704  auto&& three = std::mutex{};
705  auto&& barrier = BenchmarkStartBarrier{3};
706 
707  auto threadOne = std::thread{[&] {
708  barrier.wait();
709 
710  for (auto i = std::size_t{0}; i < iters; ++i) {
711  auto lckOne = std::unique_lock<std::mutex>{one, std::defer_lock};
712  auto lckTwo = std::unique_lock<std::mutex>{two, std::defer_lock};
713  auto lckThree = std::unique_lock<std::mutex>{three, std::defer_lock};
714  func(lckOne, lckTwo, lckThree);
715 
716  spin(FLAGS_iterations);
717  }
718  }};
719 
720  auto threadTwo = std::thread{[&] {
721  barrier.wait();
722 
723  for (auto i = std::size_t{0}; i < iters; ++i) {
724  auto lck = std::unique_lock<std::mutex>{one};
725 
726  spin(FLAGS_iterations * FLAGS_iterations);
727  }
728  }};
729 
730  auto threadThree = std::thread{[&] {
731  barrier.wait();
732 
733  for (auto i = std::size_t{0}; i < iters; ++i) {
734  auto lckTwo = std::unique_lock<std::mutex>{two};
735  auto lckThree = std::unique_lock<std::mutex>{three};
736 
737  spin(FLAGS_iterations * FLAGS_iterations);
738  }
739  }};
740 
741  barrier.wait();
742  suspender.dismissing([&] {
743  threadOne.join();
744  threadTwo.join();
745  threadThree.join();
746  });
747 }
748 
749 template <typename LockingFunc>
750 void uncontended(std::size_t iters, LockingFunc func) {
751  auto&& suspender = BenchmarkSuspender{};
752  auto&& one = std::mutex{};
753  auto&& two = std::mutex{};
754 
755  suspender.dismissing([&] {
756  for (auto i = std::size_t{0}; i < iters; ++i) {
757  func(one, two);
758 
759  spin(FLAGS_iterations);
760 
761  one.unlock();
762  two.unlock();
763  }
764  });
765 }
766 
767 void spin(int iterations) {
768  for (auto i = 0; i < iterations; ++i) {
770  }
771 }
772 
773 } // namespace
static bool simple
DEFINE_uint64(iterations, 100,"The number of iterations with lock held")
—— Concurrent Priority Queue Implementation ——
Definition: AtomicBitSet.h:29
void runBenchmarks()
Definition: Benchmark.cpp:456
std::condition_variable cv_
int main(int argc, char **argv)
std::mutex mutex_
std::vector< std::thread::id > threads
char ** argv
auto lock(SynchronizedLocker...lockersIn) -> std::tuple< typename SynchronizedLocker::LockedPtr... >
Definition: Synchronized.h:871
auto lock(Synchronized< D, M > &synchronized, Args &&...args)
bool wait(Waiter *waiter, bool shouldSleep, Waiter *&next)
Definition: Traits.h:588
BENCHMARK(fbFollyGlobalBenchmarkBaseline)
Definition: Benchmark.cpp:84
std::mutex mutex
BENCHMARK_DRAW_LINE()
void swap(SwapTrackingAlloc< T > &, SwapTrackingAlloc< T > &)
Definition: F14TestUtil.h:414
auto doNotOptimizeAway(const T &datum) -> typename std::enable_if< !detail::DoNotOptimizeAwayNeedsIndirect< T >::value >::type
Definition: Benchmark.h:258