proxygen
ThreadLocalBenchmark.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2016-present Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <folly/ThreadLocal.h>
18 
19 #include <sys/types.h>
20 
21 #include <array>
22 #include <atomic>
23 #include <condition_variable>
24 #include <map>
25 #include <mutex>
26 #include <set>
27 #include <thread>
28 
29 #include <boost/thread/tss.hpp>
30 #include <glog/logging.h>
31 
32 #include <folly/Benchmark.h>
35 
36 using namespace folly;
37 
38 // Simple reference implementation using pthread_get_specific
39 template <typename T>
41  public:
42  PThreadGetSpecific() : key_(0) {
43  pthread_key_create(&key_, OnThreadExit);
44  }
45 
46  T* get() const {
47  return static_cast<T*>(pthread_getspecific(key_));
48  }
49 
50  void reset(T* t) {
51  delete get();
52  pthread_setspecific(key_, t);
53  }
54  static void OnThreadExit(void* obj) {
55  delete static_cast<T*>(obj);
56  }
57 
58  private:
59  pthread_key_t key_;
60 };
61 
62 DEFINE_int32(numThreads, 8, "Number simultaneous threads for benchmarks.");
63 
64 #define REG(var) \
65  BENCHMARK(FB_CONCATENATE(BM_mt_, var), iters) { \
66  const int itersPerThread = iters / FLAGS_numThreads; \
67  std::vector<std::thread> threads; \
68  for (int i = 0; i < FLAGS_numThreads; ++i) { \
69  threads.push_back(std::thread([&]() { \
70  var.reset(new int(0)); \
71  for (int j = 0; j < itersPerThread; ++j) { \
72  ++(*var.get()); \
73  } \
74  })); \
75  } \
76  for (auto& t : threads) { \
77  t.join(); \
78  } \
79  }
80 
82 REG(tlp)
84 REG(pthread_get_specific)
85 boost::thread_specific_ptr<int> boost_tsp;
86 REG(boost_tsp)
88 
89 struct foo {
90  int a{0};
91  int b{0};
92 };
93 
94 template <typename TL>
95 void run_multi(uint32_t iters) {
96  const int itersPerThread = iters / FLAGS_numThreads;
97  std::vector<std::thread> threads;
98  TL var;
99  for (int i = 0; i < FLAGS_numThreads; ++i) {
100  threads.push_back(std::thread([&]() {
101  var.reset(new foo);
102  for (int j = 0; j < itersPerThread; ++j) {
103  ++var.get()->a;
104  var.get()->b += var.get()->a;
105  --var.get()->a;
106  var.get()->b += var.get()->a;
107  }
108  }));
109  }
110  for (auto& t : threads) {
111  t.join();
112  }
113 }
114 
115 BENCHMARK(BM_mt_tlp_multi, iters) {
116  run_multi<ThreadLocalPtr<foo>>(iters);
117 }
118 BENCHMARK(BM_mt_pthread_get_specific_multi, iters) {
119  run_multi<PThreadGetSpecific<foo>>(iters);
120 }
121 BENCHMARK(BM_mt_boost_tsp_multi, iters) {
122  run_multi<boost::thread_specific_ptr<foo>>(iters);
123 }
125 
126 int main(int argc, char** argv) {
127  gflags::ParseCommandLineFlags(&argc, &argv, true);
128  gflags::SetCommandLineOptionWithMode(
129  "bm_max_iters", "100000000", gflags::SET_FLAG_IF_DEFAULT);
131  return 0;
132 }
133 
134 /*
135 ./buck-out/gen/folly/test/thread_local_benchmark --bm_min_iters=10000000
136 --numThreads=1
137 
138 ============================================================================
139 folly/test/ThreadLocalBenchmark.cpp relative time/iter iters/s
140 ============================================================================
141 BM_mt_tlp 1.92ns 520.02M
142 BM_mt_pthread_get_specific 2.69ns 372.15M
143 BM_mt_boost_tsp 11.81ns 84.67M
144 ----------------------------------------------------------------------------
145 BM_mt_tlp_multi 7.53ns 132.79M
146 BM_mt_pthread_get_specific_multi 15.80ns 63.29M
147 BM_mt_boost_tsp_multi 71.70ns 13.95M
148 ----------------------------------------------------------------------------
149 ============================================================================
150 */
ThreadLocalPtr< int > tlp
char b
int main(int argc, char **argv)
folly::std T
—— Concurrent Priority Queue Implementation ——
Definition: AtomicBitSet.h:29
void runBenchmarks()
Definition: Benchmark.cpp:456
static void OnThreadExit(void *obj)
std::vector< std::thread::id > threads
boost::thread_specific_ptr< int > boost_tsp
DEFINE_int32(numThreads, 8,"Number simultaneous threads for benchmarks.")
char ** argv
void run_multi(uint32_t iters)
#define REG(var)
char a
BENCHMARK(fbFollyGlobalBenchmarkBaseline)
Definition: Benchmark.cpp:84
BENCHMARK_DRAW_LINE()
PThreadGetSpecific< int > pthread_get_specific