proxygen
ThreadCachedIntTest.cpp File Reference
#include <folly/ThreadCachedInt.h>
#include <atomic>
#include <condition_variable>
#include <memory>
#include <thread>
#include <glog/logging.h>
#include <folly/Benchmark.h>
#include <folly/hash/Hash.h>
#include <folly/portability/GFlags.h>
#include <folly/portability/GTest.h>
#include <folly/system/ThreadId.h>

Go to the source code of this file.

Classes

class  ThreadCachedIntTest
 
struct  ShardedAtomicInt
 

Macros

#define CREATE_INC_FUNC(size)
 
#define MAKE_MT_CACHE_SIZE_BM(size)
 
#define REG_BASELINE(name, inc_stmt)
 

Typedefs

using Counter = ThreadCachedInt< int64_t >
 

Functions

 TEST_F (ThreadCachedIntTest, MultithreadedSlow)
 
 TEST_F (ThreadCachedIntTest, MultithreadedFast)
 
 TEST (ThreadCachedInt, SingleThreadedNotCached)
 
 TEST (ThreadCachedInt, SingleThreadedCached)
 
 DEFINE_int32 (numThreads, 8,"Number simultaneous threads for benchmarks.")
 
void incFunc64 ()
 
void incFunc32 ()
 
 TEST (ThreadCachedInt, MultiThreadedCached)
 
void BM_mt_cache_size64 (int iters, int cacheSize)
 
void BM_mt_cache_size32 (int iters, int cacheSize)
 
std::atomic< int64_tglobalInt64Baseline (0)
 
std::atomic< int32_tglobalInt32Baseline (0)
 
 BENCHMARK (FB_CONCATENATE(BM_mt_baseline_, _thread64), iters)
 
 BENCHMARK (FB_CONCATENATE(BM_mt_baseline_, _thread32), iters)
 
 BENCHMARK (FB_CONCATENATE(BM_mt_baseline_, ThreadLocal64), iters)
 
 BENCHMARK (FB_CONCATENATE(BM_mt_baseline_, ThreadLocal32), iters)
 
 BENCHMARK (FB_CONCATENATE(BM_mt_baseline_, atomic_inc64), iters)
 
 BENCHMARK (FB_CONCATENATE(BM_mt_baseline_, atomic_inc32), iters)
 
 BENCHMARK (FB_CONCATENATE(BM_mt_baseline_, ShardedAtm64), iters)
 
 BENCHMARK_DRAW_LINE ()
 
 BENCHMARK (Atomic_readFull)
 
 BENCHMARK (ThrCache_readFull)
 
 BENCHMARK (Sharded_readFull)
 
 BENCHMARK (ThrCache_readFast)
 
 BENCHMARK (Sharded_readFast)
 
 BENCHMARK (FB_CONCATENATE(BM_mt_baseline_, Atomic_readFull), iters)
 
 BENCHMARK (FB_CONCATENATE(BM_mt_baseline_, ThrCache_readFull), iters)
 
 BENCHMARK (FB_CONCATENATE(BM_mt_baseline_, Sharded_readFull), iters)
 
 BENCHMARK (FB_CONCATENATE(BM_mt_baseline_, ThrCache_readFast), iters)
 
 BENCHMARK (FB_CONCATENATE(BM_mt_baseline_, Sharded_readFast), iters)
 
int main (int argc, char **argv)
 

Variables

ThreadCachedInt< int32_tglobalInt32 (0, 11)
 
ThreadCachedInt< int64_tglobalInt64 (0, 11)
 
int kNumInserts = 100000
 
ThreadLocal< int64_tglobalTL64Baseline
 
ThreadLocal< int32_tglobalTL32Baseline
 
FOLLY_TLS int64_t global__thread64
 
FOLLY_TLS int32_t global__thread32
 
ShardedAtomicInt shd_int64
 

Macro Definition Documentation

#define CREATE_INC_FUNC (   size)
Value:
void incFunc##size() { \
const int num = kNumInserts / FLAGS_numThreads; \
for (int i = 0; i < num; ++i) { \
++globalInt##size; \
} \
}
int kNumInserts
constexpr auto size(C const &c) -> decltype(c.size())
Definition: Access.h:45
const
Definition: upload.py:398

Definition at line 204 of file ThreadCachedIntTest.cpp.

#define MAKE_MT_CACHE_SIZE_BM (   size)
Value:
void BM_mt_cache_size##size(int iters, int cacheSize) { \
kNumInserts = iters; \
globalInt##size.set(0); \
globalInt##size.setCacheSize(cacheSize); \
std::vector<std::thread> threads; \
for (int i = 0; i < FLAGS_numThreads; ++i) { \
threads.push_back(std::thread(incFunc##size)); \
} \
for (auto& t : threads) { \
t.join(); \
} \
}
int kNumInserts
std::vector< std::thread::id > threads
constexpr auto size(C const &c) -> decltype(c.size())
Definition: Access.h:45

Definition at line 262 of file ThreadCachedIntTest.cpp.

#define REG_BASELINE (   name,
  inc_stmt 
)
Value:
BENCHMARK(FB_CONCATENATE(BM_mt_baseline_, name), iters) { \
const int iterPerThread = iters / FLAGS_numThreads; \
std::vector<std::thread> threads; \
for (int i = 0; i < FLAGS_numThreads; ++i) { \
threads.push_back(std::thread([&]() { \
for (int j = 0; j < iterPerThread; ++j) { \
inc_stmt; \
} \
})); \
} \
for (auto& t : threads) { \
t.join(); \
} \
}
BENCHMARK(FB_CONCATENATE(BM_mt_baseline_, _thread64), iters)
std::vector< std::thread::id > threads
const char * name
Definition: http_parser.c:437
#define FB_CONCATENATE(s1, s2)
Definition: Preprocessor.h:88
const
Definition: upload.py:398

Definition at line 278 of file ThreadCachedIntTest.cpp.

Referenced by BENCHMARK().

Typedef Documentation

Definition at line 37 of file ThreadCachedIntTest.cpp.

Function Documentation

BENCHMARK ( FB_CONCATENATE(BM_mt_baseline_,_thread64)  ,
iters   
)

Definition at line 335 of file ThreadCachedIntTest.cpp.

BENCHMARK ( FB_CONCATENATE(BM_mt_baseline_,_thread32)  ,
iters   
)

Definition at line 336 of file ThreadCachedIntTest.cpp.

BENCHMARK ( FB_CONCATENATE(BM_mt_baseline_,ThreadLocal64)  ,
iters   
)

Definition at line 337 of file ThreadCachedIntTest.cpp.

BENCHMARK ( FB_CONCATENATE(BM_mt_baseline_,ThreadLocal32)  ,
iters   
)

Definition at line 338 of file ThreadCachedIntTest.cpp.

BENCHMARK ( FB_CONCATENATE(BM_mt_baseline_,atomic_inc64)  ,
iters   
)

Definition at line 341 of file ThreadCachedIntTest.cpp.

BENCHMARK ( FB_CONCATENATE(BM_mt_baseline_,atomic_inc32)  ,
iters   
)

Definition at line 344 of file ThreadCachedIntTest.cpp.

358 {
BENCHMARK ( FB_CONCATENATE(BM_mt_baseline_,ShardedAtm64)  ,
iters   
)

Definition at line 345 of file ThreadCachedIntTest.cpp.

358 {
BENCHMARK ( Atomic_readFull  )

Definition at line 358 of file ThreadCachedIntTest.cpp.

References folly::doNotOptimizeAway().

358  {
359  doNotOptimizeAway(globalInt64Baseline.load(std::memory_order_relaxed));
360 }
std::atomic< int64_t > globalInt64Baseline(0)
auto doNotOptimizeAway(const T &datum) -> typename std::enable_if< !detail::DoNotOptimizeAwayNeedsIndirect< T >::value >::type
Definition: Benchmark.h:258
BENCHMARK ( ThrCache_readFull  )

Definition at line 361 of file ThreadCachedIntTest.cpp.

References folly::doNotOptimizeAway(), and folly::ThreadCachedInt< IntT, Tag >::readFull().

361  {
362  doNotOptimizeAway(globalInt64.readFull());
363 }
ThreadCachedInt< int64_t > globalInt64(0, 11)
auto doNotOptimizeAway(const T &datum) -> typename std::enable_if< !detail::DoNotOptimizeAwayNeedsIndirect< T >::value >::type
Definition: Benchmark.h:258
BENCHMARK ( Sharded_readFull  )

Definition at line 364 of file ThreadCachedIntTest.cpp.

References folly::doNotOptimizeAway(), and ShardedAtomicInt::readFull().

364  {
366 }
ShardedAtomicInt shd_int64
auto doNotOptimizeAway(const T &datum) -> typename std::enable_if< !detail::DoNotOptimizeAwayNeedsIndirect< T >::value >::type
Definition: Benchmark.h:258
BENCHMARK ( ThrCache_readFast  )

Definition at line 367 of file ThreadCachedIntTest.cpp.

References folly::doNotOptimizeAway(), and folly::ThreadCachedInt< IntT, Tag >::readFast().

367  {
368  doNotOptimizeAway(globalInt64.readFast());
369 }
ThreadCachedInt< int64_t > globalInt64(0, 11)
auto doNotOptimizeAway(const T &datum) -> typename std::enable_if< !detail::DoNotOptimizeAwayNeedsIndirect< T >::value >::type
Definition: Benchmark.h:258
BENCHMARK ( Sharded_readFast  )

Definition at line 370 of file ThreadCachedIntTest.cpp.

References folly::BENCHMARK_DRAW_LINE(), folly::doNotOptimizeAway(), ShardedAtomicInt::readFast(), and REG_BASELINE.

370  {
372 }
ShardedAtomicInt shd_int64
auto doNotOptimizeAway(const T &datum) -> typename std::enable_if< !detail::DoNotOptimizeAwayNeedsIndirect< T >::value >::type
Definition: Benchmark.h:258
BENCHMARK ( FB_CONCATENATE(BM_mt_baseline_,Atomic_readFull)  ,
iters   
)

Definition at line 378 of file ThreadCachedIntTest.cpp.

385 {
BENCHMARK ( FB_CONCATENATE(BM_mt_baseline_,ThrCache_readFull)  ,
iters   
)

Definition at line 379 of file ThreadCachedIntTest.cpp.

385 {
BENCHMARK ( FB_CONCATENATE(BM_mt_baseline_,Sharded_readFull)  ,
iters   
)

Definition at line 380 of file ThreadCachedIntTest.cpp.

385 {
BENCHMARK ( FB_CONCATENATE(BM_mt_baseline_,ThrCache_readFast)  ,
iters   
)

Definition at line 381 of file ThreadCachedIntTest.cpp.

385 {
BENCHMARK ( FB_CONCATENATE(BM_mt_baseline_,Sharded_readFast)  ,
iters   
)

Definition at line 382 of file ThreadCachedIntTest.cpp.

385 {
BENCHMARK_DRAW_LINE ( )
void BM_mt_cache_size32 ( int  iters,
int  cacheSize 
)

Definition at line 276 of file ThreadCachedIntTest.cpp.

279 { \
void BM_mt_cache_size64 ( int  iters,
int  cacheSize 
)

Definition at line 275 of file ThreadCachedIntTest.cpp.

279 { \
DEFINE_int32 ( numThreads  ,
,
"Number simultaneous threads for benchmarks."   
)
std::atomic<int32_t> globalInt32Baseline ( )
std::atomic<int64_t> globalInt64Baseline ( )
void incFunc32 ( )

Definition at line 212 of file ThreadCachedIntTest.cpp.

215 {
void incFunc64 ( )

Definition at line 211 of file ThreadCachedIntTest.cpp.

215 {
int main ( int  argc,
char **  argv 
)

Definition at line 385 of file ThreadCachedIntTest.cpp.

References testing::InitGoogleTest(), RUN_ALL_TESTS(), and folly::runBenchmarks().

385  {
387  gflags::ParseCommandLineFlags(&argc, &argv, true);
388  gflags::SetCommandLineOptionWithMode(
389  "bm_min_usec", "10000", gflags::SET_FLAG_IF_DEFAULT);
390  if (FLAGS_benchmark) {
392  }
393  return RUN_ALL_TESTS();
394 }
int RUN_ALL_TESTS() GTEST_MUST_USE_RESULT_
Definition: gtest.h:2232
void runBenchmarks()
Definition: Benchmark.cpp:456
char ** argv
GTEST_API_ void InitGoogleTest(int *argc, char **argv)
Definition: gtest.cc:5370
TEST ( ThreadCachedInt  ,
SingleThreadedNotCached   
)

Definition at line 169 of file ThreadCachedIntTest.cpp.

References EXPECT_EQ, i, folly::ThreadCachedInt< IntT, Tag >::readFast(), and val.

169  {
171  EXPECT_EQ(0, val.readFast());
172  ++val;
173  EXPECT_EQ(1, val.readFast());
174  for (int i = 0; i < 41; ++i) {
175  val.increment(1);
176  }
177  EXPECT_EQ(42, val.readFast());
178  --val;
179  EXPECT_EQ(41, val.readFast());
180 }
#define EXPECT_EQ(val1, val2)
Definition: gtest.h:1922
double val
Definition: String.cpp:273
TEST ( ThreadCachedInt  ,
SingleThreadedCached   
)

Definition at line 184 of file ThreadCachedIntTest.cpp.

References EXPECT_EQ, globalInt32, globalInt64, i, folly::ThreadCachedInt< IntT, Tag >::readFast(), and val.

184  {
186  EXPECT_EQ(0, val.readFast());
187  ++val;
188  EXPECT_EQ(0, val.readFast());
189  for (int i = 0; i < 7; ++i) {
190  val.increment(1);
191  }
192  EXPECT_EQ(0, val.readFast());
193  EXPECT_EQ(0, val.readFastAndReset());
194  EXPECT_EQ(8, val.readFull());
195  EXPECT_EQ(8, val.readFullAndReset());
196  EXPECT_EQ(0, val.readFull());
197  EXPECT_EQ(0, val.readFast());
198 }
#define EXPECT_EQ(val1, val2)
Definition: gtest.h:1922
double val
Definition: String.cpp:273
TEST ( ThreadCachedInt  ,
MultiThreadedCached   
)

Definition at line 215 of file ThreadCachedIntTest.cpp.

References EXPECT_EQ, EXPECT_NE, FOR_EACH_RANGE, i, k, kNumInserts, folly::ThreadCachedInt< IntT, Tag >::readFast(), folly::ThreadCachedInt< IntT, Tag >::readFull(), folly::run(), folly::ThreadCachedInt< IntT, Tag >::set(), folly::pushmi::detail::t, and threads.

215  {
216  kNumInserts = 100000;
217  CHECK_EQ(0, kNumInserts % FLAGS_numThreads)
218  << "FLAGS_numThreads must evenly divide kNumInserts (" << kNumInserts
219  << ").";
220  const int numPerThread = kNumInserts / FLAGS_numThreads;
221  ThreadCachedInt<int64_t> TCInt64(0, numPerThread - 2);
222  {
223  std::atomic<bool> run(true);
224  std::atomic<int> threadsDone(0);
225  std::vector<std::thread> threads;
226  for (int i = 0; i < FLAGS_numThreads; ++i) {
227  threads.push_back(std::thread([&] {
228  FOR_EACH_RANGE (k, 0, numPerThread) { ++TCInt64; }
229  std::atomic_fetch_add(&threadsDone, 1);
230  while (run.load()) {
231  usleep(100);
232  }
233  }));
234  }
235 
236  // We create and increment another ThreadCachedInt here to make sure it
237  // doesn't interact with the other instances
238  ThreadCachedInt<int64_t> otherTCInt64(0, 10);
239  otherTCInt64.set(33);
240  ++otherTCInt64;
241 
242  while (threadsDone.load() < FLAGS_numThreads) {
243  usleep(100);
244  }
245 
246  ++otherTCInt64;
247 
248  // Threads are done incrementing, but caches have not been flushed yet, so
249  // we have to readFull.
250  EXPECT_NE(kNumInserts, TCInt64.readFast());
251  EXPECT_EQ(kNumInserts, TCInt64.readFull());
252 
253  run.store(false);
254  for (auto& t : threads) {
255  t.join();
256  }
257 
258  } // Caches are flushed when threads finish
259  EXPECT_EQ(kNumInserts, TCInt64.readFast());
260 }
#define EXPECT_EQ(val1, val2)
Definition: gtest.h:1922
int kNumInserts
std::vector< std::thread::id > threads
#define FOR_EACH_RANGE(i, begin, end)
Definition: Foreach.h:313
static void run(EventBaseManager *ebm, EventBase *eb, folly::Baton<> *stop, const StringPiece &name)
#define EXPECT_NE(val1, val2)
Definition: gtest.h:1926
KeyT k
TEST_F ( ThreadCachedIntTest  ,
MultithreadedSlow   
)

Definition at line 81 of file ThreadCachedIntTest.cpp.

References EXPECT_EQ, EXPECT_GE, i, int32_t, kNumThreads, threads, and uint32_t.

81  {
82  static constexpr uint32_t kNumThreads = 20;
83  g_sync_for_mt = 0;
84  vector<unique_ptr<std::thread>> threads(kNumThreads);
85  // Creates kNumThreads threads. Each thread performs a different
86  // number of iterations in Runner() - threads[0] performs 1
87  // iteration, threads[1] performs 2 iterations, threads[2] performs
88  // 3 iterations, and so on.
89  for (uint32_t i = 0; i < kNumThreads; ++i) {
90  threads[i] =
91  std::make_unique<std::thread>(Runner, &g_counter_for_mt_slow, i + 1);
92  }
93  // Variable to grab current counter value.
94  int32_t counter_value;
95  // The expected value of the counter.
96  int32_t total = 0;
97  // The expected value of GetDeadThreadsTotal().
98  int32_t dead_total = 0;
99  // Each iteration of the following thread allows one additional
100  // iteration of the threads. Given that the threads perform
101  // different number of iterations from 1 through kNumThreads, one
102  // thread will complete in each of the iterations of the loop below.
103  for (uint32_t i = 0; i < kNumThreads; ++i) {
104  // Allow upto iteration i on all threads.
105  {
106  std::lock_guard<std::mutex> lk(cv_m);
107  g_sync_for_mt = i + 1;
108  }
109  cv.notify_all();
110  total += (kNumThreads - i) * 10;
111  // Loop until the counter reaches its expected value.
112  do {
113  counter_value = g_counter_for_mt_slow.readFull();
114  } while (counter_value < total);
115  // All threads have done what they can until iteration i, now make
116  // sure they don't go further by checking 10 more times in the
117  // following loop.
118  for (uint32_t j = 0; j < 10; ++j) {
119  counter_value = g_counter_for_mt_slow.readFull();
120  EXPECT_EQ(total, counter_value);
121  }
122  dead_total += (i + 1) * 10;
123  EXPECT_GE(dead_total, GetDeadThreadsTotal(g_counter_for_mt_slow));
124  }
125  // All threads are done.
126  for (uint32_t i = 0; i < kNumThreads; ++i) {
127  threads[i]->join();
128  }
129  counter_value = g_counter_for_mt_slow.readFull();
130  EXPECT_EQ(total, counter_value);
131  EXPECT_EQ(total, dead_total);
132  EXPECT_EQ(dead_total, GetDeadThreadsTotal(g_counter_for_mt_slow));
133 }
#define EXPECT_EQ(val1, val2)
Definition: gtest.h:1922
static size_t const kNumThreads
#define EXPECT_GE(val1, val2)
Definition: gtest.h:1932
std::vector< std::thread::id > threads
TEST_F ( ThreadCachedIntTest  ,
MultithreadedFast   
)

Definition at line 137 of file ThreadCachedIntTest.cpp.

References EXPECT_EQ, i, int32_t, kNumThreads, threads, and uint32_t.

137  {
138  static constexpr uint32_t kNumThreads = 1000;
139  g_sync_for_mt = 0;
140  vector<unique_ptr<std::thread>> threads(kNumThreads);
141  // Creates kNumThreads threads. Each thread performs a different
142  // number of iterations in Runner() - threads[0] performs 1
143  // iteration, threads[1] performs 2 iterations, threads[2] performs
144  // 3 iterations, and so on.
145  for (uint32_t i = 0; i < kNumThreads; ++i) {
146  threads[i] =
147  std::make_unique<std::thread>(Runner, &g_counter_for_mt_fast, i + 1);
148  }
149  // Let the threads run to completion.
150  {
151  std::lock_guard<std::mutex> lk(cv_m);
152  g_sync_for_mt = kNumThreads;
153  }
154  cv.notify_all();
155  // The expected value of the counter.
156  uint32_t total = 0;
157  for (uint32_t i = 0; i < kNumThreads; ++i) {
158  total += (kNumThreads - i) * 10;
159  }
160  // Wait for all threads to complete.
161  for (uint32_t i = 0; i < kNumThreads; ++i) {
162  threads[i]->join();
163  }
164  int32_t counter_value = g_counter_for_mt_fast.readFull();
165  EXPECT_EQ(total, counter_value);
166  EXPECT_EQ(total, GetDeadThreadsTotal(g_counter_for_mt_fast));
167 }
#define EXPECT_EQ(val1, val2)
Definition: gtest.h:1922
static size_t const kNumThreads
std::vector< std::thread::id > threads

Variable Documentation

FOLLY_TLS int32_t global__thread32

Definition at line 299 of file ThreadCachedIntTest.cpp.

FOLLY_TLS int64_t global__thread64

Definition at line 298 of file ThreadCachedIntTest.cpp.

ThreadCachedInt<int32_t> globalInt32(0, 11)

Referenced by TEST().

ThreadCachedInt<int64_t> globalInt64(0, 11)

Referenced by TEST().

ThreadLocal<int32_t> globalTL32Baseline

Definition at line 295 of file ThreadCachedIntTest.cpp.

ThreadLocal<int64_t> globalTL64Baseline

Definition at line 294 of file ThreadCachedIntTest.cpp.

int kNumInserts = 100000

Definition at line 202 of file ThreadCachedIntTest.cpp.

Referenced by TEST().

ShardedAtomicInt shd_int64

Definition at line 333 of file ThreadCachedIntTest.cpp.