proxygen: proxygen/folly/folly/test/ThreadCachedIntTest.cpp File Reference

#include <folly/ThreadCachedInt.h>
#include <atomic>
#include <condition_variable>
#include <memory>
#include <thread>
#include <glog/logging.h>
#include <folly/Benchmark.h>
#include <folly/hash/Hash.h>
#include <folly/portability/GFlags.h>
#include <folly/portability/GTest.h>
#include <folly/system/ThreadId.h>

Go to the source code of this file.

Classes
class	ThreadCachedIntTest

struct	ShardedAtomicInt

Macros
#define	CREATE_INC_FUNC(size)

#define	MAKE_MT_CACHE_SIZE_BM(size)

#define	REG_BASELINE(name, inc_stmt)

Typedefs
using	Counter = ThreadCachedInt< int64_t >

Functions
	TEST_F (ThreadCachedIntTest, MultithreadedSlow)

	TEST_F (ThreadCachedIntTest, MultithreadedFast)

	TEST (ThreadCachedInt, SingleThreadedNotCached)

	TEST (ThreadCachedInt, SingleThreadedCached)

	DEFINE_int32 (numThreads, 8,"Number simultaneous threads for benchmarks.")

void	incFunc64 ()

void	incFunc32 ()

	TEST (ThreadCachedInt, MultiThreadedCached)

void	BM_mt_cache_size64 (int iters, int cacheSize)

void	BM_mt_cache_size32 (int iters, int cacheSize)

std::atomic< int64_t >	globalInt64Baseline (0)

std::atomic< int32_t >	globalInt32Baseline (0)

	BENCHMARK (FB_CONCATENATE(BM_mt_baseline_, _thread64), iters)

	BENCHMARK (FB_CONCATENATE(BM_mt_baseline_, _thread32), iters)

	BENCHMARK (FB_CONCATENATE(BM_mt_baseline_, ThreadLocal64), iters)

	BENCHMARK (FB_CONCATENATE(BM_mt_baseline_, ThreadLocal32), iters)

	BENCHMARK (FB_CONCATENATE(BM_mt_baseline_, atomic_inc64), iters)

	BENCHMARK (FB_CONCATENATE(BM_mt_baseline_, atomic_inc32), iters)

	BENCHMARK (FB_CONCATENATE(BM_mt_baseline_, ShardedAtm64), iters)

	BENCHMARK_DRAW_LINE ()

	BENCHMARK (Atomic_readFull)

	BENCHMARK (ThrCache_readFull)

	BENCHMARK (Sharded_readFull)

	BENCHMARK (ThrCache_readFast)

	BENCHMARK (Sharded_readFast)

	BENCHMARK (FB_CONCATENATE(BM_mt_baseline_, Atomic_readFull), iters)

	BENCHMARK (FB_CONCATENATE(BM_mt_baseline_, ThrCache_readFull), iters)

	BENCHMARK (FB_CONCATENATE(BM_mt_baseline_, Sharded_readFull), iters)

	BENCHMARK (FB_CONCATENATE(BM_mt_baseline_, ThrCache_readFast), iters)

	BENCHMARK (FB_CONCATENATE(BM_mt_baseline_, Sharded_readFast), iters)

int	main (int argc, char **argv)

Variables
ThreadCachedInt< int32_t >	globalInt32 (0, 11)

ThreadCachedInt< int64_t >	globalInt64 (0, 11)

int	kNumInserts = 100000

ThreadLocal< int64_t >	globalTL64Baseline

ThreadLocal< int32_t >	globalTL32Baseline

FOLLY_TLS int64_t	global__thread64

FOLLY_TLS int32_t	global__thread32

ShardedAtomicInt	shd_int64

Macro Definition Documentation

#define CREATE_INC_FUNC ( size )

Value:

void incFunc##size() {                            \
    const int num = kNumInserts / FLAGS_numThreads; \
    for (int i = 0; i < num; ++i) {                 \
      ++globalInt##size;                            \
    }                                               \
  }

Definition at line 204 of file ThreadCachedIntTest.cpp.

#define MAKE_MT_CACHE_SIZE_BM ( size )

Value:

void BM_mt_cache_size##size(int iters, int cacheSize) { \
    kNumInserts = iters;                                  \
    globalInt##size.set(0);                               \
    globalInt##size.setCacheSize(cacheSize);              \
    std::vector<std::thread> threads;                     \
    for (int i = 0; i < FLAGS_numThreads; ++i) {          \
      threads.push_back(std::thread(incFunc##size));      \
    }                                                     \
    for (auto& t : threads) {                             \
      t.join();                                           \
    }                                                     \
  }

Definition at line 262 of file ThreadCachedIntTest.cpp.

#define REG_BASELINE	(	name,
		inc_stmt
	)

Value:

BENCHMARK(FB_CONCATENATE(BM_mt_baseline_, name), iters) { \
    const int iterPerThread = iters / FLAGS_numThreads;     \
    std::vector<std::thread> threads;                       \
    for (int i = 0; i < FLAGS_numThreads; ++i) {            \
      threads.push_back(std::thread([&]() {                 \
        for (int j = 0; j < iterPerThread; ++j) {           \
          inc_stmt;                                         \
        }                                                   \
      }));                                                  \
    }                                                       \
    for (auto& t : threads) {                               \
      t.join();                                             \
    }                                                       \
  }

Definition at line 278 of file ThreadCachedIntTest.cpp.

Referenced by BENCHMARK().

Typedef Documentation

using Counter = ThreadCachedInt<int64_t>

Definition at line 37 of file ThreadCachedIntTest.cpp.

Function Documentation

BENCHMARK	(	FB_CONCATENATE(BM_mt_baseline_,_thread64)	,
		iters
	)

Definition at line 335 of file ThreadCachedIntTest.cpp.

BENCHMARK	(	FB_CONCATENATE(BM_mt_baseline_,_thread32)	,
		iters
	)

Definition at line 336 of file ThreadCachedIntTest.cpp.

BENCHMARK	(	FB_CONCATENATE(BM_mt_baseline_,ThreadLocal64)	,
		iters
	)

Definition at line 337 of file ThreadCachedIntTest.cpp.

BENCHMARK	(	FB_CONCATENATE(BM_mt_baseline_,ThreadLocal32)	,
		iters
	)

Definition at line 338 of file ThreadCachedIntTest.cpp.

BENCHMARK	(	FB_CONCATENATE(BM_mt_baseline_,atomic_inc64)	,
		iters
	)

Definition at line 341 of file ThreadCachedIntTest.cpp.

BENCHMARK	(	FB_CONCATENATE(BM_mt_baseline_,atomic_inc32)	,
		iters
	)

Definition at line 344 of file ThreadCachedIntTest.cpp.

358 {

BENCHMARK	(	FB_CONCATENATE(BM_mt_baseline_,ShardedAtm64)	,
		iters
	)

Definition at line 345 of file ThreadCachedIntTest.cpp.

358 {

BENCHMARK ( Atomic_readFull )

Definition at line 358 of file ThreadCachedIntTest.cpp.

References folly::doNotOptimizeAway().

                            {
   doNotOptimizeAway(globalInt64Baseline.load(std::memory_order_relaxed));
 }

BENCHMARK ( ThrCache_readFull )

Definition at line 361 of file ThreadCachedIntTest.cpp.

References folly::doNotOptimizeAway(), and folly::ThreadCachedInt< IntT, Tag >::readFull().

                              {
   doNotOptimizeAway(globalInt64.readFull());
 }

BENCHMARK ( Sharded_readFull )

Definition at line 364 of file ThreadCachedIntTest.cpp.

References folly::doNotOptimizeAway(), and ShardedAtomicInt::readFull().

                             {
   doNotOptimizeAway(shd_int64.readFull());
 }

BENCHMARK ( ThrCache_readFast )

Definition at line 367 of file ThreadCachedIntTest.cpp.

References folly::doNotOptimizeAway(), and folly::ThreadCachedInt< IntT, Tag >::readFast().

                              {
   doNotOptimizeAway(globalInt64.readFast());
 }

BENCHMARK ( Sharded_readFast )

Definition at line 370 of file ThreadCachedIntTest.cpp.

References folly::BENCHMARK_DRAW_LINE(), folly::doNotOptimizeAway(), ShardedAtomicInt::readFast(), and REG_BASELINE.

                             {
   doNotOptimizeAway(shd_int64.readFast());
 }

BENCHMARK	(	FB_CONCATENATE(BM_mt_baseline_,Atomic_readFull)	,
		iters
	)

Definition at line 378 of file ThreadCachedIntTest.cpp.

385 {

BENCHMARK	(	FB_CONCATENATE(BM_mt_baseline_,ThrCache_readFull)	,
		iters
	)

Definition at line 379 of file ThreadCachedIntTest.cpp.

385 {

BENCHMARK	(	FB_CONCATENATE(BM_mt_baseline_,Sharded_readFull)	,
		iters
	)

Definition at line 380 of file ThreadCachedIntTest.cpp.

385 {

BENCHMARK	(	FB_CONCATENATE(BM_mt_baseline_,ThrCache_readFast)	,
		iters
	)

Definition at line 381 of file ThreadCachedIntTest.cpp.

385 {

BENCHMARK	(	FB_CONCATENATE(BM_mt_baseline_,Sharded_readFast)	,
		iters
	)

Definition at line 382 of file ThreadCachedIntTest.cpp.

385 {

BENCHMARK_DRAW_LINE ( )

void BM_mt_cache_size32	(	int	iters,
		int	cacheSize
	)

Definition at line 276 of file ThreadCachedIntTest.cpp.

279 { \

void BM_mt_cache_size64	(	int	iters,
		int	cacheSize
	)

Definition at line 275 of file ThreadCachedIntTest.cpp.

279 { \

DEFINE_int32	(	numThreads	,
		8	,
		"Number simultaneous threads for benchmarks."
	)

std::atomic<int32_t> globalInt32Baseline ( 0 )

std::atomic<int64_t> globalInt64Baseline ( 0 )

void incFunc32 ( )

Definition at line 212 of file ThreadCachedIntTest.cpp.

215 {

void incFunc64 ( )

Definition at line 211 of file ThreadCachedIntTest.cpp.

215 {

int main	(	int	argc,
		char **	argv
	)

Definition at line 385 of file ThreadCachedIntTest.cpp.

References testing::InitGoogleTest(), RUN_ALL_TESTS(), and folly::runBenchmarks().

                                 {
   testing::InitGoogleTest(&argc, argv);
   gflags::ParseCommandLineFlags(&argc, &argv, true);
   gflags::SetCommandLineOptionWithMode(
       "bm_min_usec", "10000", gflags::SET_FLAG_IF_DEFAULT);
   if (FLAGS_benchmark) {
     folly::runBenchmarks();
   }
   return RUN_ALL_TESTS();
 }

TEST	(	ThreadCachedInt	,
		SingleThreadedNotCached
	)

Definition at line 169 of file ThreadCachedIntTest.cpp.

References EXPECT_EQ, i, folly::ThreadCachedInt< IntT, Tag >::readFast(), and val.

                                                {
   ThreadCachedInt<int64_t> val(0, 0);
   EXPECT_EQ(0, val.readFast());
   ++val;
   EXPECT_EQ(1, val.readFast());
   for (int i = 0; i < 41; ++i) {
     val.increment(1);
   }
   EXPECT_EQ(42, val.readFast());
   --val;
   EXPECT_EQ(41, val.readFast());
 }

TEST	(	ThreadCachedInt	,
		SingleThreadedCached
	)

Definition at line 184 of file ThreadCachedIntTest.cpp.

References EXPECT_EQ, globalInt32, globalInt64, i, folly::ThreadCachedInt< IntT, Tag >::readFast(), and val.

                                             {
   ThreadCachedInt<int64_t> val(0, 10);
   EXPECT_EQ(0, val.readFast());
   ++val;
   EXPECT_EQ(0, val.readFast());
   for (int i = 0; i < 7; ++i) {
     val.increment(1);
   }
   EXPECT_EQ(0, val.readFast());
   EXPECT_EQ(0, val.readFastAndReset());
   EXPECT_EQ(8, val.readFull());
   EXPECT_EQ(8, val.readFullAndReset());
   EXPECT_EQ(0, val.readFull());
   EXPECT_EQ(0, val.readFast());
 }

TEST	(	ThreadCachedInt	,
		MultiThreadedCached
	)

Definition at line 215 of file ThreadCachedIntTest.cpp.

References EXPECT_EQ, EXPECT_NE, FOR_EACH_RANGE, i, k, kNumInserts, folly::ThreadCachedInt< IntT, Tag >::readFast(), folly::ThreadCachedInt< IntT, Tag >::readFull(), folly::run(), folly::ThreadCachedInt< IntT, Tag >::set(), folly::pushmi::detail::t, and threads.

                                            {
   kNumInserts = 100000;
   CHECK_EQ(0, kNumInserts % FLAGS_numThreads)
       << "FLAGS_numThreads must evenly divide kNumInserts (" << kNumInserts
       << ").";
   const int numPerThread = kNumInserts / FLAGS_numThreads;
   ThreadCachedInt<int64_t> TCInt64(0, numPerThread - 2);
   {
     std::atomic<bool> run(true);
     std::atomic<int> threadsDone(0);
     std::vector<std::thread> threads;
     for (int i = 0; i < FLAGS_numThreads; ++i) {
       threads.push_back(std::thread([&] {
         FOR_EACH_RANGE (k, 0, numPerThread) { ++TCInt64; }
         std::atomic_fetch_add(&threadsDone, 1);
         while (run.load()) {
           usleep(100);
         }
       }));
     }
 
     // We create and increment another ThreadCachedInt here to make sure it
     // doesn't interact with the other instances
     ThreadCachedInt<int64_t> otherTCInt64(0, 10);
     otherTCInt64.set(33);
     ++otherTCInt64;
 
     while (threadsDone.load() < FLAGS_numThreads) {
       usleep(100);
     }
 
     ++otherTCInt64;
 
     // Threads are done incrementing, but caches have not been flushed yet, so
     // we have to readFull.
     EXPECT_NE(kNumInserts, TCInt64.readFast());
     EXPECT_EQ(kNumInserts, TCInt64.readFull());
 
     run.store(false);
     for (auto& t : threads) {
       t.join();
     }
 
   } // Caches are flushed when threads finish
   EXPECT_EQ(kNumInserts, TCInt64.readFast());
 }

TEST_F	(	ThreadCachedIntTest	,
		MultithreadedSlow
	)

Definition at line 81 of file ThreadCachedIntTest.cpp.

References EXPECT_EQ, EXPECT_GE, i, int32_t, kNumThreads, threads, and uint32_t.

                                                {
   static constexpr uint32_t kNumThreads = 20;
   g_sync_for_mt = 0;
   vector<unique_ptr<std::thread>> threads(kNumThreads);
   // Creates kNumThreads threads.  Each thread performs a different
   // number of iterations in Runner() - threads[0] performs 1
   // iteration, threads[1] performs 2 iterations, threads[2] performs
   // 3 iterations, and so on.
   for (uint32_t i = 0; i < kNumThreads; ++i) {
     threads[i] =
         std::make_unique<std::thread>(Runner, &g_counter_for_mt_slow, i + 1);
   }
   // Variable to grab current counter value.
   int32_t counter_value;
   // The expected value of the counter.
   int32_t total = 0;
   // The expected value of GetDeadThreadsTotal().
   int32_t dead_total = 0;
   // Each iteration of the following thread allows one additional
   // iteration of the threads.  Given that the threads perform
   // different number of iterations from 1 through kNumThreads, one
   // thread will complete in each of the iterations of the loop below.
   for (uint32_t i = 0; i < kNumThreads; ++i) {
     // Allow upto iteration i on all threads.
     {
       std::lock_guard<std::mutex> lk(cv_m);
       g_sync_for_mt = i + 1;
     }
     cv.notify_all();
     total += (kNumThreads - i) * 10;
     // Loop until the counter reaches its expected value.
     do {
       counter_value = g_counter_for_mt_slow.readFull();
     } while (counter_value < total);
     // All threads have done what they can until iteration i, now make
     // sure they don't go further by checking 10 more times in the
     // following loop.
     for (uint32_t j = 0; j < 10; ++j) {
       counter_value = g_counter_for_mt_slow.readFull();
       EXPECT_EQ(total, counter_value);
     }
     dead_total += (i + 1) * 10;
     EXPECT_GE(dead_total, GetDeadThreadsTotal(g_counter_for_mt_slow));
   }
   // All threads are done.
   for (uint32_t i = 0; i < kNumThreads; ++i) {
     threads[i]->join();
   }
   counter_value = g_counter_for_mt_slow.readFull();
   EXPECT_EQ(total, counter_value);
   EXPECT_EQ(total, dead_total);
   EXPECT_EQ(dead_total, GetDeadThreadsTotal(g_counter_for_mt_slow));
 }

TEST_F	(	ThreadCachedIntTest	,
		MultithreadedFast
	)

Definition at line 137 of file ThreadCachedIntTest.cpp.

References EXPECT_EQ, i, int32_t, kNumThreads, threads, and uint32_t.

                                                {
   static constexpr uint32_t kNumThreads = 1000;
   g_sync_for_mt = 0;
   vector<unique_ptr<std::thread>> threads(kNumThreads);
   // Creates kNumThreads threads.  Each thread performs a different
   // number of iterations in Runner() - threads[0] performs 1
   // iteration, threads[1] performs 2 iterations, threads[2] performs
   // 3 iterations, and so on.
   for (uint32_t i = 0; i < kNumThreads; ++i) {
     threads[i] =
         std::make_unique<std::thread>(Runner, &g_counter_for_mt_fast, i + 1);
   }
   // Let the threads run to completion.
   {
     std::lock_guard<std::mutex> lk(cv_m);
     g_sync_for_mt = kNumThreads;
   }
   cv.notify_all();
   // The expected value of the counter.
   uint32_t total = 0;
   for (uint32_t i = 0; i < kNumThreads; ++i) {
     total += (kNumThreads - i) * 10;
   }
   // Wait for all threads to complete.
   for (uint32_t i = 0; i < kNumThreads; ++i) {
     threads[i]->join();
   }
   int32_t counter_value = g_counter_for_mt_fast.readFull();
   EXPECT_EQ(total, counter_value);
   EXPECT_EQ(total, GetDeadThreadsTotal(g_counter_for_mt_fast));
 }

Variable Documentation

FOLLY_TLS int32_t global__thread32

Definition at line 299 of file ThreadCachedIntTest.cpp.

FOLLY_TLS int64_t global__thread64

Definition at line 298 of file ThreadCachedIntTest.cpp.

ThreadCachedInt<int32_t> globalInt32(0, 11)

Referenced by TEST().

ThreadCachedInt<int64_t> globalInt64(0, 11)

Referenced by TEST().

ThreadLocal<int32_t> globalTL32Baseline

Definition at line 295 of file ThreadCachedIntTest.cpp.

ThreadLocal<int64_t> globalTL64Baseline

Definition at line 294 of file ThreadCachedIntTest.cpp.

int kNumInserts = 100000

Definition at line 202 of file ThreadCachedIntTest.cpp.

Referenced by TEST().

ShardedAtomicInt shd_int64

Definition at line 333 of file ThreadCachedIntTest.cpp.

Classes

Macros

Typedefs

Functions

Variables

Macro Definition Documentation

Typedef Documentation

Function Documentation

Variable Documentation