proxygen
AtomicUnorderedMapTest.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2013-present Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
18 
19 #include <memory>
20 #include <thread>
21 #include <unordered_map>
22 
23 #include <folly/Benchmark.h>
28 
29 using namespace folly;
30 using namespace folly::test;
31 
32 template <class T>
33 struct non_atomic {
35 
36  non_atomic() = default;
37  non_atomic(const non_atomic&) = delete;
38  constexpr /* implicit */ non_atomic(T desired) : value(desired) {}
39 
40  T operator+=(T arg) {
41  value += arg;
42  return load();
43  }
44 
45  T load(std::memory_order /* order */ = std::memory_order_seq_cst) const {
46  return value;
47  }
48 
49  /* implicit */
50  operator T() const {
51  return load();
52  }
53 
54  void store(
55  T desired,
56  std::memory_order /* order */ = std::memory_order_seq_cst) {
57  value = desired;
58  }
59 
61  T desired,
62  std::memory_order /* order */ = std::memory_order_seq_cst) {
63  T old = load();
64  store(desired);
65  return old;
66  }
67 
69  T& expected,
70  T desired,
71  std::memory_order /* success */ = std::memory_order_seq_cst,
72  std::memory_order /* failure */ = std::memory_order_seq_cst) {
73  if (value == expected) {
74  value = desired;
75  return true;
76  }
77 
78  expected = value;
79  return false;
80  }
81 
83  T& expected,
84  T desired,
85  std::memory_order /* success */ = std::memory_order_seq_cst,
86  std::memory_order /* failure */ = std::memory_order_seq_cst) {
87  if (value == expected) {
88  value = desired;
89  return true;
90  }
91 
92  expected = value;
93  return false;
94  }
95 
96  bool is_lock_free() const {
97  return true;
98  }
99 };
100 
101 template <
102  typename Key,
103  typename Value,
104  typename IndexType,
105  template <typename> class Atom = std::atomic,
106  typename Allocator = std::allocator<char>>
108  Key,
109  Value,
110  std::hash<Key>,
111  std::equal_to<Key>,
114  Atom,
115  IndexType,
116  Allocator>;
117 
118 namespace {
119 template <typename T>
120 struct AtomicUnorderedInsertMapTest : public ::testing::Test {};
121 } // namespace
122 
123 // uint16_t doesn't make sense for most platforms, but we might as well
124 // test it
125 using IndexTypesToTest = ::testing::Types<uint16_t, uint32_t, uint64_t>;
126 TYPED_TEST_CASE(AtomicUnorderedInsertMapTest, IndexTypesToTest);
127 
128 TYPED_TEST(AtomicUnorderedInsertMapTest, basic) {
129  UIM<std::string,
130  std::string,
131  TypeParam,
132  std::atomic,
134  m(100);
135 
136  m.emplace("abc", "ABC");
137  EXPECT_TRUE(m.find("abc") != m.cend());
138  EXPECT_EQ(m.find("abc")->first, "abc");
139  EXPECT_EQ(m.find("abc")->second, "ABC");
140  EXPECT_TRUE(m.find("def") == m.cend());
141  auto iter = m.cbegin();
142  EXPECT_TRUE(iter != m.cend());
143  EXPECT_TRUE(iter == m.find("abc"));
144  auto a = iter;
145  EXPECT_TRUE(a == iter);
146  auto b = iter;
147  ++iter;
148  EXPECT_TRUE(iter == m.cend());
149  EXPECT_TRUE(a == b);
150  EXPECT_TRUE(a != iter);
151  a++;
152  EXPECT_TRUE(a == iter);
153  EXPECT_TRUE(a != b);
154 }
155 
158 
159  // we should be able to put in much more than 5000 things because of
160  // our load factor request
161  for (int i = 0; i < 10000; ++i) {
162  m.emplace(i, true);
163  }
164 }
165 
166 TEST(AtomicUnorderedInsertMap, capacity_exceeded) {
168 
169  EXPECT_THROW(
170  {
171  for (int i = 0; i < 6000; ++i) {
172  m.emplace(i, false);
173  }
174  },
175  std::bad_alloc);
176 }
177 
178 TYPED_TEST(AtomicUnorderedInsertMapTest, value_mutation) {
179  UIM<int, MutableAtom<int>, TypeParam> m(100);
180 
181  for (int i = 0; i < 50; ++i) {
182  m.emplace(i, i);
183  }
184 
185  m.find(1)->second.data++;
186 }
187 
188 TEST(UnorderedInsertMap, value_mutation) {
190 
191  for (int i = 0; i < 50; ++i) {
192  m.emplace(i, i);
193  }
194 
195  m.find(1)->second.data++;
196  EXPECT_EQ(m.find(1)->second.data, 2);
197 }
198 
199 // This test is too expensive to run automatically. On my dev server it
200 // takes about 10 minutes for dbg build, 2 for opt.
201 TEST(AtomicUnorderedInsertMap, DISABLED_mega_map) {
202  size_t capacity = 2000000000;
204  for (size_t i = 0; i < capacity * 2; i += 2) {
205  big.emplace(i, i * 10);
206  }
207  for (size_t i = 0; i < capacity * 3; i += capacity / 1000 + 1) {
208  auto iter = big.find(i);
209  if ((i & 1) == 0 && i < capacity * 2) {
210  EXPECT_EQ(iter->second, i * 10);
211  } else {
212  EXPECT_TRUE(iter == big.cend());
213  }
214  }
215 }
216 
217 BENCHMARK(lookup_int_int_hit, iters) {
218  std::unique_ptr<AtomicUnorderedInsertMap<int, size_t>> ptr = {};
219 
220  size_t capacity = 100000;
221 
223  ptr = std::make_unique<AtomicUnorderedInsertMap<int, size_t>>(capacity);
224  for (size_t i = 0; i < capacity; ++i) {
225  auto k = 3 * ((5641 * i) % capacity);
226  ptr->emplace(k, k + 1);
227  EXPECT_EQ(ptr->find(k)->second, k + 1);
228  }
229  }
230 
231  for (size_t i = 0; i < iters; ++i) {
232  size_t k = 3 * (((i * 7919) ^ (i * 4001)) % capacity);
233  auto iter = ptr->find(k);
234  if (iter == ptr->cend() || iter->second != k + 1) {
235  auto jter = ptr->find(k);
236  EXPECT_TRUE(iter == jter);
237  }
238  EXPECT_EQ(iter->second, k + 1);
239  }
240 
242  ptr.reset(nullptr);
243  }
244 }
245 
246 struct PairHash {
247  size_t operator()(const std::pair<uint64_t, uint64_t>& pr) const {
248  return pr.first ^ pr.second;
249  }
250 };
251 
253  size_t itersPerThread,
254  size_t capacity,
255  size_t numThreads,
256  size_t readsPerWrite) {
257  typedef std::pair<uint64_t, uint64_t> Key;
259 
260  std::unique_ptr<Map> ptr = {};
261  std::atomic<bool> go{false};
262  std::vector<std::thread> threads;
263 
265  ptr = std::make_unique<Map>(capacity);
266  while (threads.size() < numThreads) {
267  threads.emplace_back([&]() {
268  while (!go) {
270  }
271 
272  size_t reads = 0;
273  size_t writes = 0;
274  while (reads + writes < itersPerThread) {
275  auto r = Random::rand32();
276  Key key(reads + writes, r);
277  if (reads < writes * readsPerWrite ||
278  writes >= capacity / numThreads) {
279  // read needed
280  ++reads;
281  auto iter = ptr->find(key);
282  EXPECT_TRUE(
283  iter == ptr->cend() ||
284  iter->second.data.load(std::memory_order_acquire) >= key.first);
285  } else {
286  ++writes;
287  try {
288  auto pr = ptr->emplace(key, key.first);
289  if (!pr.second) {
290  pr.first->second.data++;
291  }
292  } catch (std::bad_alloc&) {
293  LOG(INFO) << "bad alloc";
294  }
295  }
296  }
297  });
298  }
299  }
300 
301  go = true;
302 
303  for (auto& thr : threads) {
304  thr.join();
305  }
306 
308  ptr.reset(nullptr);
309  }
310 }
311 
312 // clang-format off
313 // sudo nice -n -20 ~/fbcode/_bin/common/concurrency/experimental/atomic_unordered_map --benchmark --bm_min_iters=1000000
314 //
315 // without MAP_HUGETLB (default)
316 //
317 // ============================================================================
318 // common/concurrency/experimental/AtomicUnorderedMapTest.cpprelative time/iter
319 // iters/s
320 // ============================================================================
321 // lookup_int_int_hit 20.05ns 49.89M
322 // contendedRW(small_32thr_99pct) 70.36ns 14.21M
323 // contendedRW(large_32thr_99pct) 164.23ns 6.09M
324 // contendedRW(large_32thr_99_9pct) 158.81ns 6.30M
325 // ============================================================================
326 //
327 // with MAP_HUGETLB hacked in
328 // ============================================================================
329 // lookup_int_int_hit 19.67ns 50.84M
330 // contendedRW(small_32thr_99pct) 62.46ns 16.01M
331 // contendedRW(large_32thr_99pct) 119.41ns 8.37M
332 // contendedRW(large_32thr_99_9pct) 111.23ns 8.99M
333 // ============================================================================
334 // clang-format on
335 BENCHMARK_NAMED_PARAM(contendedRW, small_32thr_99pct, 100000, 32, 99)
336 BENCHMARK_NAMED_PARAM(contendedRW, large_32thr_99pct, 100000000, 32, 99)
337 BENCHMARK_NAMED_PARAM(contendedRW, large_32thr_99_9pct, 100000000, 32, 999)
338 
340 
341 // clang-format off
342 // sudo nice -n -20 ~/fbcode/_build/opt/site_integrity/quasar/experimental/atomic_unordered_map_test --benchmark --bm_min_iters=10000
343 // Single threaded benchmarks to test how much better we are than
344 // std::unordered_map and what is the cost of using atomic operations
345 // in the uncontended use case
346 // ============================================================================
347 // std_map 1.20ms 832.58
348 // atomic_fast_map 511.35us 1.96K
349 // fast_map 196.28us 5.09K
350 // ============================================================================
351 // clang-format on
352 
353 BENCHMARK(std_map) {
354  std::unordered_map<long, long> m;
355  m.reserve(10000);
356  for (int i = 0; i < 10000; ++i) {
357  m.emplace(i, i);
358  }
359 
360  for (int i = 0; i < 10000; ++i) {
361  auto a = m.find(i);
363  }
364 }
365 
366 BENCHMARK(atomic_fast_map) {
368  for (int i = 0; i < 10000; ++i) {
369  m.emplace(i, i);
370  }
371 
372  for (int i = 0; i < 10000; ++i) {
373  auto a = m.find(i);
375  }
376 }
377 
378 BENCHMARK(fast_map) {
380  for (int i = 0; i < 10000; ++i) {
381  m.emplace(i, i);
382  }
383 
384  for (int i = 0; i < 10000; ++i) {
385  auto a = m.find(i);
387  }
388 }
389 
390 BENCHMARK(atomic_fast_map_64) {
392  for (int i = 0; i < 10000; ++i) {
393  m.emplace(i, i);
394  }
395 
396  for (int i = 0; i < 10000; ++i) {
397  auto a = m.find(i);
399  }
400 }
401 
402 BENCHMARK(fast_map_64) {
404  for (int i = 0; i < 10000; ++i) {
405  m.emplace(i, i);
406  }
407 
408  for (int i = 0; i < 10000; ++i) {
409  auto a = m.find(i);
411  }
412 }
413 
414 int main(int argc, char** argv) {
415  testing::InitGoogleTest(&argc, argv);
416  gflags::ParseCommandLineFlags(&argc, &argv, true);
417  int rv = RUN_ALL_TESTS();
419  return rv;
420 }
void * ptr
bool compare_exchange_weak(T &expected, T desired, std::memory_order=std::memory_order_seq_cst, std::memory_order=std::memory_order_seq_cst)
auto f
void contendedRW(size_t itersPerThread, size_t capacity, size_t numThreads, size_t readsPerWrite)
#define EXPECT_THROW(statement, expected_exception)
Definition: gtest.h:1843
char b
int RUN_ALL_TESTS() GTEST_MUST_USE_RESULT_
Definition: gtest.h:2232
#define EXPECT_EQ(val1, val2)
Definition: gtest.h:1922
bool compare_exchange_strong(T &expected, T desired, std::memory_order=std::memory_order_seq_cst, std::memory_order=std::memory_order_seq_cst)
#define BENCHMARK_SUSPEND
Definition: Benchmark.h:576
T exchange(T desired, std::memory_order=std::memory_order_seq_cst)
::testing::Types< uint16_t, uint32_t, uint64_t > IndexTypesToTest
internal::KeyMatcher< M > Key(M inner_matcher)
folly::std T
TYPED_TEST_CASE(SynchronizedTest, SynchronizedTestTypes)
static void basic()
—— Concurrent Priority Queue Implementation ——
Definition: AtomicBitSet.h:29
def load()
Definition: deadlock.py:441
std::unordered_map< int64_t, VecT > Map
std::vector< std::thread::id > threads
char ** argv
#define BENCHMARK_NAMED_PARAM(name, param_name,...)
Definition: Benchmark.h:449
bool Value(const T &value, M matcher)
#define Atom
bool is_lock_free() const
TYPED_TEST(SynchronizedTest, Basic)
static map< string, int > m
char a
int main(int argc, char **argv)
static const char *const value
Definition: Conv.cpp:50
constexpr non_atomic(T desired)
TEST(ProgramOptionsTest, Errors)
#define EXPECT_TRUE(condition)
Definition: gtest.h:1859
BENCHMARK(fbFollyGlobalBenchmarkBaseline)
Definition: Benchmark.cpp:84
const char * string
Definition: Conv.cpp:212
std::pair< const_iterator, bool > emplace(const K &key, V &&value)
bool runBenchmarksOnFlag()
Definition: Benchmark.h:48
BENCHMARK_DRAW_LINE()
T load(std::memory_order=std::memory_order_seq_cst) const
uint64_t value(const typename LockFreeRingBuffer< T, Atom >::Cursor &rbcursor)
GTEST_API_ void InitGoogleTest(int *argc, char **argv)
Definition: gtest.cc:5370
static uint32_t rand32()
Definition: Random.h:213
void store(T desired, std::memory_order=std::memory_order_seq_cst)
KeyT k
const_iterator find(const Key &key) const
auto doNotOptimizeAway(const T &datum) -> typename std::enable_if< !detail::DoNotOptimizeAwayNeedsIndirect< T >::value >::type
Definition: Benchmark.h:258
size_t operator()(const std::pair< uint64_t, uint64_t > &pr) const