proxygen
FlatCombiningBenchmark.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017-present Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <folly/Benchmark.h>
19 
21 #include <glog/logging.h>
22 
23 using namespace folly::test;
24 
25 // use option --benchmark to run folly::Benchmark
26 // use option --direct to run direct benchmark measurements
27 DEFINE_bool(direct, false, "run direct measurement");
28 DEFINE_int32(reps, 10, "number of reps");
29 DEFINE_int32(ops, 100000, "number of operations per rep");
30 DEFINE_int32(lines, 5, "number of cache lines accessed per operation");
31 DEFINE_int32(numRecs, 8, "number of records");
32 DEFINE_int32(work, 1000, "amount of unrelated work per operation");
33 
34 static std::vector<int> nthr = {1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64};
35 static int nthreads;
36 static bool fc;
37 static bool simple;
38 static bool dedicated;
39 static bool tc;
40 static bool syncops;
41 
42 // baseline - no combining
43 BENCHMARK(no_combining_base, iters) {
44  fc = false;
45  dedicated = false;
46  tc = false;
47  syncops = false;
48  run_test(
49  nthreads,
50  FLAGS_lines,
51  FLAGS_numRecs,
52  FLAGS_work,
53  iters,
54  fc,
55  simple,
56  dedicated,
57  tc,
58  syncops);
59 }
60 
61 BENCHMARK_RELATIVE(no_combining_dup, iters) {
62  run_test(
63  nthreads,
64  FLAGS_lines,
65  FLAGS_numRecs,
66  FLAGS_work,
67  iters,
68  fc,
69  simple,
70  dedicated,
71  tc,
72  syncops);
73 }
75 
76 // dedicated combiner
77 
79 
80 BENCHMARK_RELATIVE(combining_dedicated_notc_sync, iters) {
81  fc = true;
82  dedicated = true;
83  tc = false;
84  syncops = true;
85  run_test(
86  nthreads,
87  FLAGS_lines,
88  FLAGS_numRecs,
89  FLAGS_work,
90  iters,
91  fc,
92  simple,
93  dedicated,
94  tc,
95  syncops);
96 }
97 
98 BENCHMARK_RELATIVE(combining_dedicated_notc_sync_dup, iters) {
99  run_test(
100  nthreads,
101  FLAGS_lines,
102  FLAGS_numRecs,
103  FLAGS_work,
104  iters,
105  fc,
106  simple,
107  dedicated,
108  tc,
109  syncops);
110 }
111 
113 
114 BENCHMARK_RELATIVE(combining_dedicated_notc_async, iters) {
115  syncops = false;
116  run_test(
117  nthreads,
118  FLAGS_lines,
119  FLAGS_numRecs,
120  FLAGS_work,
121  iters,
122  fc,
123  simple,
124  dedicated,
125  tc,
126  syncops);
127 }
128 
129 BENCHMARK_RELATIVE(combining_dedicated_notc_async_dup, iters) {
130  run_test(
131  nthreads,
132  FLAGS_lines,
133  FLAGS_numRecs,
134  FLAGS_work,
135  iters,
136  fc,
137  simple,
138  dedicated,
139  tc,
140  syncops);
141 }
142 
144 
145 BENCHMARK_RELATIVE(combining_dedicated_tc_sync, iters) {
146  tc = true;
147  syncops = true;
148  run_test(
149  nthreads,
150  FLAGS_lines,
151  FLAGS_numRecs,
152  FLAGS_work,
153  iters,
154  fc,
155  simple,
156  dedicated,
157  tc,
158  syncops);
159 }
160 
161 BENCHMARK_RELATIVE(combining_dedicated_tc_sync_dup, iters) {
162  run_test(
163  nthreads,
164  FLAGS_lines,
165  FLAGS_numRecs,
166  FLAGS_work,
167  iters,
168  fc,
169  simple,
170  dedicated,
171  tc,
172  syncops);
173 }
174 
176 
177 BENCHMARK_RELATIVE(combining_dedicated_tc_async, iters) {
178  tc = true;
179  syncops = false;
180  run_test(
181  nthreads,
182  FLAGS_lines,
183  FLAGS_numRecs,
184  FLAGS_work,
185  iters,
186  fc,
187  simple,
188  dedicated,
189  tc,
190  syncops);
191 }
192 
193 BENCHMARK_RELATIVE(combining_dedicated_tc_async_dup, iters) {
194  run_test(
195  nthreads,
196  FLAGS_lines,
197  FLAGS_numRecs,
198  FLAGS_work,
199  iters,
200  fc,
201  simple,
202  dedicated,
203  tc,
204  syncops);
205 }
206 
208 
209 // no dedicated combiner
210 
212 
213 BENCHMARK_RELATIVE(combining_no_dedicated_notc_sync, iters) {
214  dedicated = false;
215  tc = false;
216  syncops = true;
217  run_test(
218  nthreads,
219  FLAGS_lines,
220  FLAGS_numRecs,
221  FLAGS_work,
222  iters,
223  fc,
224  simple,
225  dedicated,
226  tc,
227  syncops);
228 }
229 
230 BENCHMARK_RELATIVE(combining_no_dedicated_notc_sync_dup, iters) {
231  run_test(
232  nthreads,
233  FLAGS_lines,
234  FLAGS_numRecs,
235  FLAGS_work,
236  iters,
237  fc,
238  simple,
239  dedicated,
240  tc,
241  syncops);
242 }
243 
245 
246 BENCHMARK_RELATIVE(combining_no_dedicated_notc_async, iters) {
247  syncops = false;
248  run_test(
249  nthreads,
250  FLAGS_lines,
251  FLAGS_numRecs,
252  FLAGS_work,
253  iters,
254  fc,
255  simple,
256  dedicated,
257  tc,
258  syncops);
259 }
260 
261 BENCHMARK_RELATIVE(combining_no_dedicated_notc_async_dup, iters) {
262  run_test(
263  nthreads,
264  FLAGS_lines,
265  FLAGS_numRecs,
266  FLAGS_work,
267  iters,
268  fc,
269  simple,
270  dedicated,
271  tc,
272  syncops);
273 }
274 
276 
277 BENCHMARK_RELATIVE(combining_no_dedicated_tc_sync, iters) {
278  tc = true;
279  syncops = true;
280  run_test(
281  nthreads,
282  FLAGS_lines,
283  FLAGS_numRecs,
284  FLAGS_work,
285  iters,
286  fc,
287  simple,
288  dedicated,
289  tc,
290  syncops);
291 }
292 
293 BENCHMARK_RELATIVE(combining_no_dedicated_tc_sync_dup, iters) {
294  run_test(
295  nthreads,
296  FLAGS_lines,
297  FLAGS_numRecs,
298  FLAGS_work,
299  iters,
300  fc,
301  simple,
302  dedicated,
303  tc,
304  syncops);
305 }
306 
308 
309 BENCHMARK_RELATIVE(combining_no_dedicated_tc_async, iters) {
310  tc = true;
311  syncops = false;
312  run_test(
313  nthreads,
314  FLAGS_lines,
315  FLAGS_numRecs,
316  FLAGS_work,
317  iters,
318  fc,
319  simple,
320  dedicated,
321  tc,
322  syncops);
323 }
324 
325 BENCHMARK_RELATIVE(combining_no_dedicated_tc_async_dup, iters) {
326  run_test(
327  nthreads,
328  FLAGS_lines,
329  FLAGS_numRecs,
330  FLAGS_work,
331  iters,
332  fc,
333  simple,
334  dedicated,
335  tc,
336  syncops);
337 }
338 
340 
342  int numCores = std::thread::hardware_concurrency();
343  std::cout << "\nRunning benchmarks on machine with " << numCores
344  << " logical cores" << std::endl;
345 }
346 
347 TEST(FlatCombining, folly_benchmark) {
348  if (FLAGS_benchmark) {
349  benchmarkSetup();
350  for (bool b : {true, false}) {
351  simple = b;
352  std::string str = simple ? "simple" : "custom";
353  std::cout << "\n------------------------------------ " << str
354  << " interface" << std::endl;
355  for (int i : nthr) {
356  std::cout << "\n---------------------------------- Number of threads = "
357  << i << std::endl;
358  nthreads = i;
360  }
361  }
362  }
363 }
364 
365 // Direct measurement - not using folly::Benchmark
366 
367 static uint64_t test(
369  bool fc_,
370  bool dedicated_,
371  bool tc_,
372  bool syncops_,
373  uint64_t base) {
374  uint64_t min = UINTMAX_MAX;
375  uint64_t max = 0;
376  uint64_t sum = 0;
377 
378  for (int i = 0; i < FLAGS_reps; ++i) {
379  uint64_t dur = run_test(
380  nthreads,
381  FLAGS_lines,
382  FLAGS_numRecs,
383  FLAGS_work,
384  FLAGS_ops,
385  fc_,
386  simple,
387  dedicated_,
388  tc_,
389  syncops_);
390  sum += dur;
391  min = std::min(min, dur);
392  max = std::max(max, dur);
393  }
394  uint64_t avg = sum / FLAGS_reps;
395 
396  uint64_t res = min;
397  std::cout << name;
398  std::cout << " " << std::setw(4) << max / FLAGS_ops << " ns";
399  std::cout << " " << std::setw(4) << avg / FLAGS_ops << " ns";
400  std::cout << " " << std::setw(4) << res / FLAGS_ops << " ns";
401  if (base) {
402  std::cout << " " << std::setw(3) << 100 * base / res << "%";
403  }
404  std::cout << std::endl;
405  return res;
406 }
407 
408 TEST(FlatCombining, direct_measurement) {
409  if (!FLAGS_direct) {
410  return;
411  }
412  benchmarkSetup();
413  simple = false;
414  std::string str = simple ? "simple" : "custom";
415  std::cout << "\n------------------------------------ " << str << " interface"
416  << std::endl;
417  for (int i : nthr) {
418  nthreads = i;
419  std::cout << "\n------------------------------------ Number of threads = "
420  << i << "\n"
421  << std::endl;
422  std::cout << "Test_name, Max time, Avg time, Min time, % base min / min\n"
423  << std::endl;
424 
425  uint64_t base =
426  test("no_combining - base ", false, false, false, false, 0);
427  test("no_combining - dup ", false, false, false, false, base);
428  std::cout << "---------------------------------------" << std::endl;
429 
430  std::cout << "---- dedicated-------------------------" << std::endl;
431  test("combining_notc_sync ", true, true, false, true, base);
432  test("combining_notc_sync - dup ", true, true, false, true, base);
433  std::cout << "---------------------------------------" << std::endl;
434  test("combining_notc_async ", true, true, false, false, base);
435  test("combining_notc_async - dup ", true, true, false, false, base);
436  std::cout << "---------------------------------------" << std::endl;
437  test("combining_tc_sync ", true, true, true, true, base);
438  test("combining_tc_sync - dup ", true, true, true, true, base);
439  std::cout << "---------------------------------------" << std::endl;
440  test("combining_tc_async ", true, true, true, false, base);
441  test("combining_tc_async - dup ", true, true, true, false, base);
442  std::cout << "---------------------------------------" << std::endl;
443 
444  std::cout << "---- no dedicated----------------------" << std::endl;
445  test("combining_notc_sync ", true, false, false, true, base);
446  test("combining_notc_sync - dup ", true, false, false, true, base);
447  std::cout << "---------------------------------------" << std::endl;
448  test("combining_notc_async ", true, false, false, false, base);
449  test("combining_notc_async - dup ", true, false, false, false, base);
450  std::cout << "---------------------------------------" << std::endl;
451  test("combining_tc_sync ", true, false, true, true, base);
452  test("combining_tc_sync - dup ", true, false, true, true, base);
453  std::cout << "---------------------------------------" << std::endl;
454  test("combining_tc_async ", true, false, true, false, base);
455  test("combining_tc_async - dup ", true, false, true, false, base);
456  std::cout << "---------------------------------------" << std::endl;
457  }
458 }
459 
460 // clang-format off
461 /*
462 See benchmark results in https://phabricator.intern.facebook.com/P57204895
463 
464 The results are from a run using the command
465 $ numactl -N 1 flat_combining_benchmark --benchmark --bm_min_iters=100000 --direct
466 
467 Using the default parameters of the benchmark: In each iteration, the
468 operation on the shared data structure updates 5 cache lines and
469 performs unrelated work (~300ns) after each operation. The benchmark
470 doesn't do any smart combining (i.e., saving or dropping some work
471 based on understanding the details of the combined operations).
472 
473 Direct measurements are used to evaluate the high variance in some cases.
474 Duplicate runs are included in order to assess the relevance of outliers.
475 
476 ----
477 [==========] Running 2 tests from 1 test case.
478 [----------] Global test environment set-up.
479 [----------] 2 tests from FlatCombining
480 [ RUN ] FlatCombining.folly_benchmark
481 
482 Running benchmarks on machine with 32 logical cores
483 
484 ------------------------------------ simple interface
485 
486 ---------------------------------- Number of threads = 1
487 ============================================================================
488 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
489 ============================================================================
490 no_combining_base 330.43ns 3.03M
491 no_combining_dup 100.09% 330.13ns 3.03M
492 ----------------------------------------------------------------------------
493 ----------------------------------------------------------------------------
494 combining_dedicated_notc_sync 93.17% 354.66ns 2.82M
495 combining_dedicated_notc_sync_dup 93.57% 353.15ns 2.83M
496 ----------------------------------------------------------------------------
497 combining_dedicated_notc_async 99.35% 332.60ns 3.01M
498 combining_dedicated_notc_async_dup 99.07% 333.54ns 3.00M
499 ----------------------------------------------------------------------------
500 combining_dedicated_tc_sync 93.05% 355.13ns 2.82M
501 combining_dedicated_tc_sync_dup 92.87% 355.81ns 2.81M
502 ----------------------------------------------------------------------------
503 combining_dedicated_tc_async 99.17% 333.21ns 3.00M
504 combining_dedicated_tc_async_dup 99.28% 332.84ns 3.00M
505 ----------------------------------------------------------------------------
506 ----------------------------------------------------------------------------
507 combining_no_dedicated_notc_sync 93.51% 353.38ns 2.83M
508 combining_no_dedicated_notc_sync_dup 93.27% 354.26ns 2.82M
509 ----------------------------------------------------------------------------
510 combining_no_dedicated_notc_async 99.40% 332.44ns 3.01M
511 combining_no_dedicated_notc_async_dup 99.13% 333.34ns 3.00M
512 ----------------------------------------------------------------------------
513 combining_no_dedicated_tc_sync 93.38% 353.86ns 2.83M
514 combining_no_dedicated_tc_sync_dup 93.52% 353.31ns 2.83M
515 ----------------------------------------------------------------------------
516 combining_no_dedicated_tc_async 99.29% 332.78ns 3.00M
517 combining_no_dedicated_tc_async_dup 99.19% 333.11ns 3.00M
518 ----------------------------------------------------------------------------
519 ============================================================================
520 
521 ---------------------------------- Number of threads = 2
522 ============================================================================
523 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
524 ============================================================================
525 no_combining_base 213.60ns 4.68M
526 no_combining_dup 100.84% 211.82ns 4.72M
527 ----------------------------------------------------------------------------
528 ----------------------------------------------------------------------------
529 combining_dedicated_notc_sync 89.84% 237.76ns 4.21M
530 combining_dedicated_notc_sync_dup 89.85% 237.73ns 4.21M
531 ----------------------------------------------------------------------------
532 combining_dedicated_notc_async 93.80% 227.72ns 4.39M
533 combining_dedicated_notc_async_dup 87.85% 243.15ns 4.11M
534 ----------------------------------------------------------------------------
535 combining_dedicated_tc_sync 86.81% 246.06ns 4.06M
536 combining_dedicated_tc_sync_dup 87.15% 245.09ns 4.08M
537 ----------------------------------------------------------------------------
538 combining_dedicated_tc_async 92.14% 231.82ns 4.31M
539 combining_dedicated_tc_async_dup 92.04% 232.08ns 4.31M
540 ----------------------------------------------------------------------------
541 ----------------------------------------------------------------------------
542 combining_no_dedicated_notc_sync 95.20% 224.36ns 4.46M
543 combining_no_dedicated_notc_sync_dup 95.40% 223.91ns 4.47M
544 ----------------------------------------------------------------------------
545 combining_no_dedicated_notc_async 95.41% 223.89ns 4.47M
546 combining_no_dedicated_notc_async_dup 95.86% 222.82ns 4.49M
547 ----------------------------------------------------------------------------
548 combining_no_dedicated_tc_sync 94.43% 226.21ns 4.42M
549 combining_no_dedicated_tc_sync_dup 94.28% 226.56ns 4.41M
550 ----------------------------------------------------------------------------
551 combining_no_dedicated_tc_async 96.62% 221.07ns 4.52M
552 combining_no_dedicated_tc_async_dup 97.24% 219.66ns 4.55M
553 ----------------------------------------------------------------------------
554 ============================================================================
555 
556 ---------------------------------- Number of threads = 3
557 ============================================================================
558 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
559 ============================================================================
560 no_combining_base 188.20ns 5.31M
561 no_combining_dup 94.07% 200.07ns 5.00M
562 ----------------------------------------------------------------------------
563 ----------------------------------------------------------------------------
564 combining_dedicated_notc_sync 95.39% 197.30ns 5.07M
565 combining_dedicated_notc_sync_dup 94.50% 199.16ns 5.02M
566 ----------------------------------------------------------------------------
567 combining_dedicated_notc_async 75.29% 249.96ns 4.00M
568 combining_dedicated_notc_async_dup 72.97% 257.91ns 3.88M
569 ----------------------------------------------------------------------------
570 combining_dedicated_tc_sync 91.26% 206.22ns 4.85M
571 combining_dedicated_tc_sync_dup 90.68% 207.54ns 4.82M
572 ----------------------------------------------------------------------------
573 combining_dedicated_tc_async 89.64% 209.95ns 4.76M
574 combining_dedicated_tc_async_dup 88.21% 213.36ns 4.69M
575 ----------------------------------------------------------------------------
576 ----------------------------------------------------------------------------
577 combining_no_dedicated_notc_sync 96.19% 195.66ns 5.11M
578 combining_no_dedicated_notc_sync_dup 93.27% 201.78ns 4.96M
579 ----------------------------------------------------------------------------
580 combining_no_dedicated_notc_async 81.12% 231.99ns 4.31M
581 combining_no_dedicated_notc_async_dup 82.48% 228.19ns 4.38M
582 ----------------------------------------------------------------------------
583 combining_no_dedicated_tc_sync 79.48% 236.78ns 4.22M
584 combining_no_dedicated_tc_sync_dup 79.73% 236.04ns 4.24M
585 ----------------------------------------------------------------------------
586 combining_no_dedicated_tc_async 100.70% 186.90ns 5.35M
587 combining_no_dedicated_tc_async_dup 99.43% 189.27ns 5.28M
588 ----------------------------------------------------------------------------
589 ============================================================================
590 
591 ---------------------------------- Number of threads = 4
592 ============================================================================
593 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
594 ============================================================================
595 no_combining_base 242.84ns 4.12M
596 no_combining_dup 100.78% 240.96ns 4.15M
597 ----------------------------------------------------------------------------
598 ----------------------------------------------------------------------------
599 combining_dedicated_notc_sync 100.91% 240.65ns 4.16M
600 combining_dedicated_notc_sync_dup 99.76% 243.42ns 4.11M
601 ----------------------------------------------------------------------------
602 combining_dedicated_notc_async 102.06% 237.95ns 4.20M
603 combining_dedicated_notc_async_dup 101.63% 238.94ns 4.19M
604 ----------------------------------------------------------------------------
605 combining_dedicated_tc_sync 109.79% 221.18ns 4.52M
606 combining_dedicated_tc_sync_dup 108.94% 222.92ns 4.49M
607 ----------------------------------------------------------------------------
608 combining_dedicated_tc_async 133.01% 182.58ns 5.48M
609 combining_dedicated_tc_async_dup 134.91% 180.00ns 5.56M
610 ----------------------------------------------------------------------------
611 ----------------------------------------------------------------------------
612 combining_no_dedicated_notc_sync 108.77% 223.25ns 4.48M
613 combining_no_dedicated_notc_sync_dup 107.64% 225.61ns 4.43M
614 ----------------------------------------------------------------------------
615 combining_no_dedicated_notc_async 115.14% 210.91ns 4.74M
616 combining_no_dedicated_notc_async_dup 115.06% 211.05ns 4.74M
617 ----------------------------------------------------------------------------
618 combining_no_dedicated_tc_sync 116.36% 208.70ns 4.79M
619 combining_no_dedicated_tc_sync_dup 115.70% 209.89ns 4.76M
620 ----------------------------------------------------------------------------
621 combining_no_dedicated_tc_async 159.69% 152.07ns 6.58M
622 combining_no_dedicated_tc_async_dup 158.27% 153.43ns 6.52M
623 ----------------------------------------------------------------------------
624 ============================================================================
625 
626 ---------------------------------- Number of threads = 6
627 ============================================================================
628 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
629 ============================================================================
630 no_combining_base 281.36ns 3.55M
631 no_combining_dup 98.56% 285.46ns 3.50M
632 ----------------------------------------------------------------------------
633 ----------------------------------------------------------------------------
634 combining_dedicated_notc_sync 132.39% 212.51ns 4.71M
635 combining_dedicated_notc_sync_dup 133.10% 211.38ns 4.73M
636 ----------------------------------------------------------------------------
637 combining_dedicated_notc_async 141.35% 199.05ns 5.02M
638 combining_dedicated_notc_async_dup 143.18% 196.51ns 5.09M
639 ----------------------------------------------------------------------------
640 combining_dedicated_tc_sync 138.94% 202.50ns 4.94M
641 combining_dedicated_tc_sync_dup 138.64% 202.93ns 4.93M
642 ----------------------------------------------------------------------------
643 combining_dedicated_tc_async 199.76% 140.85ns 7.10M
644 combining_dedicated_tc_async_dup 200.28% 140.48ns 7.12M
645 ----------------------------------------------------------------------------
646 ----------------------------------------------------------------------------
647 combining_no_dedicated_notc_sync 155.48% 180.96ns 5.53M
648 combining_no_dedicated_notc_sync_dup 150.82% 186.55ns 5.36M
649 ----------------------------------------------------------------------------
650 combining_no_dedicated_notc_async 162.23% 173.43ns 5.77M
651 combining_no_dedicated_notc_async_dup 161.33% 174.39ns 5.73M
652 ----------------------------------------------------------------------------
653 combining_no_dedicated_tc_sync 167.90% 167.57ns 5.97M
654 combining_no_dedicated_tc_sync_dup 164.84% 170.69ns 5.86M
655 ----------------------------------------------------------------------------
656 combining_no_dedicated_tc_async 242.51% 116.02ns 8.62M
657 combining_no_dedicated_tc_async_dup 245.67% 114.53ns 8.73M
658 ----------------------------------------------------------------------------
659 ============================================================================
660 
661 ---------------------------------- Number of threads = 8
662 ============================================================================
663 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
664 ============================================================================
665 no_combining_base 315.57ns 3.17M
666 no_combining_dup 98.83% 319.32ns 3.13M
667 ----------------------------------------------------------------------------
668 ----------------------------------------------------------------------------
669 combining_dedicated_notc_sync 170.48% 185.11ns 5.40M
670 combining_dedicated_notc_sync_dup 174.57% 180.77ns 5.53M
671 ----------------------------------------------------------------------------
672 combining_dedicated_notc_async 178.57% 176.72ns 5.66M
673 combining_dedicated_notc_async_dup 181.30% 174.06ns 5.75M
674 ----------------------------------------------------------------------------
675 combining_dedicated_tc_sync 195.40% 161.50ns 6.19M
676 combining_dedicated_tc_sync_dup 197.18% 160.05ns 6.25M
677 ----------------------------------------------------------------------------
678 combining_dedicated_tc_async 322.03% 97.99ns 10.20M
679 combining_dedicated_tc_async_dup 324.51% 97.24ns 10.28M
680 ----------------------------------------------------------------------------
681 ----------------------------------------------------------------------------
682 combining_no_dedicated_notc_sync 205.61% 153.48ns 6.52M
683 combining_no_dedicated_notc_sync_dup 204.94% 153.98ns 6.49M
684 ----------------------------------------------------------------------------
685 combining_no_dedicated_notc_async 217.81% 144.88ns 6.90M
686 combining_no_dedicated_notc_async_dup 218.58% 144.37ns 6.93M
687 ----------------------------------------------------------------------------
688 combining_no_dedicated_tc_sync 223.96% 140.91ns 7.10M
689 combining_no_dedicated_tc_sync_dup 224.55% 140.53ns 7.12M
690 ----------------------------------------------------------------------------
691 combining_no_dedicated_tc_async 364.58% 86.56ns 11.55M
692 combining_no_dedicated_tc_async_dup 363.33% 86.86ns 11.51M
693 ----------------------------------------------------------------------------
694 ============================================================================
695 
696 ---------------------------------- Number of threads = 12
697 ============================================================================
698 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
699 ============================================================================
700 no_combining_base 353.59ns 2.83M
701 no_combining_dup 99.91% 353.91ns 2.83M
702 ----------------------------------------------------------------------------
703 ----------------------------------------------------------------------------
704 combining_dedicated_notc_sync 276.36% 127.95ns 7.82M
705 combining_dedicated_notc_sync_dup 278.88% 126.79ns 7.89M
706 ----------------------------------------------------------------------------
707 combining_dedicated_notc_async 249.52% 141.71ns 7.06M
708 combining_dedicated_notc_async_dup 247.26% 143.00ns 6.99M
709 ----------------------------------------------------------------------------
710 combining_dedicated_tc_sync 318.57% 110.99ns 9.01M
711 combining_dedicated_tc_sync_dup 326.27% 108.37ns 9.23M
712 ----------------------------------------------------------------------------
713 combining_dedicated_tc_async 428.50% 82.52ns 12.12M
714 combining_dedicated_tc_async_dup 429.19% 82.39ns 12.14M
715 ----------------------------------------------------------------------------
716 ----------------------------------------------------------------------------
717 combining_no_dedicated_notc_sync 276.54% 127.86ns 7.82M
718 combining_no_dedicated_notc_sync_dup 275.59% 128.31ns 7.79M
719 ----------------------------------------------------------------------------
720 combining_no_dedicated_notc_async 298.92% 118.29ns 8.45M
721 combining_no_dedicated_notc_async_dup 298.93% 118.28ns 8.45M
722 ----------------------------------------------------------------------------
723 combining_no_dedicated_tc_sync 300.56% 117.64ns 8.50M
724 combining_no_dedicated_tc_sync_dup 296.95% 119.07ns 8.40M
725 ----------------------------------------------------------------------------
726 combining_no_dedicated_tc_async 431.06% 82.03ns 12.19M
727 combining_no_dedicated_tc_async_dup 430.40% 82.15ns 12.17M
728 ----------------------------------------------------------------------------
729 ============================================================================
730 
731 ---------------------------------- Number of threads = 16
732 ============================================================================
733 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
734 ============================================================================
735 no_combining_base 358.57ns 2.79M
736 no_combining_dup 99.97% 358.70ns 2.79M
737 ----------------------------------------------------------------------------
738 ----------------------------------------------------------------------------
739 combining_dedicated_notc_sync 319.73% 112.15ns 8.92M
740 combining_dedicated_notc_sync_dup 327.86% 109.37ns 9.14M
741 ----------------------------------------------------------------------------
742 combining_dedicated_notc_async 296.17% 121.07ns 8.26M
743 combining_dedicated_notc_async_dup 306.86% 116.85ns 8.56M
744 ----------------------------------------------------------------------------
745 combining_dedicated_tc_sync 337.53% 106.24ns 9.41M
746 combining_dedicated_tc_sync_dup 347.98% 103.04ns 9.70M
747 ----------------------------------------------------------------------------
748 combining_dedicated_tc_async 423.80% 84.61ns 11.82M
749 combining_dedicated_tc_async_dup 421.07% 85.16ns 11.74M
750 ----------------------------------------------------------------------------
751 ----------------------------------------------------------------------------
752 combining_no_dedicated_notc_sync 321.94% 111.38ns 8.98M
753 combining_no_dedicated_notc_sync_dup 318.54% 112.57ns 8.88M
754 ----------------------------------------------------------------------------
755 combining_no_dedicated_notc_async 364.71% 98.32ns 10.17M
756 combining_no_dedicated_notc_async_dup 364.22% 98.45ns 10.16M
757 ----------------------------------------------------------------------------
758 combining_no_dedicated_tc_sync 322.91% 111.04ns 9.01M
759 combining_no_dedicated_tc_sync_dup 322.42% 111.21ns 8.99M
760 ----------------------------------------------------------------------------
761 combining_no_dedicated_tc_async 466.30% 76.90ns 13.00M
762 combining_no_dedicated_tc_async_dup 462.76% 77.49ns 12.91M
763 ----------------------------------------------------------------------------
764 ============================================================================
765 
766 ---------------------------------- Number of threads = 24
767 ============================================================================
768 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
769 ============================================================================
770 no_combining_base 348.54ns 2.87M
771 no_combining_dup 99.96% 348.69ns 2.87M
772 ----------------------------------------------------------------------------
773 ----------------------------------------------------------------------------
774 combining_dedicated_notc_sync 260.21% 133.95ns 7.47M
775 combining_dedicated_notc_sync_dup 257.84% 135.18ns 7.40M
776 ----------------------------------------------------------------------------
777 combining_dedicated_notc_async 242.25% 143.88ns 6.95M
778 combining_dedicated_notc_async_dup 235.88% 147.76ns 6.77M
779 ----------------------------------------------------------------------------
780 combining_dedicated_tc_sync 262.45% 132.80ns 7.53M
781 combining_dedicated_tc_sync_dup 251.14% 138.78ns 7.21M
782 ----------------------------------------------------------------------------
783 combining_dedicated_tc_async 256.89% 135.68ns 7.37M
784 combining_dedicated_tc_async_dup 304.76% 114.37ns 8.74M
785 ----------------------------------------------------------------------------
786 ----------------------------------------------------------------------------
787 combining_no_dedicated_notc_sync 270.20% 129.00ns 7.75M
788 combining_no_dedicated_notc_sync_dup 271.69% 128.29ns 7.80M
789 ----------------------------------------------------------------------------
790 combining_no_dedicated_notc_async 298.35% 116.82ns 8.56M
791 combining_no_dedicated_notc_async_dup 289.04% 120.59ns 8.29M
792 ----------------------------------------------------------------------------
793 combining_no_dedicated_tc_sync 286.59% 121.62ns 8.22M
794 combining_no_dedicated_tc_sync_dup 292.21% 119.28ns 8.38M
795 ----------------------------------------------------------------------------
796 combining_no_dedicated_tc_async 471.86% 73.87ns 13.54M
797 combining_no_dedicated_tc_async_dup 458.16% 76.08ns 13.14M
798 ----------------------------------------------------------------------------
799 ============================================================================
800 
801 ---------------------------------- Number of threads = 32
802 ============================================================================
803 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
804 ============================================================================
805 no_combining_base 337.61ns 2.96M
806 no_combining_dup 99.41% 339.60ns 2.94M
807 ----------------------------------------------------------------------------
808 ----------------------------------------------------------------------------
809 combining_dedicated_notc_sync 204.50% 165.09ns 6.06M
810 combining_dedicated_notc_sync_dup 233.28% 144.72ns 6.91M
811 ----------------------------------------------------------------------------
812 combining_dedicated_notc_async 187.20% 180.35ns 5.54M
813 combining_dedicated_notc_async_dup 192.76% 175.15ns 5.71M
814 ----------------------------------------------------------------------------
815 combining_dedicated_tc_sync 220.56% 153.07ns 6.53M
816 combining_dedicated_tc_sync_dup 207.62% 162.61ns 6.15M
817 ----------------------------------------------------------------------------
818 combining_dedicated_tc_async 317.11% 106.46ns 9.39M
819 combining_dedicated_tc_async_dup 318.92% 105.86ns 9.45M
820 ----------------------------------------------------------------------------
821 ----------------------------------------------------------------------------
822 combining_no_dedicated_notc_sync 259.29% 130.21ns 7.68M
823 combining_no_dedicated_notc_sync_dup 248.33% 135.95ns 7.36M
824 ----------------------------------------------------------------------------
825 combining_no_dedicated_notc_async 290.40% 116.26ns 8.60M
826 combining_no_dedicated_notc_async_dup 299.92% 112.57ns 8.88M
827 ----------------------------------------------------------------------------
828 combining_no_dedicated_tc_sync 281.91% 119.76ns 8.35M
829 combining_no_dedicated_tc_sync_dup 284.19% 118.80ns 8.42M
830 ----------------------------------------------------------------------------
831 combining_no_dedicated_tc_async 435.16% 77.58ns 12.89M
832 combining_no_dedicated_tc_async_dup 389.67% 86.64ns 11.54M
833 ----------------------------------------------------------------------------
834 ============================================================================
835 
836 ---------------------------------- Number of threads = 48
837 ============================================================================
838 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
839 ============================================================================
840 no_combining_base 334.48ns 2.99M
841 no_combining_dup 100.00% 334.46ns 2.99M
842 ----------------------------------------------------------------------------
843 ----------------------------------------------------------------------------
844 combining_dedicated_notc_sync 257.01% 130.14ns 7.68M
845 combining_dedicated_notc_sync_dup 254.13% 131.62ns 7.60M
846 ----------------------------------------------------------------------------
847 combining_dedicated_notc_async 189.56% 176.45ns 5.67M
848 combining_dedicated_notc_async_dup 247.68% 135.05ns 7.40M
849 ----------------------------------------------------------------------------
850 combining_dedicated_tc_sync 259.47% 128.91ns 7.76M
851 combining_dedicated_tc_sync_dup 281.34% 118.89ns 8.41M
852 ----------------------------------------------------------------------------
853 combining_dedicated_tc_async 301.96% 110.77ns 9.03M
854 combining_dedicated_tc_async_dup 347.65% 96.21ns 10.39M
855 ----------------------------------------------------------------------------
856 ----------------------------------------------------------------------------
857 combining_no_dedicated_notc_sync 268.45% 124.60ns 8.03M
858 combining_no_dedicated_notc_sync_dup 272.54% 122.73ns 8.15M
859 ----------------------------------------------------------------------------
860 combining_no_dedicated_notc_async 306.04% 109.29ns 9.15M
861 combining_no_dedicated_notc_async_dup 294.38% 113.62ns 8.80M
862 ----------------------------------------------------------------------------
863 combining_no_dedicated_tc_sync 280.89% 119.08ns 8.40M
864 combining_no_dedicated_tc_sync_dup 276.01% 121.18ns 8.25M
865 ----------------------------------------------------------------------------
866 combining_no_dedicated_tc_async 466.45% 71.71ns 13.95M
867 combining_no_dedicated_tc_async_dup 465.45% 71.86ns 13.92M
868 ----------------------------------------------------------------------------
869 ============================================================================
870 
871 ---------------------------------- Number of threads = 64
872 ============================================================================
873 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
874 ============================================================================
875 no_combining_base 335.68ns 2.98M
876 no_combining_dup 101.03% 332.25ns 3.01M
877 ----------------------------------------------------------------------------
878 ----------------------------------------------------------------------------
879 combining_dedicated_notc_sync 272.91% 123.00ns 8.13M
880 combining_dedicated_notc_sync_dup 270.56% 124.07ns 8.06M
881 ----------------------------------------------------------------------------
882 combining_dedicated_notc_async 200.44% 167.47ns 5.97M
883 combining_dedicated_notc_async_dup 208.36% 161.10ns 6.21M
884 ----------------------------------------------------------------------------
885 combining_dedicated_tc_sync 258.40% 129.91ns 7.70M
886 combining_dedicated_tc_sync_dup 249.16% 134.72ns 7.42M
887 ----------------------------------------------------------------------------
888 combining_dedicated_tc_async 378.86% 88.60ns 11.29M
889 combining_dedicated_tc_async_dup 299.32% 112.15ns 8.92M
890 ----------------------------------------------------------------------------
891 ----------------------------------------------------------------------------
892 combining_no_dedicated_notc_sync 272.18% 123.33ns 8.11M
893 combining_no_dedicated_notc_sync_dup 275.26% 121.95ns 8.20M
894 ----------------------------------------------------------------------------
895 combining_no_dedicated_notc_async 296.23% 113.32ns 8.82M
896 combining_no_dedicated_notc_async_dup 311.17% 107.88ns 9.27M
897 ----------------------------------------------------------------------------
898 combining_no_dedicated_tc_sync 283.30% 118.49ns 8.44M
899 combining_no_dedicated_tc_sync_dup 263.86% 127.22ns 7.86M
900 ----------------------------------------------------------------------------
901 combining_no_dedicated_tc_async 426.62% 78.68ns 12.71M
902 combining_no_dedicated_tc_async_dup 445.17% 75.40ns 13.26M
903 ----------------------------------------------------------------------------
904 ============================================================================
905 
906 ------------------------------------ custom interface
907 
908 ---------------------------------- Number of threads = 1
909 ============================================================================
910 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
911 ============================================================================
912 no_combining_base 329.49ns 3.03M
913 no_combining_dup 99.91% 329.79ns 3.03M
914 ----------------------------------------------------------------------------
915 ----------------------------------------------------------------------------
916 combining_dedicated_notc_sync 98.69% 333.88ns 3.00M
917 combining_dedicated_notc_sync_dup 98.70% 333.83ns 3.00M
918 ----------------------------------------------------------------------------
919 combining_dedicated_notc_async 98.22% 335.47ns 2.98M
920 combining_dedicated_notc_async_dup 98.16% 335.66ns 2.98M
921 ----------------------------------------------------------------------------
922 combining_dedicated_tc_sync 98.70% 333.85ns 3.00M
923 combining_dedicated_tc_sync_dup 98.78% 333.58ns 3.00M
924 ----------------------------------------------------------------------------
925 combining_dedicated_tc_async 98.14% 335.73ns 2.98M
926 combining_dedicated_tc_async_dup 97.92% 336.49ns 2.97M
927 ----------------------------------------------------------------------------
928 ----------------------------------------------------------------------------
929 combining_no_dedicated_notc_sync 98.94% 333.00ns 3.00M
930 combining_no_dedicated_notc_sync_dup 98.86% 333.29ns 3.00M
931 ----------------------------------------------------------------------------
932 combining_no_dedicated_notc_async 98.36% 334.99ns 2.99M
933 combining_no_dedicated_notc_async_dup 98.61% 334.15ns 2.99M
934 ----------------------------------------------------------------------------
935 combining_no_dedicated_tc_sync 99.07% 332.58ns 3.01M
936 combining_no_dedicated_tc_sync_dup 99.12% 332.41ns 3.01M
937 ----------------------------------------------------------------------------
938 combining_no_dedicated_tc_async 97.08% 339.38ns 2.95M
939 combining_no_dedicated_tc_async_dup 97.54% 337.81ns 2.96M
940 ----------------------------------------------------------------------------
941 ============================================================================
942 
943 ---------------------------------- Number of threads = 2
944 ============================================================================
945 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
946 ============================================================================
947 no_combining_base 216.71ns 4.61M
948 no_combining_dup 100.34% 215.97ns 4.63M
949 ----------------------------------------------------------------------------
950 ----------------------------------------------------------------------------
951 combining_dedicated_notc_sync 95.42% 227.11ns 4.40M
952 combining_dedicated_notc_sync_dup 94.16% 230.15ns 4.34M
953 ----------------------------------------------------------------------------
954 combining_dedicated_notc_async 91.84% 235.97ns 4.24M
955 combining_dedicated_notc_async_dup 91.41% 237.08ns 4.22M
956 ----------------------------------------------------------------------------
957 combining_dedicated_tc_sync 96.79% 223.90ns 4.47M
958 combining_dedicated_tc_sync_dup 96.54% 224.47ns 4.45M
959 ----------------------------------------------------------------------------
960 combining_dedicated_tc_async 90.90% 238.41ns 4.19M
961 combining_dedicated_tc_async_dup 95.45% 227.03ns 4.40M
962 ----------------------------------------------------------------------------
963 ----------------------------------------------------------------------------
964 combining_no_dedicated_notc_sync 101.13% 214.28ns 4.67M
965 combining_no_dedicated_notc_sync_dup 100.11% 216.48ns 4.62M
966 ----------------------------------------------------------------------------
967 combining_no_dedicated_notc_async 96.40% 224.80ns 4.45M
968 combining_no_dedicated_notc_async_dup 96.36% 224.90ns 4.45M
969 ----------------------------------------------------------------------------
970 combining_no_dedicated_tc_sync 100.86% 214.85ns 4.65M
971 combining_no_dedicated_tc_sync_dup 101.91% 212.65ns 4.70M
972 ----------------------------------------------------------------------------
973 combining_no_dedicated_tc_async 95.66% 226.54ns 4.41M
974 combining_no_dedicated_tc_async_dup 95.88% 226.03ns 4.42M
975 ----------------------------------------------------------------------------
976 ============================================================================
977 
978 ---------------------------------- Number of threads = 3
979 ============================================================================
980 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
981 ============================================================================
982 no_combining_base 189.61ns 5.27M
983 no_combining_dup 100.22% 189.20ns 5.29M
984 ----------------------------------------------------------------------------
985 ----------------------------------------------------------------------------
986 combining_dedicated_notc_sync 103.18% 183.76ns 5.44M
987 combining_dedicated_notc_sync_dup 103.66% 182.92ns 5.47M
988 ----------------------------------------------------------------------------
989 combining_dedicated_notc_async 77.14% 245.81ns 4.07M
990 combining_dedicated_notc_async_dup 90.25% 210.10ns 4.76M
991 ----------------------------------------------------------------------------
992 combining_dedicated_tc_sync 89.88% 210.95ns 4.74M
993 combining_dedicated_tc_sync_dup 87.83% 215.90ns 4.63M
994 ----------------------------------------------------------------------------
995 combining_dedicated_tc_async 89.33% 212.26ns 4.71M
996 combining_dedicated_tc_async_dup 85.19% 222.56ns 4.49M
997 ----------------------------------------------------------------------------
998 ----------------------------------------------------------------------------
999 combining_no_dedicated_notc_sync 98.43% 192.64ns 5.19M
1000 combining_no_dedicated_notc_sync_dup 101.15% 187.46ns 5.33M
1001 ----------------------------------------------------------------------------
1002 combining_no_dedicated_notc_async 83.77% 226.36ns 4.42M
1003 combining_no_dedicated_notc_async_dup 84.69% 223.89ns 4.47M
1004 ----------------------------------------------------------------------------
1005 combining_no_dedicated_tc_sync 85.47% 221.85ns 4.51M
1006 combining_no_dedicated_tc_sync_dup 86.32% 219.65ns 4.55M
1007 ----------------------------------------------------------------------------
1008 combining_no_dedicated_tc_async 105.62% 179.52ns 5.57M
1009 combining_no_dedicated_tc_async_dup 105.26% 180.14ns 5.55M
1010 ----------------------------------------------------------------------------
1011 ============================================================================
1012 
1013 ---------------------------------- Number of threads = 4
1014 ============================================================================
1015 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
1016 ============================================================================
1017 no_combining_base 237.50ns 4.21M
1018 no_combining_dup 99.80% 237.97ns 4.20M
1019 ----------------------------------------------------------------------------
1020 ----------------------------------------------------------------------------
1021 combining_dedicated_notc_sync 112.56% 210.99ns 4.74M
1022 combining_dedicated_notc_sync_dup 104.08% 228.20ns 4.38M
1023 ----------------------------------------------------------------------------
1024 combining_dedicated_notc_async 101.44% 234.12ns 4.27M
1025 combining_dedicated_notc_async_dup 100.73% 235.77ns 4.24M
1026 ----------------------------------------------------------------------------
1027 combining_dedicated_tc_sync 111.70% 212.62ns 4.70M
1028 combining_dedicated_tc_sync_dup 113.00% 210.18ns 4.76M
1029 ----------------------------------------------------------------------------
1030 combining_dedicated_tc_async 131.11% 181.15ns 5.52M
1031 combining_dedicated_tc_async_dup 132.65% 179.04ns 5.59M
1032 ----------------------------------------------------------------------------
1033 ----------------------------------------------------------------------------
1034 combining_no_dedicated_notc_sync 115.76% 205.17ns 4.87M
1035 combining_no_dedicated_notc_sync_dup 114.70% 207.06ns 4.83M
1036 ----------------------------------------------------------------------------
1037 combining_no_dedicated_notc_async 111.63% 212.76ns 4.70M
1038 combining_no_dedicated_notc_async_dup 111.91% 212.22ns 4.71M
1039 ----------------------------------------------------------------------------
1040 combining_no_dedicated_tc_sync 120.07% 197.80ns 5.06M
1041 combining_no_dedicated_tc_sync_dup 118.25% 200.85ns 4.98M
1042 ----------------------------------------------------------------------------
1043 combining_no_dedicated_tc_async 153.73% 154.49ns 6.47M
1044 combining_no_dedicated_tc_async_dup 153.08% 155.15ns 6.45M
1045 ----------------------------------------------------------------------------
1046 ============================================================================
1047 
1048 ---------------------------------- Number of threads = 6
1049 ============================================================================
1050 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
1051 ============================================================================
1052 no_combining_base 281.56ns 3.55M
1053 no_combining_dup 99.97% 281.65ns 3.55M
1054 ----------------------------------------------------------------------------
1055 ----------------------------------------------------------------------------
1056 combining_dedicated_notc_sync 144.76% 194.50ns 5.14M
1057 combining_dedicated_notc_sync_dup 149.96% 187.76ns 5.33M
1058 ----------------------------------------------------------------------------
1059 combining_dedicated_notc_async 147.72% 190.61ns 5.25M
1060 combining_dedicated_notc_async_dup 140.86% 199.89ns 5.00M
1061 ----------------------------------------------------------------------------
1062 combining_dedicated_tc_sync 154.17% 182.63ns 5.48M
1063 combining_dedicated_tc_sync_dup 156.60% 179.80ns 5.56M
1064 ----------------------------------------------------------------------------
1065 combining_dedicated_tc_async 202.42% 139.10ns 7.19M
1066 combining_dedicated_tc_async_dup 203.44% 138.40ns 7.23M
1067 ----------------------------------------------------------------------------
1068 ----------------------------------------------------------------------------
1069 combining_no_dedicated_notc_sync 168.33% 167.27ns 5.98M
1070 combining_no_dedicated_notc_sync_dup 166.02% 169.59ns 5.90M
1071 ----------------------------------------------------------------------------
1072 combining_no_dedicated_notc_async 166.44% 169.16ns 5.91M
1073 combining_no_dedicated_notc_async_dup 160.14% 175.82ns 5.69M
1074 ----------------------------------------------------------------------------
1075 combining_no_dedicated_tc_sync 181.79% 154.88ns 6.46M
1076 combining_no_dedicated_tc_sync_dup 180.25% 156.20ns 6.40M
1077 ----------------------------------------------------------------------------
1078 combining_no_dedicated_tc_async 240.56% 117.04ns 8.54M
1079 combining_no_dedicated_tc_async_dup 240.74% 116.96ns 8.55M
1080 ----------------------------------------------------------------------------
1081 ============================================================================
1082 
1083 ---------------------------------- Number of threads = 8
1084 ============================================================================
1085 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
1086 ============================================================================
1087 no_combining_base 312.99ns 3.19M
1088 no_combining_dup 98.93% 316.37ns 3.16M
1089 ----------------------------------------------------------------------------
1090 ----------------------------------------------------------------------------
1091 combining_dedicated_notc_sync 182.71% 171.30ns 5.84M
1092 combining_dedicated_notc_sync_dup 183.23% 170.82ns 5.85M
1093 ----------------------------------------------------------------------------
1094 combining_dedicated_notc_async 183.16% 170.88ns 5.85M
1095 combining_dedicated_notc_async_dup 181.29% 172.64ns 5.79M
1096 ----------------------------------------------------------------------------
1097 combining_dedicated_tc_sync 191.49% 163.45ns 6.12M
1098 combining_dedicated_tc_sync_dup 191.04% 163.84ns 6.10M
1099 ----------------------------------------------------------------------------
1100 combining_dedicated_tc_async 302.89% 103.34ns 9.68M
1101 combining_dedicated_tc_async_dup 304.07% 102.94ns 9.71M
1102 ----------------------------------------------------------------------------
1103 ----------------------------------------------------------------------------
1104 combining_no_dedicated_notc_sync 220.41% 142.00ns 7.04M
1105 combining_no_dedicated_notc_sync_dup 219.90% 142.34ns 7.03M
1106 ----------------------------------------------------------------------------
1107 combining_no_dedicated_notc_async 218.66% 143.14ns 6.99M
1108 combining_no_dedicated_notc_async_dup 218.74% 143.09ns 6.99M
1109 ----------------------------------------------------------------------------
1110 combining_no_dedicated_tc_sync 241.82% 129.43ns 7.73M
1111 combining_no_dedicated_tc_sync_dup 241.72% 129.48ns 7.72M
1112 ----------------------------------------------------------------------------
1113 combining_no_dedicated_tc_async 352.39% 88.82ns 11.26M
1114 combining_no_dedicated_tc_async_dup 350.17% 89.38ns 11.19M
1115 ----------------------------------------------------------------------------
1116 ============================================================================
1117 
1118 ---------------------------------- Number of threads = 12
1119 ============================================================================
1120 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
1121 ============================================================================
1122 no_combining_base 350.05ns 2.86M
1123 no_combining_dup 99.06% 353.37ns 2.83M
1124 ----------------------------------------------------------------------------
1125 ----------------------------------------------------------------------------
1126 combining_dedicated_notc_sync 266.87% 131.17ns 7.62M
1127 combining_dedicated_notc_sync_dup 245.79% 142.42ns 7.02M
1128 ----------------------------------------------------------------------------
1129 combining_dedicated_notc_async 238.57% 146.73ns 6.82M
1130 combining_dedicated_notc_async_dup 240.02% 145.84ns 6.86M
1131 ----------------------------------------------------------------------------
1132 combining_dedicated_tc_sync 316.70% 110.53ns 9.05M
1133 combining_dedicated_tc_sync_dup 321.05% 109.03ns 9.17M
1134 ----------------------------------------------------------------------------
1135 combining_dedicated_tc_async 403.10% 86.84ns 11.52M
1136 combining_dedicated_tc_async_dup 409.94% 85.39ns 11.71M
1137 ----------------------------------------------------------------------------
1138 ----------------------------------------------------------------------------
1139 combining_no_dedicated_notc_sync 300.23% 116.59ns 8.58M
1140 combining_no_dedicated_notc_sync_dup 299.07% 117.04ns 8.54M
1141 ----------------------------------------------------------------------------
1142 combining_no_dedicated_notc_async 297.79% 117.55ns 8.51M
1143 combining_no_dedicated_notc_async_dup 296.66% 118.00ns 8.47M
1144 ----------------------------------------------------------------------------
1145 combining_no_dedicated_tc_sync 328.07% 106.70ns 9.37M
1146 combining_no_dedicated_tc_sync_dup 331.52% 105.59ns 9.47M
1147 ----------------------------------------------------------------------------
1148 combining_no_dedicated_tc_async 424.57% 82.45ns 12.13M
1149 combining_no_dedicated_tc_async_dup 409.47% 85.49ns 11.70M
1150 ----------------------------------------------------------------------------
1151 ============================================================================
1152 
1153 ---------------------------------- Number of threads = 16
1154 ============================================================================
1155 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
1156 ============================================================================
1157 no_combining_base 360.47ns 2.77M
1158 no_combining_dup 100.11% 360.07ns 2.78M
1159 ----------------------------------------------------------------------------
1160 ----------------------------------------------------------------------------
1161 combining_dedicated_notc_sync 320.54% 112.46ns 8.89M
1162 combining_dedicated_notc_sync_dup 313.31% 115.05ns 8.69M
1163 ----------------------------------------------------------------------------
1164 combining_dedicated_notc_async 296.83% 121.44ns 8.23M
1165 combining_dedicated_notc_async_dup 289.91% 124.34ns 8.04M
1166 ----------------------------------------------------------------------------
1167 combining_dedicated_tc_sync 364.27% 98.96ns 10.11M
1168 combining_dedicated_tc_sync_dup 361.10% 99.82ns 10.02M
1169 ----------------------------------------------------------------------------
1170 combining_dedicated_tc_async 424.43% 84.93ns 11.77M
1171 combining_dedicated_tc_async_dup 418.07% 86.22ns 11.60M
1172 ----------------------------------------------------------------------------
1173 ----------------------------------------------------------------------------
1174 combining_no_dedicated_notc_sync 373.13% 96.60ns 10.35M
1175 combining_no_dedicated_notc_sync_dup 364.35% 98.93ns 10.11M
1176 ----------------------------------------------------------------------------
1177 combining_no_dedicated_notc_async 361.40% 99.74ns 10.03M
1178 combining_no_dedicated_notc_async_dup 366.49% 98.36ns 10.17M
1179 ----------------------------------------------------------------------------
1180 combining_no_dedicated_tc_sync 382.22% 94.31ns 10.60M
1181 combining_no_dedicated_tc_sync_dup 380.64% 94.70ns 10.56M
1182 ----------------------------------------------------------------------------
1183 combining_no_dedicated_tc_async 461.14% 78.17ns 12.79M
1184 combining_no_dedicated_tc_async_dup 481.50% 74.86ns 13.36M
1185 ----------------------------------------------------------------------------
1186 ============================================================================
1187 
1188 ---------------------------------- Number of threads = 24
1189 ============================================================================
1190 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
1191 ============================================================================
1192 no_combining_base 348.97ns 2.87M
1193 no_combining_dup 100.12% 348.54ns 2.87M
1194 ----------------------------------------------------------------------------
1195 ----------------------------------------------------------------------------
1196 combining_dedicated_notc_sync 234.17% 149.02ns 6.71M
1197 combining_dedicated_notc_sync_dup 205.54% 169.78ns 5.89M
1198 ----------------------------------------------------------------------------
1199 combining_dedicated_notc_async 248.28% 140.55ns 7.11M
1200 combining_dedicated_notc_async_dup 239.71% 145.58ns 6.87M
1201 ----------------------------------------------------------------------------
1202 combining_dedicated_tc_sync 272.87% 127.89ns 7.82M
1203 combining_dedicated_tc_sync_dup 235.76% 148.02ns 6.76M
1204 ----------------------------------------------------------------------------
1205 combining_dedicated_tc_async 295.71% 118.01ns 8.47M
1206 combining_dedicated_tc_async_dup 265.87% 131.25ns 7.62M
1207 ----------------------------------------------------------------------------
1208 ----------------------------------------------------------------------------
1209 combining_no_dedicated_notc_sync 298.96% 116.73ns 8.57M
1210 combining_no_dedicated_notc_sync_dup 297.67% 117.23ns 8.53M
1211 ----------------------------------------------------------------------------
1212 combining_no_dedicated_notc_async 298.44% 116.93ns 8.55M
1213 combining_no_dedicated_notc_async_dup 292.80% 119.18ns 8.39M
1214 ----------------------------------------------------------------------------
1215 combining_no_dedicated_tc_sync 316.44% 110.28ns 9.07M
1216 combining_no_dedicated_tc_sync_dup 317.52% 109.90ns 9.10M
1217 ----------------------------------------------------------------------------
1218 combining_no_dedicated_tc_async 432.64% 80.66ns 12.40M
1219 combining_no_dedicated_tc_async_dup 441.55% 79.03ns 12.65M
1220 ----------------------------------------------------------------------------
1221 ============================================================================
1222 
1223 ---------------------------------- Number of threads = 32
1224 ============================================================================
1225 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
1226 ============================================================================
1227 no_combining_base 338.90ns 2.95M
1228 no_combining_dup 100.01% 338.87ns 2.95M
1229 ----------------------------------------------------------------------------
1230 ----------------------------------------------------------------------------
1231 combining_dedicated_notc_sync 204.34% 165.85ns 6.03M
1232 combining_dedicated_notc_sync_dup 202.84% 167.07ns 5.99M
1233 ----------------------------------------------------------------------------
1234 combining_dedicated_notc_async 192.27% 176.26ns 5.67M
1235 combining_dedicated_notc_async_dup 188.61% 179.68ns 5.57M
1236 ----------------------------------------------------------------------------
1237 combining_dedicated_tc_sync 247.57% 136.89ns 7.31M
1238 combining_dedicated_tc_sync_dup 285.53% 118.69ns 8.43M
1239 ----------------------------------------------------------------------------
1240 combining_dedicated_tc_async 277.97% 121.92ns 8.20M
1241 combining_dedicated_tc_async_dup 231.11% 146.64ns 6.82M
1242 ----------------------------------------------------------------------------
1243 ----------------------------------------------------------------------------
1244 combining_no_dedicated_notc_sync 299.20% 113.27ns 8.83M
1245 combining_no_dedicated_notc_sync_dup 289.53% 117.05ns 8.54M
1246 ----------------------------------------------------------------------------
1247 combining_no_dedicated_notc_async 282.29% 120.05ns 8.33M
1248 combining_no_dedicated_notc_async_dup 305.09% 111.08ns 9.00M
1249 ----------------------------------------------------------------------------
1250 combining_no_dedicated_tc_sync 312.52% 108.44ns 9.22M
1251 combining_no_dedicated_tc_sync_dup 324.88% 104.31ns 9.59M
1252 ----------------------------------------------------------------------------
1253 combining_no_dedicated_tc_async 420.99% 80.50ns 12.42M
1254 combining_no_dedicated_tc_async_dup 406.58% 83.35ns 12.00M
1255 ----------------------------------------------------------------------------
1256 ============================================================================
1257 
1258 ---------------------------------- Number of threads = 48
1259 ============================================================================
1260 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
1261 ============================================================================
1262 no_combining_base 334.84ns 2.99M
1263 no_combining_dup 99.57% 336.29ns 2.97M
1264 ----------------------------------------------------------------------------
1265 ----------------------------------------------------------------------------
1266 combining_dedicated_notc_sync 212.82% 157.34ns 6.36M
1267 combining_dedicated_notc_sync_dup 198.39% 168.78ns 5.93M
1268 ----------------------------------------------------------------------------
1269 combining_dedicated_notc_async 166.74% 200.82ns 4.98M
1270 combining_dedicated_notc_async_dup 197.07% 169.91ns 5.89M
1271 ----------------------------------------------------------------------------
1272 combining_dedicated_tc_sync 246.35% 135.92ns 7.36M
1273 combining_dedicated_tc_sync_dup 209.52% 159.81ns 6.26M
1274 ----------------------------------------------------------------------------
1275 combining_dedicated_tc_async 293.94% 113.91ns 8.78M
1276 combining_dedicated_tc_async_dup 280.74% 119.27ns 8.38M
1277 ----------------------------------------------------------------------------
1278 ----------------------------------------------------------------------------
1279 combining_no_dedicated_notc_sync 301.60% 111.02ns 9.01M
1280 combining_no_dedicated_notc_sync_dup 296.10% 113.09ns 8.84M
1281 ----------------------------------------------------------------------------
1282 combining_no_dedicated_notc_async 308.91% 108.40ns 9.23M
1283 combining_no_dedicated_notc_async_dup 298.48% 112.18ns 8.91M
1284 ----------------------------------------------------------------------------
1285 combining_no_dedicated_tc_sync 331.11% 101.13ns 9.89M
1286 combining_no_dedicated_tc_sync_dup 329.37% 101.66ns 9.84M
1287 ----------------------------------------------------------------------------
1288 combining_no_dedicated_tc_async 451.58% 74.15ns 13.49M
1289 combining_no_dedicated_tc_async_dup 431.37% 77.62ns 12.88M
1290 ----------------------------------------------------------------------------
1291 ============================================================================
1292 
1293 ---------------------------------- Number of threads = 64
1294 ============================================================================
1295 folly/experimental/flat_combining/test/FlatCombiningBenchmark.cpprelative time/iter iters/s
1296 ============================================================================
1297 no_combining_base 336.22ns 2.97M
1298 no_combining_dup 100.69% 333.92ns 2.99M
1299 ----------------------------------------------------------------------------
1300 ----------------------------------------------------------------------------
1301 combining_dedicated_notc_sync 230.57% 145.82ns 6.86M
1302 combining_dedicated_notc_sync_dup 221.08% 152.08ns 6.58M
1303 ----------------------------------------------------------------------------
1304 combining_dedicated_notc_async 232.38% 144.69ns 6.91M
1305 combining_dedicated_notc_async_dup 192.77% 174.41ns 5.73M
1306 ----------------------------------------------------------------------------
1307 combining_dedicated_tc_sync 284.07% 118.36ns 8.45M
1308 combining_dedicated_tc_sync_dup 298.03% 112.81ns 8.86M
1309 ----------------------------------------------------------------------------
1310 combining_dedicated_tc_async 361.07% 93.12ns 10.74M
1311 combining_dedicated_tc_async_dup 324.11% 103.74ns 9.64M
1312 ----------------------------------------------------------------------------
1313 ----------------------------------------------------------------------------
1314 combining_no_dedicated_notc_sync 284.58% 118.15ns 8.46M
1315 combining_no_dedicated_notc_sync_dup 301.73% 111.43ns 8.97M
1316 ----------------------------------------------------------------------------
1317 combining_no_dedicated_notc_async 294.87% 114.02ns 8.77M
1318 combining_no_dedicated_notc_async_dup 287.51% 116.94ns 8.55M
1319 ----------------------------------------------------------------------------
1320 combining_no_dedicated_tc_sync 317.96% 105.74ns 9.46M
1321 combining_no_dedicated_tc_sync_dup 332.45% 101.13ns 9.89M
1322 ----------------------------------------------------------------------------
1323 combining_no_dedicated_tc_async 441.96% 76.07ns 13.15M
1324 combining_no_dedicated_tc_async_dup 393.82% 85.37ns 11.71M
1325 ----------------------------------------------------------------------------
1326 ============================================================================
1327 [ OK ] FlatCombining.folly_benchmark (455269 ms)
1328 [ RUN ] FlatCombining.direct_measurement
1329 
1330 Running benchmarks on machine with 32 logical cores
1331 
1332 ------------------------------------ custom interface
1333 
1334 ------------------------------------ Number of threads = 1
1335 
1336 Test_name, Max time, Avg time, Min time, % base min / min
1337 
1338 no_combining - base 334 ns 331 ns 329 ns
1339 no_combining - dup 335 ns 332 ns 331 ns 99%
1340 ---------------------------------------
1341 ---- dedicated-------------------------
1342 combining_notc_sync 340 ns 335 ns 332 ns 99%
1343 combining_notc_sync - dup 337 ns 335 ns 333 ns 98%
1344 ---------------------------------------
1345 combining_notc_async 360 ns 343 ns 338 ns 97%
1346 combining_notc_async - dup 339 ns 337 ns 336 ns 98%
1347 ---------------------------------------
1348 combining_tc_sync 337 ns 335 ns 333 ns 98%
1349 combining_tc_sync - dup 346 ns 336 ns 332 ns 99%
1350 ---------------------------------------
1351 combining_tc_async 338 ns 336 ns 335 ns 98%
1352 combining_tc_async - dup 338 ns 336 ns 335 ns 98%
1353 ---------------------------------------
1354 ---- no dedicated----------------------
1355 combining_notc_sync 338 ns 335 ns 333 ns 98%
1356 combining_notc_sync - dup 337 ns 334 ns 333 ns 98%
1357 ---------------------------------------
1358 combining_notc_async 339 ns 336 ns 335 ns 98%
1359 combining_notc_async - dup 347 ns 340 ns 336 ns 98%
1360 ---------------------------------------
1361 combining_tc_sync 337 ns 335 ns 333 ns 98%
1362 combining_tc_sync - dup 436 ns 386 ns 333 ns 98%
1363 ---------------------------------------
1364 combining_tc_async 340 ns 337 ns 335 ns 98%
1365 combining_tc_async - dup 338 ns 336 ns 335 ns 98%
1366 ---------------------------------------
1367 
1368 ------------------------------------ Number of threads = 2
1369 
1370 Test_name, Max time, Avg time, Min time, % base min / min
1371 
1372 no_combining - base 315 ns 226 ns 211 ns
1373 no_combining - dup 217 ns 216 ns 213 ns 98%
1374 ---------------------------------------
1375 ---- dedicated-------------------------
1376 combining_notc_sync 251 ns 237 ns 229 ns 92%
1377 combining_notc_sync - dup 250 ns 241 ns 226 ns 93%
1378 ---------------------------------------
1379 combining_notc_async 278 ns 268 ns 252 ns 83%
1380 combining_notc_async - dup 297 ns 263 ns 245 ns 86%
1381 ---------------------------------------
1382 combining_tc_sync 254 ns 246 ns 234 ns 90%
1383 combining_tc_sync - dup 335 ns 252 ns 230 ns 91%
1384 ---------------------------------------
1385 combining_tc_async 305 ns 282 ns 245 ns 86%
1386 combining_tc_async - dup 284 ns 256 ns 239 ns 88%
1387 ---------------------------------------
1388 ---- no dedicated----------------------
1389 combining_notc_sync 230 ns 222 ns 217 ns 97%
1390 combining_notc_sync - dup 231 ns 225 ns 218 ns 96%
1391 ---------------------------------------
1392 combining_notc_async 244 ns 238 ns 233 ns 90%
1393 combining_notc_async - dup 241 ns 236 ns 231 ns 91%
1394 ---------------------------------------
1395 combining_tc_sync 283 ns 239 ns 221 ns 95%
1396 combining_tc_sync - dup 299 ns 247 ns 225 ns 93%
1397 ---------------------------------------
1398 combining_tc_async 290 ns 270 ns 244 ns 86%
1399 combining_tc_async - dup 290 ns 251 ns 238 ns 88%
1400 ---------------------------------------
1401 
1402 ------------------------------------ Number of threads = 3
1403 
1404 Test_name, Max time, Avg time, Min time, % base min / min
1405 
1406 no_combining - base 211 ns 197 ns 190 ns
1407 no_combining - dup 209 ns 201 ns 195 ns 97%
1408 ---------------------------------------
1409 ---- dedicated-------------------------
1410 combining_notc_sync 258 ns 197 ns 168 ns 112%
1411 combining_notc_sync - dup 274 ns 200 ns 162 ns 117%
1412 ---------------------------------------
1413 combining_notc_async 307 ns 281 ns 260 ns 73%
1414 combining_notc_async - dup 284 ns 258 ns 216 ns 88%
1415 ---------------------------------------
1416 combining_tc_sync 228 ns 215 ns 192 ns 98%
1417 combining_tc_sync - dup 216 ns 203 ns 178 ns 107%
1418 ---------------------------------------
1419 combining_tc_async 246 ns 233 ns 220 ns 86%
1420 combining_tc_async - dup 236 ns 221 ns 208 ns 91%
1421 ---------------------------------------
1422 ---- no dedicated----------------------
1423 combining_notc_sync 204 ns 198 ns 184 ns 103%
1424 combining_notc_sync - dup 203 ns 198 ns 193 ns 98%
1425 ---------------------------------------
1426 combining_notc_async 238 ns 225 ns 218 ns 87%
1427 combining_notc_async - dup 231 ns 227 ns 223 ns 85%
1428 ---------------------------------------
1429 combining_tc_sync 220 ns 216 ns 211 ns 90%
1430 combining_tc_sync - dup 227 ns 223 ns 219 ns 87%
1431 ---------------------------------------
1432 combining_tc_async 182 ns 181 ns 179 ns 106%
1433 combining_tc_async - dup 186 ns 181 ns 180 ns 105%
1434 ---------------------------------------
1435 
1436 ------------------------------------ Number of threads = 4
1437 
1438 Test_name, Max time, Avg time, Min time, % base min / min
1439 
1440 no_combining - base 258 ns 245 ns 238 ns
1441 no_combining - dup 262 ns 249 ns 245 ns 97%
1442 ---------------------------------------
1443 ---- dedicated-------------------------
1444 combining_notc_sync 264 ns 250 ns 220 ns 107%
1445 combining_notc_sync - dup 260 ns 254 ns 231 ns 102%
1446 ---------------------------------------
1447 combining_notc_async 266 ns 255 ns 233 ns 102%
1448 combining_notc_async - dup 268 ns 260 ns 252 ns 94%
1449 ---------------------------------------
1450 combining_tc_sync 250 ns 240 ns 215 ns 110%
1451 combining_tc_sync - dup 252 ns 242 ns 217 ns 109%
1452 ---------------------------------------
1453 combining_tc_async 199 ns 190 ns 183 ns 129%
1454 combining_tc_async - dup 199 ns 189 ns 178 ns 133%
1455 ---------------------------------------
1456 ---- no dedicated----------------------
1457 combining_notc_sync 223 ns 211 ns 203 ns 116%
1458 combining_notc_sync - dup 218 ns 211 ns 202 ns 117%
1459 ---------------------------------------
1460 combining_notc_async 222 ns 213 ns 207 ns 114%
1461 combining_notc_async - dup 236 ns 222 ns 215 ns 110%
1462 ---------------------------------------
1463 combining_tc_sync 202 ns 199 ns 197 ns 120%
1464 combining_tc_sync - dup 207 ns 199 ns 194 ns 122%
1465 ---------------------------------------
1466 combining_tc_async 162 ns 157 ns 152 ns 155%
1467 combining_tc_async - dup 188 ns 161 ns 154 ns 154%
1468 ---------------------------------------
1469 
1470 ------------------------------------ Number of threads = 6
1471 
1472 Test_name, Max time, Avg time, Min time, % base min / min
1473 
1474 no_combining - base 298 ns 292 ns 281 ns
1475 no_combining - dup 296 ns 289 ns 270 ns 104%
1476 ---------------------------------------
1477 ---- dedicated-------------------------
1478 combining_notc_sync 221 ns 211 ns 196 ns 143%
1479 combining_notc_sync - dup 247 ns 211 ns 192 ns 146%
1480 ---------------------------------------
1481 combining_notc_async 216 ns 205 ns 194 ns 144%
1482 combining_notc_async - dup 215 ns 206 ns 197 ns 142%
1483 ---------------------------------------
1484 combining_tc_sync 225 ns 204 ns 185 ns 151%
1485 combining_tc_sync - dup 229 ns 210 ns 186 ns 151%
1486 ---------------------------------------
1487 combining_tc_async 165 ns 152 ns 144 ns 194%
1488 combining_tc_async - dup 166 ns 150 ns 143 ns 195%
1489 ---------------------------------------
1490 ---- no dedicated----------------------
1491 combining_notc_sync 184 ns 182 ns 180 ns 155%
1492 combining_notc_sync - dup 176 ns 174 ns 172 ns 163%
1493 ---------------------------------------
1494 combining_notc_async 179 ns 177 ns 174 ns 161%
1495 combining_notc_async - dup 186 ns 181 ns 177 ns 158%
1496 ---------------------------------------
1497 combining_tc_sync 164 ns 163 ns 160 ns 174%
1498 combining_tc_sync - dup 171 ns 168 ns 161 ns 173%
1499 ---------------------------------------
1500 combining_tc_async 142 ns 139 ns 138 ns 202%
1501 combining_tc_async - dup 141 ns 136 ns 119 ns 235%
1502 ---------------------------------------
1503 
1504 ------------------------------------ Number of threads = 8
1505 
1506 Test_name, Max time, Avg time, Min time, % base min / min
1507 
1508 no_combining - base 333 ns 328 ns 315 ns
1509 no_combining - dup 336 ns 330 ns 327 ns 96%
1510 ---------------------------------------
1511 ---- dedicated-------------------------
1512 combining_notc_sync 203 ns 179 ns 172 ns 183%
1513 combining_notc_sync - dup 190 ns 177 ns 171 ns 183%
1514 ---------------------------------------
1515 combining_notc_async 204 ns 183 ns 170 ns 185%
1516 combining_notc_async - dup 201 ns 187 ns 176 ns 179%
1517 ---------------------------------------
1518 combining_tc_sync 177 ns 170 ns 165 ns 190%
1519 combining_tc_sync - dup 178 ns 167 ns 164 ns 192%
1520 ---------------------------------------
1521 combining_tc_async 134 ns 115 ns 105 ns 300%
1522 combining_tc_async - dup 132 ns 115 ns 103 ns 304%
1523 ---------------------------------------
1524 ---- no dedicated----------------------
1525 combining_notc_sync 154 ns 145 ns 143 ns 220%
1526 combining_notc_sync - dup 153 ns 144 ns 142 ns 222%
1527 ---------------------------------------
1528 combining_notc_async 145 ns 144 ns 143 ns 219%
1529 combining_notc_async - dup 157 ns 148 ns 144 ns 218%
1530 ---------------------------------------
1531 combining_tc_sync 142 ns 134 ns 130 ns 241%
1532 combining_tc_sync - dup 144 ns 136 ns 130 ns 241%
1533 ---------------------------------------
1534 combining_tc_async 118 ns 99 ns 91 ns 344%
1535 combining_tc_async - dup 118 ns 95 ns 91 ns 344%
1536 ---------------------------------------
1537 
1538 ------------------------------------ Number of threads = 12
1539 
1540 Test_name, Max time, Avg time, Min time, % base min / min
1541 
1542 no_combining - base 361 ns 357 ns 353 ns
1543 no_combining - dup 361 ns 357 ns 355 ns 99%
1544 ---------------------------------------
1545 ---- dedicated-------------------------
1546 combining_notc_sync 190 ns 157 ns 138 ns 255%
1547 combining_notc_sync - dup 162 ns 149 ns 138 ns 255%
1548 ---------------------------------------
1549 combining_notc_async 163 ns 153 ns 145 ns 242%
1550 combining_notc_async - dup 194 ns 158 ns 152 ns 231%
1551 ---------------------------------------
1552 combining_tc_sync 181 ns 128 ns 111 ns 316%
1553 combining_tc_sync - dup 183 ns 148 ns 121 ns 289%
1554 ---------------------------------------
1555 combining_tc_async 92 ns 89 ns 87 ns 402%
1556 combining_tc_async - dup 152 ns 105 ns 87 ns 405%
1557 ---------------------------------------
1558 ---- no dedicated----------------------
1559 combining_notc_sync 120 ns 119 ns 118 ns 298%
1560 combining_notc_sync - dup 120 ns 119 ns 118 ns 298%
1561 ---------------------------------------
1562 combining_notc_async 122 ns 120 ns 120 ns 294%
1563 combining_notc_async - dup 121 ns 120 ns 118 ns 297%
1564 ---------------------------------------
1565 combining_tc_sync 110 ns 108 ns 106 ns 331%
1566 combining_tc_sync - dup 110 ns 109 ns 107 ns 327%
1567 ---------------------------------------
1568 combining_tc_async 88 ns 87 ns 85 ns 411%
1569 combining_tc_async - dup 90 ns 88 ns 85 ns 411%
1570 ---------------------------------------
1571 
1572 ------------------------------------ Number of threads = 16
1573 
1574 Test_name, Max time, Avg time, Min time, % base min / min
1575 
1576 no_combining - base 363 ns 361 ns 360 ns
1577 no_combining - dup 362 ns 361 ns 358 ns 100%
1578 ---------------------------------------
1579 ---- dedicated-------------------------
1580 combining_notc_sync 177 ns 136 ns 111 ns 323%
1581 combining_notc_sync - dup 185 ns 148 ns 112 ns 320%
1582 ---------------------------------------
1583 combining_notc_async 191 ns 151 ns 122 ns 294%
1584 combining_notc_async - dup 179 ns 157 ns 118 ns 305%
1585 ---------------------------------------
1586 combining_tc_sync 154 ns 125 ns 100 ns 360%
1587 combining_tc_sync - dup 166 ns 130 ns 98 ns 367%
1588 ---------------------------------------
1589 combining_tc_async 143 ns 107 ns 86 ns 418%
1590 combining_tc_async - dup 132 ns 112 ns 88 ns 407%
1591 ---------------------------------------
1592 ---- no dedicated----------------------
1593 combining_notc_sync 121 ns 103 ns 98 ns 367%
1594 combining_notc_sync - dup 117 ns 104 ns 99 ns 362%
1595 ---------------------------------------
1596 combining_notc_async 116 ns 105 ns 99 ns 363%
1597 combining_notc_async - dup 112 ns 104 ns 100 ns 359%
1598 ---------------------------------------
1599 combining_tc_sync 111 ns 101 ns 94 ns 381%
1600 combining_tc_sync - dup 113 ns 98 ns 93 ns 387%
1601 ---------------------------------------
1602 combining_tc_async 97 ns 85 ns 74 ns 484%
1603 combining_tc_async - dup 98 ns 86 ns 78 ns 457%
1604 ---------------------------------------
1605 
1606 ------------------------------------ Number of threads = 24
1607 
1608 Test_name, Max time, Avg time, Min time, % base min / min
1609 
1610 no_combining - base 352 ns 351 ns 349 ns
1611 no_combining - dup 352 ns 351 ns 348 ns 100%
1612 ---------------------------------------
1613 ---- dedicated-------------------------
1614 combining_notc_sync 214 ns 173 ns 149 ns 234%
1615 combining_notc_sync - dup 212 ns 166 ns 137 ns 254%
1616 ---------------------------------------
1617 combining_notc_async 232 ns 198 ns 161 ns 216%
1618 combining_notc_async - dup 225 ns 191 ns 149 ns 234%
1619 ---------------------------------------
1620 combining_tc_sync 192 ns 152 ns 129 ns 270%
1621 combining_tc_sync - dup 176 ns 156 ns 121 ns 286%
1622 ---------------------------------------
1623 combining_tc_async 202 ns 147 ns 118 ns 296%
1624 combining_tc_async - dup 200 ns 158 ns 120 ns 291%
1625 ---------------------------------------
1626 ---- no dedicated----------------------
1627 combining_notc_sync 161 ns 125 ns 115 ns 303%
1628 combining_notc_sync - dup 144 ns 127 ns 116 ns 299%
1629 ---------------------------------------
1630 combining_notc_async 135 ns 122 ns 116 ns 298%
1631 combining_notc_async - dup 341 ns 148 ns 117 ns 298%
1632 ---------------------------------------
1633 combining_tc_sync 130 ns 118 ns 109 ns 319%
1634 combining_tc_sync - dup 116 ns 110 ns 105 ns 332%
1635 ---------------------------------------
1636 combining_tc_async 97 ns 86 ns 79 ns 442%
1637 combining_tc_async - dup 95 ns 86 ns 79 ns 440%
1638 ---------------------------------------
1639 
1640 ------------------------------------ Number of threads = 32
1641 
1642 Test_name, Max time, Avg time, Min time, % base min / min
1643 
1644 no_combining - base 337 ns 336 ns 333 ns
1645 no_combining - dup 338 ns 336 ns 333 ns 99%
1646 ---------------------------------------
1647 ---- dedicated-------------------------
1648 combining_notc_sync 193 ns 177 ns 162 ns 204%
1649 combining_notc_sync - dup 211 ns 181 ns 156 ns 213%
1650 ---------------------------------------
1651 combining_notc_async 245 ns 200 ns 162 ns 205%
1652 combining_notc_async - dup 216 ns 197 ns 149 ns 223%
1653 ---------------------------------------
1654 combining_tc_sync 195 ns 167 ns 121 ns 274%
1655 combining_tc_sync - dup 179 ns 164 ns 143 ns 231%
1656 ---------------------------------------
1657 combining_tc_async 187 ns 152 ns 108 ns 307%
1658 combining_tc_async - dup 182 ns 151 ns 125 ns 266%
1659 ---------------------------------------
1660 ---- no dedicated----------------------
1661 combining_notc_sync 189 ns 127 ns 114 ns 290%
1662 combining_notc_sync - dup 126 ns 118 ns 110 ns 302%
1663 ---------------------------------------
1664 combining_notc_async 233 ns 129 ns 112 ns 297%
1665 combining_notc_async - dup 170 ns 126 ns 113 ns 293%
1666 ---------------------------------------
1667 combining_tc_sync 948 ns 212 ns 107 ns 309%
1668 combining_tc_sync - dup 137 ns 112 ns 104 ns 318%
1669 ---------------------------------------
1670 combining_tc_async 90 ns 86 ns 79 ns 421%
1671 combining_tc_async - dup 94 ns 87 ns 80 ns 414%
1672 ---------------------------------------
1673 
1674 ------------------------------------ Number of threads = 48
1675 
1676 Test_name, Max time, Avg time, Min time, % base min / min
1677 
1678 no_combining - base 340 ns 336 ns 334 ns
1679 no_combining - dup 336 ns 335 ns 334 ns 100%
1680 ---------------------------------------
1681 ---- dedicated-------------------------
1682 combining_notc_sync 214 ns 176 ns 137 ns 243%
1683 combining_notc_sync - dup 210 ns 173 ns 128 ns 260%
1684 ---------------------------------------
1685 combining_notc_async 217 ns 186 ns 162 ns 205%
1686 combining_notc_async - dup 215 ns 186 ns 149 ns 224%
1687 ---------------------------------------
1688 combining_tc_sync 206 ns 171 ns 145 ns 230%
1689 combining_tc_sync - dup 179 ns 149 ns 126 ns 265%
1690 ---------------------------------------
1691 combining_tc_async 175 ns 138 ns 108 ns 309%
1692 combining_tc_async - dup 169 ns 134 ns 110 ns 301%
1693 ---------------------------------------
1694 ---- no dedicated----------------------
1695 combining_notc_sync 1798 ns 293 ns 118 ns 282%
1696 combining_notc_sync - dup 171 ns 122 ns 105 ns 318%
1697 ---------------------------------------
1698 combining_notc_async 227 ns 132 ns 110 ns 302%
1699 combining_notc_async - dup 226 ns 137 ns 111 ns 301%
1700 ---------------------------------------
1701 combining_tc_sync 111 ns 106 ns 102 ns 327%
1702 combining_tc_sync - dup 127 ns 110 ns 104 ns 321%
1703 ---------------------------------------
1704 combining_tc_async 297 ns 117 ns 77 ns 433%
1705 combining_tc_async - dup 742 ns 149 ns 77 ns 432%
1706 ---------------------------------------
1707 
1708 ------------------------------------ Number of threads = 64
1709 
1710 Test_name, Max time, Avg time, Min time, % base min / min
1711 
1712 no_combining - base 338 ns 333 ns 331 ns
1713 no_combining - dup 335 ns 333 ns 331 ns 99%
1714 ---------------------------------------
1715 ---- dedicated-------------------------
1716 combining_notc_sync 198 ns 163 ns 148 ns 223%
1717 combining_notc_sync - dup 172 ns 154 ns 124 ns 266%
1718 ---------------------------------------
1719 combining_notc_async 211 ns 177 ns 158 ns 209%
1720 combining_notc_async - dup 182 ns 166 ns 152 ns 216%
1721 ---------------------------------------
1722 combining_tc_sync 195 ns 133 ns 112 ns 294%
1723 combining_tc_sync - dup 158 ns 135 ns 108 ns 305%
1724 ---------------------------------------
1725 combining_tc_async 145 ns 119 ns 95 ns 347%
1726 combining_tc_async - dup 159 ns 130 ns 95 ns 346%
1727 ---------------------------------------
1728 ---- no dedicated----------------------
1729 combining_notc_sync 188 ns 123 ns 107 ns 308%
1730 combining_notc_sync - dup 546 ns 159 ns 107 ns 307%
1731 ---------------------------------------
1732 combining_notc_async 558 ns 160 ns 108 ns 304%
1733 combining_notc_async - dup 192 ns 127 ns 107 ns 308%
1734 ---------------------------------------
1735 combining_tc_sync 325 ns 130 ns 101 ns 325%
1736 combining_tc_sync - dup 1766 ns 273 ns 101 ns 325%
1737 ---------------------------------------
1738 combining_tc_async 417 ns 118 ns 74 ns 446%
1739 combining_tc_async - dup 838 ns 212 ns 72 ns 455%
1740 ---------------------------------------
1741 [ OK ] FlatCombining.direct_measurement (178622 ms)
1742 [----------] 2 tests from FlatCombining (633891 ms total)
1743 
1744 [----------] Global test environment tear-down
1745 [==========] 2 tests from 1 test case ran. (633891 ms total)
1746 [ PASSED ] 2 tests.
1747 
1748 ---
1749 
1750 $ lscpu
1751 
1752 Architecture: x86_64
1753 CPU op-mode(s): 32-bit, 64-bit
1754 Byte Order: Little Endian
1755 CPU(s): 32
1756 On-line CPU(s) list: 0-31
1757 Thread(s) per core: 2
1758 Core(s) per socket: 8
1759 Socket(s): 2
1760 NUMA node(s): 2
1761 Vendor ID: GenuineIntel
1762 CPU family: 6
1763 Model: 45
1764 Model name: Intel(R) Xeon(R) CPU E5-2660 0 @ 2.20GHz
1765 Stepping: 6
1766 CPU MHz: 2200.000
1767 CPU max MHz: 2200.0000
1768 CPU min MHz: 1200.0000
1769 BogoMIPS: 4399.87
1770 Virtualization: VT-x
1771 L1d cache: 32K
1772 L1i cache: 32K
1773 L2 cache: 256K
1774 L3 cache: 20480K
1775 NUMA node0 CPU(s): 0-7,16-23
1776 NUMA node1 CPU(s): 8-15,24-31
1777 
1778 Flags: fpu vme de pse tsc msr pae mce cx8 apic sep
1779 mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht
1780 tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts
1781 rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni pclmulqdq
1782 dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid dca
1783 sse4_1 sse4_2 x2apic popcnt tsc_deadline_timer aes xsave avx lahf_lm
1784 epb tpr_shadow vnmi flexpriority ept vpid xsaveopt dtherm arat pln pts
1785 
1786 ---
1787 
1788  */
1789 // clang-format on
DEFINE_int32(reps, 10,"number of reps")
std::atomic< int64_t > sum(0)
char b
LogLevel max
Definition: LogLevel.cpp:31
BENCHMARK_RELATIVE(no_combining_dup, iters)
DEFINE_bool(direct, false,"run direct measurement")
uint64_t run_test(int nthreads, int lines, int numRecs, int work, int ops, bool combining, bool simple, bool dedicated, bool tc, bool syncops, bool excl=false, bool allocAll=false)
static uint64_t test(std::string name, bool fc_, bool dedicated_, bool tc_, bool syncops_, uint64_t base)
static bool simple
void runBenchmarks()
Definition: Benchmark.cpp:456
static bool dedicated
static int nthreads
const char * name
Definition: http_parser.c:437
LogLevel min
Definition: LogLevel.cpp:30
static bool tc
const int ops
S lines(StringPiece source)
Definition: String.h:80
static std::vector< int > nthr
TEST(ProgramOptionsTest, Errors)
static bool syncops
BENCHMARK(fbFollyGlobalBenchmarkBaseline)
Definition: Benchmark.cpp:84
const char * string
Definition: Conv.cpp:212
BENCHMARK_DRAW_LINE()
void benchmarkSetup()
static bool fc