proxygen
ParallelBenchmark.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2014-present Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <array>
18 #include <future>
19 #include <iostream>
20 #include <vector>
21 
22 #include <glog/logging.h>
23 
24 #include <folly/gen/Base.h>
25 #include <folly/gen/Parallel.h>
26 #include <folly/gen/test/Bench.h>
27 
29  threads,
30  std::max(1, (int32_t)sysconf(_SC_NPROCESSORS_CONF) / 2),
31  "Num threads.");
32 
33 using namespace folly::gen;
34 using std::vector;
35 
36 constexpr int kFib = 28; // unit of work
37 size_t fib(int n) {
38  return n <= 1 ? 1 : fib(n - 1) + fib(n - 2);
39 }
40 
41 static auto isPrimeSlow = [](int n) {
42  if (n < 2) {
43  return false;
44  } else if (n > 2) {
45  for (int d = 3; d * d <= n; d += 2) {
46  if (0 == n % d) {
47  return false;
48  }
49  }
50  }
51  return true;
52 };
53 
54 static auto primes = seq(1, 1 << 20) | filter(isPrimeSlow) | as<vector>();
55 
56 static auto stopc(int n) {
57  return [=](int d) { return d * d > n; };
58 }
59 static auto divides(int n) {
60  return [=](int d) { return 0 == n % d; };
61 }
62 
63 static auto isPrime = [](int n) {
64  return from(primes) | until(stopc(n)) | filter(divides(n)) | isEmpty;
65 };
66 
67 static auto factors = [](int n) {
68  return from(primes) | until(stopc(n)) | filter(divides(n)) | count;
69 };
70 
71 static auto factorsSlow = [](int n) {
72  return from(primes) | filter(divides(n)) | count;
73 };
74 
75 static auto sleepyWork = [](int i) {
76  const auto sleepyTime = std::chrono::microseconds(100);
77  std::this_thread::sleep_for(sleepyTime);
78  return i;
79 };
80 
81 static auto sleepAndWork = [](int i) { return factorsSlow(i) + sleepyWork(i); };
82 
83 auto start = 1 << 20;
84 auto v = seq(start) | take(1 << 20) | as<vector>();
85 auto small = from(v) | take(1 << 12);
86 auto medium = from(v) | take(1 << 14);
87 auto large = from(v) | take(1 << 18);
88 auto huge = from(v);
89 auto chunks = chunked(v);
90 
94 
98 
102 
103 auto ch = chunks;
104 auto cat = concat;
110 
114 
115 const int fibs = 1000;
116 BENCH_GEN(seq(1, fibs) | map([](int) { return fib(kFib); }) | sum);
117 // clang-format off
119  seq(1, fibs)
120  | parallel(map([](int) { return fib(kFib); }) | sub(sum))
121  | sum);
122 // clang-format on
123 BENCH_GEN_REL([] {
124  // clang-format off
125  auto threads = seq(1, int(FLAGS_threads))
126  | map([](int i) {
127  return std::thread([=] {
128  return range(
129  (i + 0) * fibs / FLAGS_threads, (i + 1) * fibs / FLAGS_threads)
130  | map([](int) { return fib(kFib); })
131  | sum;
132  });
133  })
134  | as<vector>();
135  from(threads) | [](std::thread &thread) { thread.join(); };
136  // clang-format on
137  return 1;
138 }());
140 
141 #if 0
142 ============================================================================
143 folly/gen/test/ParallelBenchmark.cpp relative time/iter iters/s
144 ============================================================================
145 small | map(factorsSlow) | sum 4.59s 217.87m
146 small | parallel(map(factorsSlow)) | sum 1588.86% 288.88ms 3.46
147 ----------------------------------------------------------------------------
148 small | map(factors) | sum 9.62ms 103.94
149 small | parallel(map(factors)) | sum 89.15% 10.79ms 92.66
150 ----------------------------------------------------------------------------
151 large | map(factors) | sum 650.52ms 1.54
152 large | parallel(map(factors)) | sum 53.82% 1.21s 827.41m
153 ----------------------------------------------------------------------------
154 huge | filter(isPrime) | count 295.93ms 3.38
155 ch | cat | filter(isPrime) | count 99.76% 296.64ms 3.37
156 ch | parallel(cat | filter(isPrime)) | count 142.75% 207.31ms 4.82
157 ch | parallel(cat | filter(isPrime) | sub(count 1538.50% 19.24ms 51.99
158 ----------------------------------------------------------------------------
159 small | map(sleepAndWork) | sum 5.37s 186.18m
160 small | parallel(map(sleepAndWork)) | sum 1840.38% 291.85ms 3.43
161 ----------------------------------------------------------------------------
162 seq(1, fibs) | map([](int) { return fib(kFib); 1.49s 669.53m
163 seq(1, fibs) | parallel(map([](int) { return fi 1698.07% 87.96ms 11.37
164 [] { auto threads = seq(1, int(FLAGS_threads)) 1571.16% 95.06ms 10.52
165 ----------------------------------------------------------------------------
166 ============================================================================
167 #endif
168 int main(int argc, char* argv[]) {
169  gflags::ParseCommandLineFlags(&argc, &argv, true);
171  return 0;
172 }
static auto sleepAndWork
static auto isPrimeSlow
const int fibs
auto chunks
Sub sub(Sink sink)
Definition: Parallel.h:104
static auto factorsSlow
BENCH_GEN_REL(small|parallel(map(factorsSlow))|sum)
auto cat
int main(int argc, char *argv[])
LogLevel max
Definition: LogLevel.cpp:31
constexpr detail::Count count
Definition: Base-inl.h:2551
static auto sleepyWork
From from(Container &source)
Definition: Base.h:438
auto large
DEFINE_int32(threads, std::max(1,(int32_t) sysconf(_SC_NPROCESSORS_CONF)/2),"Num threads.")
Gen seq(Value first, Value last)
Definition: Base.h:484
static auto primes
—— Concurrent Priority Queue Implementation ——
Definition: AtomicBitSet.h:29
void runBenchmarks()
Definition: Benchmark.cpp:456
static auto isPrime
auto ch
constexpr detail::Sum sum
Definition: Base-inl.h:2549
std::vector< std::thread::id > threads
Gen range(Value begin, Value end)
Definition: Base.h:467
auto small
char ** argv
static auto stopc(int n)
constexpr detail::IsEmpty< true > isEmpty
Definition: Base-inl.h:2555
Map map(Predicate pred=Predicate())
Definition: Base.h:545
static map< string, int > m
Parallel parallel(Ops ops, size_t threads=0)
Definition: Parallel.h:88
BENCH_GEN(small|map(factorsSlow)|sum)
auto huge
auto start
size_t fib(int n)
Filter filter(Predicate pred=Predicate())
Definition: Base.h:646
constexpr int kFib
constexpr detail::Concat concat
Definition: Base-inl.h:2569
Until until(Predicate pred=Predicate())
Definition: Base.h:656
static auto divides(int n)
BENCHMARK_DRAW_LINE()
static set< string > s
detail::Take take(Number count)
Definition: Base-inl.h:2582
static auto factors
auto medium
Chunked chunked(const Container &container, int chunkSize=256)
Definition: Parallel.h:49
std::chrono::nanoseconds time()