proxygen
Benchmark.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2012-present Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // @author Andrei Alexandrescu (andrei.alexandrescu@fb.com)
18 
19 #include <folly/Benchmark.h>
20 
21 #include <algorithm>
22 #include <cmath>
23 #include <cstring>
24 #include <iostream>
25 #include <limits>
26 #include <map>
27 #include <memory>
28 #include <utility>
29 #include <vector>
30 
31 #include <boost/regex.hpp>
32 
33 #include <folly/MapUtil.h>
34 #include <folly/String.h>
36 #include <folly/json.h>
37 
38 using namespace std;
39 
40 DEFINE_bool(benchmark, false, "Run benchmarks.");
41 DEFINE_bool(json, false, "Output in JSON format.");
42 DEFINE_bool(json_verbose, false, "Output in verbose JSON format.");
43 
45  bm_regex,
46  "",
47  "Only benchmarks whose names match this regex will be run.");
48 
50  bm_min_usec,
51  100,
52  "Minimum # of microseconds we'll accept for each benchmark.");
53 
55  bm_min_iters,
56  1,
57  "Minimum # of iterations we'll try for each benchmark.");
58 
60  bm_max_iters,
61  1 << 30,
62  "Maximum # of iterations we'll try for each benchmark.");
63 
65  bm_max_secs,
66  1,
67  "Maximum # of seconds we'll spend on each benchmark.");
68 
69 namespace folly {
70 
71 std::chrono::high_resolution_clock::duration BenchmarkSuspender::timeSpent;
72 
73 typedef function<detail::TimeIterPair(unsigned int)> BenchmarkFun;
74 
75 vector<detail::BenchmarkRegistration>& benchmarks() {
76  static vector<detail::BenchmarkRegistration> _benchmarks;
77  return _benchmarks;
78 }
79 
80 #define FB_FOLLY_GLOBAL_BENCHMARK_BASELINE fbFollyGlobalBenchmarkBaseline
81 #define FB_STRINGIZE_X2(x) FB_STRINGIZE(x)
82 
83 // Add the global baseline
85 #ifdef _MSC_VER
86  _ReadWriteBarrier();
87 #else
88  asm volatile("");
89 #endif
90 }
91 
94  auto it = std::find_if(
95  benchmarks().begin(),
96  benchmarks().end(),
97  [global](const detail::BenchmarkRegistration& v) {
98  return v.name == global;
99  });
100  CHECK(it != benchmarks().end());
101  return size_t(std::distance(benchmarks().begin(), it));
102 }
103 
104 #undef FB_STRINGIZE_X2
105 #undef FB_FOLLY_GLOBAL_BENCHMARK_BASELINE
106 
108  const char* file,
109  const char* name,
110  BenchmarkFun fun) {
111  benchmarks().push_back({file, name, std::move(fun)});
112 }
113 
117 static double estimateTime(double* begin, double* end) {
118  assert(begin < end);
119 
120  // Current state of the art: get the minimum. After some
121  // experimentation, it seems taking the minimum is the best.
122  return *min_element(begin, end);
123 }
124 
126  const BenchmarkFun& fun,
127  const double globalBaseline) {
128  using std::chrono::duration_cast;
129  using std::chrono::high_resolution_clock;
130  using std::chrono::microseconds;
131  using std::chrono::nanoseconds;
132  using std::chrono::seconds;
133 
134  // They key here is accuracy; too low numbers means the accuracy was
135  // coarse. We up the ante until we get to at least minNanoseconds
136  // timings.
137  static_assert(
139  "High resolution clock must be nanosecond resolution.");
140  // We choose a minimum minimum (sic) of 100,000 nanoseconds, but if
141  // the clock resolution is worse than that, it will be larger. In
142  // essence we're aiming at making the quantization noise 0.01%.
143  static const auto minNanoseconds = std::max<nanoseconds>(
144  nanoseconds(100000), microseconds(FLAGS_bm_min_usec));
145 
146  // We do measurements in several epochs and take the minimum, to
147  // account for jitter.
148  static const unsigned int epochs = 1000;
149  // We establish a total time budget as we don't want a measurement
150  // to take too long. This will curtail the number of actual epochs.
151  const auto timeBudget = seconds(FLAGS_bm_max_secs);
152  auto global = high_resolution_clock::now();
153 
154  double epochResults[epochs] = {0};
155  size_t actualEpochs = 0;
156 
157  for (; actualEpochs < epochs; ++actualEpochs) {
158  const auto maxIters = uint32_t(FLAGS_bm_max_iters);
159  for (auto n = uint32_t(FLAGS_bm_min_iters); n < maxIters; n *= 2) {
160  auto const nsecsAndIter = fun(static_cast<unsigned int>(n));
161  if (nsecsAndIter.first < minNanoseconds) {
162  continue;
163  }
164  // We got an accurate enough timing, done. But only save if
165  // smaller than the current result.
166  auto nsecs = duration_cast<nanoseconds>(nsecsAndIter.first).count();
167  epochResults[actualEpochs] =
168  max(0.0, double(nsecs) / nsecsAndIter.second - globalBaseline);
169  // Done with the current epoch, we got a meaningful timing.
170  break;
171  }
173  if (now - global >= timeBudget) {
174  // No more time budget available.
175  ++actualEpochs;
176  break;
177  }
178  }
179 
180  // If the benchmark was basically drowned in baseline noise, it's
181  // possible it became negative.
182  return max(0.0, estimateTime(epochResults, epochResults + actualEpochs));
183 }
184 
185 struct ScaleInfo {
186  double boundary;
187  const char* suffix;
188 };
189 
190 static const ScaleInfo kTimeSuffixes[]{
191  {365.25 * 24 * 3600, "years"},
192  {24 * 3600, "days"},
193  {3600, "hr"},
194  {60, "min"},
195  {1, "s"},
196  {1E-3, "ms"},
197  {1E-6, "us"},
198  {1E-9, "ns"},
199  {1E-12, "ps"},
200  {1E-15, "fs"},
201  {0, nullptr},
202 };
203 
204 static const ScaleInfo kMetricSuffixes[]{
205  {1E24, "Y"}, // yotta
206  {1E21, "Z"}, // zetta
207  {1E18, "X"}, // "exa" written with suffix 'X' so as to not create
208  // confusion with scientific notation
209  {1E15, "P"}, // peta
210  {1E12, "T"}, // terra
211  {1E9, "G"}, // giga
212  {1E6, "M"}, // mega
213  {1E3, "K"}, // kilo
214  {1, ""},
215  {1E-3, "m"}, // milli
216  {1E-6, "u"}, // micro
217  {1E-9, "n"}, // nano
218  {1E-12, "p"}, // pico
219  {1E-15, "f"}, // femto
220  {1E-18, "a"}, // atto
221  {1E-21, "z"}, // zepto
222  {1E-24, "y"}, // yocto
223  {0, nullptr},
224 };
225 
226 static string
227 humanReadable(double n, unsigned int decimals, const ScaleInfo* scales) {
228  if (std::isinf(n) || std::isnan(n)) {
229  return folly::to<string>(n);
230  }
231 
232  const double absValue = fabs(n);
233  const ScaleInfo* scale = scales;
234  while (absValue < scale[0].boundary && scale[1].suffix != nullptr) {
235  ++scale;
236  }
237 
238  const double scaledValue = n / scale->boundary;
239  return stringPrintf("%.*f%s", decimals, scaledValue, scale->suffix);
240 }
241 
242 static string readableTime(double n, unsigned int decimals) {
243  return humanReadable(n, decimals, kTimeSuffixes);
244 }
245 
246 static string metricReadable(double n, unsigned int decimals) {
247  return humanReadable(n, decimals, kMetricSuffixes);
248 }
249 
250 namespace {
251 class BenchmarkResultsPrinter {
252  public:
253  static constexpr unsigned int columns{76};
255  string lastFile;
256 
257  void separator(char pad) {
258  puts(string(columns, pad).c_str());
259  }
260 
261  void header(const string& file) {
262  separator('=');
263  printf("%-*srelative time/iter iters/s\n", columns - 28, file.c_str());
264  separator('=');
265  }
266 
267  void print(const vector<detail::BenchmarkResult>& data) {
268  for (auto& datum : data) {
269  auto file = datum.file;
270  if (file != lastFile) {
271  // New file starting
272  header(file);
273  lastFile = file;
274  }
275 
276  string s = datum.name;
277  if (s == "-") {
278  separator('-');
279  continue;
280  }
281  bool useBaseline /* = void */;
282  if (s[0] == '%') {
283  s.erase(0, 1);
284  useBaseline = true;
285  } else {
286  baselineNsPerIter = datum.timeInNs;
287  useBaseline = false;
288  }
289  s.resize(columns - 29, ' ');
290  auto nsPerIter = datum.timeInNs;
291  auto secPerIter = nsPerIter / 1E9;
292  auto itersPerSec = (secPerIter == 0)
293  ? std::numeric_limits<double>::infinity()
294  : (1 / secPerIter);
295  if (!useBaseline) {
296  // Print without baseline
297  printf(
298  "%*s %9s %7s\n",
299  static_cast<int>(s.size()),
300  s.c_str(),
301  readableTime(secPerIter, 2).c_str(),
302  metricReadable(itersPerSec, 2).c_str());
303  } else {
304  // Print with baseline
305  auto rel = baselineNsPerIter / nsPerIter * 100.0;
306  printf(
307  "%*s %7.2f%% %9s %7s\n",
308  static_cast<int>(s.size()),
309  s.c_str(),
310  rel,
311  readableTime(secPerIter, 2).c_str(),
312  metricReadable(itersPerSec, 2).c_str());
313  }
314  }
315  }
316 };
317 } // namespace
318 
320  const vector<detail::BenchmarkResult>& data) {
322  for (auto& datum : data) {
323  d[datum.name] = datum.timeInNs * 1000.;
324  }
325 
326  printf("%s\n", toPrettyJson(d).c_str());
327 }
328 
330  const vector<detail::BenchmarkResult>& data) {
331  dynamic d;
332  benchmarkResultsToDynamic(data, d);
333  printf("%s\n", toPrettyJson(d).c_str());
334 }
335 
336 static void printBenchmarkResults(const vector<detail::BenchmarkResult>& data) {
337  if (FLAGS_json_verbose) {
339  return;
340  } else if (FLAGS_json) {
342  return;
343  }
344 
345  CHECK(FLAGS_json_verbose || FLAGS_json) << "Cannot print benchmark results";
346 }
347 
349  const vector<detail::BenchmarkResult>& data,
350  dynamic& out) {
351  out = dynamic::array;
352  for (auto& datum : data) {
353  out.push_back(dynamic::array(datum.file, datum.name, datum.timeInNs));
354  }
355 }
356 
358  const dynamic& d,
359  vector<detail::BenchmarkResult>& results) {
360  for (auto& datum : d) {
361  results.push_back(
362  {datum[0].asString(), datum[1].asString(), datum[2].asDouble()});
363  }
364 }
365 
366 static pair<StringPiece, StringPiece> resultKey(
367  const detail::BenchmarkResult& result) {
368  return pair<StringPiece, StringPiece>(result.file, result.name);
369 }
370 
372  const vector<detail::BenchmarkResult>& base,
373  const vector<detail::BenchmarkResult>& test) {
374  map<pair<StringPiece, StringPiece>, double> baselines;
375 
376  for (auto& baseResult : base) {
377  baselines[resultKey(baseResult)] = baseResult.timeInNs;
378  }
379  //
380  // Width available
381  static const unsigned int columns = 76;
382 
383  // Compute the longest benchmark name
384  size_t longestName = 0;
385  for (auto& datum : test) {
386  longestName = max(longestName, datum.name.size());
387  }
388 
389  // Print a horizontal rule
390  auto separator = [&](char pad) { puts(string(columns, pad).c_str()); };
391 
392  // Print header for a file
393  auto header = [&](const string& file) {
394  separator('=');
395  printf("%-*srelative time/iter iters/s\n", columns - 28, file.c_str());
396  separator('=');
397  };
398 
399  string lastFile;
400 
401  for (auto& datum : test) {
402  folly::Optional<double> baseline =
403  folly::get_optional(baselines, resultKey(datum));
404  auto file = datum.file;
405  if (file != lastFile) {
406  // New file starting
407  header(file);
408  lastFile = file;
409  }
410 
411  string s = datum.name;
412  if (s == "-") {
413  separator('-');
414  continue;
415  }
416  if (s[0] == '%') {
417  s.erase(0, 1);
418  }
419  s.resize(columns - 29, ' ');
420  auto nsPerIter = datum.timeInNs;
421  auto secPerIter = nsPerIter / 1E9;
422  auto itersPerSec = (secPerIter == 0)
423  ? std::numeric_limits<double>::infinity()
424  : (1 / secPerIter);
425  if (!baseline) {
426  // Print without baseline
427  printf(
428  "%*s %9s %7s\n",
429  static_cast<int>(s.size()),
430  s.c_str(),
431  readableTime(secPerIter, 2).c_str(),
432  metricReadable(itersPerSec, 2).c_str());
433  } else {
434  // Print with baseline
435  auto rel = *baseline / nsPerIter * 100.0;
436  printf(
437  "%*s %7.2f%% %9s %7s\n",
438  static_cast<int>(s.size()),
439  s.c_str(),
440  rel,
441  readableTime(secPerIter, 2).c_str(),
442  metricReadable(itersPerSec, 2).c_str());
443  }
444  }
445  separator('=');
446 }
447 
448 void checkRunMode() {
450  std::cerr << "WARNING: Benchmark running "
451  << (folly::kIsDebug ? "in DEBUG mode" : "with SANITIZERS")
452  << std::endl;
453  }
454 }
455 
457  CHECK(!benchmarks().empty());
458 
459  checkRunMode();
460 
461  vector<detail::BenchmarkResult> results;
462  results.reserve(benchmarks().size() - 1);
463 
464  std::unique_ptr<boost::regex> bmRegex;
465  if (!FLAGS_bm_regex.empty()) {
466  bmRegex = std::make_unique<boost::regex>(FLAGS_bm_regex);
467  }
468 
469  // PLEASE KEEP QUIET. MEASUREMENTS IN PROGRESS.
470 
471  size_t baselineIndex = getGlobalBenchmarkBaselineIndex();
472 
473  auto const globalBaseline =
474  runBenchmarkGetNSPerIteration(benchmarks()[baselineIndex].func, 0);
475  auto printer = BenchmarkResultsPrinter{};
476  FOR_EACH_RANGE (i, 0, benchmarks().size()) {
477  if (i == baselineIndex) {
478  continue;
479  }
480  double elapsed = 0.0;
481  auto& bm = benchmarks()[i];
482  if (bm.name != "-") { // skip separators
483  if (bmRegex && !boost::regex_search(bm.name, *bmRegex)) {
484  continue;
485  }
486  elapsed = runBenchmarkGetNSPerIteration(bm.func, globalBaseline);
487  }
488 
489  if (!FLAGS_json_verbose && !FLAGS_json) {
490  printer.print({{bm.file, bm.name, elapsed}});
491  } else {
492  results.push_back({bm.file, bm.name, elapsed});
493  }
494  }
495 
496  // PLEASE MAKE NOISE. MEASUREMENTS DONE.
497 
498  if (FLAGS_json_verbose || FLAGS_json) {
499  printBenchmarkResults(results);
500  } else {
501  printer.separator('=');
502  }
503 
504  checkRunMode();
505 }
506 
507 } // namespace folly
static const ScaleInfo kTimeSuffixes[]
Definition: Benchmark.cpp:190
size_t getGlobalBenchmarkBaselineIndex()
Definition: Benchmark.cpp:92
BENCHMARK(constantFuture)
Definition: Benchmark.cpp:45
void * object
Definition: AtFork.cpp:32
static void printBenchmarkResults(const vector< detail::BenchmarkResult > &data)
Definition: Benchmark.cpp:336
DEFINE_bool(benchmark, false,"Run benchmarks.")
constexpr auto kIsDebug
Definition: Portability.h:264
LogLevel max
Definition: LogLevel.cpp:31
static string humanReadable(double n, unsigned int decimals, const ScaleInfo *scales)
Definition: Benchmark.cpp:227
static void printBenchmarkResultsAsVerboseJson(const vector< detail::BenchmarkResult > &data)
Definition: Benchmark.cpp:329
vector< detail::BenchmarkRegistration > & benchmarks()
Definition: Benchmark.cpp:75
std::chrono::steady_clock::time_point now()
constexpr detail::Map< Move > move
Definition: Base-inl.h:2567
#define FB_STRINGIZE_X2(x)
Definition: Benchmark.cpp:81
void addBenchmarkImpl(const char *file, const char *name, std::function< TimeIterPair(unsigned int)>)
STL namespace.
auto begin(TestAdlIterable &instance)
Definition: ForeachTest.cpp:56
function< detail::TimeIterPair(unsigned int)> BenchmarkFun
Definition: Benchmark.cpp:73
std::string stringPrintf(const char *format,...)
Definition: String.cpp:223
void benchmarkResultsFromDynamic(const dynamic &d, vector< detail::BenchmarkResult > &results)
Definition: Benchmark.cpp:357
—— Concurrent Priority Queue Implementation ——
Definition: AtomicBitSet.h:29
void runBenchmarks()
Definition: Benchmark.cpp:456
double baselineNsPerIter
Definition: Benchmark.cpp:254
DEFINE_int32(bm_min_iters, 1,"Minimum # of iterations we'll try for each benchmark.")
DEFINE_string(bm_regex,"","Only benchmarks whose names match this regex will be run.")
static string readableTime(double n, unsigned int decimals)
Definition: Benchmark.cpp:242
#define FOR_EACH_RANGE(i, begin, end)
Definition: Foreach.h:313
string lastFile
Definition: Benchmark.cpp:255
void benchmarkResultsToDynamic(const vector< detail::BenchmarkResult > &data, dynamic &out)
Definition: Benchmark.cpp:348
const char * name
Definition: http_parser.c:437
constexpr auto size(C const &c) -> decltype(c.size())
Definition: Access.h:45
constexpr auto empty(C const &c) -> decltype(c.empty())
Definition: Access.h:55
static string metricReadable(double n, unsigned int decimals)
Definition: Benchmark.cpp:246
constexpr bool kIsSanitize
Definition: Portability.h:130
const char * suffix
Definition: String.cpp:272
auto end(TestAdlIterable &instance)
Definition: ForeachTest.cpp:62
std::string toPrettyJson(dynamic const &dyn)
Definition: json.cpp:919
folly::Optional< typename Map::mapped_type > get_optional(const Map &map, const Key &key)
Definition: MapUtil.h:102
static double estimateTime(double *begin, double *end)
Definition: Benchmark.cpp:117
const char * suffix
Definition: Benchmark.cpp:187
void push_back(dynamic const &)
Definition: dynamic-inl.h:969
static const char *const value
Definition: Conv.cpp:50
static constexpr unsigned int columns
Definition: Benchmark.cpp:253
int * count
DEFINE_int64(bm_min_usec, 100,"Minimum # of microseconds we'll accept for each benchmark.")
void fun()
static set< string > s
static const ScaleInfo kMetricSuffixes[]
Definition: Benchmark.cpp:204
#define FB_FOLLY_GLOBAL_BENCHMARK_BASELINE
Definition: Benchmark.cpp:80
void checkRunMode()
Definition: Benchmark.cpp:448
void printResultComparison(const vector< detail::BenchmarkResult > &base, const vector< detail::BenchmarkResult > &test)
Definition: Benchmark.cpp:371
void benchmark(size_t n)
static void printBenchmarkResultsAsJson(const vector< detail::BenchmarkResult > &data)
Definition: Benchmark.cpp:319
static constexpr uint64_t data[1]
Definition: Fingerprint.cpp:43
static double runBenchmarkGetNSPerIteration(const BenchmarkFun &fun, const double globalBaseline)
Definition: Benchmark.cpp:125
static pair< StringPiece, StringPiece > resultKey(const detail::BenchmarkResult &result)
Definition: Benchmark.cpp:366