proxygen
MathBenchmark.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2016-present Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <folly/Math.h>
18 
19 #include <algorithm>
20 #include <random>
21 
22 #include <folly/Benchmark.h>
23 
24 namespace {
25 template <typename T>
26 T brokenButWidespreadDivCeil(T num, T denom) {
27  return (num + denom - 1) / denom;
28 }
29 
30 template <typename T>
31 T viaFloatDivCeil(T num, T denom) {
32  return static_cast<T>(ceilf(static_cast<float>(num) / denom));
33 }
34 
35 template <typename T>
36 T viaDoubleDivCeil(T num, T denom) {
37  return static_cast<T>(ceil(static_cast<double>(num) / denom));
38 }
39 
40 template <typename T>
41 T viaLongDoubleDivCeil(T num, T denom) {
42  return static_cast<T>(ceill(static_cast<long double>(num) / denom));
43 }
44 
45 template <typename T>
46 std::vector<T> divValues() {
47  std::vector<T> rv;
48  for (T i = 1; i < std::numeric_limits<T>::max() && i <= 1000; ++i) {
49  rv.push_back(i);
50  rv.push_back(-i);
51  rv.push_back(std::numeric_limits<T>::max() / i);
52  auto x = std::numeric_limits<T>::min() / i;
53  if (x != 0) {
54  rv.push_back(x);
55  }
56  }
57  return rv;
58 }
59 
60 template <typename T, typename F>
61 void runDivTests(const F& func, size_t iters) {
62  std::vector<T> denoms;
63  std::vector<T> numers;
65  denoms = divValues<T>();
66  numers = denoms;
67  numers.push_back(0);
68  std::mt19937 rnd(1234);
69  std::shuffle(denoms.begin(), denoms.end(), rnd);
70  std::shuffle(numers.begin(), numers.end(), rnd);
71  }
72  T dep = 0;
73  while (true) {
74  for (T d : denoms) {
75  for (T n : numers) {
76  n ^= dep;
78  d == -1) {
79  // min / -1 overflows in two's complement
80  d = -2;
81  }
82  dep = func(n, d);
83 
84  if (--iters == 0) {
86  return;
87  }
88  }
89  }
90  }
91 }
92 } // namespace
93 
95 BENCHMARK(divTruncInt8, iters) {
96  runDivTests<int8_t>(&folly::divTrunc<int8_t, int8_t>, iters);
97 }
98 BENCHMARK(divFloorInt8, iters) {
99  runDivTests<int8_t>(&folly::divFloor<int8_t, int8_t>, iters);
100 }
101 BENCHMARK(divCeilInt8, iters) {
102  runDivTests<int8_t>(&folly::divCeil<int8_t, int8_t>, iters);
103 }
104 BENCHMARK_RELATIVE(branchlessDivCeilInt8, iters) {
105  runDivTests<int8_t>(&folly::detail::divCeilBranchless<int8_t>, iters);
106 }
107 BENCHMARK_RELATIVE(branchfulDivCeilInt8, iters) {
108  runDivTests<int8_t>(&folly::detail::divCeilBranchful<int8_t>, iters);
109 }
110 BENCHMARK_RELATIVE(brokenButWidespreadDivCeilInt8, iters) {
111  runDivTests<int8_t>(&brokenButWidespreadDivCeil<int8_t>, iters);
112 }
113 BENCHMARK_RELATIVE(viaFloatDivCeilInt8, iters) {
114  runDivTests<int8_t>(&viaFloatDivCeil<int8_t>, iters);
115 }
116 BENCHMARK_RELATIVE(viaDoubleDivCeilInt8, iters) {
117  runDivTests<int8_t>(&viaDoubleDivCeil<int8_t>, iters);
118 }
119 BENCHMARK_RELATIVE(viaLongDoubleDivCeilInt8, iters) {
120  runDivTests<int8_t>(&viaLongDoubleDivCeil<int8_t>, iters);
121 }
122 BENCHMARK(divRoundAwayInt8, iters) {
123  runDivTests<int8_t>(&folly::divRoundAway<int8_t, int8_t>, iters);
124 }
125 
127 BENCHMARK(divTruncInt16, iters) {
128  runDivTests<int16_t>(&folly::divTrunc<int16_t, int16_t>, iters);
129 }
130 BENCHMARK(divFloorInt16, iters) {
131  runDivTests<int16_t>(&folly::divFloor<int16_t, int16_t>, iters);
132 }
133 BENCHMARK(divCeilInt16, iters) {
134  runDivTests<int16_t>(&folly::divCeil<int16_t, int16_t>, iters);
135 }
136 BENCHMARK_RELATIVE(branchlessDivCeilInt16, iters) {
137  runDivTests<int16_t>(&folly::detail::divCeilBranchless<int16_t>, iters);
138 }
139 BENCHMARK_RELATIVE(branchfulDivCeilInt16, iters) {
140  runDivTests<int16_t>(&folly::detail::divCeilBranchful<int16_t>, iters);
141 }
142 BENCHMARK_RELATIVE(brokenButWidespreadDivCeilInt16, iters) {
143  runDivTests<int16_t>(&brokenButWidespreadDivCeil<int16_t>, iters);
144 }
145 BENCHMARK_RELATIVE(viaFloatDivCeilInt16, iters) {
146  runDivTests<int16_t>(&viaFloatDivCeil<int16_t>, iters);
147 }
148 BENCHMARK_RELATIVE(viaDoubleDivCeilInt16, iters) {
149  runDivTests<int16_t>(&viaDoubleDivCeil<int16_t>, iters);
150 }
151 BENCHMARK_RELATIVE(viaLongDoubleDivCeilInt16, iters) {
152  runDivTests<int16_t>(&viaLongDoubleDivCeil<int16_t>, iters);
153 }
154 BENCHMARK(divRoundAwayInt16, iters) {
155  runDivTests<int16_t>(&folly::divRoundAway<int16_t, int16_t>, iters);
156 }
157 
159 BENCHMARK(divTruncInt32, iters) {
160  runDivTests<int32_t>(&folly::divTrunc<int32_t, int32_t>, iters);
161 }
162 BENCHMARK(divFloorInt32, iters) {
163  runDivTests<int32_t>(&folly::divFloor<int32_t, int32_t>, iters);
164 }
165 BENCHMARK(divCeilInt32, iters) {
166  runDivTests<int32_t>(&folly::divCeil<int32_t, int32_t>, iters);
167 }
168 BENCHMARK_RELATIVE(branchlessDivCeilInt32, iters) {
169  runDivTests<int32_t>(&folly::detail::divCeilBranchless<int32_t>, iters);
170 }
171 BENCHMARK_RELATIVE(branchfulDivCeilInt32, iters) {
172  runDivTests<int32_t>(&folly::detail::divCeilBranchful<int32_t>, iters);
173 }
174 BENCHMARK_RELATIVE(brokenButWidespreadDivCeilInt32, iters) {
175  runDivTests<int32_t>(&brokenButWidespreadDivCeil<int32_t>, iters);
176 }
177 BENCHMARK_RELATIVE(approxViaFloatDivCeilInt32, iters) {
178  runDivTests<int32_t>(&viaFloatDivCeil<int32_t>, iters);
179 }
180 BENCHMARK_RELATIVE(viaDoubleDivCeilInt32, iters) {
181  runDivTests<int32_t>(&viaDoubleDivCeil<int32_t>, iters);
182 }
183 BENCHMARK_RELATIVE(viaLongDoubleDivCeilInt32, iters) {
184  runDivTests<int32_t>(&viaLongDoubleDivCeil<int32_t>, iters);
185 }
186 BENCHMARK(divRoundAwayInt32, iters) {
187  runDivTests<int32_t>(&folly::divRoundAway<int32_t, int32_t>, iters);
188 }
189 
191 BENCHMARK(divTruncInt64, iters) {
192  runDivTests<int64_t>(&folly::divTrunc<int64_t, int64_t>, iters);
193 }
194 BENCHMARK(divFloorInt64, iters) {
195  runDivTests<int64_t>(&folly::divFloor<int64_t, int64_t>, iters);
196 }
197 BENCHMARK(divCeilInt64, iters) {
198  runDivTests<int64_t>(&folly::divCeil<int64_t, int64_t>, iters);
199 }
200 BENCHMARK_RELATIVE(branchlessDivCeilInt64, iters) {
201  runDivTests<int64_t>(&folly::detail::divCeilBranchless<int64_t>, iters);
202 }
203 BENCHMARK_RELATIVE(branchfulDivCeilInt64, iters) {
204  runDivTests<int64_t>(&folly::detail::divCeilBranchful<int64_t>, iters);
205 }
206 BENCHMARK_RELATIVE(brokenButWidespreadDivCeilInt64, iters) {
207  runDivTests<int64_t>(&brokenButWidespreadDivCeil<int64_t>, iters);
208 }
209 BENCHMARK_RELATIVE(approxViaFloatDivCeilInt64, iters) {
210  runDivTests<int64_t>(&viaFloatDivCeil<int64_t>, iters);
211 }
212 BENCHMARK_RELATIVE(approxViaDoubleDivCeilInt64, iters) {
213  runDivTests<int64_t>(&viaDoubleDivCeil<int64_t>, iters);
214 }
215 BENCHMARK_RELATIVE(viaLongDoubleDivCeilInt64, iters) {
216  runDivTests<int64_t>(&viaLongDoubleDivCeil<int64_t>, iters);
217 }
218 BENCHMARK(divRoundAwayInt64, iters) {
219  runDivTests<int64_t>(&folly::divRoundAway<int64_t, int64_t>, iters);
220 }
221 
223 BENCHMARK(divTruncUint8, iters) {
224  runDivTests<uint8_t>(&folly::divTrunc<uint8_t, uint8_t>, iters);
225 }
226 BENCHMARK(divFloorUint8, iters) {
227  runDivTests<uint8_t>(&folly::divFloor<uint8_t, uint8_t>, iters);
228 }
229 BENCHMARK(divCeilUint8, iters) {
230  runDivTests<uint8_t>(&folly::divCeil<uint8_t, uint8_t>, iters);
231 }
232 BENCHMARK_RELATIVE(branchlessDivCeilUint8, iters) {
233  runDivTests<uint8_t>(&folly::detail::divCeilBranchless<uint8_t>, iters);
234 }
235 BENCHMARK_RELATIVE(branchfulDivCeilUint8, iters) {
236  runDivTests<uint8_t>(&folly::detail::divCeilBranchful<uint8_t>, iters);
237 }
238 BENCHMARK_RELATIVE(brokenButWidespreadDivCeilUint8, iters) {
239  runDivTests<uint8_t>(&brokenButWidespreadDivCeil<uint8_t>, iters);
240 }
241 BENCHMARK_RELATIVE(viaFloatDivCeilUint8, iters) {
242  runDivTests<uint8_t>(&viaFloatDivCeil<uint8_t>, iters);
243 }
244 BENCHMARK_RELATIVE(viaDoubleDivCeilUint8, iters) {
245  runDivTests<uint8_t>(&viaDoubleDivCeil<uint8_t>, iters);
246 }
247 BENCHMARK_RELATIVE(viaLongDoubleDivCeilUint8, iters) {
248  runDivTests<uint8_t>(&viaLongDoubleDivCeil<uint8_t>, iters);
249 }
250 BENCHMARK(divRoundAwayUint8, iters) {
251  runDivTests<uint8_t>(&folly::divRoundAway<uint8_t, uint8_t>, iters);
252 }
253 
255 BENCHMARK(divTruncUint16, iters) {
256  runDivTests<uint16_t>(&folly::divTrunc<uint16_t, uint16_t>, iters);
257 }
258 BENCHMARK(divFloorUint16, iters) {
259  runDivTests<uint16_t>(&folly::divFloor<uint16_t, uint16_t>, iters);
260 }
261 BENCHMARK(divCeilUint16, iters) {
262  runDivTests<uint16_t>(&folly::divCeil<uint16_t, uint16_t>, iters);
263 }
264 BENCHMARK_RELATIVE(branchlessDivCeilUint16, iters) {
265  runDivTests<uint16_t>(&folly::detail::divCeilBranchless<uint16_t>, iters);
266 }
267 BENCHMARK_RELATIVE(branchfulDivCeilUint16, iters) {
268  runDivTests<uint16_t>(&folly::detail::divCeilBranchful<uint16_t>, iters);
269 }
270 BENCHMARK_RELATIVE(brokenButWidespreadDivCeilUint16, iters) {
271  runDivTests<uint16_t>(&brokenButWidespreadDivCeil<uint16_t>, iters);
272 }
273 BENCHMARK_RELATIVE(viaFloatDivCeilUint16, iters) {
274  runDivTests<uint16_t>(&viaFloatDivCeil<uint16_t>, iters);
275 }
276 BENCHMARK_RELATIVE(viaDoubleDivCeilUint16, iters) {
277  runDivTests<uint16_t>(&viaDoubleDivCeil<uint16_t>, iters);
278 }
279 BENCHMARK_RELATIVE(viaLongDoubleDivCeilUint16, iters) {
280  runDivTests<uint16_t>(&viaLongDoubleDivCeil<uint16_t>, iters);
281 }
282 BENCHMARK(divRoundAwayUint16, iters) {
283  runDivTests<uint16_t>(&folly::divRoundAway<uint16_t, uint16_t>, iters);
284 }
285 
287 BENCHMARK(divTruncUint32, iters) {
288  runDivTests<uint32_t>(&folly::divTrunc<uint32_t, uint32_t>, iters);
289 }
290 BENCHMARK(divFloorUint32, iters) {
291  runDivTests<uint32_t>(&folly::divFloor<uint32_t, uint32_t>, iters);
292 }
293 BENCHMARK(divCeilUint32, iters) {
294  runDivTests<uint32_t>(&folly::divCeil<uint32_t, uint32_t>, iters);
295 }
296 BENCHMARK_RELATIVE(branchlessDivCeilUint32, iters) {
297  runDivTests<uint32_t>(&folly::detail::divCeilBranchless<uint32_t>, iters);
298 }
299 BENCHMARK_RELATIVE(branchfulDivCeilUint32, iters) {
300  runDivTests<uint32_t>(&folly::detail::divCeilBranchful<uint32_t>, iters);
301 }
302 BENCHMARK_RELATIVE(brokenButWidespreadDivCeilUint32, iters) {
303  runDivTests<uint32_t>(&brokenButWidespreadDivCeil<uint32_t>, iters);
304 }
305 BENCHMARK_RELATIVE(approxViaFloatDivCeilUint32, iters) {
306  runDivTests<uint32_t>(&viaFloatDivCeil<uint32_t>, iters);
307 }
308 BENCHMARK_RELATIVE(viaDoubleDivCeilUint32, iters) {
309  runDivTests<uint32_t>(&viaDoubleDivCeil<uint32_t>, iters);
310 }
311 BENCHMARK_RELATIVE(viaLongDoubleDivCeilUint32, iters) {
312  runDivTests<uint32_t>(&viaLongDoubleDivCeil<uint32_t>, iters);
313 }
314 BENCHMARK(divRoundAwayUint32, iters) {
315  runDivTests<uint32_t>(&folly::divRoundAway<uint32_t, uint32_t>, iters);
316 }
317 
319 BENCHMARK(divTruncUint64, iters) {
320  runDivTests<uint64_t>(&folly::divTrunc<uint64_t, uint64_t>, iters);
321 }
322 BENCHMARK(divFloorUint64, iters) {
323  runDivTests<uint64_t>(&folly::divFloor<uint64_t, uint64_t>, iters);
324 }
325 BENCHMARK(divCeilUint64, iters) {
326  runDivTests<uint64_t>(&folly::divCeil<uint64_t, uint64_t>, iters);
327 }
328 BENCHMARK_RELATIVE(branchlessDivCeilUint64, iters) {
329  runDivTests<uint64_t>(&folly::detail::divCeilBranchless<uint64_t>, iters);
330 }
331 BENCHMARK_RELATIVE(branchfulDivCeilUint64, iters) {
332  runDivTests<uint64_t>(&folly::detail::divCeilBranchful<uint64_t>, iters);
333 }
334 BENCHMARK_RELATIVE(brokenButWidespreadDivCeilUint64, iters) {
335  runDivTests<uint64_t>(&brokenButWidespreadDivCeil<uint64_t>, iters);
336 }
337 BENCHMARK_RELATIVE(approxViaFloatDivCeilUint64, iters) {
338  runDivTests<uint64_t>(&viaFloatDivCeil<uint64_t>, iters);
339 }
340 BENCHMARK_RELATIVE(approxViaDoubleDivCeilUint64, iters) {
341  runDivTests<uint64_t>(&viaDoubleDivCeil<uint64_t>, iters);
342 }
343 BENCHMARK_RELATIVE(viaLongDoubleDivCeilUint64, iters) {
344  runDivTests<uint64_t>(&viaLongDoubleDivCeil<uint64_t>, iters);
345 }
346 BENCHMARK(divRoundAwayUint64, iters) {
347  runDivTests<uint64_t>(&folly::divRoundAway<uint64_t, uint64_t>, iters);
348 }
349 
350 int main(int argc, char** argv) {
351  gflags::ParseCommandLineFlags(&argc, &argv, true);
353  return 0;
354 }
355 
356 /*
357 Benchmarks run single-threaded on a dual Xeon E5-2660 @ 2.2 Ghz with
358 hyperthreading (16 physical cores, 20 MB cache per socket, 256 GB RAM)
359 
360 Benchmarks used --bm_min_iters=10000000.
361 
362 divTrunc is just a native integral division. viaDoubleViaCeil doesn't
363 have full accuracy for Int64 or Uint64. There is a loop-carried
364 dependency for all of the div* tests, but there is a bit of extra slack
365 (a predictable call, a load that should be from the L1, and a predictable
366 not-taken branch in addition to the loop's branch) in the driving loop,
367 so the benchmark driver's attempt to subtract the overhead of the loop
368 might mean that the latency numbers here are slightly too low or too high.
369 
370 The branchful implementation's branch is very predictable in this
371 microbenchmark for unsigned types, since it only needs to predict a
372 zero numerator. That's likely to be true in real life as well, so we
373 make this the default.
374 
375 I was surprised at the speed of float and double division, but
376 the only case where it actually wins by much and is correct is for
377 int16_t. (float + ceil is faster for the 32-bit case, but is only
378 an approximation.) I ran a similar benchmark setup for ARM and ARM64.
379 On ARM the conditional versions win by quite a bit. 32-bit ARM doesn't
380 have a native integer divide, so getting the remainder after a division
381 (to see if truncation occurred) is more work than preconditioning the
382 numerator to make truncation go in the correct direction. 64-bit ARM
383 had the same winners and losers as x86_64, at least on the two physical
384 instances I tested.
385 
386 ============================================================================
387 folly/test/MathBenchmark.cpp relative time/iter iters/s
388 ============================================================================
389 ----------------------------------------------------------------------------
390 divTruncInt8 8.89ns 112.44M
391 divFloorInt8 10.99ns 91.00M
392 divCeilInt8 10.95ns 91.33M
393 branchlessDivCeilInt8 100.40% 10.91ns 91.69M
394 branchfulDivCeilInt8 88.87% 12.32ns 81.16M
395 brokenButWidespreadDivCeilInt8 109.20% 10.03ns 99.73M
396 viaFloatDivCeilInt8 109.68% 9.98ns 100.17M
397 viaDoubleDivCeilInt8 95.47% 11.47ns 87.19M
398 viaLongDoubleDivCeilInt8 31.65% 34.59ns 28.91M
399 divRoundAwayInt8 10.42ns 95.97M
400 ----------------------------------------------------------------------------
401 divTruncInt16 8.68ns 115.17M
402 divFloorInt16 10.94ns 91.38M
403 divCeilInt16 10.91ns 91.70M
404 branchlessDivCeilInt16 99.44% 10.97ns 91.18M
405 branchfulDivCeilInt16 81.68% 13.35ns 74.90M
406 brokenButWidespreadDivCeilInt16 109.50% 9.96ns 100.40M
407 viaFloatDivCeilInt16 108.04% 10.09ns 99.07M
408 viaDoubleDivCeilInt16 85.38% 12.77ns 78.29M
409 viaLongDoubleDivCeilInt16 29.99% 36.36ns 27.50M
410 divRoundAwayInt16 10.59ns 94.46M
411 ----------------------------------------------------------------------------
412 divTruncInt32 8.38ns 119.29M
413 divFloorInt32 11.01ns 90.84M
414 divCeilInt32 11.12ns 89.91M
415 branchlessDivCeilInt32 101.94% 10.91ns 91.66M
416 branchfulDivCeilInt32 84.67% 13.14ns 76.12M
417 brokenButWidespreadDivCeilInt32 117.61% 9.46ns 105.75M
418 approxViaFloatDivCeilInt32 115.98% 9.59ns 104.28M
419 viaDoubleDivCeilInt32 89.86% 12.38ns 80.79M
420 viaLongDoubleDivCeilInt32 30.84% 36.06ns 27.73M
421 divRoundAwayInt32 11.30ns 88.50M
422 ----------------------------------------------------------------------------
423 divTruncInt64 16.07ns 62.21M
424 divFloorInt64 18.37ns 54.45M
425 divCeilInt64 18.61ns 53.74M
426 branchlessDivCeilInt64 100.43% 18.53ns 53.97M
427 branchfulDivCeilInt64 84.65% 21.98ns 45.49M
428 brokenButWidespreadDivCeilInt64 108.47% 17.16ns 58.29M
429 approxViaFloatDivCeilInt64 190.99% 9.74ns 102.64M
430 approxViaDoubleDivCeilInt64 148.64% 12.52ns 79.88M
431 viaLongDoubleDivCeilInt64 52.01% 35.77ns 27.95M
432 divRoundAwayInt64 18.79ns 53.21M
433 ----------------------------------------------------------------------------
434 divTruncUint8 7.76ns 128.89M
435 divFloorUint8 8.29ns 120.61M
436 divCeilUint8 9.61ns 104.09M
437 branchlessDivCeilUint8 112.00% 8.58ns 116.58M
438 branchfulDivCeilUint8 114.01% 8.43ns 118.67M
439 brokenButWidespreadDivCeilUint8 100.48% 9.56ns 104.58M
440 viaFloatDivCeilUint8 103.53% 9.28ns 107.76M
441 viaDoubleDivCeilUint8 85.75% 11.20ns 89.26M
442 viaLongDoubleDivCeilUint8 27.72% 34.65ns 28.86M
443 divRoundAwayUint8 9.60ns 104.11M
444 ----------------------------------------------------------------------------
445 divTruncUint16 8.39ns 119.19M
446 divFloorUint16 8.28ns 120.82M
447 divCeilUint16 9.90ns 100.96M
448 branchlessDivCeilUint16 100.23% 9.88ns 101.19M
449 branchfulDivCeilUint16 107.83% 9.19ns 108.87M
450 brokenButWidespreadDivCeilUint16 99.89% 9.92ns 100.85M
451 viaFloatDivCeilUint16 100.54% 9.85ns 101.50M
452 viaDoubleDivCeilUint16 77.38% 12.80ns 78.13M
453 viaLongDoubleDivCeilUint16 27.30% 36.28ns 27.56M
454 divRoundAwayUint16 9.82ns 101.85M
455 ----------------------------------------------------------------------------
456 divTruncUint32 8.12ns 123.20M
457 divFloorUint32 8.09ns 123.58M
458 divCeilUint32 8.44ns 118.55M
459 branchlessDivCeilUint32 88.27% 9.56ns 104.64M
460 branchfulDivCeilUint32 98.91% 8.53ns 117.25M
461 brokenButWidespreadDivCeilUint32 93.48% 9.02ns 110.82M
462 approxViaFloatDivCeilUint32 86.29% 9.78ns 102.30M
463 viaDoubleDivCeilUint32 66.76% 12.63ns 79.15M
464 viaLongDoubleDivCeilUint32 23.35% 36.13ns 27.68M
465 divRoundAwayUint32 8.47ns 118.03M
466 ----------------------------------------------------------------------------
467 divTruncUint64 12.38ns 80.79M
468 divFloorUint64 12.27ns 81.47M
469 divCeilUint64 12.66ns 78.99M
470 branchlessDivCeilUint64 93.46% 13.55ns 73.83M
471 branchfulDivCeilUint64 100.30% 12.62ns 79.23M
472 brokenButWidespreadDivCeilUint64 99.41% 12.73ns 78.53M
473 approxViaFloatDivCeilUint64 106.59% 11.88ns 84.19M
474 approxViaDoubleDivCeilUint64 92.14% 13.74ns 72.78M
475 viaLongDoubleDivCeilUint64 33.51% 37.78ns 26.47M
476 divRoundAwayUint64 12.34ns 81.02M
477 ============================================================================
478 */
Definition: InvokeTest.cpp:58
#define T(v)
Definition: http_parser.c:233
constexpr To ceil(std::chrono::duration< Rep, Period > const &d)
Definition: Chrono.h:91
LogLevel max
Definition: LogLevel.cpp:31
#define BENCHMARK_SUSPEND
Definition: Benchmark.h:576
BENCHMARK_RELATIVE(branchlessDivCeilInt8, iters)
void runBenchmarks()
Definition: Benchmark.cpp:456
BENCHMARK_DRAW_LINE()
char ** argv
BENCHMARK(divTruncInt8, iters)
LogLevel min
Definition: LogLevel.cpp:30
static const char *const value
Definition: Conv.cpp:50
int main(int argc, char **argv)
auto doNotOptimizeAway(const T &datum) -> typename std::enable_if< !detail::DoNotOptimizeAwayNeedsIndirect< T >::value >::type
Definition: Benchmark.h:258