proxygen
BitsBenchmark.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2016-present Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // @author Tudor Bosman (tudorb@fb.com)
18 
19 #include <algorithm>
20 #include <vector>
21 
22 #include <folly/CppAttributes.h>
23 #include <folly/Random.h>
24 #include <folly/lang/Assume.h>
25 #include <folly/lang/Bits.h>
26 
27 #include <folly/Benchmark.h>
28 
29 using namespace folly;
30 
31 BENCHMARK(nextPowTwoClz, iters) {
32  for (unsigned long i = 0; i < iters; ++i) {
33  auto x = folly::nextPowTwo(i);
35  }
36 }
37 
40  bool b;
41  for (unsigned long i = 0; i < iters; ++i) {
42  b = folly::isPowTwo(i);
44  }
45 }
46 
48 BENCHMARK(reverse, iters) {
49  uint64_t b = 0;
50  for (unsigned long i = 0; i < iters; ++i) {
51  b = folly::bitReverse(i + b);
53  }
54 }
55 
56 namespace {
57 
58 template <class F>
59 void testPartialLoadUnaligned(F f, size_t iters) {
60  constexpr size_t kBufSize = 32;
61 
62  std::vector<char> buf;
64  buf.resize(kBufSize + 7); // Allow unguarded tail reads.
65  std::generate(
66  buf.begin(), buf.end(), [] { return folly::Random::rand32(255); });
67  }
68 
69  uint64_t ret = 0;
70  for (size_t i = 0; i < iters; ++i) {
71  // Make the position depend on the previous result to break loop pipelining.
72  auto pos = ret % kBufSize;
73  ret = f(buf.data() + pos, i % 8);
75  }
76 }
77 
84 uint64_t partialLoadUnalignedSwitch(const char* p, size_t l) {
85  folly::assume(l < 8);
86 
87  uint64_t r = 0;
88  switch (l) {
89  case 7:
90  r = static_cast<uint64_t>(folly::loadUnaligned<uint32_t>(p + 3)) << 24;
92  case 3:
93  r |= static_cast<uint64_t>(folly::loadUnaligned<uint16_t>(p + 1)) << 8;
95  case 1:
96  r |= *p;
97  break;
98 
99  case 6:
100  r = static_cast<uint64_t>(folly::loadUnaligned<uint16_t>(p + 4)) << 32;
102  case 4:
103  r |= folly::loadUnaligned<uint32_t>(p);
104  break;
105 
106  case 5:
107  r = static_cast<uint64_t>(folly::loadUnaligned<uint32_t>(p + 4)) << 32;
108  r |= *p;
109  break;
110 
111  case 2:
112  r = folly::loadUnaligned<uint16_t>(p);
113  break;
114 
115  case 0:
116  break;
117  }
118 
119  return r;
120 }
121 
122 } // namespace
123 
125 
126 BENCHMARK(PartialLoadUnaligned, iters) {
127  testPartialLoadUnaligned(folly::partialLoadUnaligned<uint64_t>, iters);
128 }
129 
130 BENCHMARK(PartialLoadUnalignedMemcpy, iters) {
131  testPartialLoadUnaligned(
132  [](const char* p, size_t l) {
133  folly::assume(l < 8);
134 
135  uint64_t ret;
136  memcpy(&ret, p, l);
137  return ret;
138  },
139  iters);
140 }
141 
142 BENCHMARK(PartialLoadUnalignedSwitch, iters) {
143  testPartialLoadUnaligned(partialLoadUnalignedSwitch, iters);
144 }
145 
146 int main(int argc, char** argv) {
147  gflags::ParseCommandLineFlags(&argc, &argv, true);
149  return 0;
150 }
151 
152 /*
153 Benchmarks run on Intel Xeon CPU E5-2678 v3 @ 2.50GHz with --bm_min_usec=500000
154 
155 ============================================================================
156 folly/lang/test/BitsBenchmark.cpp relative time/iter iters/s
157 ============================================================================
158 nextPowTwoClz 0.00fs Infinity
159 ----------------------------------------------------------------------------
160 isPowTwo 0.00fs Infinity
161 ----------------------------------------------------------------------------
162 reverse 4.18ns 239.14M
163 ----------------------------------------------------------------------------
164 PartialLoadUnaligned 2.22ns 449.80M
165 PartialLoadUnalignedMemcpy 7.53ns 132.78M
166 PartialLoadUnalignedSwitch 2.04ns 491.30M
167 ============================================================================
168 */
Definition: InvokeTest.cpp:58
auto f
char b
constexpr T nextPowTwo(T const v)
Definition: Bits.h:149
T bitReverse(T n)
Definition: Bits.h:369
#define BENCHMARK_SUSPEND
Definition: Benchmark.h:576
—— Concurrent Priority Queue Implementation ——
Definition: AtomicBitSet.h:29
void runBenchmarks()
Definition: Benchmark.cpp:456
constexpr bool isPowTwo(T const v)
Definition: Bits.h:161
char ** argv
int main(int argc, char *argv[])
BENCHMARK(fbFollyGlobalBenchmarkBaseline)
Definition: Benchmark.cpp:84
BENCHMARK_DRAW_LINE()
static uint32_t rand32()
Definition: Random.h:213
FOLLY_ALWAYS_INLINE void assume(bool cond)
Definition: Assume.h:41
#define FOLLY_FALLTHROUGH
Definition: CppAttributes.h:63
auto doNotOptimizeAway(const T &datum) -> typename std::enable_if< !detail::DoNotOptimizeAwayNeedsIndirect< T >::value >::type
Definition: Benchmark.h:258