proxygen
StringBenchmark.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2014-present Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <folly/String.h>
18 
19 #include <boost/algorithm/string.hpp>
20 #include <folly/Benchmark.h>
21 #include <folly/Random.h>
22 #include <random>
23 
24 using namespace folly;
25 using namespace std;
26 
27 BENCHMARK(libc_tolower, iters) {
28  static const size_t kSize = 256;
29  // This array is static to keep the compiler from optimizing the
30  // entire function down to a no-op if it has an inlined impl of
31  // tolower and thus is able to tell that there are no side-effects.
32  // No side-effects + no writes to anything other than local variables
33  // + no return value = no need to run any of the code in the function.
34  // gcc, for example, makes that optimization with -O2.
35  static char input[kSize];
36  for (size_t i = 0; i < kSize; i++) {
37  input[i] = (char)(i & 0xff);
38  }
39  for (auto i = iters; i > 0; i--) {
40  for (size_t offset = 0; offset < kSize; offset++) {
41  input[offset] = tolower(input[offset]);
42  }
43  }
44 }
45 
46 BENCHMARK(folly_toLowerAscii, iters) {
47  static const size_t kSize = 256;
48  static char input[kSize];
49  for (size_t i = 0; i < kSize; i++) {
50  input[i] = (char)(i & 0xff);
51  }
52  for (auto i = iters; i > 0; i--) {
53  folly::toLowerAscii(input, kSize);
54  }
55 }
56 
57 // A simple benchmark that tests various output sizes for a simple
58 // input; the goal is to measure the output buffer resize code cost.
59 void stringPrintfOutputSize(int iters, int param) {
60  string buffer;
62  buffer.resize(param, 'x');
63  }
64 
65  for (int64_t i = 0; i < iters; ++i) {
66  string s = stringPrintf("msg: %d, %d, %s", 10, 20, buffer.c_str());
67  }
68 }
69 
70 // The first few of these tend to fit in the inline buffer, while the
71 // subsequent ones cross that limit, trigger a second vsnprintf, and
72 // exercise a different codepath.
79 
80 // Benchmark simple stringAppendf behavior to show a pathology Lovro
81 // reported (t5735468).
82 BENCHMARK(stringPrintfAppendfBenchmark, iters) {
83  for (unsigned int i = 0; i < iters; ++i) {
84  string s;
86  s.reserve(300000);
87  }
88  for (int j = 0; j < 300000; ++j) {
89  stringAppendf(&s, "%d", 1);
90  }
91  }
92 }
93 
94 namespace {
95 fbstring cbmString;
96 fbstring cbmEscapedString;
97 fbstring cEscapedString;
98 fbstring cUnescapedString;
99 const size_t kCBmStringLength = 64 << 10;
100 const uint32_t kCPrintablePercentage = 90;
101 
102 fbstring uribmString;
103 fbstring uribmEscapedString;
104 fbstring uriEscapedString;
105 fbstring uriUnescapedString;
106 const size_t kURIBmStringLength = 256;
107 const uint32_t kURIPassThroughPercentage = 50;
108 
109 fbstring hexlifyInput;
110 fbstring hexlifyOutput;
111 const size_t kHexlifyLength = 1024;
112 
113 void initBenchmark() {
114  std::mt19937 rnd;
115 
116  // C escape
117  std::uniform_int_distribution<uint32_t> printable(32, 126);
118  std::uniform_int_distribution<uint32_t> nonPrintable(0, 160);
119  std::uniform_int_distribution<uint32_t> percentage(0, 99);
120 
121  cbmString.reserve(kCBmStringLength);
122  for (size_t i = 0; i < kCBmStringLength; ++i) {
123  unsigned char c;
124  if (percentage(rnd) < kCPrintablePercentage) {
125  c = printable(rnd);
126  } else {
127  c = nonPrintable(rnd);
128  // Generate characters in both non-printable ranges:
129  // 0..31 and 127..255
130  if (c >= 32) {
131  c += (126 - 32) + 1;
132  }
133  }
134  cbmString.push_back(c);
135  }
136 
137  cbmEscapedString = cEscape<fbstring>(cbmString);
138 
139  // URI escape
140  std::uniform_int_distribution<uint32_t> passthrough('a', 'z');
141  std::string encodeChars = " ?!\"',+[]";
142  std::uniform_int_distribution<uint32_t> encode(0, encodeChars.size() - 1);
143 
144  uribmString.reserve(kURIBmStringLength);
145  for (size_t i = 0; i < kURIBmStringLength; ++i) {
146  unsigned char c;
147  if (percentage(rnd) < kURIPassThroughPercentage) {
148  c = passthrough(rnd);
149  } else {
150  c = encodeChars[encode(rnd)];
151  }
152  uribmString.push_back(c);
153  }
154 
155  uribmEscapedString = uriEscape<fbstring>(uribmString);
156 
157  // hexlify
158  hexlifyInput.resize(kHexlifyLength);
159  Random::secureRandom(&hexlifyInput[0], kHexlifyLength);
160  folly::hexlify(hexlifyInput, hexlifyOutput);
161 }
162 
163 BENCHMARK(BM_cEscape, iters) {
164  while (iters--) {
165  cEscapedString = cEscape<fbstring>(cbmString);
166  doNotOptimizeAway(cEscapedString.size());
167  }
168 }
169 
170 BENCHMARK(BM_cUnescape, iters) {
171  while (iters--) {
172  cUnescapedString = cUnescape<fbstring>(cbmEscapedString);
173  doNotOptimizeAway(cUnescapedString.size());
174  }
175 }
176 
177 BENCHMARK(BM_uriEscape, iters) {
178  while (iters--) {
179  uriEscapedString = uriEscape<fbstring>(uribmString);
180  doNotOptimizeAway(uriEscapedString.size());
181  }
182 }
183 
184 BENCHMARK(BM_uriUnescape, iters) {
185  while (iters--) {
186  uriUnescapedString = uriUnescape<fbstring>(uribmEscapedString);
187  doNotOptimizeAway(uriUnescapedString.size());
188  }
189 }
190 
191 BENCHMARK(BM_unhexlify, iters) {
192  // iters/sec = bytes output per sec
193  std::string unhexed;
194  folly::StringPiece hex = hexlifyOutput;
195  for (; iters >= hex.size(); iters -= hex.size()) {
196  folly::unhexlify(hex, unhexed);
197  }
198  iters -= iters % 2; // round down to an even number of chars
199  hex = hex.subpiece(0, iters);
200  folly::unhexlify(hex, unhexed);
201 }
202 
203 } // namespace
204 
206 
207 BENCHMARK(splitOnSingleChar, iters) {
208  static const std::string line = "one:two:three:four";
209  for (size_t i = 0; i < iters << 4; ++i) {
210  std::vector<StringPiece> pieces;
211  folly::split(':', line, pieces);
212  }
213 }
214 
215 BENCHMARK(splitOnSingleCharFixed, iters) {
216  static const std::string line = "one:two:three:four";
217  for (size_t i = 0; i < iters << 4; ++i) {
218  StringPiece a, b, c, d;
219  folly::split(':', line, a, b, c, d);
220  }
221 }
222 
223 BENCHMARK(splitOnSingleCharFixedAllowExtra, iters) {
224  static const std::string line = "one:two:three:four";
225  for (size_t i = 0; i < iters << 4; ++i) {
226  StringPiece a, b, c, d;
227  folly::split<false>(':', line, a, b, c, d);
228  }
229 }
230 
231 BENCHMARK(splitStr, iters) {
232  static const std::string line = "one-*-two-*-three-*-four";
233  for (size_t i = 0; i < iters << 4; ++i) {
234  std::vector<StringPiece> pieces;
235  folly::split("-*-", line, pieces);
236  }
237 }
238 
239 BENCHMARK(splitStrFixed, iters) {
240  static const std::string line = "one-*-two-*-three-*-four";
241  for (size_t i = 0; i < iters << 4; ++i) {
242  StringPiece a, b, c, d;
243  folly::split("-*-", line, a, b, c, d);
244  }
245 }
246 
247 BENCHMARK(boost_splitOnSingleChar, iters) {
248  static const std::string line = "one:two:three:four";
249  bool (*pred)(char) = [](char c) -> bool { return c == ':'; };
250  for (size_t i = 0; i < iters << 4; ++i) {
251  std::vector<boost::iterator_range<std::string::const_iterator>> pieces;
252  boost::split(pieces, line, pred);
253  }
254 }
255 
256 BENCHMARK(joinCharStr, iters) {
257  static const std::vector<std::string> input = {
258  "one", "two", "three", "four", "five", "six", "seven"};
259  for (size_t i = 0; i < iters << 4; ++i) {
261  folly::join(':', input, output);
262  }
263 }
264 
265 BENCHMARK(joinStrStr, iters) {
266  static const std::vector<std::string> input = {
267  "one", "two", "three", "four", "five", "six", "seven"};
268  for (size_t i = 0; i < iters << 4; ++i) {
270  folly::join(":", input, output);
271  }
272 }
273 
274 BENCHMARK(joinInt, iters) {
275  static const auto input = {123, 456, 78910, 1112, 1314, 151, 61718};
276  for (size_t i = 0; i < iters << 4; ++i) {
278  folly::join(":", input, output);
279  }
280 }
281 
282 int main(int argc, char** argv) {
283  gflags::ParseCommandLineFlags(&argc, &argv, true);
284  initBenchmark();
286  return 0;
287 }
288 
289 /*
290 Results on x86_64:
291 ============================================================================
292 folly/test/StringBenchmark.cpp relative time/iter iters/s
293 ============================================================================
294 libc_tolower 773.30ns 1.29M
295 folly_toLowerAscii 65.04ns 15.38M
296 stringPrintfOutputSize(1) 224.67ns 4.45M
297 stringPrintfOutputSize(4) 231.53ns 4.32M
298 stringPrintfOutputSize(16) 286.54ns 3.49M
299 stringPrintfOutputSize(64) 305.47ns 3.27M
300 stringPrintfOutputSize(256) 1.48us 674.45K
301 stringPrintfOutputSize(1024) 5.89us 169.72K
302 stringPrintfAppendfBenchmark 34.43ms 29.04
303 BM_cEscape 461.51us 2.17K
304 BM_cUnescape 328.19us 3.05K
305 BM_uriEscape 4.36us 229.25K
306 BM_uriUnescape 2.22us 450.64K
307 splitOnSingleChar 1.46us 687.21K
308 splitOnSingleCharFixed 133.02ns 7.52M
309 splitOnSingleCharFixedAllowExtra 74.35ns 13.45M
310 splitStr 2.36us 424.00K
311 splitStrFixed 282.38ns 3.54M
312 boost_splitOnSingleChar 2.83us 353.12K
313 joinCharStr 2.65us 376.93K
314 joinStrStr 2.64us 378.09K
315 joinInt 3.89us 257.37K
316 ============================================================================
317 */
size_type size() const
Definition: FBString.h:1337
std::vector< uint8_t > buffer(kBufferSize+16)
bool unhexlify(const InputString &input, OutputString &output)
Definition: String-inl.h:616
unique_ptr< IOBuf > encode(vector< HPACKHeader > &headers, HPACKEncoder &encoder)
char b
static std::enable_if< std::is_integral< T >::value &&!std::is_same< T, bool >::value, T >::type secureRandom()
Definition: Random.h:112
#define BENCHMARK_SUSPEND
Definition: Benchmark.h:576
STL namespace.
constexpr size_type size() const
Definition: Range.h:431
std::string stringPrintf(const char *format,...)
Definition: String.cpp:223
—— Concurrent Priority Queue Implementation ——
Definition: AtomicBitSet.h:29
void runBenchmarks()
Definition: Benchmark.cpp:456
void split(const Delim &delimiter, const String &input, std::vector< OutputType > &out, bool ignoreEmpty)
Definition: String-inl.h:382
void reserve(size_type res_arg=0)
Definition: FBString.h:1355
char ** argv
void stringPrintfOutputSize(int iters, int param)
Range subpiece(size_type first, size_type length=npos) const
Definition: Range.h:686
S split(const StringPiece source, char delimiter)
Definition: String.h:61
Target passthrough(Target target)
Definition: String-inl.h:356
char a
void push_back(const value_type c)
Definition: FBString.h:1437
std::string & stringAppendf(std::string *output, const char *format,...)
Definition: String.cpp:240
#define BENCHMARK_PARAM(name, param)
Definition: Benchmark.h:417
void toLowerAscii(char *str, size_t length)
Definition: String.cpp:601
BENCHMARK(fbFollyGlobalBenchmarkBaseline)
Definition: Benchmark.cpp:84
const char * string
Definition: Conv.cpp:212
static set< string > s
void join(const Delim &delimiter, Iterator begin, Iterator end, String &output)
Definition: String-inl.h:498
bool hexlify(const InputString &input, OutputString &output, bool append_output)
Definition: String-inl.h:596
char c
int main(int argc, char *argv[])
void resize(size_type n, value_type c=value_type())
Definition: FBString.h:1936
auto doNotOptimizeAway(const T &datum) -> typename std::enable_if< !detail::DoNotOptimizeAwayNeedsIndirect< T >::value >::type
Definition: Benchmark.h:258