proxygen
StringBenchmark.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2014-present Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <atomic>
18 
19 #include <glog/logging.h>
20 
21 #include <folly/Benchmark.h>
22 #include <folly/String.h>
24 #include <folly/gen/Base.h>
25 #include <folly/gen/String.h>
26 
27 using namespace folly;
28 using namespace folly::gen;
29 using std::pair;
30 using std::set;
31 using std::tuple;
32 using std::vector;
33 
34 namespace {
35 
36 static std::atomic<int> testSize(1000);
37 static vector<fbstring> testStrVector =
38  seq(1, testSize.load()) | eachTo<fbstring>() | as<vector>();
39 static auto testFileContent = from(testStrVector) | unsplit('\n');
40 
41 const char* const kLine = "The quick brown fox jumped over the lazy dog.\n";
42 const size_t kLineCount = 10000;
43 std::string bigLines;
44 const size_t kSmallLineSize = 17;
45 std::vector<std::string> smallLines;
46 
47 void initStringResplitterBenchmark() {
48  bigLines.reserve(kLineCount * strlen(kLine));
49  for (size_t i = 0; i < kLineCount; ++i) {
50  bigLines += kLine;
51  }
52  size_t remaining = bigLines.size();
53  size_t pos = 0;
54  while (remaining) {
55  size_t n = std::min(kSmallLineSize, remaining);
56  smallLines.push_back(bigLines.substr(pos, n));
57  pos += n;
58  remaining -= n;
59  }
60 }
61 
62 size_t len(folly::StringPiece s) {
63  return s.size();
64 }
65 
66 } // namespace
67 
68 BENCHMARK(StringResplitter_Big, iters) {
69  size_t s = 0;
70  while (iters--) {
71  s += from({bigLines}) | resplit('\n') | map(&len) | sum;
72  }
74 }
75 
76 BENCHMARK_RELATIVE(StringResplitter_Small, iters) {
77  size_t s = 0;
78  while (iters--) {
79  s += from(smallLines) | resplit('\n') | map(&len) | sum;
80  }
82 }
83 
85 
86 BENCHMARK(StringSplit_Old, iters) {
87  size_t s = 0;
88  std::string line(kLine);
89  while (iters--) {
90  std::vector<StringPiece> parts;
91  split(' ', line, parts);
92  s += parts.size();
93  }
95 }
96 
97 BENCHMARK_RELATIVE(StringSplit_Gen_Vector, iters) {
98  size_t s = 0;
99  StringPiece line(kLine);
100  while (iters--) {
101  s += (split(line, ' ') | as<vector>()).size();
102  }
104 }
105 
107 
108 BENCHMARK(StringSplit_Old_ReuseVector, iters) {
109  size_t s = 0;
110  std::string line(kLine);
111  std::vector<StringPiece> parts;
112  while (iters--) {
113  parts.clear();
114  split(' ', line, parts);
115  s += parts.size();
116  }
118 }
119 
120 BENCHMARK_RELATIVE(StringSplit_Gen_ReuseVector, iters) {
121  size_t s = 0;
122  StringPiece line(kLine);
123  std::vector<StringPiece> parts;
124  while (iters--) {
125  parts.clear();
126  split(line, ' ') | appendTo(parts);
127  s += parts.size();
128  }
130 }
131 
132 BENCHMARK_RELATIVE(StringSplit_Gen, iters) {
133  size_t s = 0;
134  StringPiece line(kLine);
135  while (iters--) {
136  s += split(line, ' ') | count;
137  }
139 }
140 
141 BENCHMARK_RELATIVE(StringSplit_Gen_Take, iters) {
142  size_t s = 0;
143  StringPiece line(kLine);
144  while (iters--) {
145  s += split(line, ' ') | take(10) | count;
146  }
148 }
149 
151 
152 BENCHMARK(StringUnsplit_Old, iters) {
153  size_t s = 0;
154  while (iters--) {
155  fbstring joined;
156  join(',', testStrVector, joined);
157  s += joined.size();
158  }
160 }
161 
162 BENCHMARK_RELATIVE(StringUnsplit_Old_ReusedBuffer, iters) {
163  size_t s = 0;
164  fbstring joined;
165  while (iters--) {
166  joined.clear();
167  join(',', testStrVector, joined);
168  s += joined.size();
169  }
171 }
172 
174  size_t s = 0;
175  while (iters--) {
176  fbstring joined = from(testStrVector) | unsplit(',');
177  s += joined.size();
178  }
180 }
181 
182 BENCHMARK_RELATIVE(StringUnsplit_Gen_ReusedBuffer, iters) {
183  size_t s = 0;
185  while (iters--) {
186  buffer.clear();
187  from(testStrVector) | unsplit(',', &buffer);
188  s += buffer.size();
189  }
191 }
192 
194 
195 void StringUnsplit_Gen(size_t iters, size_t joinSize) {
196  std::vector<fbstring> v;
198  FOR_EACH_RANGE (i, 0, joinSize) { v.push_back(to<fbstring>(rand())); }
199  }
200  size_t s = 0;
202  while (iters--) {
203  buffer.clear();
204  from(v) | unsplit(',', &buffer);
205  s += buffer.size();
206  }
208 }
209 
214 
216 void Lines_Gen(size_t iters, int joinSize) {
217  size_t s = 0;
218  StringPiece content = testFileContent;
219  for (size_t i = 0; i < iters; ++i) {
220  s += lines(content.subpiece(0, joinSize)) | take(100) | count;
221  }
223 }
224 
228 
230 
231 // clang-format off
232 fbstring records = seq<size_t>(1, 1000)
233  | mapped([](size_t i) {
234  return folly::to<fbstring>(i, ' ', i * i, ' ', i * i * i);
235  })
236  | unsplit('\n');
237 // clang-format o
238 
239 BENCHMARK(Records_EachToTuple, iters) {
240  size_t s = 0;
241  for (size_t i = 0; i < iters; i += 1000) {
242  // clang-format off
243  s += split(records, '\n')
244  | eachToTuple<int, size_t, StringPiece>(' ')
245  | get<1>()
246  | sum;
247  // clang-format on
248  }
250 }
251 
252 BENCHMARK_RELATIVE(Records_VectorStringPieceReused, iters) {
253  size_t s = 0;
254  std::vector<StringPiece> fields;
255  for (size_t i = 0; i < iters; i += 1000) {
256  // clang-format off
257  s += split(records, '\n')
258  | mapped([&](StringPiece line) {
259  fields.clear();
260  folly::split(' ', line, fields);
261  CHECK(fields.size() == 3);
262  return std::make_tuple(
263  folly::to<int>(fields[0]),
264  folly::to<size_t>(fields[1]),
265  StringPiece(fields[2]));
266  })
267  | get<1>()
268  | sum;
269  // clang-format on
270  }
272 }
273 
274 BENCHMARK_RELATIVE(Records_VectorStringPiece, iters) {
275  size_t s = 0;
276  for (size_t i = 0; i < iters; i += 1000) {
277  // clang-format off
278  s += split(records, '\n')
279  | mapped([](StringPiece line) {
280  std::vector<StringPiece> fields;
281  folly::split(' ', line, fields);
282  CHECK(fields.size() == 3);
283  return std::make_tuple(
284  folly::to<int>(fields[0]),
285  folly::to<size_t>(fields[1]),
286  StringPiece(fields[2]));
287  })
288  | get<1>()
289  | sum;
290  // clang-format on
291  }
293 }
294 
295 BENCHMARK_RELATIVE(Records_VectorString, iters) {
296  size_t s = 0;
297  for (size_t i = 0; i < iters; i += 1000) {
298  // clang-format off
299  s += split(records, '\n')
300  | mapped([](StringPiece line) {
301  std::vector<std::string> fields;
302  folly::split(' ', line, fields);
303  CHECK(fields.size() == 3);
304  return std::make_tuple(
305  folly::to<int>(fields[0]),
306  folly::to<size_t>(fields[1]),
307  StringPiece(fields[2]));
308  })
309  | get<1>()
310  | sum;
311  // clang-format on
312  }
314 }
315 
316 // Results from an Intel(R) Xeon(R) CPU E5-2660 0 @ 2.20GHz
317 // ============================================================================
318 // folly/gen/test/StringBenchmark.cpp relative time/iter iters/s
319 // ============================================================================
320 // StringResplitter_Big 108.58us 9.21K
321 // StringResplitter_Small 10.60% 1.02ms 976.48
322 // ----------------------------------------------------------------------------
323 // StringSplit_Old 357.82ns 2.79M
324 // StringSplit_Gen_Vector 105.10% 340.46ns 2.94M
325 // ----------------------------------------------------------------------------
326 // StringSplit_Old_ReuseVector 96.45ns 10.37M
327 // StringSplit_Gen_ReuseVector 124.01% 77.78ns 12.86M
328 // StringSplit_Gen 140.10% 68.85ns 14.52M
329 // StringSplit_Gen_Take 122.97% 78.44ns 12.75M
330 // ----------------------------------------------------------------------------
331 // StringUnsplit_Old 42.99us 23.26K
332 // StringUnsplit_Old_ReusedBuffer 100.48% 42.79us 23.37K
333 // StringUnsplit_Gen 96.37% 44.61us 22.42K
334 // StringUnsplit_Gen_ReusedBuffer 116.96% 36.76us 27.20K
335 // ----------------------------------------------------------------------------
336 // StringUnsplit_Gen(1000) 44.71us 22.37K
337 // StringUnsplit_Gen(2000) 49.28% 90.72us 11.02K
338 // StringUnsplit_Gen(4000) 24.05% 185.91us 5.38K
339 // StringUnsplit_Gen(8000) 12.23% 365.42us 2.74K
340 // ----------------------------------------------------------------------------
341 // Records_EachToTuple 101.43us 9.86K
342 // Records_VectorStringPieceReused 93.72% 108.22us 9.24K
343 // Records_VectorStringPiece 37.14% 273.11us 3.66K
344 // Records_VectorString 16.70% 607.47us 1.65K
345 // ============================================================================
346 
347 int main(int argc, char* argv[]) {
348  gflags::ParseCommandLineFlags(&argc, &argv, true);
349  initStringResplitterBenchmark();
350  runBenchmarks();
351  return 0;
352 }
size_type size() const
Definition: FBString.h:1337
std::vector< uint8_t > buffer(kBufferSize+16)
S resplit(char delimiter, bool keepDelimiter=false)
Definition: String.h:56
auto v
void Lines_Gen(size_t iters, int joinSize)
constexpr detail::Count count
Definition: Base-inl.h:2551
static std::atomic< int > testSize(1000)
From from(Container &source)
Definition: Base.h:438
#define BENCHMARK_SUSPEND
Definition: Benchmark.h:576
constexpr size_type size() const
Definition: Range.h:431
Gen seq(Value first, Value last)
Definition: Base.h:484
—— Concurrent Priority Queue Implementation ——
Definition: AtomicBitSet.h:29
void runBenchmarks()
Definition: Benchmark.cpp:456
tuple make_tuple()
Definition: gtest-tuple.h:675
void split(const Delim &delimiter, const String &input, std::vector< OutputType > &out, bool ignoreEmpty)
Definition: String-inl.h:382
constexpr detail::Sum sum
Definition: Base-inl.h:2549
BENCHMARK_RELATIVE(StringResplitter_Small, iters)
#define FOR_EACH_RANGE(i, begin, end)
Definition: Foreach.h:313
char ** argv
LogLevel min
Definition: LogLevel.cpp:30
void StringUnsplit_Gen(size_t iters, size_t joinSize)
Range subpiece(size_type first, size_type length=npos) const
Definition: Range.h:686
S split(const StringPiece source, char delimiter)
Definition: String.h:61
Map map(Predicate pred=Predicate())
Definition: Base.h:545
Append appendTo(Collection &collection)
Definition: Base.h:824
S lines(StringPiece source)
Definition: String.h:80
#define BENCHMARK_PARAM(name, param)
Definition: Benchmark.h:417
BENCHMARK(fbFollyGlobalBenchmarkBaseline)
Definition: Benchmark.cpp:84
const char * string
Definition: Conv.cpp:212
BENCHMARK_DRAW_LINE()
static set< string > s
#define BENCHMARK_RELATIVE_PARAM(name, param)
Definition: Benchmark.h:518
detail::Take take(Number count)
Definition: Base-inl.h:2582
Map mapped(Predicate pred=Predicate())
Definition: Base.h:540
void join(const Delim &delimiter, Iterator begin, Iterator end, String &output)
Definition: String-inl.h:498
Unsplit unsplit(const Delimiter &delimiter)
Definition: String.h:101
Range< const char * > StringPiece
fbstring records
int main(int argc, char *argv[])
auto doNotOptimizeAway(const T &datum) -> typename std::enable_if< !detail::DoNotOptimizeAwayNeedsIndirect< T >::value >::type
Definition: Benchmark.h:258