proxygen
StringTest.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2014-present Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <iosfwd>
18 #include <map>
19 #include <vector>
20 
22 #include <folly/gen/String.h>
24 
25 using namespace folly::gen;
26 using namespace folly;
27 using std::make_tuple;
28 using std::ostream;
29 using std::pair;
30 using std::string;
31 using std::tuple;
32 using std::unique_ptr;
33 using std::vector;
34 
35 using vec = vector<string>;
36 
37 static auto collect = eachTo<std::string>() | as<vector>();
38 
39 TEST(StringGen, EmptySplit) {
40  {
41  auto input = "";
42  auto expected = vec{};
43  EXPECT_EQ(expected, split(input, ',') | collect);
44  }
45 
46  // The last delimiter is eaten, just like std::getline
47  {
48  auto input = ",";
49  auto expected = vec{""};
50  EXPECT_EQ(expected, split(input, ',') | collect);
51  }
52 
53  {
54  auto input = ",,";
55  auto expected = vec{"", ""};
56  EXPECT_EQ(expected, split(input, ',') | collect);
57  }
58 
59  {
60  auto input = ",,";
61  auto expected = vec{""};
62  EXPECT_EQ(expected, split(input, ',') | take(1) | collect);
63  }
64 }
65 
66 TEST(StringGen, Split) {
67  {
68  auto input = "hello,, world, goodbye, meow";
69  auto expected = vec{"hello", "", " world", " goodbye", " meow"};
70  EXPECT_EQ(expected, split(input, ',') | collect);
71  }
72 
73  {
74  auto input = "hello,, world, goodbye, meow";
75  auto expected = vec{"hello", "", " world"};
76  EXPECT_EQ(expected, split(input, ',') | take(3) | collect);
77  }
78 
79  {
80  auto input = "hello,, world, goodbye, meow";
81  auto expected = vec{"hello", "", " world", " goodbye", " meow"};
82  EXPECT_EQ(expected, split(input, ",") | take(5) | collect);
83  }
84 
85  {
86  auto input = "hello,, world, goodbye, meow";
87  auto expected = vec{"hello,", "world", "goodbye", "meow"};
88  EXPECT_EQ(expected, split(input, ", ") | collect);
89  }
90 }
91 
92 TEST(StringGen, SplitByNewLine) {
93  {
94  auto input = "hello\n\n world\r\n goodbye\r me\n\row";
95  auto expected = vec{"hello", "", " world", " goodbye", " me", "", "ow"};
96  EXPECT_EQ(expected, lines(input) | collect);
97  }
98 }
99 
100 TEST(StringGen, EmptyResplit) {
101  {
102  auto input = vec{""};
103  auto expected = vec{};
104  EXPECT_EQ(expected, from(input) | resplit(',') | collect);
105  }
106 
107  // The last delimiter is eaten, just like std::getline
108  {
109  auto input = vec{","};
110  auto expected = vec{""};
111  EXPECT_EQ(expected, from(input) | resplit(',') | collect);
112  }
113 
114  {
115  auto input = vec{",,"};
116  auto expected = vec{"", ""};
117  EXPECT_EQ(expected, from(input) | resplit(',') | collect);
118  }
119 }
120 
121 TEST(StringGen, Resplit) {
122  {
123  auto input = vec{"hello,, world, goodbye, meow"};
124  auto expected = vec{"hello", "", " world", " goodbye", " meow"};
125  EXPECT_EQ(expected, from(input) | resplit(',') | collect);
126  }
127 
128  {
129  auto input = vec{"hel", "lo,", ", world", ", goodbye, m", "eow"};
130  auto expected = vec{"hello", "", " world", " goodbye", " meow"};
131  EXPECT_EQ(expected, from(input) | resplit(',') | collect);
132  }
133 }
134 
135 TEST(StringGen, ResplitKeepDelimiter) {
136  {
137  auto input = vec{"hello,, world, goodbye, meow"};
138  auto expected = vec{"hello,", ",", " world,", " goodbye,", " meow"};
139  EXPECT_EQ(expected, from(input) | resplit(',', true) | collect);
140  }
141 
142  {
143  auto input = vec{"hel", "lo,", ", world", ", goodbye, m", "eow"};
144  auto expected = vec{"hello,", ",", " world,", " goodbye,", " meow"};
145  EXPECT_EQ(expected, from(input) | resplit(',', true) | collect);
146  }
147 }
148 
149 TEST(StringGen, EachToTuple) {
150  {
151  auto lines = "2:1.414:yo 3:1.732:hi";
152  // clang-format off
153  auto actual
154  = split(lines, ' ')
155  | eachToTuple<int, double, std::string>(':')
156  | as<vector>();
157  // clang-format on
158  vector<tuple<int, double, std::string>> expected{
159  make_tuple(2, 1.414, "yo"),
160  make_tuple(3, 1.732, "hi"),
161  };
162  EXPECT_EQ(expected, actual);
163  }
164  {
165  auto lines = "2 3";
166  // clang-format off
167  auto actual
168  = split(lines, ' ')
169  | eachToTuple<int>(',')
170  | as<vector>();
171  // clang-format on
172  vector<tuple<int>> expected{
173  make_tuple(2),
174  make_tuple(3),
175  };
176  EXPECT_EQ(expected, actual);
177  }
178  {
179  // StringPiece target
180  auto lines = "1:cat 2:dog";
181  // clang-format off
182  auto actual
183  = split(lines, ' ')
184  | eachToTuple<int, StringPiece>(':')
185  | as<vector>();
186  // clang-format on
187  vector<tuple<int, StringPiece>> expected{
188  make_tuple(1, "cat"),
189  make_tuple(2, "dog"),
190  };
191  EXPECT_EQ(expected, actual);
192  }
193  {
194  // Empty field
195  auto lines = "2:tjackson:4 3::5";
196  // clang-format off
197  auto actual
198  = split(lines, ' ')
199  | eachToTuple<int, fbstring, int>(':')
200  | as<vector>();
201  // clang-format on
202  vector<tuple<int, fbstring, int>> expected{
203  make_tuple(2, "tjackson", 4),
204  make_tuple(3, "", 5),
205  };
206  EXPECT_EQ(expected, actual);
207  }
208  {
209  // Excess fields
210  auto lines = "1:2 3:4:5";
211  // clang-format off
212  EXPECT_THROW(
213  (split(lines, ' ')
214  | eachToTuple<int, int>(':')
215  | as<vector>()),
216  std::runtime_error);
217  // clang-format on
218  }
219  {
220  // Missing fields
221  auto lines = "1:2:3 4:5";
222  // clang-format off
223  EXPECT_THROW(
224  (split(lines, ' ')
225  | eachToTuple<int, int, int>(':')
226  | as<vector>()),
227  std::runtime_error);
228  // clang-format on
229  }
230 }
231 
232 TEST(StringGen, EachToPair) {
233  {
234  // char delimiters
235  auto lines = "2:1.414 3:1.732";
236  // clang-format off
237  auto actual
238  = split(lines, ' ')
239  | eachToPair<int, double>(':')
240  | as<std::map<int, double>>();
241  // clang-format on
242  std::map<int, double> expected{
243  {3, 1.732},
244  {2, 1.414},
245  };
246  EXPECT_EQ(expected, actual);
247  }
248  {
249  // string delimiters
250  auto lines = "ab=>cd ef=>gh";
251  // clang-format off
252  auto actual
253  = split(lines, ' ')
254  | eachToPair<string, string>("=>")
255  | as<std::map<string, string>>();
256  // clang-format on
257  std::map<string, string> expected{
258  {"ab", "cd"},
259  {"ef", "gh"},
260  };
261  EXPECT_EQ(expected, actual);
262  }
263 }
264 
266  vector<string> ins,
267  char delim,
268  uint64_t maxLength,
269  vector<string> outs) {
270  vector<std::string> pieces;
271  auto splitter = streamSplitter(
272  delim,
273  [&pieces](StringPiece s) {
274  pieces.push_back(string(s.begin(), s.end()));
275  return true;
276  },
277  maxLength);
278  for (const auto& in : ins) {
279  splitter(in);
280  }
281  splitter.flush();
282 
283  EXPECT_EQ(outs.size(), pieces.size());
284  for (size_t i = 0; i < outs.size(); ++i) {
285  EXPECT_EQ(outs[i], pieces[i]);
286  }
287 
288  // Also check the concatenated input against the same output
289  if (ins.size() > 1) {
290  checkResplitMaxLength({folly::join("", ins)}, delim, maxLength, outs);
291  }
292 }
293 
294 TEST(StringGen, ResplitMaxLength) {
295  // clang-format off
297  {"hel", "lo,", ", world", ", goodbye, m", "ew"}, ',', 5,
298  {"hello", ",", ",", " worl", "d,", " good", "bye,", " mew"});
299  // " meow" cannot be "end of stream", since it's maxLength long
301  {"hel", "lo,", ", world", ", goodbye, m", "eow"}, ',', 5,
302  {"hello", ",", ",", " worl", "d,", " good", "bye,", " meow", ""});
304  {"||", "", "", "", "|a|b", "cdefghijklmn", "|opqrst",
305  "uvwx|y|||", "z", "0123456789", "|", ""}, '|', 2,
306  {"|", "|", "|", "a|", "bc", "de", "fg", "hi", "jk", "lm", "n|", "op",
307  "qr", "st", "uv", "wx", "|", "y|", "|", "|", "z0", "12", "34", "56",
308  "78", "9|", ""});
309  // clang-format on
310 }
311 
312 template <typename F>
313 void runUnsplitSuite(F fn) {
314  fn("hello, world");
315  fn("hello,world,goodbye");
316  fn(" ");
317  fn("");
318  fn(", ");
319  fn(", a, b,c");
320 }
321 
322 TEST(StringGen, Unsplit) {
323  auto basicFn = [](StringPiece s) {
324  EXPECT_EQ(split(s, ',') | unsplit(','), s);
325  };
326 
327  auto existingBuffer = [](StringPiece s) {
328  folly::fbstring buffer("asdf");
329  split(s, ',') | unsplit(',', &buffer);
330  auto expected = folly::to<folly::fbstring>("asdf", s.empty() ? "" : ",", s);
331  EXPECT_EQ(expected, buffer);
332  };
333 
334  auto emptyBuffer = [](StringPiece s) {
336  split(s, ',') | unsplit(',', &buffer);
337  EXPECT_EQ(s, buffer);
338  };
339 
340  auto stringDelim = [](StringPiece s) {
341  EXPECT_EQ(s, split(s, ',') | unsplit(","));
343  split(s, ',') | unsplit(",", &buffer);
344  EXPECT_EQ(buffer, s);
345  };
346 
347  runUnsplitSuite(basicFn);
348  runUnsplitSuite(existingBuffer);
349  runUnsplitSuite(emptyBuffer);
350  runUnsplitSuite(stringDelim);
351  EXPECT_EQ("1, 2, 3", seq(1, 3) | unsplit(", "));
352 }
353 
354 TEST(StringGen, Batch) {
355  std::vector<std::string> chunks{
356  "on", "e\nt", "w", "o", "\nthr", "ee\nfo", "ur\n"};
357  std::vector<std::string> lines{"one", "two", "three", "four"};
358  EXPECT_EQ(4, from(chunks) | resplit('\n') | count);
359  EXPECT_EQ(4, from(chunks) | resplit('\n') | batch(2) | rconcat | count);
360  EXPECT_EQ(4, from(chunks) | resplit('\n') | batch(3) | rconcat | count);
361  // clang-format off
362  EXPECT_EQ(
363  lines,
364  from(chunks)
365  | resplit('\n')
366  | eachTo<std::string>()
367  | batch(3)
368  | rconcat
369  | as<vector>());
370  // clang-format on
371 }
372 
373 TEST(StringGen, UncurryTuple) {
374  folly::StringPiece file = "1\t2\t3\n1\t4\t9";
375  auto rows = split(file, '\n') | eachToTuple<int, int, int>('\t');
376  auto productSum =
377  rows | map(uncurry([](int x, int y, int z) { return x * y * z; })) | sum;
378  EXPECT_EQ(42, productSum);
379 }
380 
381 TEST(StringGen, UncurryPair) {
382  folly::StringPiece file = "2\t3\n4\t9";
383  auto rows = split(file, '\n') | eachToPair<int, int>('\t');
384  auto productSum =
385  rows | map(uncurry([](int x, int y) { return x * y; })) | sum;
386  EXPECT_EQ(42, productSum);
387 }
Definition: InvokeTest.cpp:58
auto chunks
std::vector< uint8_t > buffer(kBufferSize+16)
S resplit(char delimiter, bool keepDelimiter=false)
Definition: String.h:56
void runUnsplitSuite(F fn)
Definition: StringTest.cpp:313
#define EXPECT_THROW(statement, expected_exception)
Definition: gtest.h:1843
auto uncurry(F &&f) -> detail::apply_tuple::Uncurry< typename std::decay< F >::type >
Definition: ApplyTuple.h:178
constexpr detail::Count count
Definition: Base-inl.h:2551
From from(Container &source)
Definition: Base.h:438
#define EXPECT_EQ(val1, val2)
Definition: gtest.h:1922
Gen seq(Value first, Value last)
Definition: Base.h:484
detail::Batch batch(size_t batchSize)
Definition: Base-inl.h:2602
—— Concurrent Priority Queue Implementation ——
Definition: AtomicBitSet.h:29
tuple make_tuple()
Definition: gtest-tuple.h:675
constexpr detail::Sum sum
Definition: Base-inl.h:2549
void checkResplitMaxLength(vector< string > ins, char delim, uint64_t maxLength, vector< string > outs)
Definition: StringTest.cpp:265
constexpr detail::RangeConcat rconcat
Definition: Base-inl.h:2571
S split(const StringPiece source, char delimiter)
Definition: String.h:61
Map map(Predicate pred=Predicate())
Definition: Base.h:545
Definition: InvokeTest.cpp:72
Definition: Traits.h:588
S lines(StringPiece source)
Definition: String.h:80
constexpr Iter end() const
Definition: Range.h:455
constexpr Iter begin() const
Definition: Range.h:452
Future< std::vector< typename std::iterator_traits< InputIterator >::value_type::value_type > > collect(InputIterator first, InputIterator last)
Definition: Future-inl.h:1536
const char * string
Definition: Conv.cpp:212
StreamSplitter< Callback > streamSplitter(char delimiter, Callback &&pieceCb, uint64_t capacity=0)
Definition: String.h:239
static set< string > s
Definition: InvokeTest.cpp:65
detail::Take take(Number count)
Definition: Base-inl.h:2582
void join(const Delim &delimiter, Iterator begin, Iterator end, String &output)
Definition: String-inl.h:498
Unsplit unsplit(const Delimiter &delimiter)
Definition: String.h:101
Collect as()
Definition: Base.h:811
TEST(IStream, ByLine)
Definition: IStreamTest.cpp:32