proxygen
String-inl.h
Go to the documentation of this file.
1 /*
2  * Copyright 2014-present Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef FOLLY_GEN_STRING_H_
18 #error This file may only be included from folly/gen/String.h
19 #endif
20 
21 #include <folly/Conv.h>
22 #include <folly/Portability.h>
23 #include <folly/String.h>
24 
25 namespace folly {
26 namespace gen {
27 namespace detail {
28 
37 inline size_t
38 splitPrefix(StringPiece& in, StringPiece& prefix, char delimiter) {
39  size_t found = in.find(delimiter);
40  if (found != StringPiece::npos) {
41  ++found;
42  prefix.assign(in.data(), in.data() + found);
43  in.advance(found);
44  return 1;
45  }
46  prefix.clear();
47  return 0;
48 }
49 
53 inline size_t
55  auto found = in.find(delimiter);
56  if (found != StringPiece::npos) {
57  found += delimiter.size();
58  prefix.assign(in.data(), in.data() + found);
59  in.advance(found);
60  return delimiter.size();
61  }
62  prefix.clear();
63  return 0;
64 }
65 
70  const auto kCRLF = "\r\n";
71  const size_t kLenCRLF = 2;
72 
73  auto p = in.find_first_of(kCRLF);
74  if (p != std::string::npos) {
75  const auto in_start = in.data();
76  size_t delim_len = 1;
77  in.advance(p);
78  // Either remove an MS-DOS CR-LF 2-byte newline, or eat 1 byte at a time.
79  if (in.removePrefix(kCRLF)) {
80  delim_len = kLenCRLF;
81  } else {
82  in.advance(delim_len);
83  }
84  prefix.assign(in_start, in.data());
85  return delim_len;
86  }
87  prefix.clear();
88  return 0;
89 }
90 
91 inline const char* ch(const unsigned char* p) {
92  return reinterpret_cast<const char*>(p);
93 }
94 
95 // Chop s into pieces of at most maxLength, feed them to cb
96 template <class Callback>
97 bool consumeFixedSizeChunks(Callback& cb, StringPiece& s, uint64_t maxLength) {
98  while (!s.empty()) {
99  auto num_to_add = s.size();
100  if (maxLength) {
101  num_to_add = std::min<uint64_t>(num_to_add, maxLength);
102  }
103  if (!cb(StringPiece(s.begin(), num_to_add))) {
104  return false;
105  }
106  s.advance(num_to_add);
107  }
108  return true;
109 }
110 
111 // Consumes all of buffer, plus n chars from s.
112 template <class Callback>
113 bool consumeBufferPlus(Callback& cb, IOBuf& buf, StringPiece& s, uint64_t n) {
114  buf.reserve(0, n);
115  memcpy(buf.writableTail(), s.data(), n);
116  buf.append(n);
117  s.advance(n);
118  if (!cb(StringPiece(detail::ch(buf.data()), buf.length()))) {
119  return false;
120  }
121  buf.clear();
122  return true;
123 }
124 
125 } // namespace detail
126 
127 template <class Callback>
129  CHECK(maxLength_ == 0 || buffer_.length() < maxLength_);
130  if (!pieceCb_(StringPiece(detail::ch(buffer_.data()), buffer_.length()))) {
131  return false;
132  }
133  // We are ready to handle another stream now.
134  buffer_.clear();
135  return true;
136 }
137 
138 template <class Callback>
141  // NB This code assumes a 1-byte delimiter. It's not too hard to support
142  // multibyte delimiters, just remember that maxLength_ chunks can end up
143  // falling in the middle of a delimiter.
144  bool found = detail::splitPrefix(in, prefix, delimiter_);
145  if (buffer_.length() != 0) {
146  if (found) {
147  uint64_t num_to_add = prefix.size();
148  if (maxLength_) {
149  CHECK(buffer_.length() < maxLength_);
150  // Consume as much of prefix as possible without exceeding maxLength_
151  num_to_add = std::min(maxLength_ - buffer_.length(), num_to_add);
152  }
153 
154  // Append part of the prefix to the buffer, and send it to the callback
155  if (!detail::consumeBufferPlus(pieceCb_, buffer_, prefix, num_to_add)) {
156  return false;
157  }
158 
159  if (!detail::consumeFixedSizeChunks(pieceCb_, prefix, maxLength_)) {
160  return false;
161  }
162 
163  found = detail::splitPrefix(in, prefix, delimiter_);
164  // Post-conditions:
165  // - we consumed all of buffer_ and all of the first prefix.
166  // - found, in, and prefix reflect the second delimiter_ search
167  } else if (maxLength_ && buffer_.length() + in.size() >= maxLength_) {
168  // Send all of buffer_, plus a bit of in, to the callback
170  pieceCb_, buffer_, in, maxLength_ - buffer_.length())) {
171  return false;
172  }
173  // Post-conditions:
174  // - we consumed all of buffer, and the minimal # of bytes from in
175  // - found is false
176  } // Otherwise: found is false & we cannot invoke the callback this turn
177  }
178  // Post-condition: buffer_ is nonempty only if found is false **and**
179  // len(buffer + in) < maxLength_.
180 
181  // Send lines to callback directly from input (no buffer)
182  while (found) { // Buffer guaranteed to be empty
183  if (!detail::consumeFixedSizeChunks(pieceCb_, prefix, maxLength_)) {
184  return false;
185  }
186  found = detail::splitPrefix(in, prefix, delimiter_);
187  }
188 
189  // No more delimiters left; consume 'in' until it is shorter than maxLength_
190  if (maxLength_) {
191  while (in.size() >= maxLength_) { // Buffer is guaranteed to be empty
192  if (!pieceCb_(StringPiece(in.begin(), maxLength_))) {
193  return false;
194  }
195  in.advance(maxLength_);
196  }
197  }
198 
199  if (!in.empty()) { // Buffer may be nonempty
200  // Incomplete line left, append to buffer
201  buffer_.reserve(0, in.size());
202  memcpy(buffer_.writableTail(), in.data(), in.size());
203  buffer_.append(in.size());
204  }
205  CHECK(maxLength_ == 0 || buffer_.length() < maxLength_);
206  return true;
207 }
208 
209 namespace detail {
210 
211 class StringResplitter : public Operator<StringResplitter> {
214 
215  public:
216  explicit StringResplitter(char delimiter, bool keepDelimiter = false)
217  : delimiter_(delimiter), keepDelimiter_(keepDelimiter) {}
218 
219  template <class Source>
220  class Generator : public GenImpl<StringPiece, Generator<Source>> {
221  Source source_;
224 
225  public:
226  Generator(Source source, char delimiter, bool keepDelimiter)
227  : source_(std::move(source)),
228  delimiter_(delimiter),
229  keepDelimiter_(keepDelimiter) {}
230 
231  template <class Body>
232  bool apply(Body&& body) const {
233  auto splitter =
234  streamSplitter(this->delimiter_, [this, &body](StringPiece s) {
235  // The stream ended with a delimiter; our contract is to swallow
236  // the final empty piece.
237  if (s.empty()) {
238  return true;
239  }
240  if (s.back() != this->delimiter_) {
241  return body(s);
242  }
243  if (!keepDelimiter_) {
244  s.pop_back(); // Remove the 1-character delimiter
245  }
246  return body(s);
247  });
248  if (!source_.apply(splitter)) {
249  return false;
250  }
251  return splitter.flush();
252  }
253 
254  static constexpr bool infinite = Source::infinite;
255  };
256 
257  template <class Source, class Value, class Gen = Generator<Source>>
258  Gen compose(GenImpl<Value, Source>&& source) const {
259  return Gen(std::move(source.self()), delimiter_, keepDelimiter_);
260  }
261 
262  template <class Source, class Value, class Gen = Generator<Source>>
263  Gen compose(const GenImpl<Value, Source>& source) const {
264  return Gen(source.self(), delimiter_, keepDelimiter_);
265  }
266 };
267 
268 template <class DelimiterType = char>
270  : public GenImpl<StringPiece, SplitStringSource<DelimiterType>> {
272  DelimiterType delimiter_;
273 
274  public:
275  SplitStringSource(const StringPiece source, DelimiterType delimiter)
276  : source_(source), delimiter_(std::move(delimiter)) {}
277 
278  template <class Body>
279  bool apply(Body&& body) const {
280  StringPiece rest(source_);
282  while (size_t delim_len = splitPrefix(rest, prefix, this->delimiter_)) {
283  prefix.subtract(delim_len); // Remove the delimiter
284  if (!body(prefix)) {
285  return false;
286  }
287  }
288  if (!rest.empty()) {
289  if (!body(rest)) {
290  return false;
291  }
292  }
293  return true;
294  }
295 };
296 
303 template <class Delimiter, class Output>
304 class Unsplit : public Operator<Unsplit<Delimiter, Output>> {
306 
307  public:
308  explicit Unsplit(const Delimiter& delimiter) : delimiter_(delimiter) {}
309 
310  template <class Source, class Value>
311  Output compose(const GenImpl<Value, Source>& source) const {
312  Output outputBuffer;
313  UnsplitBuffer<Delimiter, Output> unsplitter(delimiter_, &outputBuffer);
314  unsplitter.compose(source);
315  return outputBuffer;
316  }
317 };
318 
325 template <class Delimiter, class OutputBuffer>
326 class UnsplitBuffer : public Operator<UnsplitBuffer<Delimiter, OutputBuffer>> {
328  OutputBuffer* outputBuffer_;
329 
330  public:
331  UnsplitBuffer(const Delimiter& delimiter, OutputBuffer* outputBuffer)
332  : delimiter_(delimiter), outputBuffer_(outputBuffer) {
333  CHECK(outputBuffer);
334  }
335 
336  template <class Source, class Value>
337  void compose(const GenImpl<Value, Source>& source) const {
338  // If the output buffer is empty, we skip inserting the delimiter for the
339  // first element.
340  bool skipDelim = outputBuffer_->empty();
341  source | [&](Value v) {
342  if (skipDelim) {
343  skipDelim = false;
344  toAppend(std::forward<Value>(v), outputBuffer_);
345  } else {
346  toAppend(delimiter_, std::forward<Value>(v), outputBuffer_);
347  }
348  };
349  }
350 };
351 
355 template <class Target, class = void>
356 inline Target passthrough(Target target) {
357  return target;
358 }
359 
361 #ifdef __clang__
362 // Clang isn't happy with eatField() hack below.
363 #pragma GCC diagnostic ignored "-Wreturn-stack-address"
364 #endif // __clang__
365 
376 template <class TargetContainer, class Delimiter, class... Targets>
377 class SplitTo {
378  Delimiter delimiter_;
379 
380  public:
381  explicit SplitTo(Delimiter delimiter) : delimiter_(delimiter) {}
382 
383  TargetContainer operator()(StringPiece line) const {
384  int i = 0;
385  StringPiece fields[sizeof...(Targets)];
386  // HACK(tjackson): Used for referencing fields[] corresponding to variadic
387  // template parameters.
388  auto eatField = [&]() -> StringPiece& { return fields[i++]; };
389  if (!split(
390  delimiter_,
391  line,
392  detail::passthrough<StringPiece&, Targets>(eatField())...)) {
393  throw std::runtime_error("field count mismatch");
394  }
395  i = 0;
396  return TargetContainer(To<Targets>()(eatField())...);
397  }
398 };
399 
401 
402 } // namespace detail
403 
404 } // namespace gen
405 } // namespace folly
SplitTo(Delimiter delimiter)
Definition: String-inl.h:381
Output compose(const GenImpl< Value, Source > &source) const
Definition: String-inl.h:311
#define FOLLY_POP_WARNING
Definition: Portability.h:179
const char * ch(const unsigned char *p)
Definition: String-inl.h:91
auto v
SplitStringSource(const StringPiece source, DelimiterType delimiter)
Definition: String-inl.h:275
bool consumeBufferPlus(Callback &cb, IOBuf &buf, StringPiece &s, uint64_t n)
Definition: String-inl.h:113
#define FOLLY_PUSH_WARNING
Definition: Portability.h:178
StringResplitter(char delimiter, bool keepDelimiter=false)
Definition: String-inl.h:216
size_type find_first_of(const_range_type needles) const
Definition: Range.h:773
constexpr detail::Map< Move > move
Definition: Base-inl.h:2567
size_type find(const_range_type str) const
Definition: Range.h:721
void advance(size_type n)
Definition: Range.h:672
STL namespace.
constexpr size_type size() const
Definition: Range.h:431
const uint8_t * data() const
Definition: IOBuf.h:499
void pop_back()
Definition: Range.h:715
void reserve(std::size_t minHeadroom, std::size_t minTailroom)
Definition: IOBuf.h:741
bool operator()(StringPiece in)
Definition: String-inl.h:139
—— Concurrent Priority Queue Implementation ——
Definition: AtomicBitSet.h:29
Generator(Source source, char delimiter, bool keepDelimiter)
Definition: String-inl.h:226
bool prefix(Cursor &c, uint32_t expected)
size_t splitPrefix(StringPiece &in, StringPiece &prefix, char delimiter)
Definition: String-inl.h:38
uint8_t * writableTail()
Definition: IOBuf.h:526
void clear()
Definition: Range.h:411
Gen compose(GenImpl< Value, Source > &&source) const
Definition: String-inl.h:258
constexpr bool empty() const
Definition: Range.h:443
LogLevel min
Definition: LogLevel.cpp:30
bool Value(const T &value, M matcher)
folly::std Delimiter
constexpr Iter data() const
Definition: Range.h:446
std::size_t length() const
Definition: IOBuf.h:533
void assign(Iter start, Iter end)
Definition: Range.h:416
S split(const StringPiece source, char delimiter)
Definition: String.h:61
bool apply(Body &&body) const
Definition: String-inl.h:279
Target passthrough(Target target)
Definition: String-inl.h:356
bool removePrefix(const const_range_type &prefix)
Definition: Range.h:893
void toAppend(char value, Tgt *result)
Definition: Conv.h:406
constexpr Iter begin() const
Definition: Range.h:452
void clear()
Definition: IOBuf.h:728
static const size_type npos
Definition: Range.h:197
Gen compose(const GenImpl< Value, Source > &source) const
Definition: String-inl.h:263
StreamSplitter< Callback > streamSplitter(char delimiter, Callback &&pieceCb, uint64_t capacity=0)
Definition: String.h:239
static set< string > s
const Self & self() const
Definition: Core-inl.h:71
void subtract(size_type n)
Definition: Range.h:679
Unsplit(const Delimiter &delimiter)
Definition: String-inl.h:308
Range< const char * > StringPiece
bool consumeFixedSizeChunks(Callback &cb, StringPiece &s, uint64_t maxLength)
Definition: String-inl.h:97
UnsplitBuffer(const Delimiter &delimiter, OutputBuffer *outputBuffer)
Definition: String-inl.h:331
std::unique_ptr< unsigned char[]> buffer_
Definition: Random.cpp:105
TargetContainer operator()(StringPiece line) const
Definition: String-inl.h:383
void append(std::size_t amount)
Definition: IOBuf.h:689
value_type & back()
Definition: Range.h:468
void compose(const GenImpl< Value, Source > &source) const
Definition: String-inl.h:337