proxygen
RFC1867.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD-style license found in the
6  * LICENSE file in the root directory of this source tree. An additional grant
7  * of patent rights can be found in the PATENTS file in the same directory.
8  *
9  */
12 
13 using folly::IOBuf;
14 using folly::IOBufQueue;
15 using folly::StringPiece;
16 using folly::io::Cursor;
17 using std::string;
18 
19 namespace {
20 // This is required to get HTTP1xCodec ready to parse a header block
21 const string kDummyGet("GET / HTTP/1.0");
22 
23 enum class BoundaryResult {
24  YES,
25  NO,
26  PARTIAL
27 };
28 
29 BoundaryResult isBoundary(const IOBuf& buf,
30  uint32_t offset,
31  char const *boundary,
32  size_t boundarylen) {
33  assert(offset <= buf.length());
34  const IOBuf *crtBuf = &buf;
35  do {
36  size_t crtLen = crtBuf->length() - offset;
37  const uint8_t *crtData = crtBuf->data() + offset;
38  size_t cmplen = std::min(crtLen, boundarylen);
39  if (memcmp(crtData, boundary, cmplen) == 0) {
40  if (cmplen == boundarylen) {
41  return BoundaryResult::YES;
42  } else {
43  // beginning of a partial match
44  boundary += cmplen;
45  boundarylen -= cmplen;
46  }
47  } else {
48  return BoundaryResult::NO;
49  }
50  offset = 0;
51  crtBuf = crtBuf->next();
52  } while (crtBuf != &buf);
53 
54  return BoundaryResult::PARTIAL;
55 }
56 
57 }
58 
59 namespace proxygen {
60 
61 std::unique_ptr<IOBuf> RFC1867Codec::onIngress(std::unique_ptr<IOBuf> data) {
62  static auto dummyBuf = IOBuf::wrapBuffer(kDummyGet.data(),
63  kDummyGet.length());
64  IOBufQueue result{IOBufQueue::cacheChainLength()};
65  bool foundBoundary = false;
66  BoundaryResult br = BoundaryResult::NO;
67 
68  input_.append(std::move(data));
69  while (!input_.empty()) {
70  switch (state_) {
71  case ParserState::START:
72  // first time, must start with boundary without leading \n
73  br = isBoundary(*input_.front(), 0, boundary_.data() + 1,
74  boundary_.length() - 1);
75  if (br == BoundaryResult::NO) {
76  if (callback_) {
77  LOG(ERROR) << "Invalid starting sequence";
78  callback_->onError();
79  }
80  state_ = ParserState::ERROR;
81  return nullptr;
82  } else if (br == BoundaryResult::PARTIAL) {
83  return input_.move();
84  }
85  input_.trimStart(boundary_.length() - 1);
86  bytesProcessed_ += boundary_.length() - 1;
87  state_ = ParserState::HEADERS_START;
88  // fall through
89 
90  case ParserState::HEADERS_START:
91  {
92  if (input_.chainLength() < 3) {
93  return input_.move();
94  }
95  Cursor c(input_.front());
96  char firstTwo[2];
97  c.pull(firstTwo, 2);
98  // We have at least 3 chars available to read
99  uint8_t toTrim = 3;
100  if (memcmp(firstTwo, "--", 2) == 0) {
101  do {
102  auto ch = c.read<char>();
103  if (ch == '\n') {
104  input_.trimStart(toTrim);
105  state_ = ParserState::DONE;
106  } else if (ch == '\r') {
107  // Every \r we encounter is a char we must trim but we must
108  // make sure we have sufficient data available in input_ to
109  // keep reading (toTrim is always one pos ahead to handle the
110  // expected \n)
111  ++toTrim;
112  if (input_.chainLength() < toTrim) {
113  return input_.move();
114  }
115  } else {
116  state_ = ParserState::ERROR;
117  }
118  } while (state_ == ParserState::HEADERS_START);
119  break;
120  }
121  }
122  headerParser_.setParserPaused(false);
123  headerParser_.onIngress(*dummyBuf);
124  CHECK(!parseError_);
125  state_ = ParserState::HEADERS;
126  // fall through
127 
129  while (!parseError_ && input_.front() &&
130  state_ == ParserState::HEADERS) {
131  size_t bytesParsed = headerParser_.onIngress(*input_.front());
132  input_.trimStart(bytesParsed);
133  bytesProcessed_ += bytesParsed;
134  }
135  if (parseError_) {
136  if (callback_) {
137  LOG(ERROR) << "Error parsing header data: ";
138  VLOG(3) << IOBufPrinter::printHexFolly(input_.front());
139  callback_->onError();
140  }
141  state_ = ParserState::ERROR;
142  return nullptr;
143  }
144  break;
145 
146  case ParserState::FIELD_DATA:
147  result = readToBoundary(foundBoundary);
148  value_.append(result.move());
149  if (!value_.empty() && callback_) {
150  if (callback_->onFieldData(value_.move(), bytesProcessed_) < 0) {
151  LOG(ERROR) << "Callback returned error";
152  state_ = ParserState::ERROR;
153  return nullptr;
154  }
155  }
156  if (foundBoundary) {
157  if (callback_) {
158  callback_->onFieldEnd(true, bytesProcessed_);
159  }
160  state_ = ParserState::HEADERS_START;
161  } else {
162  if (input_.chainLength() > 0) {
163  VLOG(5) << "Trailing input="
164  << IOBufPrinter::printHexFolly(input_.front());
165  }
166  return input_.move();
167  }
168  break;
169  case ParserState::DONE:
170  case ParserState::ERROR:
171  // abort, consume all input
172  return nullptr;
173  }
174  }
175  return nullptr;
176 }
177 
178 void RFC1867Codec::onHeadersComplete(HTTPCodec::StreamID /*stream*/,
179  std::unique_ptr<HTTPMessage> msg) {
180  static const StringPiece kName("name", 4);
181  static const StringPiece kFilename("filename", 8);
182  static const StringPiece kFormData("form-data", 9);
183 
184  const auto& contentDisp =
185  msg->getHeaders().getSingleOrEmpty(HTTP_HEADER_CONTENT_DISPOSITION);
186  string name;
187  folly::Optional<string> filename; // filename is optional
188  HTTPMessage::splitNameValuePieces(
189  contentDisp, ';', '=',
190  [&] (folly::StringPiece parameter, folly::StringPiece value) {
191  // TODO: Trim whitespace first
192  // Strip quotes if present
193  if (value.size() >= 2 && value[0] == '\"' &&
194  value[value.size() - 1] == '\"') {
195  value.reset(value.data() + 1, value.size() - 2);
196  }
197  if (parameter == kName) {
198  name = value.str();
199  } else if (parameter == kFilename) {
200  filename = value.str();
201  } else if (parameter != kFormData) {
202  LOG(WARNING) << "Ignoring parameter " << parameter << " value \""
203  << value << '"';
204  }
205  });
206  if (name.empty()) {
207  if (callback_) {
208  LOG(ERROR) << "name empty";
209  callback_->onError();
210  }
211  state_ = ParserState::ERROR;
212  return;
213  } else {
214  state_ = ParserState::FIELD_DATA;
215  if (callback_ && callback_->onFieldStart(name, filename,
216  std::move(msg),
217  bytesProcessed_) < 0) {
218  field_ = name;
219  LOG(WARNING) << "Callback returned error";
220  state_ = ParserState::ERROR;
221  }
222  }
223 }
224 
225 IOBufQueue RFC1867Codec::readToBoundary(bool& foundBoundary) {
226  IOBufQueue result{IOBufQueue::cacheChainLength()};
227  BoundaryResult boundaryResult = BoundaryResult::NO;
228 
229  while (!input_.empty() && boundaryResult != BoundaryResult::PARTIAL) {
230  const IOBuf* head = input_.front();
231  uint64_t len = head->length();
232  const uint8_t *ptr = head->data();
233 
234  /* iterate through first character matches */
235  while (len > 0 && (ptr = (const uint8_t*)memchr(ptr, boundary_[0], len))) {
236  /* calculate length after match */
237  uint64_t readlen = (ptr - head->data());
238  len = head->length() - readlen;
239  boundaryResult =
240  isBoundary(*head, readlen, boundary_.data(), boundary_.length());
241  if (boundaryResult == BoundaryResult::YES) {
242  CHECK(readlen < head->length());
243  bool hasCr = false;
244  if (readlen == 0 && pendingCR_) {
245  pendingCR_.reset();
246  }
247  if (readlen > 0) {
248  // If the last read char is a CR omit from result
249  Cursor c(head);
250  c.skip(readlen - 1);
251  uint8_t ch = c.read<uint8_t>();
252  if (ch == '\r') {
253  --readlen;
254  hasCr = true;
255  }
256  }
257  result.append(std::move(pendingCR_));
258  result.append(input_.split(readlen));
259  uint32_t trimLen = boundary_.length() + (hasCr ? 1 : 0);
260  input_.trimStart(trimLen);
261  bytesProcessed_ += readlen + trimLen;
262  foundBoundary = true;
263  return result;
264  } else if (boundaryResult == BoundaryResult::PARTIAL) {
265  break;
266  } else if (pendingCR_) {
267  // not a match, append pending CR to result
268  result.append(std::move(pendingCR_));
269  }
270 
271  /* next character */
272  ptr++; len--;
273  }
274  uint64_t resultLen = ptr ? ptr - head->data() : head->length();
275  // Put pendingCR_ in result if there was no partial match in head, or a
276  // partial match starting after the first character
277  if ((boundaryResult == BoundaryResult::NO || resultLen > 0) &&
278  pendingCR_) {
279  result.append(std::move(pendingCR_));
280  }
281  // the boundary does not start through resultLen, append it
282  // to result, except maybe the last char if it's a CR.
283  if (resultLen > 0 && head->data()[resultLen - 1] == '\r') {
284  result.append(input_.split(resultLen - 1));
285  CHECK(!pendingCR_);
286  pendingCR_ = input_.split(1);
287  } else {
288  result.append(input_.split(resultLen));
289  }
290  bytesProcessed_ += resultLen;
291  }
292 
293  // reached the end but no boundary found
294  foundBoundary = false;
295 
296  return result;
297 }
298 
299 void RFC1867Codec::onIngressEOM() {
300  if (state_ == ParserState::FIELD_DATA) {
301  LOG(WARNING) << "Field not terminated by boundary";
302  if (callback_) {
303  callback_->onFieldEnd(false, bytesProcessed_);
304  }
305  }
306  if (state_ != ParserState::HEADERS_START && state_ != ParserState::ERROR &&
307  state_ != ParserState::DONE) {
308  if (callback_) {
309  LOG(ERROR) << "onIngressEOM with state_=" << (uint8_t)state_;
310  callback_->onError();
311  }
312  }
313  state_ = ParserState::START;
314 }
315 
316 }
BoundaryResult
Definition: RFC1867.cpp:23
void * ptr
void append(std::unique_ptr< folly::IOBuf > &&buf, bool pack=false)
Definition: IOBufQueue.cpp:143
std::string str() const
Definition: Range.h:591
constexpr detail::Map< Move > move
Definition: Base-inl.h:2567
constexpr size_type size() const
Definition: Range.h:431
const uint8_t * data() const
Definition: IOBuf.h:499
auto ch
const char * name
Definition: http_parser.c:437
LogLevel min
Definition: LogLevel.cpp:30
constexpr Iter data() const
Definition: Range.h:446
std::size_t length() const
Definition: IOBuf.h:533
static const char *const value
Definition: Conv.cpp:50
void skip(size_t len)
Definition: Cursor.h:371
const char * string
Definition: Conv.cpp:212
uint64_t StreamID
Definition: HTTPCodec.h:49
folly::Function< void()> callback_
Range< const char * > StringPiece
char c
static constexpr uint64_t data[1]
Definition: Fingerprint.cpp:43
void reset(Iter start, size_type size)
Definition: Range.h:421