proxygen
ParseURL.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD-style license found in the
6  * LICENSE file in the root directory of this source tree. An additional grant
7  * of patent rights can be found in the PATENTS file in the same directory.
8  *
9  */
11 
12 #include <algorithm>
15 
17 
18 using folly::fbstring;
19 using std::string;
20 
21 namespace proxygen {
22 
45 // Helper function to check if URL has valid scheme.
46 // http_parser only support full form scheme with double slash,
47 // and the scheme must be all alphabetic charecter.
49  auto schemeEnd = url.find("://");
50  if (schemeEnd == std::string::npos || schemeEnd == 0) {
51  return false;
52  }
53 
54  auto scheme = url.subpiece(0, schemeEnd);
55  return std::all_of(scheme.begin(), scheme.end(), [](auto _) {
56  return std::isalpha(_);
57  });
58 }
59 
61  if (validateScheme(url_)) {
62  struct http_parser_url u;
63  memset(&u, 0, sizeof(struct http_parser_url)); // init before used
64  valid_ = !(http_parser_parse_url(url_.data(), url_.size(), 0, &u));
65 
66  if(valid_) {
67  // Since we init the http_parser_url with all fields to 0, if the field
68  // not present in url, it would be [0, 0], means that this field starts at
69  // 0 and len = 0, we will get "" from this. So no need to check field_set
70  // before get field.
71 
74 
75  if(u.field_data[UF_HOST].off != 0 &&
76  url_[u.field_data[UF_HOST].off - 1] == '[') {
77  // special case: host: [::1]
79  u.field_data[UF_HOST].len + 2);
80  } else {
83  }
84 
85  port_ = u.port;
86 
93 
94  authority_ = (port_) ? folly::to<std::string>(host_, ":", port_)
95  : host_.str();
96  }
97  } else {
98  parseNonFully();
99  }
100 }
101 
103  if (url_.empty()) {
104  valid_ = false;
105  return;
106  }
107 
108  // Check if the URL has only printable characters and no control character.
109  if (!validateURL(url_)) {
110  valid_ = false;
111  return;
112  }
113 
114  auto pathStart = url_.find('/');
115  auto queryStart = url_.find('?');
116  auto hashStart = url_.find('#');
117 
118  auto queryEnd = std::min(hashStart, std::string::npos);
119  auto pathEnd = std::min(queryStart, hashStart);
120  auto authorityEnd = std::min(pathStart, pathEnd);
121 
122  authority_ = url_.subpiece(0, authorityEnd).str();
123 
124  if (pathStart < pathEnd) {
125  path_ = url_.subpiece(pathStart, pathEnd - pathStart);
126  } else {
127  // missing the '/', e.g. '?query=3'
128  path_ = "";
129  }
130 
131  if (queryStart < queryEnd) {
132  query_ = url_.subpiece(queryStart + 1, queryEnd - queryStart - 1);
133  } else if (queryStart != std::string::npos && hashStart < queryStart) {
134  valid_ = false;
135  return;
136  }
137 
138  if (hashStart != std::string::npos) {
139  fragment_ = url_.subpiece(hashStart + 1, std::string::npos);
140  }
141 
142  if (!parseAuthority()) {
143  valid_ = false;
144  return;
145  }
146 
147  valid_ = true;
148 }
149 
151  auto left = authority_.find("[");
152  auto right = authority_.find("]");
153 
154  auto pos = authority_.find(":", right != std::string::npos ? right : 0);
155  if (pos != std::string::npos) {
156  try {
157  port_ = folly::to<uint16_t>(
158  folly::StringPiece(authority_, pos+1, std::string::npos));
159  } catch (...) {
160  return false;
161  }
162  }
163 
164  if (left == std::string::npos && right == std::string::npos) {
165  // not a ipv6 literal
167  return true;
168  } else if (left < right && right != std::string::npos) {
169  // a ipv6 literal
170  host_ = folly::StringPiece(authority_, left, right - left + 1);
171  return true;
172  } else {
173  return false;
174  }
175 }
176 
178  if (!valid_) {
179  return false;
180  }
181 
182  stripBrackets();
183  int af = hostNoBrackets_.find(':') == std::string::npos ? AF_INET : AF_INET6;
184  char buf4[sizeof(in_addr)];
185  char buf6[sizeof(in6_addr)];
186  // we have to make a copy of hostNoBrackets_ since the string piece is not
187  // null-terminated
188  return inet_pton(af, hostNoBrackets_.str().c_str(),
189  af == AF_INET ? buf4 : buf6) == 1;
190 }
191 
193  if (hostNoBrackets_.empty()) {
194  if (!host_.empty() && host_.front() == '[' && host_.back() == ']') {
196  } else {
198  }
199  }
200 }
201 
202 }
FB_EXPORT void parse() noexcept
Definition: ParseURL.cpp:60
static bool validateScheme(folly::StringPiece url)
Definition: ParseURL.cpp:48
std::string str() const
Definition: Range.h:591
size_type find(const_range_type str) const
Definition: Range.h:721
constexpr size_type size() const
Definition: Range.h:431
bool validateURL(folly::ByteRange url)
Definition: UtilInl.h:25
uint16_t port_
Definition: ParseURL.h:108
requires E e noexcept(noexcept(s.error(std::move(e))))
folly::StringPiece query_
Definition: ParseURL.h:106
FB_EXPORT void stripBrackets() noexcept
Definition: ParseURL.cpp:192
constexpr bool empty() const
Definition: Range.h:443
folly::StringPiece host_
Definition: ParseURL.h:103
LogLevel min
Definition: LogLevel.cpp:30
int http_parser_parse_url(const char *buf, size_t buflen, int is_connect, struct http_parser_url *u)
Definition: http_parser.c:2332
folly::StringPiece path_
Definition: ParseURL.h:105
constexpr Iter data() const
Definition: Range.h:446
folly::StringPiece url_
Definition: ParseURL.h:100
Range subpiece(size_type first, size_type length=npos) const
Definition: Range.h:686
folly::StringPiece fragment_
Definition: ParseURL.h:107
bool parseAuthority() noexcept
Definition: ParseURL.cpp:150
const char * string
Definition: Conv.cpp:212
value_type & front()
Definition: Range.h:464
const internal::AnythingMatcher _
basic_fbstring< char > fbstring
Definition: FBString.h:2904
Range< const char * > StringPiece
folly::StringPiece hostNoBrackets_
Definition: ParseURL.h:104
bool hostIsIPAddress()
Definition: ParseURL.cpp:177
struct http_parser_url::@1 field_data[UF_MAX]
std::string authority_
Definition: ParseURL.h:102
void parseNonFully() noexcept
Definition: ParseURL.cpp:102
value_type & back()
Definition: Range.h:468
folly::StringPiece scheme_
Definition: ParseURL.h:101