proxygen
http_parser.h File Reference
#include <sys/types.h>
#include <stdint.h>

Go to the source code of this file.

Classes

struct  http_parser
 
struct  http_parser_settings
 
struct  http_parser_url
 

Macros

#define HTTP_PARSER_VERSION_MAJOR   1
 
#define HTTP_PARSER_VERSION_MINOR   0
 
#define HTTP_PARSER_STRICT   0
 
#define HTTP_PARSER_DEBUG   0
 
#define HTTP_MAX_HEADER_SIZE   (80*1024)
 
#define HTTP_ERRNO_MAP(XX)
 
#define HTTP_ERRNO_GEN(n, s)   HPE_##n,
 
#define HTTP_PARSER_ERRNO(p)   ((enum http_errno) (p)->http_errno)
 
#define HTTP_PARSER_ERRNO_LINE(p)   0
 

Typedefs

typedef struct http_parser http_parser
 
typedef struct http_parser_settings http_parser_settings
 
typedef struct http_parser_result http_parser_result
 
typedef int(* http_data_cb) (http_parser *, const char *at, size_t length)
 
typedef int(* http_cb) (http_parser *)
 

Enumerations

enum  http_method {
  HTTP_DELETE = 0, HTTP_GET, HTTP_HEAD, HTTP_POST,
  HTTP_PUT, HTTP_CONNECT, HTTP_OPTIONS, HTTP_TRACE,
  HTTP_COPY, HTTP_LOCK, HTTP_MKCOL, HTTP_MOVE,
  HTTP_PROPFIND, HTTP_PROPPATCH, HTTP_UNLOCK, HTTP_REPORT,
  HTTP_MKACTIVITY, HTTP_CHECKOUT, HTTP_MERGE, HTTP_MSEARCH,
  HTTP_NOTIFY, HTTP_SUBSCRIBE, HTTP_UNSUBSCRIBE, HTTP_PATCH
}
 
enum  http_parser_type { HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH }
 
enum  flags { F_CHUNKED = 1 << 0, F_TRAILING = 1 << 3, F_UPGRADE = 1 << 4, F_SKIPBODY = 1 << 5 }
 
enum  http_errno {
  HPE_OK, HPE_CB_message_begin, HPE_CB_path, HPE_CB_query_string,
  HPE_CB_url, HPE_CB_fragment, HPE_CB_header_field, HPE_CB_header_value,
  HPE_CB_headers_complete, HPE_CB_body, HPE_CB_message_complete, HPE_CB_reason,
  HPE_CB_chunk_header, HPE_CB_chunk_complete, HPE_INVALID_EOF_STATE, HPE_HEADER_OVERFLOW,
  HPE_CLOSED_CONNECTION, HPE_INVALID_VERSION, HPE_INVALID_STATUS, HPE_INVALID_METHOD,
  HPE_INVALID_URL, HPE_INVALID_HOST, HPE_INVALID_PORT, HPE_INVALID_PATH,
  HPE_INVALID_QUERY_STRING, HPE_INVALID_FRAGMENT, HPE_LF_EXPECTED, HPE_INVALID_HEADER_TOKEN,
  HPE_INVALID_CONTENT_LENGTH, HPE_HUGE_CONTENT_LENGTH, HPE_INVALID_CHUNK_SIZE, HPE_HUGE_CHUNK_SIZE,
  HPE_INVALID_TRANSFER_ENCODING, HPE_INVALID_UPGRADE, HPE_INVALID_CONSTANT, HPE_INVALID_INTERNAL_STATE,
  HPE_STRICT, HPE_PAUSED, HPE_UNKNOWN
}
 
enum  http_parser_url_fields {
  UF_SCHEMA = 0, UF_HOST = 1, UF_PORT = 2, UF_PATH = 3,
  UF_QUERY = 4, UF_FRAGMENT = 5, UF_USERINFO = 6, UF_MAX = 7
}
 

Functions

void http_parser_init (http_parser *parser, enum http_parser_type type)
 
size_t http_parser_execute (http_parser *parser, const http_parser_settings *settings, const char *data, size_t len)
 
const char * http_method_str (enum http_method m)
 
const char * http_errno_name (enum http_errno err)
 
const char * http_errno_description (enum http_errno err)
 
int http_parser_parse_url (const char *buf, size_t buflen, int is_connect, struct http_parser_url *u)
 
void http_parser_pause (http_parser *parser, int paused)
 

Macro Definition Documentation

#define HTTP_ERRNO_GEN (   n,
  s 
)    HPE_##n,

Definition at line 195 of file http_parser.h.

#define HTTP_ERRNO_MAP (   XX)

Definition at line 139 of file http_parser.h.

#define HTTP_MAX_HEADER_SIZE   (80*1024)

Definition at line 64 of file http_parser.h.

Referenced by http_parser_execute().

#define HTTP_PARSER_DEBUG   0

Definition at line 59 of file http_parser.h.

#define HTTP_PARSER_ERRNO_LINE (   p)    0

Definition at line 208 of file http_parser.h.

Referenced by print_error().

#define HTTP_PARSER_STRICT   0

Definition at line 52 of file http_parser.h.

#define HTTP_PARSER_VERSION_MAJOR   1

Definition at line 24 of file http_parser.h.

#define HTTP_PARSER_VERSION_MINOR   0

Definition at line 25 of file http_parser.h.

Typedef Documentation

typedef int(* http_cb) (http_parser *)

Definition at line 86 of file http_parser.h.

typedef int(* http_data_cb) (http_parser *, const char *at, size_t length)

Definition at line 85 of file http_parser.h.

typedef struct http_parser http_parser

Definition at line 67 of file http_parser.h.

Definition at line 69 of file http_parser.h.

Definition at line 68 of file http_parser.h.

Enumeration Type Documentation

enum flags
Enumerator
F_CHUNKED 
F_TRAILING 
F_UPGRADE 
F_SKIPBODY 

Definition at line 127 of file http_parser.h.

128  { F_CHUNKED = 1 << 0
129  , F_TRAILING = 1 << 3
130  , F_UPGRADE = 1 << 4
131  , F_SKIPBODY = 1 << 5
132  };
enum http_errno
Enumerator
HPE_OK 
HPE_CB_message_begin 
HPE_CB_path 
HPE_CB_query_string 
HPE_CB_url 
HPE_CB_fragment 
HPE_CB_header_field 
HPE_CB_header_value 
HPE_CB_headers_complete 
HPE_CB_body 
HPE_CB_message_complete 
HPE_CB_reason 
HPE_CB_chunk_header 
HPE_CB_chunk_complete 
HPE_INVALID_EOF_STATE 
HPE_HEADER_OVERFLOW 
HPE_CLOSED_CONNECTION 
HPE_INVALID_VERSION 
HPE_INVALID_STATUS 
HPE_INVALID_METHOD 
HPE_INVALID_URL 
HPE_INVALID_HOST 
HPE_INVALID_PORT 
HPE_INVALID_PATH 
HPE_INVALID_QUERY_STRING 
HPE_INVALID_FRAGMENT 
HPE_LF_EXPECTED 
HPE_INVALID_HEADER_TOKEN 
HPE_INVALID_CONTENT_LENGTH 
HPE_HUGE_CONTENT_LENGTH 
HPE_INVALID_CHUNK_SIZE 
HPE_HUGE_CHUNK_SIZE 
HPE_INVALID_TRANSFER_ENCODING 
HPE_INVALID_UPGRADE 
HPE_INVALID_CONSTANT 
HPE_INVALID_INTERNAL_STATE 
HPE_STRICT 
HPE_PAUSED 
HPE_UNKNOWN 

Definition at line 196 of file http_parser.h.

196  {
#define HTTP_ERRNO_MAP(XX)
Definition: http_parser.h:139
#define HTTP_ERRNO_GEN(n, s)
Definition: http_parser.h:195
Enumerator
HTTP_DELETE 
HTTP_GET 
HTTP_HEAD 
HTTP_POST 
HTTP_PUT 
HTTP_CONNECT 
HTTP_OPTIONS 
HTTP_TRACE 
HTTP_COPY 
HTTP_LOCK 
HTTP_MKCOL 
HTTP_MOVE 
HTTP_PROPFIND 
HTTP_PROPPATCH 
HTTP_UNLOCK 
HTTP_REPORT 
HTTP_MKACTIVITY 
HTTP_CHECKOUT 
HTTP_MERGE 
HTTP_MSEARCH 
HTTP_NOTIFY 
HTTP_SUBSCRIBE 
HTTP_UNSUBSCRIBE 
HTTP_PATCH 

Definition at line 90 of file http_parser.h.

91  { HTTP_DELETE = 0
92  , HTTP_GET
93  , HTTP_HEAD
94  , HTTP_POST
95  , HTTP_PUT
96  /* pathological */
97  , HTTP_CONNECT
98  , HTTP_OPTIONS
99  , HTTP_TRACE
100  /* webdav */
101  , HTTP_COPY
102  , HTTP_LOCK
103  , HTTP_MKCOL
104  , HTTP_MOVE
105  , HTTP_PROPFIND
107  , HTTP_UNLOCK
108  /* subversion */
109  , HTTP_REPORT
111  , HTTP_CHECKOUT
112  , HTTP_MERGE
113  /* upnp */
114  , HTTP_MSEARCH
115  , HTTP_NOTIFY
118  /* RFC-5789 */
119  , HTTP_PATCH
120  };
Enumerator
HTTP_REQUEST 
HTTP_RESPONSE 
HTTP_BOTH 

Definition at line 123 of file http_parser.h.

Enumerator
UF_SCHEMA 
UF_HOST 
UF_PORT 
UF_PATH 
UF_QUERY 
UF_FRAGMENT 
UF_USERINFO 
UF_MAX 

Definition at line 263 of file http_parser.h.

265  { UF_SCHEMA = 0
266  , UF_HOST = 1
267  , UF_PORT = 2
268  , UF_PATH = 3
269  , UF_QUERY = 4
270  , UF_FRAGMENT = 5
271  , UF_USERINFO = 6
272  , UF_MAX = 7

Function Documentation

const char* http_errno_description ( enum http_errno  err)

Definition at line 2186 of file http_parser.c.

References http_strerror_tab.

Referenced by proxygen::HTTP1xCodec::onParserError(), and print_error().

2187  {
2188  assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
2189  return http_strerror_tab[err].description;
static struct @0 http_strerror_tab[]
const char* http_errno_name ( enum http_errno  err)

Definition at line 2180 of file http_parser.c.

References http_strerror_tab.

Referenced by test_simple().

2181  {
2182  assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
2183  return http_strerror_tab[err].name;
static struct @0 http_strerror_tab[]
const char* http_method_str ( enum http_method  m)

Definition at line 2159 of file http_parser.c.

References m.

Referenced by proxygen::HTTP1xCodec::onHeadersComplete().

2161 {
2162  return method_strings[m];
static const char * method_strings[]
Definition: http_parser.c:149
static map< string, int > m
size_t http_parser_execute ( http_parser parser,
const http_parser_settings settings,
const char *  data,
size_t  len 
)

Definition at line 602 of file http_parser.c.

References _CALLBACK_DATA, BS, c, CALLBACK_DATA, CALLBACK_DATA_NOADVANCE, CALLBACK_NOTIFY, CALLBACK_NOTIFY_NOADVANCE, CALLBACK_SPACE, ch, CHUNKED, CONTENT_LENGTH, http_parser::content_length, CR, data, folly::pushmi::operators::error(), F_CHUNKED, F_SKIPBODY, F_TRAILING, F_UPGRADE, http_parser::flags, h_content_length, h_general, h_general_and_quote, h_general_and_quote_and_escape, h_matching_content_length, h_matching_transfer_encoding, h_matching_transfer_encoding_chunked, h_matching_upgrade, h_transfer_encoding, h_transfer_encoding_chunked, h_upgrade, http_parser::header_state, HPE_CB_headers_complete, HPE_HEADER_OVERFLOW, HPE_HUGE_CHUNK_SIZE, HPE_HUGE_CONTENT_LENGTH, HPE_INVALID_CHUNK_SIZE, HPE_INVALID_CONSTANT, HPE_INVALID_CONTENT_LENGTH, HPE_INVALID_EOF_STATE, HPE_INVALID_FRAGMENT, HPE_INVALID_HEADER_TOKEN, HPE_INVALID_HOST, HPE_INVALID_INTERNAL_STATE, HPE_INVALID_METHOD, HPE_INVALID_PATH, HPE_INVALID_PORT, HPE_INVALID_QUERY_STRING, HPE_INVALID_STATUS, HPE_INVALID_TRANSFER_ENCODING, HPE_INVALID_UPGRADE, HPE_INVALID_URL, HPE_INVALID_VERSION, HPE_LF_EXPECTED, HPE_OK, HPE_STRICT, HPE_UNKNOWN, HTTP_CHECKOUT, HTTP_CONNECT, HTTP_COPY, HTTP_DELETE, http_parser::http_errno, HTTP_GET, HTTP_HEAD, HTTP_LOCK, http_parser::http_major, HTTP_MAX_HEADER_SIZE, HTTP_MERGE, http_parser::http_minor, HTTP_MKACTIVITY, HTTP_MKCOL, HTTP_MOVE, HTTP_MSEARCH, HTTP_NOTIFY, HTTP_OPTIONS, HTTP_PARSER_ERRNO, HTTP_PATCH, HTTP_POST, HTTP_PROPFIND, HTTP_PROPPATCH, HTTP_PUT, HTTP_REPORT, HTTP_REQUEST, HTTP_RESPONSE, HTTP_SUBSCRIBE, HTTP_TRACE, HTTP_UNLOCK, HTTP_UNSUBSCRIBE, http_parser::index, int8_t, IS_ALPHA, IS_ALPHANUM, IS_HEADER_CHAR, IS_HEX, IS_HOST_CHAR, IS_NUM, IS_URL_CHAR, LF, LOWER, MARK, http_parser::method, MIN, MOVE_FAST, MOVE_THE_HEAD, NEW_MESSAGE, http_parser::nread, http_parser_settings::on_headers_complete, PARSING_HEADER, QT, RETURN, s_body_identity, s_body_identity_eof, s_chunk_data, s_chunk_data_almost_done, s_chunk_data_done, s_chunk_parameters, s_chunk_size, s_chunk_size_almost_done, s_chunk_size_start, s_header_almost_done, s_header_field, s_header_field_start, s_header_value, s_header_value_lws, s_header_value_start, s_headers_almost_done, s_headers_done, s_message_done, s_pre_start_req, s_pre_start_req_or_res, s_pre_start_res, s_req_first_http_major, s_req_first_http_minor, s_req_fragment, s_req_fragment_start, s_req_host, s_req_host_done, s_req_host_ipv6, s_req_host_start, s_req_http_H, s_req_http_HT, s_req_http_HTT, s_req_http_HTTP, s_req_http_major, s_req_http_minor, s_req_http_start, s_req_line_almost_done, s_req_method, s_req_path, s_req_port, s_req_query_string, s_req_query_string_start, s_req_schema, s_req_schema_slash, s_req_schema_slash_slash, s_req_spaces_before_url, s_res_first_http_major, s_res_first_http_minor, s_res_first_status_code, s_res_H, s_res_HT, s_res_HTT, s_res_HTTP, s_res_http_major, s_res_http_minor, s_res_line_almost_done, s_res_or_resp_H, s_res_status, s_res_status_code, s_start_req, s_start_req_or_res, s_start_res, SET_ERRNO, http_parser::state, http_parser::status_code, STRICT_CHECK, TOKEN, TRANSFER_ENCODING, http_parser::type, uint64_t, UPGRADE, and http_parser::upgrade.

Referenced by proxygen::HTTP1xCodec::onIngress(), proxygen::HTTP1xCodec::onIngressEOF(), parse(), parse_count_body(), parse_pause(), test_header_overflow_error(), and test_no_overflow_long_body().

607 {
608  char c, ch;
609  int8_t unhex_val;
610  const char *p = data;
611 
612  /* Optimization: within the parsing loop below, we refer to this
613  * local copy of the state rather than parser->state. The compiler
614  * can't be sure whether parser->state will change during a callback,
615  * so it generates a lot of memory loads and stores to keep a register
616  * copy of the state in sync with the memory copy. We know, however,
617  * that the callbacks aren't allowed to change the parser state, so
618  * the parsing loop works with this local variable and only copies
619  * the value back to parser->loop before returning or invoking a
620  * callback.
621  */
622  unsigned char state = parser->state;
623  const unsigned int lenient = 0;
624 
625  /* We're in an error state. Don't bother doing anything. */
626  if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
627  RETURN(0);
628  }
629 
630  if (len == 0) {
631  switch (state) {
632  case s_body_identity_eof:
633  /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
634  * we got paused.
635  */
636  CALLBACK_NOTIFY_NOADVANCE(message_complete);
637  RETURN(0);
638 
640  case s_pre_start_res:
641  case s_pre_start_req:
642  RETURN(0);
643 
644  default:
646  RETURN(1);
647  }
648  }
649 
650  /* technically we could combine all of these (except for url_mark) into one
651  variable, saving stack space, but it seems more clear to have them
652  separated. */
653  const char *header_field_mark = 0;
654  const char *header_value_mark = 0;
655  const char *url_mark = 0;
656  const char *reason_mark = 0;
657  const char *body_mark = 0;
658 
659  if (state == s_header_field)
660  header_field_mark = data;
661  if (state == s_header_value)
662  header_value_mark = data;
663  if (state == s_req_path ||
664  state == s_req_schema ||
665  state == s_req_schema_slash ||
666  state == s_req_schema_slash_slash ||
667  state == s_req_port ||
668  state == s_req_query_string_start ||
669  state == s_req_query_string ||
670  state == s_req_host_start ||
671  state == s_req_host ||
672  state == s_req_host_ipv6 ||
673  state == s_req_host_done ||
674  state == s_req_fragment_start ||
675  state == s_req_fragment)
676  url_mark = data;
677  if (state == s_res_status)
678  reason_mark = data;
679 
680  /* Used only for overflow checking. If the parser is in a parsing-headers
681  * state, then its value is equal to max(data, the beginning of the current
682  * message or chunk). If the parser is in a not-parsing-headers state, then
683  * its value is irrelevant.
684  */
685  const char* data_or_header_data_start = data;
686 
687  for (p = data; p != data + len; p++) {
688  ch = *p;
689 
690  reexecute_byte:
691  switch (state) {
692 
694  if (ch == CR || ch == LF)
695  break;
696  state = s_start_req_or_res;
697  CALLBACK_NOTIFY_NOADVANCE(message_begin);
698  goto reexecute_byte;
699 
700  case s_start_req_or_res:
701  {
702  parser->flags = 0;
703  parser->content_length = -1;
704 
705  if (ch == 'H') {
706  state = s_res_or_resp_H;
707  } else {
708  parser->type = HTTP_REQUEST;
709  state = s_start_req;
710  goto reexecute_byte;
711  }
712 
713  break;
714  }
715 
716  case s_res_or_resp_H:
717  if (ch == 'T') {
718  parser->type = HTTP_RESPONSE;
719  state = s_res_HT;
720  } else {
721  if (ch != 'E') {
723  goto error;
724  }
725 
726  parser->type = HTTP_REQUEST;
727  parser->method = HTTP_HEAD;
728  parser->index = 2;
729  state = s_req_method;
730  }
731  break;
732 
733  case s_pre_start_res:
734  if (ch == CR || ch == LF)
735  break;
736  state = s_start_res;
737  CALLBACK_NOTIFY_NOADVANCE(message_begin);
738  goto reexecute_byte;
739 
740  case s_start_res:
741  {
742  parser->flags = 0;
743  parser->content_length = -1;
744 
745  switch (ch) {
746  case 'H':
747  state = s_res_H;
748  break;
749 
750  default:
752  goto error;
753  }
754 
755  break;
756  }
757 
758  case s_res_H:
759  STRICT_CHECK(ch != 'T');
760  state = s_res_HT;
761  break;
762 
763  case s_res_HT:
764  STRICT_CHECK(ch != 'T');
765  state = s_res_HTT;
766  break;
767 
768  case s_res_HTT:
769  STRICT_CHECK(ch != 'P');
770  state = s_res_HTTP;
771  break;
772 
773  case s_res_HTTP:
774  STRICT_CHECK(ch != '/');
775  state = s_res_first_http_major;
776  break;
777 
779  if (ch < '0' || ch > '9') {
781  goto error;
782  }
783 
784  parser->http_major = ch - '0';
785  state = s_res_http_major;
786  break;
787 
788  /* major HTTP version or dot */
789  case s_res_http_major:
790  {
791  if (ch == '.') {
792  state = s_res_first_http_minor;
793  break;
794  }
795 
796  if (!IS_NUM(ch)) {
798  goto error;
799  }
800 
801  parser->http_major *= 10;
802  parser->http_major += ch - '0';
803 
804  if (parser->http_major > 999) {
806  goto error;
807  }
808 
809  break;
810  }
811 
812  /* first digit of minor HTTP version */
814  if (!IS_NUM(ch)) {
816  goto error;
817  }
818 
819  parser->http_minor = ch - '0';
820  state = s_res_http_minor;
821  break;
822 
823  /* minor HTTP version or end of request line */
824  case s_res_http_minor:
825  {
826  if (ch == ' ') {
827  state = s_res_first_status_code;
828  break;
829  }
830 
831  if (!IS_NUM(ch)) {
833  goto error;
834  }
835 
836  parser->http_minor *= 10;
837  parser->http_minor += ch - '0';
838 
839  if (parser->http_minor > 999) {
841  goto error;
842  }
843 
844  break;
845  }
846 
848  {
849  if (!IS_NUM(ch)) {
850  if (ch == ' ') {
851  break;
852  }
853 
855  goto error;
856  }
857  parser->status_code = ch - '0';
858  state = s_res_status_code;
859  break;
860  }
861 
862  case s_res_status_code:
863  {
864  if (!IS_NUM(ch)) {
865  switch (ch) {
866  case ' ':
867  state = s_res_status;
868  break;
869  case CR:
870  state = s_res_line_almost_done;
871  break;
872  case LF:
873  state = s_header_field_start;
874  break;
875  default:
877  goto error;
878  }
879  break;
880  }
881 
882  parser->status_code *= 10;
883  parser->status_code += ch - '0';
884 
885  if (parser->status_code > 999) {
887  goto error;
888  }
889 
890  break;
891  }
892 
893  case s_res_status:
894  /* the human readable status. e.g. "NOT FOUND" */
895  MARK(reason);
896  if (ch == CR) {
897  state = s_res_line_almost_done;
898  CALLBACK_DATA(reason);
899  break;
900  }
901 
902  if (ch == LF) {
903  state = s_header_field_start;
904  CALLBACK_DATA(reason);
905  break;
906  }
907  break;
908 
910  STRICT_CHECK(ch != LF);
911  state = s_header_field_start;
912  break;
913 
914  case s_pre_start_req:
915  if (ch == CR || ch == LF) {
916  break;
917  }
918  state = s_start_req;
919  CALLBACK_NOTIFY_NOADVANCE(message_begin);
920  goto reexecute_byte;
921 
922  case s_start_req:
923  {
924  parser->flags = 0;
925  parser->content_length = -1;
926 
927  if (!IS_ALPHA(ch)) {
929  goto error;
930  }
931 
932  parser->method = (enum http_method) 0;
933  parser->index = 1;
934  switch (ch) {
935  case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
936  case 'D': parser->method = HTTP_DELETE; break;
937  case 'G': parser->method = HTTP_GET; break;
938  case 'H': parser->method = HTTP_HEAD; break;
939  case 'L': parser->method = HTTP_LOCK; break;
940  case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
941  case 'N': parser->method = HTTP_NOTIFY; break;
942  case 'O': parser->method = HTTP_OPTIONS; break;
943  case 'P': parser->method = HTTP_POST;
944  /* or PROPFIND or PROPPATCH or PUT or PATCH */
945  break;
946  case 'R': parser->method = HTTP_REPORT; break;
947  case 'S': parser->method = HTTP_SUBSCRIBE; break;
948  case 'T': parser->method = HTTP_TRACE; break;
949  case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
950  default:
952  goto error;
953  }
954  state = s_req_method;
955 
956  break;
957  }
958 
959  case s_req_method:
960  {
961  if (ch == '\0') {
963  goto error;
964  }
965 
966  const char *matcher = method_strings[parser->method];
967  if (ch == ' ' && matcher[parser->index] == '\0') {
968  state = s_req_spaces_before_url;
969  } else if (ch == matcher[parser->index]) {
970  ; /* nada */
971  } else if (parser->method == HTTP_CONNECT) {
972  if (parser->index == 1 && ch == 'H') {
973  parser->method = HTTP_CHECKOUT;
974  } else if (parser->index == 2 && ch == 'P') {
975  parser->method = HTTP_COPY;
976  } else {
977  goto error;
978  }
979  } else if (parser->method == HTTP_MKCOL) {
980  if (parser->index == 1 && ch == 'O') {
981  parser->method = HTTP_MOVE;
982  } else if (parser->index == 1 && ch == 'E') {
983  parser->method = HTTP_MERGE;
984  } else if (parser->index == 1 && ch == '-') {
985  parser->method = HTTP_MSEARCH;
986  } else if (parser->index == 2 && ch == 'A') {
987  parser->method = HTTP_MKACTIVITY;
988  } else {
989  goto error;
990  }
991  } else if (parser->index == 1 && parser->method == HTTP_POST) {
992  if (ch == 'R') {
993  parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
994  } else if (ch == 'U') {
995  parser->method = HTTP_PUT;
996  } else if (ch == 'A') {
997  parser->method = HTTP_PATCH;
998  } else {
999  goto error;
1000  }
1001  } else if (parser->index == 2 && parser->method == HTTP_UNLOCK && ch == 'S') {
1002  parser->method = HTTP_UNSUBSCRIBE;
1003  } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
1004  parser->method = HTTP_PROPPATCH;
1005  } else {
1007  goto error;
1008  }
1009 
1010  ++parser->index;
1011  break;
1012  }
1013 
1015  {
1016  if (ch == ' ') break;
1017 
1018  // CONNECT requests must be followed by a <host>:<port>
1019  if (parser->method == HTTP_CONNECT) {
1020  MARK(url);
1021  state = s_req_host_start;
1022  goto reexecute_byte;
1023  }
1024 
1025  if (ch == '/' || ch == '*') {
1026  MARK(url);
1027  state = s_req_path;
1028  break;
1029  }
1030 
1031  /* Proxied requests are followed by scheme of an absolute URI (alpha).
1032  * All other methods are followed by '/' or '*' (handled above).
1033  */
1034  if (IS_ALPHA(ch)) {
1035  MARK(url);
1036  state = s_req_schema;
1037  break;
1038  }
1039 
1041  goto error;
1042  }
1043 
1044  case s_req_schema:
1045  {
1046  if (IS_ALPHA(ch)) break;
1047 
1048  if (ch == ':') {
1049  state = s_req_schema_slash;
1050  break;
1051  }
1052 
1054  goto error;
1055  }
1056 
1057  case s_req_schema_slash:
1058  STRICT_CHECK(ch != '/');
1059  state = s_req_schema_slash_slash;
1060  break;
1061 
1063  STRICT_CHECK(ch != '/');
1064  state = s_req_host_start;
1065  break;
1066 
1067  case s_req_host_start:
1068  if (ch == '[') {
1069  state = s_req_host_ipv6;
1070  break;
1071  } else if (IS_ALPHANUM(ch)) {
1072  state = s_req_host;
1073  break;
1074  }
1075 
1077  goto error;
1078 
1079  case s_req_host:
1080  if (IS_HOST_CHAR(ch)) break;
1081  state = s_req_host_done;
1082  goto reexecute_byte;
1083 
1084  case s_req_host_ipv6:
1085  if (IS_HEX(ch) || ch == ':') break;
1086  if (ch == ']') {
1087  state = s_req_host_done;
1088  break;
1089  }
1090 
1092  goto error;
1093 
1094  case s_req_host_done:
1095  switch (ch) {
1096  case ':':
1097  state = s_req_port;
1098  break;
1099  case '/':
1100  state = s_req_path;
1101  break;
1102  case ' ':
1103  /* The request line looks like:
1104  * "GET http://foo.bar.com HTTP/1.1"
1105  * That is, there is no path.
1106  */
1107  state = s_req_http_start;
1108  CALLBACK_DATA(url);
1109  break;
1110  case '?':
1111  state = s_req_query_string_start;
1112  break;
1113  default:
1115  goto error;
1116  }
1117 
1118  break;
1119 
1120  case s_req_port:
1121  {
1122  if (IS_NUM(ch)) break;
1123  switch (ch) {
1124  case '/':
1125  state = s_req_path;
1126  break;
1127  case ' ':
1128  /* The request line looks like:
1129  * "GET http://foo.bar.com:1234 HTTP/1.1"
1130  * That is, there is no path.
1131  */
1132  state = s_req_http_start;
1133  CALLBACK_DATA(url);
1134  break;
1135  case '?':
1136  state = s_req_query_string_start;
1137  break;
1138  default:
1140  goto error;
1141  }
1142  break;
1143  }
1144 
1145  case s_req_path:
1146  {
1147  if (IS_URL_CHAR(ch)) break;
1148 
1149  switch (ch) {
1150  case ' ':
1151  state = s_req_http_start;
1152  CALLBACK_DATA(url);
1153  break;
1154  case CR:
1155  parser->http_major = 0;
1156  parser->http_minor = 9;
1157  state = s_headers_almost_done;
1158  CALLBACK_DATA(url);
1159  break;
1160  case LF:
1161  parser->http_major = 0;
1162  parser->http_minor = 9;
1163  state = s_headers_almost_done;
1164  CALLBACK_DATA(url);
1165  goto reexecute_byte;
1166  break;
1167  case '?':
1168  state = s_req_query_string_start;
1169  break;
1170  case '#':
1171  state = s_req_fragment_start;
1172  break;
1173  default:
1175  goto error;
1176  }
1177  break;
1178  }
1179 
1181  {
1182  if (IS_URL_CHAR(ch)) {
1183  state = s_req_query_string;
1184  break;
1185  }
1186 
1187  switch (ch) {
1188  case '?':
1189  break; /* XXX ignore extra '?' ... is this right? */
1190  case ' ':
1191  state = s_req_http_start;
1192  CALLBACK_DATA(url);
1193  break;
1194  case CR:
1195  parser->http_major = 0;
1196  parser->http_minor = 9;
1197  state = s_headers_almost_done;
1198  CALLBACK_DATA(url);
1199  break;
1200  case LF:
1201  parser->http_major = 0;
1202  parser->http_minor = 9;
1203  state = s_headers_almost_done;
1204  CALLBACK_DATA(url);
1205  goto reexecute_byte;
1206  break;
1207  case '#':
1208  state = s_req_fragment_start;
1209  break;
1210  default:
1212  goto error;
1213  }
1214  break;
1215  }
1216 
1217  case s_req_query_string:
1218  {
1219  if (IS_URL_CHAR(ch)) break;
1220 
1221  switch (ch) {
1222  case '?':
1223  /* allow extra '?' in query string */
1224  break;
1225  case ' ':
1226  state = s_req_http_start;
1227  CALLBACK_DATA(url);
1228  break;
1229  case CR:
1230  parser->http_major = 0;
1231  parser->http_minor = 9;
1232  state = s_headers_almost_done;
1233  CALLBACK_DATA(url);
1234  break;
1235  case LF:
1236  parser->http_major = 0;
1237  parser->http_minor = 9;
1238  state = s_headers_almost_done;
1239  CALLBACK_DATA(url);
1240  goto reexecute_byte;
1241  break;
1242  case '#':
1243  state = s_req_fragment_start;
1244  break;
1245  default:
1247  goto error;
1248  }
1249  break;
1250  }
1251 
1252  case s_req_fragment_start:
1253  {
1254  if (IS_URL_CHAR(ch)) {
1255  state = s_req_fragment;
1256  break;
1257  }
1258 
1259  switch (ch) {
1260  case ' ':
1261  state = s_req_http_start;
1262  CALLBACK_DATA(url);
1263  break;
1264  case CR:
1265  parser->http_major = 0;
1266  parser->http_minor = 9;
1267  state = s_headers_almost_done;
1268  CALLBACK_DATA(url);
1269  break;
1270  case LF:
1271  parser->http_major = 0;
1272  parser->http_minor = 9;
1273  state = s_headers_almost_done;
1274  CALLBACK_DATA(url);
1275  goto reexecute_byte;
1276  break;
1277  case '?':
1278  state = s_req_fragment;
1279  break;
1280  case '#':
1281  break;
1282  default:
1284  goto error;
1285  }
1286  break;
1287  }
1288 
1289  case s_req_fragment:
1290  {
1291  if (IS_URL_CHAR(ch)) break;
1292 
1293  switch (ch) {
1294  case ' ':
1295  state = s_req_http_start;
1296  CALLBACK_DATA(url);
1297  break;
1298  case CR:
1299  parser->http_major = 0;
1300  parser->http_minor = 9;
1301  state = s_headers_almost_done;
1302  CALLBACK_DATA(url);
1303  break;
1304  case LF:
1305  parser->http_major = 0;
1306  parser->http_minor = 9;
1307  state = s_headers_almost_done;
1308  CALLBACK_DATA(url);
1309  goto reexecute_byte;
1310  break;
1311  case '?':
1312  case '#':
1313  break;
1314  default:
1316  goto error;
1317  }
1318  break;
1319  }
1320 
1321  case s_req_http_start:
1322  switch (ch) {
1323  case 'H':
1324  state = s_req_http_H;
1325  break;
1326  case ' ':
1327  break;
1328  default:
1330  goto error;
1331  }
1332  break;
1333 
1334  case s_req_http_H:
1335  STRICT_CHECK(ch != 'T');
1336  state = s_req_http_HT;
1337  break;
1338 
1339  case s_req_http_HT:
1340  STRICT_CHECK(ch != 'T');
1341  state = s_req_http_HTT;
1342  break;
1343 
1344  case s_req_http_HTT:
1345  STRICT_CHECK(ch != 'P');
1346  state = s_req_http_HTTP;
1347  break;
1348 
1349  case s_req_http_HTTP:
1350  STRICT_CHECK(ch != '/');
1351  state = s_req_first_http_major;
1352  break;
1353 
1354  /* first digit of major HTTP version */
1356  if (ch < '0' || ch > '9') {
1358  goto error;
1359  }
1360 
1361  parser->http_major = ch - '0';
1362  state = s_req_http_major;
1363  break;
1364 
1365  /* major HTTP version or dot */
1366  case s_req_http_major:
1367  {
1368  if (ch == '.') {
1369  state = s_req_first_http_minor;
1370  break;
1371  }
1372 
1373  if (!IS_NUM(ch)) {
1375  goto error;
1376  }
1377 
1378  parser->http_major *= 10;
1379  parser->http_major += ch - '0';
1380 
1381  if (parser->http_major > 999) {
1383  goto error;
1384  }
1385 
1386  break;
1387  }
1388 
1389  /* first digit of minor HTTP version */
1391  if (!IS_NUM(ch)) {
1393  goto error;
1394  }
1395 
1396  parser->http_minor = ch - '0';
1397  state = s_req_http_minor;
1398  break;
1399 
1400  /* minor HTTP version or end of request line */
1401  case s_req_http_minor:
1402  {
1403  if (ch == CR) {
1404  if (parser->http_major== 0 && parser->http_minor == 9) {
1405  state = s_headers_almost_done;
1406  } else {
1407  state = s_req_line_almost_done;
1408  }
1409  break;
1410  }
1411 
1412  if (ch == LF) {
1413  if (parser->http_major == 0 && parser->http_minor == 9) {
1414  state = s_headers_almost_done;
1415  goto reexecute_byte;
1416  } else {
1417  state = s_header_field_start;
1418  }
1419  break;
1420  }
1421 
1422  /* XXX allow spaces after digit? */
1423 
1424  if (!IS_NUM(ch)) {
1426  goto error;
1427  }
1428 
1429  parser->http_minor *= 10;
1430  parser->http_minor += ch - '0';
1431 
1432  if (parser->http_minor > 999) {
1434  goto error;
1435  }
1436 
1437  break;
1438  }
1439 
1440  /* end of request line */
1442  {
1443  if (ch != LF) {
1445  goto error;
1446  }
1447 
1448  state = s_header_field_start;
1449  break;
1450  }
1451 
1452  case s_header_field_start:
1453  {
1454  if (ch == CR) {
1455  state = s_headers_almost_done;
1456  break;
1457  }
1458 
1459  if (ch == LF) {
1460  /* they might be just sending \n instead of \r\n so this would be
1461  * the second \n to denote the end of headers*/
1462  state = s_headers_almost_done;
1463  goto reexecute_byte;
1464  }
1465 
1466  c = TOKEN(ch);
1467 
1468  if (!c) {
1470  goto error;
1471  }
1472 
1473  MARK(header_field);
1474 
1475  parser->index = 0;
1476  state = s_header_field;
1477 
1478  switch (c) {
1479  case 'c':
1481  break;
1482 
1483  case 't':
1485  break;
1486 
1487  case 'u':
1488  parser->header_state = h_matching_upgrade;
1489  break;
1490 
1491  default:
1492  parser->header_state = h_general;
1493  break;
1494  }
1495  break;
1496  }
1497 
1498  case s_header_field:
1499  {
1500  c = TOKEN(ch);
1501 
1502  if (c) {
1503  switch (parser->header_state) {
1504  case h_general:
1505 
1506  // fast-forwarding, wheeeeeee!
1507  #define MOVE_THE_HEAD do { \
1508  ++p; \
1509  if (!TOKEN(*p)) { \
1510  ch = *p; \
1511  goto notatoken; \
1512  } \
1513  } while(0);
1514 
1515  if (data + len - p >= 9) {
1524  } else if (data + len - p >= 4) {
1528  }
1529 
1530  break;
1531 
1532  /* content-length */
1533 
1535  parser->index++;
1536  if (parser->index > sizeof(CONTENT_LENGTH)-1
1537  || c != CONTENT_LENGTH[parser->index]) {
1538  parser->header_state = h_general;
1539  } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1540  parser->header_state = h_content_length;
1541  }
1542  break;
1543 
1544  /* transfer-encoding */
1545 
1547  parser->index++;
1548  if (parser->index > sizeof(TRANSFER_ENCODING)-1
1549  || c != TRANSFER_ENCODING[parser->index]) {
1550  parser->header_state = h_general;
1551  } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1553  }
1554  break;
1555 
1556  /* upgrade */
1557 
1558  case h_matching_upgrade:
1559  parser->index++;
1560  if (parser->index > sizeof(UPGRADE)-1
1561  || c != UPGRADE[parser->index]) {
1562  parser->header_state = h_general;
1563  } else if (parser->index == sizeof(UPGRADE)-2) {
1564  parser->header_state = h_upgrade;
1565  }
1566  break;
1567 
1568  case h_content_length:
1569  case h_transfer_encoding:
1570  case h_upgrade:
1571  if (ch != ' ') parser->header_state = h_general;
1572  break;
1573 
1574  default:
1575  assert(0 && "Unknown header_state");
1576  break;
1577  }
1578  break;
1579  }
1580 
1581  notatoken:
1582  if (ch == ':') {
1583  state = s_header_value_start;
1584  // do not allow headers with trailing whitespaces
1585  // https://tools.ietf.org/html/rfc7230#section-3.2.4
1586  if (p - header_field_mark > 1 &&
1587  data[p - data - 1] == ' ') {
1589  goto error;
1590  }
1591  CALLBACK_DATA(header_field);
1592  break;
1593  }
1594 
1596  goto error;
1597  }
1598 
1599  case s_header_value_start:
1600  {
1601  if (ch == ' ' || ch == '\t') break;
1602 
1603  MARK(header_value);
1604 
1605  state = s_header_value;
1606  parser->index = 0;
1607 
1608  // Error out if a content_length, transfer_encoding, or upgrade header
1609  // was present with no actual value. These headers correspond with
1610  // special parser states that without the below accept empty header
1611  // values and so we can reject such requests here in the parser.
1612  // If more headers are added, can consider moving to a hash/map based
1613  // model below.
1614  if (ch == CR || ch == LF) {
1615  if (parser->header_state == h_content_length) {
1617  } else if (parser->header_state == h_transfer_encoding) {
1619  } else if (parser->header_state == h_upgrade) {
1621  }
1622 
1623  if (parser->http_errno != HPE_OK) {
1624  goto error;
1625  }
1626  }
1627 
1628  if (ch == CR) {
1629  STRICT_CHECK(parser->quote != 0);
1630  parser->header_state = h_general;
1631  state = s_header_almost_done;
1632  CALLBACK_DATA(header_value);
1633  break;
1634  }
1635 
1636  if (ch == LF) {
1637  STRICT_CHECK(parser->quote != 0);
1638  state = s_header_field_start;
1639  CALLBACK_DATA(header_value);
1640  break;
1641  }
1642 
1643  c = LOWER(ch);
1644 
1645  switch (parser->header_state) {
1646  case h_upgrade:
1647  parser->flags |= F_UPGRADE;
1648  parser->header_state = h_general;
1649  break;
1650 
1651  case h_transfer_encoding:
1652  /* looking for 'Transfer-Encoding: chunked' */
1653  if ('c' == c) {
1655  } else {
1656  parser->header_state = h_general;
1657  }
1658  break;
1659 
1660  case h_content_length:
1661  if (!IS_NUM(ch)) {
1663  goto error;
1664  }
1665 
1666  parser->content_length = ch - '0';
1667  break;
1668 
1669  default:
1670  parser->header_state = ch == QT ? h_general_and_quote : h_general;
1671  break;
1672  }
1673  break;
1674  }
1675 
1676  case s_header_value:
1677  {
1678  cr_or_lf_or_qt:
1679  if (ch == CR &&
1681  state = s_header_almost_done;
1682  CALLBACK_DATA(header_value);
1683  break;
1684  }
1685 
1686  if (ch == LF &&
1688  state = s_header_almost_done;
1689  CALLBACK_DATA_NOADVANCE(header_value);
1690  goto reexecute_byte;
1691  }
1692 
1693  if (!lenient && !IS_HEADER_CHAR(ch) &&
1696  goto error;
1697  }
1698 
1699  switch (parser->header_state) {
1700  case h_general:
1701  if (ch == QT) {
1703  }
1704 
1705  // fast-forwarding, wheee!
1706  #define MOVE_FAST do { \
1707  ++p; \
1708  ch = *p; \
1709  if (ch == CR || ch == LF || ch == QT || \
1710  ch == BS || !IS_HEADER_CHAR(ch)) { \
1711  goto cr_or_lf_or_qt; \
1712  } \
1713  } while(0);
1714 
1715  if (data + len - p >= 12) {
1716  MOVE_FAST
1717  MOVE_FAST
1718  MOVE_FAST
1719  MOVE_FAST
1720  MOVE_FAST
1721  MOVE_FAST
1722  MOVE_FAST
1723  MOVE_FAST
1724  MOVE_FAST
1725  MOVE_FAST
1726  MOVE_FAST
1727  } else if (data + len - p >= 5) {
1728  MOVE_FAST
1729  MOVE_FAST
1730  MOVE_FAST
1731  MOVE_FAST
1732  }
1733 
1734  break;
1735 
1736  case h_general_and_quote:
1737  if (ch == QT) {
1738  parser->header_state = h_general;
1739  } else if (ch == BS) {
1741  }
1742  break;
1743 
1746  break;
1747 
1748  // Not sure the below is relevant anymore as from
1749  // s_header_value_start it appears as though we can never
1750  // be in the situation below
1751  case h_transfer_encoding:
1753  goto error;
1754  break;
1755 
1756  case h_content_length:
1757  if (ch == ' ') break;
1758  if (!IS_NUM(ch)) {
1760  goto error;
1761  }
1762 
1763  if (parser->content_length > ((INT64_MAX - 10) / 10)) {
1764  /* overflow */
1766  goto error;
1767  }
1768 
1769  parser->content_length *= 10;
1770  parser->content_length += ch - '0';
1771  break;
1772 
1773  /* Transfer-Encoding: chunked */
1775  parser->index++;
1776  if (parser->index > sizeof(CHUNKED)-1
1777  || LOWER(ch) != CHUNKED[parser->index]) {
1778  parser->header_state = h_general;
1779  } else if (parser->index == sizeof(CHUNKED)-2) {
1781  }
1782  break;
1783 
1785  if (ch != ' ') {
1786  parser->header_state = h_general;
1787  }
1788  break;
1789 
1790  default:
1791  state = s_header_value;
1792  parser->header_state = h_general;
1793  break;
1794  }
1795  break;
1796  }
1797 
1798  case s_header_almost_done:
1799  {
1800  if (ch == LF) {
1801  state = s_header_value_lws;
1802  } else {
1803  state = s_header_value;
1804  }
1805 
1806  switch (parser->header_state) {
1808  parser->flags |= F_CHUNKED;
1809  break;
1810  default:
1811  break;
1812  }
1813 
1814  if (ch != LF) {
1815  CALLBACK_SPACE(header_value);
1816  }
1817 
1818  break;
1819  }
1820 
1821  case s_header_value_lws:
1822  {
1823  if (ch == ' ' || ch == '\t')
1824  {
1825  state = s_header_value_start;
1826  CALLBACK_SPACE(header_value);
1827  }
1828  else
1829  {
1830  state = s_header_field_start;
1831  goto reexecute_byte;
1832  }
1833  break;
1834  }
1835 
1836  case s_headers_almost_done:
1837  {
1838  STRICT_CHECK(ch != LF);
1839 
1840  if (ch != LF) {
1842  goto error;
1843  }
1844 
1845  if (parser->flags & F_TRAILING) {
1846  /* End of a chunked request */
1847  state = s_message_done;
1848  CALLBACK_NOTIFY_NOADVANCE(chunk_complete);
1849  goto reexecute_byte;
1850  }
1851 
1852  state = s_headers_done;
1853 
1854  /* Set this here so that on_headers_complete() callbacks can see it */
1855  parser->upgrade =
1856  (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT);
1857 
1858  /* Here we call the headers_complete callback. This is somewhat
1859  * different than other callbacks because if the user returns 1, we
1860  * will interpret that as saying that this message has no body. This
1861  * is needed for the annoying case of receiving a response to a HEAD
1862  * request.
1863  *
1864  * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1865  * we have to simulate it by handling a change in errno below.
1866  */
1867  size_t header_size = p - data + 1;
1868  switch (settings->on_headers_complete(parser, nullptr, header_size)) {
1869  case 0:
1870  break;
1871 
1872  case 1:
1873  parser->flags |= F_SKIPBODY;
1874  break;
1875 
1876  default:
1878  RETURN(p - data); /* Error */
1879  }
1880 
1881  if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1882  RETURN(p - data);
1883  }
1884 
1885  goto reexecute_byte;
1886  }
1887 
1888  case s_headers_done:
1889  {
1890  STRICT_CHECK(ch != LF);
1891 
1892  // we're done parsing headers, reset overflow counters
1893  parser->nread = 0;
1894  // (if we now move to s_body_*, then this is irrelevant)
1895  data_or_header_data_start = p;
1896 
1897  int hasBody = parser->flags & F_CHUNKED || parser->content_length > 0;
1898  if (parser->upgrade && (parser->method == HTTP_CONNECT ||
1899  (parser->flags & F_SKIPBODY) || !hasBody)) {
1900  /* Exit, the rest of the message is in a different protocol. */
1901  state = NEW_MESSAGE();
1902  CALLBACK_NOTIFY(message_complete);
1903  RETURN((p - data) + 1);
1904  }
1905 
1906  if (parser->flags & F_SKIPBODY) {
1907  state = NEW_MESSAGE();
1908  CALLBACK_NOTIFY(message_complete);
1909  } else if (parser->flags & F_CHUNKED) {
1910  /* chunked encoding - ignore Content-Length header */
1911  state = s_chunk_size_start;
1912  } else {
1913  if (parser->content_length == 0) {
1914  /* Content-Length header given but zero: Content-Length: 0\r\n */
1915  state = NEW_MESSAGE();
1916  CALLBACK_NOTIFY(message_complete);
1917  } else if (parser->content_length > 0) {
1918  /* Content-Length header given and non-zero */
1919  state = s_body_identity;
1920  } else {
1921  unsigned short sc = parser->status_code;
1922  if (parser->type == HTTP_REQUEST ||
1923  ((100 <= sc && sc <= 199) || sc == 204 || sc == 304)) {
1924  /* Assume content-length 0 - read the next */
1925  state = NEW_MESSAGE();
1926  CALLBACK_NOTIFY(message_complete);
1927  } else {
1928  /* Read body until EOF */
1929  state = s_body_identity_eof;
1930  }
1931  }
1932  }
1933 
1934  break;
1935  }
1936 
1937  case s_body_identity:
1938  {
1939  uint64_t to_read = MIN(parser->content_length, (data + len) - p);
1940 
1941  assert(parser->content_length > 0);
1942 
1943  /* The difference between advancing content_length and p is because
1944  * the latter will automatically advance on the next loop iteration.
1945  * Further, if content_length ends up at 0, we want to see the last
1946  * byte again for our message complete callback.
1947  */
1948  MARK(body);
1949  parser->content_length -= to_read;
1950  p += to_read - 1;
1951 
1952  if (parser->content_length == 0) {
1953  state = s_message_done;
1954 
1955  /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1956  *
1957  * The alternative to doing this is to wait for the next byte to
1958  * trigger the data callback, just as in every other case. The
1959  * problem with this is that this makes it difficult for the test
1960  * harness to distinguish between complete-on-EOF and
1961  * complete-on-length. It's not clear that this distinction is
1962  * important for applications, but let's keep it for now.
1963  */
1964  _CALLBACK_DATA(body, p - body_mark + 1, p - data);
1965  goto reexecute_byte;
1966  }
1967 
1968  break;
1969  }
1970 
1971  /* read until EOF */
1972  case s_body_identity_eof:
1973  MARK(body);
1974  p = data + len - 1;
1975 
1976  break;
1977 
1978  case s_message_done:
1979  state = NEW_MESSAGE();
1980  parser->nread = 0;
1981  data_or_header_data_start = p;
1982  CALLBACK_NOTIFY(message_complete);
1983  if (parser->upgrade) {
1984  /* Exit, the rest of the message is in a different protocol. */
1985  RETURN((p - data) + 1);
1986  }
1987  break;
1988 
1989  case s_chunk_size_start:
1990  {
1991  assert(parser->flags & F_CHUNKED);
1992 
1993  unhex_val = unhex[(unsigned char)ch];
1994  if (unhex_val == -1) {
1996  goto error;
1997  }
1998 
1999  parser->content_length = unhex_val;
2000  state = s_chunk_size;
2001  break;
2002  }
2003 
2004  case s_chunk_size:
2005  {
2006  assert(parser->flags & F_CHUNKED);
2007 
2008  if (ch == CR) {
2009  state = s_chunk_size_almost_done;
2010  break;
2011  }
2012 
2013  unhex_val = unhex[(unsigned char)ch];
2014 
2015  if (unhex_val == -1) {
2016  if (ch == ';' || ch == ' ') {
2017  state = s_chunk_parameters;
2018  break;
2019  }
2020 
2022  goto error;
2023  }
2024 
2025  if (parser->content_length > (INT64_MAX - unhex_val) >> 4) {
2026  /* overflow */
2028  goto error;
2029  }
2030  parser->content_length *= 16;
2031  parser->content_length += unhex_val;
2032  break;
2033  }
2034 
2035  case s_chunk_parameters:
2036  {
2037  assert(parser->flags & F_CHUNKED);
2038  /*
2039  * just ignore this shit. TODO check for overflow
2040  * TODO: It would be nice to pass this information to the
2041  * on_chunk_header callback.
2042  */
2043  if (ch == CR) {
2044  state = s_chunk_size_almost_done;
2045  break;
2046  }
2047  break;
2048  }
2049 
2051  {
2052  assert(parser->flags & F_CHUNKED);
2053  STRICT_CHECK(ch != LF);
2054 
2055  if (parser->content_length == 0) {
2056  parser->flags |= F_TRAILING;
2057  state = s_header_field_start;
2058  CALLBACK_NOTIFY(chunk_header);
2059  } else {
2060  state = s_chunk_data;
2061  CALLBACK_NOTIFY(chunk_header);
2062  }
2063  break;
2064  }
2065 
2066  case s_chunk_data:
2067  {
2068  uint64_t to_read = MIN(parser->content_length, (data + len) - p);
2069 
2070  assert(parser->flags & F_CHUNKED);
2071  assert(parser->content_length > 0);
2072 
2073  /* See the explanation in s_body_identity for why the content
2074  * length and data pointers are managed this way.
2075  */
2076  MARK(body);
2077  parser->content_length -= to_read;
2078  p += to_read - 1;
2079 
2080  if (parser->content_length == 0) {
2081  state = s_chunk_data_almost_done;
2082  }
2083 
2084  break;
2085  }
2086 
2088  assert(parser->flags & F_CHUNKED);
2089  assert(parser->content_length == 0);
2090  STRICT_CHECK(ch != CR);
2091  state = s_chunk_data_done;
2092  CALLBACK_DATA(body);
2093  break;
2094 
2095  case s_chunk_data_done:
2096  assert(parser->flags & F_CHUNKED);
2097  STRICT_CHECK(ch != LF);
2098  state = s_chunk_size_start;
2099  parser->nread = 0;
2100  data_or_header_data_start = p;
2101  CALLBACK_NOTIFY(chunk_complete);
2102  break;
2103 
2104  default:
2105  assert(0 && "unhandled state");
2107  goto error;
2108  }
2109  }
2110 
2111  /* We can check for overflow here because in Proxygen, len <= ~8KB and so the
2112  * worst thing that can happen is that we catch the overflow at 88KB rather
2113  * than at 80KB.
2114  * In case of chunk encoding, we count the overflow for every
2115  * chunk separately.
2116  * We zero the nread counter (and reset data_or_header_data_start) when we
2117  * start parsing a new message or a new chunk.
2118  */
2119  if (PARSING_HEADER(state)) {
2120  parser->nread += p - data_or_header_data_start;
2121  if (parser->nread > HTTP_MAX_HEADER_SIZE) {
2123  goto error;
2124  }
2125  }
2126 
2127  /* Run callbacks for any marks that we have leftover after we ran out of
2128  * bytes. There should be at most one of these set, so it's OK to invoke
2129  * them in series (unset marks will not result in callbacks).
2130  *
2131  * We use the NOADVANCE() variety of callbacks here because 'p' has already
2132  * overflowed 'data' and this allows us to correct for the off-by-one that
2133  * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
2134  * value that's in-bounds).
2135  */
2136 
2137  assert(((header_field_mark ? 1 : 0) +
2138  (header_value_mark ? 1 : 0) +
2139  (url_mark ? 1 : 0) +
2140  (reason_mark ? 1 : 0) +
2141  (body_mark ? 1 : 0)) <= 1);
2142 
2143  CALLBACK_DATA_NOADVANCE(header_field);
2144  CALLBACK_DATA_NOADVANCE(header_value);
2146  CALLBACK_DATA_NOADVANCE(reason);
2148 
2149  RETURN(len);
2150 
2151 error:
2152  if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
2154  }
2155 
2156  RETURN(p - data);
static const int8_t unhex[256]
Definition: http_parser.c:219
#define HTTP_MAX_HEADER_SIZE
Definition: http_parser.h:64
uint32_t nread
Definition: http_parser.h:220
#define IS_HEADER_CHAR(ch)
Definition: http_parser.c:426
unsigned char flags
Definition: http_parser.h:215
#define CR
Definition: http_parser.c:395
#define RETURN(r)
Definition: http_parser.c:63
#define CALLBACK_NOTIFY_NOADVANCE(FOR)
Definition: http_parser.c:89
#define IS_ALPHA(c)
Definition: http_parser.c:401
#define CALLBACK_DATA_NOADVANCE(FOR)
Definition: http_parser.c:115
#define MOVE_FAST
#define CALLBACK_SPACE(FOR)
Definition: http_parser.c:119
unsigned short http_minor
Definition: http_parser.h:225
#define CONTENT_LENGTH
Definition: http_parser.c:142
#define IS_ALPHANUM(c)
Definition: http_parser.c:403
#define PARSING_HEADER(state)
Definition: http_parser.c:357
#define CALLBACK_DATA(FOR)
Definition: http_parser.c:111
#define TOKEN(c)
Definition: http_parser.c:400
#define MIN(a, b)
Definition: http_parser.c:46
#define IS_NUM(c)
Definition: http_parser.c:402
#define BS
Definition: http_parser.c:398
requires And< SemiMovable< VN >... > &&SemiMovable< E > auto error(E e)
Definition: error.h:48
#define LOWER(c)
Definition: http_parser.c:399
auto ch
unsigned short status_code
Definition: http_parser.h:226
#define TRANSFER_ENCODING
Definition: http_parser.c:143
#define MARK(FOR)
Definition: http_parser.c:134
#define HTTP_PARSER_ERRNO(p)
Definition: http_parser.h:202
static const char * method_strings[]
Definition: http_parser.c:149
#define _CALLBACK_DATA(FOR, LEN, ER)
Definition: http_parser.c:92
unsigned char state
Definition: http_parser.h:216
#define UPGRADE
Definition: http_parser.c:144
unsigned char method
Definition: http_parser.h:227
unsigned short http_major
Definition: http_parser.h:224
http_data_cb on_headers_complete
Definition: http_parser.h:251
#define MOVE_THE_HEAD
#define NEW_MESSAGE()
Definition: http_parser.c:432
#define CALLBACK_NOTIFY(FOR)
Definition: http_parser.c:86
http_method
Definition: http_parser.h:90
#define CHUNKED
Definition: http_parser.c:145
#define SET_ERRNO(e)
Definition: http_parser.c:57
unsigned char header_state
Definition: http_parser.h:217
unsigned char http_errno
Definition: http_parser.h:228
#define QT
Definition: http_parser.c:397
#define LF
Definition: http_parser.c:396
#define IS_URL_CHAR(c)
Definition: http_parser.c:416
#define IS_HEX(c)
Definition: http_parser.c:404
unsigned char type
Definition: http_parser.h:214
#define STRICT_CHECK(cond)
Definition: http_parser.c:431
char c
static constexpr uint64_t data[1]
Definition: Fingerprint.cpp:43
#define IS_HOST_CHAR(c)
Definition: http_parser.c:418
int64_t content_length
Definition: http_parser.h:221
state
Definition: http_parser.c:272
unsigned char index
Definition: http_parser.h:218
void http_parser_init ( http_parser parser,
enum http_parser_type  type 
)

Definition at line 2166 of file http_parser.c.

References http_parser::flags, HPE_OK, http_parser::http_errno, http_parser::http_major, http_parser::http_minor, HTTP_REQUEST, HTTP_RESPONSE, http_parser::method, http_parser::nread, s_pre_start_req, s_pre_start_req_or_res, s_pre_start_res, http_parser::state, folly::pushmi::detail::t, http_parser::type, and http_parser::upgrade.

Referenced by proxygen::HTTP1xCodec::HTTP1xCodec(), parser_init(), test_header_overflow_error(), and test_no_overflow_long_body().

2168 {
2169  parser->type = t;
2171  parser->nread = 0;
2172  parser->upgrade = 0;
2173  parser->flags = 0;
2174  parser->method = 0;
2175  parser->http_major = 0;
2176  parser->http_minor = 0;
2177  parser->http_errno = HPE_OK;
uint32_t nread
Definition: http_parser.h:220
unsigned char flags
Definition: http_parser.h:215
unsigned short http_minor
Definition: http_parser.h:225
unsigned char state
Definition: http_parser.h:216
unsigned char method
Definition: http_parser.h:227
unsigned short http_major
Definition: http_parser.h:224
unsigned char http_errno
Definition: http_parser.h:228
unsigned char type
Definition: http_parser.h:214
int http_parser_parse_url ( const char *  buf,
size_t  buflen,
int  is_connect,
struct http_parser_url u 
)

Definition at line 2332 of file http_parser.c.

References http_parser_url::field_data, http_parser_url::field_set, http_parse_host(), http_parser_url::len, http_parser_url::off, parse_url_char(), http_parser_url::port, s, s_dead, s_req_fragment, s_req_fragment_start, s_req_path, s_req_query_string, s_req_query_string_start, s_req_schema, s_req_schema_slash, s_req_schema_slash_slash, s_req_server, s_req_server_start, s_req_server_with_at, s_req_spaces_before_url, UF_FRAGMENT, UF_HOST, UF_MAX, UF_PATH, UF_PORT, UF_QUERY, UF_SCHEMA, and uint16_t.

Referenced by proxygen::ParseURL::parse(), and test_parse_url().

2335 {
2336  enum state s;
2337  const char *p;
2338  enum http_parser_url_fields uf, old_uf;
2339  int found_at = 0;
2340 
2341  u->port = u->field_set = 0;
2342  s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2343  uf = old_uf = UF_MAX;
2344 
2345  for (p = buf; p < buf + buflen; p++) {
2346  s = parse_url_char(s, *p);
2347 
2348  /* Figure out the next field that we're operating on */
2349  switch (s) {
2350  case s_dead:
2351  return 1;
2352 
2353  /* Skip delimeters */
2354  case s_req_schema_slash:
2356  case s_req_server_start:
2358  case s_req_fragment_start:
2359  continue;
2360 
2361  case s_req_schema:
2362  uf = UF_SCHEMA;
2363  break;
2364 
2365  case s_req_server_with_at:
2366  found_at = 1;
2367 
2368  /* FALLTHROUGH */
2369  case s_req_server:
2370  uf = UF_HOST;
2371  break;
2372 
2373  case s_req_path:
2374  uf = UF_PATH;
2375  break;
2376 
2377  case s_req_query_string:
2378  uf = UF_QUERY;
2379  break;
2380 
2381  case s_req_fragment:
2382  uf = UF_FRAGMENT;
2383  break;
2384 
2385  default:
2386  assert(!"Unexpected state");
2387  return 1;
2388  }
2389 
2390  /* Nothing's changed; soldier on */
2391  if (uf == old_uf) {
2392  u->field_data[uf].len++;
2393  continue;
2394  }
2395 
2396  u->field_data[uf].off = p - buf;
2397  u->field_data[uf].len = 1;
2398 
2399  u->field_set |= (1 << uf);
2400  old_uf = uf;
2401  }
2402 
2403  /* host must be present if there is a schema */
2404  /* parsing http:///toto will fail */
2405  if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) {
2406  if (http_parse_host(buf, u, found_at) != 0) {
2407  return 1;
2408  }
2409  }
2410 
2411  /* CONNECT requests can only contain "hostname:port" */
2412  if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2413  return 1;
2414  }
2415 
2416  if (u->field_set & (1 << UF_PORT)) {
2417  /* Don't bother with endp; we've already validated the string */
2418  unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, nullptr, 10);
2419 
2420  /* Ports have a max value of 2^16 */
2421  if (v > 0xffff) {
2422  return 1;
2423  }
2424 
2425  u->port = (uint16_t) v;
2426  }
2427 
2428  return 0;
static int http_parse_host(const char *buf, struct http_parser_url *u, int found_at)
Definition: http_parser.c:2258
uint16_t field_set
Definition: http_parser.h:283
static set< string > s
static enum state parse_url_char(enum state s, const char ch)
Definition: http_parser.c:455
struct http_parser_url::@1 field_data[UF_MAX]
http_parser_url_fields
Definition: http_parser.h:263
state
Definition: http_parser.c:272
void http_parser_pause ( http_parser parser,
int  paused 
)

Definition at line 2431 of file http_parser.c.

References HPE_OK, HPE_PAUSED, HTTP_PARSER_ERRNO, and SET_ERRNO.

Referenced by pause_body_cb(), pause_chunk_complete_cb(), pause_chunk_header_cb(), pause_header_field_cb(), pause_header_value_cb(), pause_headers_complete_cb(), pause_message_begin_cb(), pause_message_complete_cb(), pause_request_url_cb(), pause_response_reason_cb(), proxygen::HTTP1xCodec::setParserPaused(), and test_message_pause().

2432  {
2433  /* Users should only be pausing/unpausing a parser that is not in an error
2434  * state. In non-debug builds, there's not much that we can do about this
2435  * other than ignore it.
2436  */
2437  if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2438  HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2439  SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2440  } else {
2441  assert(0 && "Attempting to pause parser in error state");
2442  }
#define HTTP_PARSER_ERRNO(p)
Definition: http_parser.h:202
#define SET_ERRNO(e)
Definition: http_parser.c:57