proxygen
http_parser_cpp.cpp File Reference
#include "proxygen/external/http_parser/http_parser.h"
#include <assert.h>
#include <stddef.h>
#include <limits.h>
#include <stdlib.h>

Go to the source code of this file.

Macros

#define nullptr   NULL
 
#define MIN(a, b)   ((a) < (b) ? (a) : (b))
 
#define SET_ERRNO(e)
 
#define RETURN(r)
 
#define _CALLBACK_NOTIFY(FOR, ER)
 
#define CALLBACK_NOTIFY(FOR)   _CALLBACK_NOTIFY(FOR, p - data + 1)
 
#define CALLBACK_NOTIFY_NOADVANCE(FOR)   _CALLBACK_NOTIFY(FOR, p - data)
 
#define _CALLBACK_DATA(FOR, LEN, ER)
 
#define CALLBACK_DATA(FOR)   _CALLBACK_DATA(FOR, p - FOR##_mark, p - data + 1)
 
#define CALLBACK_DATA_NOADVANCE(FOR)   _CALLBACK_DATA(FOR, p - FOR##_mark, p - data)
 
#define CALLBACK_SPACE(FOR)
 
#define MARK(FOR)
 
#define CONTENT_LENGTH   "content-length"
 
#define TRANSFER_ENCODING   "transfer-encoding"
 
#define UPGRADE   "upgrade"
 
#define CHUNKED   "chunked"
 
#define SPACE   " "
 
#define T(v)   v
 
#define PARSING_HEADER(state)   (state <= s_headers_done)
 
#define CR   '\r'
 
#define LF   '\n'
 
#define QT   '"'
 
#define BS   '\\'
 
#define LOWER(c)   (unsigned char)(c | 0x20)
 
#define TOKEN(c)   (tokens[(unsigned char)c])
 
#define IS_ALPHA(c)   (LOWER(c) >= 'a' && LOWER(c) <= 'z')
 
#define IS_NUM(c)   ((c) >= '0' && (c) <= '9')
 
#define IS_ALPHANUM(c)   (IS_ALPHA(c) || IS_NUM(c))
 
#define IS_HEX(c)   (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
 
#define IS_MARK(c)
 
#define IS_USERINFO_CHAR(c)
 
#define IS_URL_CHAR(c)   (normal_url_char[(unsigned char) (c)] || ((c) & 0x80))
 
#define IS_HOST_CHAR(c)   (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
 
#define IS_HEADER_CHAR(ch)   (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))
 
#define start_state   (parser->type == HTTP_REQUEST ? s_pre_start_req : s_pre_start_res)
 
#define STRICT_CHECK(cond)
 
#define NEW_MESSAGE()   start_state
 
#define HTTP_STRERROR_GEN(n, s)   { "HPE_" #n, s },
 
#define MOVE_THE_HEAD
 
#define MOVE_FAST
 

Enumerations

enum  state {
  s_dead = 1, s_pre_start_req_or_res, s_start_req_or_res, s_res_or_resp_H,
  s_pre_start_res, s_start_res, s_res_H, s_res_HT,
  s_res_HTT, s_res_HTTP, s_res_first_http_major, s_res_http_major,
  s_res_first_http_minor, s_res_http_minor, s_res_first_status_code, s_res_status_code,
  s_res_status_start, s_res_status, s_res_line_almost_done, s_pre_start_req,
  s_start_req, s_req_method, s_req_spaces_before_url, s_req_schema,
  s_req_schema_slash, s_req_schema_slash_slash, s_req_server_start, s_req_server,
  s_req_server_with_at, s_req_host_start, s_req_host, s_req_host_ipv6,
  s_req_host_done, s_req_port, s_req_path, s_req_query_string_start,
  s_req_query_string, s_req_fragment_start, s_req_fragment, s_req_http_start,
  s_req_http_H, s_req_http_HT, s_req_http_HTT, s_req_http_HTTP,
  s_req_first_http_major, s_req_http_major, s_req_first_http_minor, s_req_http_minor,
  s_req_line_almost_done, s_header_field_start, s_header_field, s_header_value_start,
  s_header_value, s_header_value_lws, s_header_almost_done, s_chunk_size_start,
  s_chunk_size, s_chunk_parameters, s_chunk_size_almost_done, s_headers_almost_done,
  s_headers_done, s_chunk_data, s_chunk_data_almost_done, s_chunk_data_done,
  s_body_identity, s_body_identity_eof, s_message_done, s_dead = 1,
  s_pre_start_req_or_res, s_start_req_or_res, s_res_or_resp_H, s_pre_start_res,
  s_start_res, s_res_H, s_res_HT, s_res_HTT,
  s_res_HTTP, s_res_first_http_major, s_res_http_major, s_res_first_http_minor,
  s_res_http_minor, s_res_first_status_code, s_res_status_code, s_res_status_start,
  s_res_status, s_res_line_almost_done, s_pre_start_req, s_start_req,
  s_req_method, s_req_spaces_before_url, s_req_schema, s_req_schema_slash,
  s_req_schema_slash_slash, s_req_server_start, s_req_server, s_req_server_with_at,
  s_req_host_start, s_req_host, s_req_host_ipv6, s_req_host_done,
  s_req_port, s_req_path, s_req_query_string_start, s_req_query_string,
  s_req_fragment_start, s_req_fragment, s_req_http_start, s_req_http_H,
  s_req_http_HT, s_req_http_HTT, s_req_http_HTTP, s_req_first_http_major,
  s_req_http_major, s_req_first_http_minor, s_req_http_minor, s_req_line_almost_done,
  s_header_field_start, s_header_field, s_header_value_start, s_header_value,
  s_header_value_lws, s_header_almost_done, s_chunk_size_start, s_chunk_size,
  s_chunk_parameters, s_chunk_size_almost_done, s_headers_almost_done, s_headers_done,
  s_chunk_data, s_chunk_data_almost_done, s_chunk_data_done, s_body_identity,
  s_body_identity_eof, s_message_done
}
 
enum  header_states {
  h_general = 0, h_general_and_quote, h_general_and_quote_and_escape, h_matching_content_length,
  h_matching_transfer_encoding, h_matching_upgrade, h_content_length, h_transfer_encoding,
  h_upgrade, h_matching_transfer_encoding_chunked, h_transfer_encoding_chunked, h_general = 0,
  h_general_and_quote, h_general_and_quote_and_escape, h_matching_content_length, h_matching_transfer_encoding,
  h_matching_upgrade, h_content_length, h_transfer_encoding, h_upgrade,
  h_matching_transfer_encoding_chunked, h_transfer_encoding_chunked
}
 
enum  http_host_state {
  s_http_host_dead = 1, s_http_userinfo_start, s_http_userinfo, s_http_host_start,
  s_http_host_v6_start, s_http_host, s_http_host_v6, s_http_host_v6_end,
  s_http_host_port_start, s_http_host_port, s_http_host_dead = 1, s_http_userinfo_start,
  s_http_userinfo, s_http_host_start, s_http_host_v6_start, s_http_host,
  s_http_host_v6, s_http_host_v6_end, s_http_host_port_start, s_http_host_port
}
 

Functions

static enum state parse_url_char (enum state s, const char ch)
 
size_t http_parser_execute (http_parser *parser, const http_parser_settings *settings, const char *data, size_t len)
 
const char * http_method_str (enum http_method m)
 
void http_parser_init (http_parser *parser, enum http_parser_type t)
 
const char * http_errno_name (enum http_errno err)
 
const char * http_errno_description (enum http_errno err)
 
static enum http_host_state http_parse_host_char (enum http_host_state s, const char ch)
 
static int http_parse_host (const char *buf, struct http_parser_url *u, int found_at)
 
int http_parser_parse_url (const char *buf, size_t buflen, int is_connect, struct http_parser_url *u)
 
void http_parser_pause (http_parser *parser, int paused)
 

Variables

static const char * method_strings []
 
static const char tokens [256]
 
static const int8_t unhex [256]
 
static const uint8_t normal_url_char [256]
 
struct {
   const char *   name
 
   const char *   description
 
http_strerror_tab []
 

Macro Definition Documentation

#define _CALLBACK_DATA (   FOR,
  LEN,
  ER 
)
Value:
do { \
if (FOR##_mark) { \
if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) { \
SET_ERRNO(HPE_CB_##FOR); \
} \
\
/* We either errored above or got paused; get out */ \
return (ER); \
} \
FOR##_mark = nullptr; \
} \
} while (0)
#define SET_ERRNO(e)
static http_parser_settings settings
Definition: test.c:1529
static http_parser * parser
Definition: test.c:40
#define HTTP_PARSER_ERRNO(p)
Definition: http_parser.h:202
unsigned char state
Definition: http_parser.h:216
if(FOLLY_USE_SYMBOLIZER) add_library(folly_exception_tracer_base ExceptionTracer.cpp StackTrace.cpp) apply_folly_compile_options_to_target(folly_exception_tracer_base) target_link_libraries(folly_exception_tracer_base PUBLIC folly) add_library(folly_exception_tracer ExceptionStackTraceLib.cpp ExceptionTracerLib.cpp) apply_folly_compile_options_to_target(folly_exception_tracer) target_link_libraries(folly_exception_tracer PUBLIC folly_exception_tracer_base) add_library(folly_exception_counter ExceptionCounterLib.cpp) apply_folly_compile_options_to_target(folly_exception_counter) target_link_libraries(folly_exception_counter PUBLIC folly_exception_tracer) install(FILES ExceptionAbi.h ExceptionCounterLib.h ExceptionTracer.h ExceptionTracerLib.h StackTrace.h DESTINATION $
Definition: CMakeLists.txt:1

Definition at line 92 of file http_parser_cpp.cpp.

Referenced by http_parser_execute().

#define _CALLBACK_NOTIFY (   FOR,
  ER 
)
Value:
do { \
if (0 != settings->on_##FOR(parser)) { \
SET_ERRNO(HPE_CB_##FOR); \
} \
\
/* We either errored above or got paused; get out */ \
return (ER); \
} \
} while (0)
#define SET_ERRNO(e)
static http_parser_settings settings
Definition: test.c:1529
static http_parser * parser
Definition: test.c:40
#define HTTP_PARSER_ERRNO(p)
Definition: http_parser.h:202
unsigned char state
Definition: http_parser.h:216
if(FOLLY_USE_SYMBOLIZER) add_library(folly_exception_tracer_base ExceptionTracer.cpp StackTrace.cpp) apply_folly_compile_options_to_target(folly_exception_tracer_base) target_link_libraries(folly_exception_tracer_base PUBLIC folly) add_library(folly_exception_tracer ExceptionStackTraceLib.cpp ExceptionTracerLib.cpp) apply_folly_compile_options_to_target(folly_exception_tracer) target_link_libraries(folly_exception_tracer PUBLIC folly_exception_tracer_base) add_library(folly_exception_counter ExceptionCounterLib.cpp) apply_folly_compile_options_to_target(folly_exception_counter) target_link_libraries(folly_exception_counter PUBLIC folly_exception_tracer) install(FILES ExceptionAbi.h ExceptionCounterLib.h ExceptionTracer.h ExceptionTracerLib.h StackTrace.h DESTINATION $
Definition: CMakeLists.txt:1

Definition at line 70 of file http_parser_cpp.cpp.

#define BS   '\\'

Definition at line 398 of file http_parser_cpp.cpp.

Referenced by http_parser_execute().

#define CALLBACK_DATA (   FOR)    _CALLBACK_DATA(FOR, p - FOR##_mark, p - data + 1)

Definition at line 111 of file http_parser_cpp.cpp.

Referenced by http_parser_execute().

#define CALLBACK_DATA_NOADVANCE (   FOR)    _CALLBACK_DATA(FOR, p - FOR##_mark, p - data)

Definition at line 115 of file http_parser_cpp.cpp.

Referenced by http_parser_execute().

#define CALLBACK_NOTIFY (   FOR)    _CALLBACK_NOTIFY(FOR, p - data + 1)

Definition at line 86 of file http_parser_cpp.cpp.

Referenced by http_parser_execute().

#define CALLBACK_NOTIFY_NOADVANCE (   FOR)    _CALLBACK_NOTIFY(FOR, p - data)

Definition at line 89 of file http_parser_cpp.cpp.

Referenced by http_parser_execute().

#define CALLBACK_SPACE (   FOR)
Value:
do { \
if (0 != settings->on_##FOR(parser, SPACE, 1)) { \
SET_ERRNO(HPE_CB_##FOR); \
return (p - data); \
} \
\
/* We either errored above or got paused; get out */ \
return (p - data); \
} \
} while (0)
#define SET_ERRNO(e)
static http_parser_settings settings
Definition: test.c:1529
static http_parser * parser
Definition: test.c:40
#define SPACE
#define HTTP_PARSER_ERRNO(p)
Definition: http_parser.h:202
unsigned char state
Definition: http_parser.h:216
if(FOLLY_USE_SYMBOLIZER) add_library(folly_exception_tracer_base ExceptionTracer.cpp StackTrace.cpp) apply_folly_compile_options_to_target(folly_exception_tracer_base) target_link_libraries(folly_exception_tracer_base PUBLIC folly) add_library(folly_exception_tracer ExceptionStackTraceLib.cpp ExceptionTracerLib.cpp) apply_folly_compile_options_to_target(folly_exception_tracer) target_link_libraries(folly_exception_tracer PUBLIC folly_exception_tracer_base) add_library(folly_exception_counter ExceptionCounterLib.cpp) apply_folly_compile_options_to_target(folly_exception_counter) target_link_libraries(folly_exception_counter PUBLIC folly_exception_tracer) install(FILES ExceptionAbi.h ExceptionCounterLib.h ExceptionTracer.h ExceptionTracerLib.h StackTrace.h DESTINATION $
Definition: CMakeLists.txt:1
static constexpr uint64_t data[1]
Definition: Fingerprint.cpp:43

Definition at line 119 of file http_parser_cpp.cpp.

Referenced by http_parser_execute().

#define CHUNKED   "chunked"

Definition at line 145 of file http_parser_cpp.cpp.

Referenced by http_parser_execute().

#define CONTENT_LENGTH   "content-length"

Definition at line 142 of file http_parser_cpp.cpp.

Referenced by http_parser_execute().

#define CR   '\r'

Definition at line 395 of file http_parser_cpp.cpp.

Referenced by http_parser_execute().

#define HTTP_STRERROR_GEN (   n,
  s 
)    { "HPE_" #n, s },

Definition at line 435 of file http_parser_cpp.cpp.

#define IS_ALPHA (   c)    (LOWER(c) >= 'a' && LOWER(c) <= 'z')

Definition at line 401 of file http_parser_cpp.cpp.

Referenced by http_parser_execute(), and parse_url_char().

#define IS_ALPHANUM (   c)    (IS_ALPHA(c) || IS_NUM(c))

Definition at line 403 of file http_parser_cpp.cpp.

Referenced by http_parser_execute().

#define IS_HEADER_CHAR (   ch)    (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))

Verify that a char is a valid visible (printable) US-ASCII character or x80-FF

Definition at line 426 of file http_parser_cpp.cpp.

Referenced by http_parser_execute().

#define IS_HEX (   c)    (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))

Definition at line 404 of file http_parser_cpp.cpp.

Referenced by http_parse_host_char(), and http_parser_execute().

#define IS_HOST_CHAR (   c)    (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')

Definition at line 418 of file http_parser_cpp.cpp.

Referenced by http_parse_host_char(), and http_parser_execute().

#define IS_MARK (   c)
Value:
((c) == '-' || (c) == '_' || (c) == '.' || \
(c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
(c) == ')')
char c

Definition at line 405 of file http_parser_cpp.cpp.

#define IS_NUM (   c)    ((c) >= '0' && (c) <= '9')

Definition at line 402 of file http_parser_cpp.cpp.

Referenced by http_parse_host_char(), and http_parser_execute().

#define IS_URL_CHAR (   c)    (normal_url_char[(unsigned char) (c)] || ((c) & 0x80))

Definition at line 416 of file http_parser_cpp.cpp.

Referenced by http_parser_execute(), and parse_url_char().

#define IS_USERINFO_CHAR (   c)
Value:
(IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
(c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
(c) == '$' || (c) == ',')
#define IS_ALPHANUM(c)
#define IS_MARK(c)
char c

Definition at line 408 of file http_parser_cpp.cpp.

Referenced by http_parse_host_char(), and parse_url_char().

#define LF   '\n'

Definition at line 396 of file http_parser_cpp.cpp.

Referenced by http_parser_execute().

#define LOWER (   c)    (unsigned char)(c | 0x20)

Definition at line 399 of file http_parser_cpp.cpp.

Referenced by http_parser_execute().

#define MARK (   FOR)
Value:
do { \
if (!FOR##_mark) { \
FOR##_mark = p; \
} \
} while (0)
if(FOLLY_USE_SYMBOLIZER) add_library(folly_exception_tracer_base ExceptionTracer.cpp StackTrace.cpp) apply_folly_compile_options_to_target(folly_exception_tracer_base) target_link_libraries(folly_exception_tracer_base PUBLIC folly) add_library(folly_exception_tracer ExceptionStackTraceLib.cpp ExceptionTracerLib.cpp) apply_folly_compile_options_to_target(folly_exception_tracer) target_link_libraries(folly_exception_tracer PUBLIC folly_exception_tracer_base) add_library(folly_exception_counter ExceptionCounterLib.cpp) apply_folly_compile_options_to_target(folly_exception_counter) target_link_libraries(folly_exception_counter PUBLIC folly_exception_tracer) install(FILES ExceptionAbi.h ExceptionCounterLib.h ExceptionTracer.h ExceptionTracerLib.h StackTrace.h DESTINATION $
Definition: CMakeLists.txt:1

Definition at line 134 of file http_parser_cpp.cpp.

Referenced by http_parser_execute().

#define MIN (   a,
  b 
)    ((a) < (b) ? (a) : (b))

Definition at line 46 of file http_parser_cpp.cpp.

Referenced by http_parser_execute().

#define MOVE_FAST
Value:
do { \
++p; \
ch = *p; \
if (ch == CR || ch == LF || ch == QT || \
ch == BS || !IS_HEADER_CHAR(ch)) { \
goto cr_or_lf_or_qt; \
} \
} while(0);
#define QT
#define CR
auto ch
#define IS_HEADER_CHAR(ch)
if(FOLLY_USE_SYMBOLIZER) add_library(folly_exception_tracer_base ExceptionTracer.cpp StackTrace.cpp) apply_folly_compile_options_to_target(folly_exception_tracer_base) target_link_libraries(folly_exception_tracer_base PUBLIC folly) add_library(folly_exception_tracer ExceptionStackTraceLib.cpp ExceptionTracerLib.cpp) apply_folly_compile_options_to_target(folly_exception_tracer) target_link_libraries(folly_exception_tracer PUBLIC folly_exception_tracer_base) add_library(folly_exception_counter ExceptionCounterLib.cpp) apply_folly_compile_options_to_target(folly_exception_counter) target_link_libraries(folly_exception_counter PUBLIC folly_exception_tracer) install(FILES ExceptionAbi.h ExceptionCounterLib.h ExceptionTracer.h ExceptionTracerLib.h StackTrace.h DESTINATION $
Definition: CMakeLists.txt:1
#define BS
#define LF

Referenced by http_parser_execute().

#define MOVE_THE_HEAD
Value:
do { \
++p; \
if (!TOKEN(*p)) { \
ch = *p; \
goto notatoken; \
} \
} while(0);
auto ch
#define TOKEN(c)
if(FOLLY_USE_SYMBOLIZER) add_library(folly_exception_tracer_base ExceptionTracer.cpp StackTrace.cpp) apply_folly_compile_options_to_target(folly_exception_tracer_base) target_link_libraries(folly_exception_tracer_base PUBLIC folly) add_library(folly_exception_tracer ExceptionStackTraceLib.cpp ExceptionTracerLib.cpp) apply_folly_compile_options_to_target(folly_exception_tracer) target_link_libraries(folly_exception_tracer PUBLIC folly_exception_tracer_base) add_library(folly_exception_counter ExceptionCounterLib.cpp) apply_folly_compile_options_to_target(folly_exception_counter) target_link_libraries(folly_exception_counter PUBLIC folly_exception_tracer) install(FILES ExceptionAbi.h ExceptionCounterLib.h ExceptionTracer.h ExceptionTracerLib.h StackTrace.h DESTINATION $
Definition: CMakeLists.txt:1

Referenced by http_parser_execute().

#define NEW_MESSAGE ( )    start_state

Definition at line 432 of file http_parser_cpp.cpp.

Referenced by http_parser_execute().

#define nullptr   NULL

Definition at line 41 of file http_parser_cpp.cpp.

#define PARSING_HEADER (   state)    (state <= s_headers_done)

Definition at line 357 of file http_parser_cpp.cpp.

Referenced by http_parser_execute().

#define QT   '"'

Definition at line 397 of file http_parser_cpp.cpp.

Referenced by http_parser_execute().

#define RETURN (   r)
Value:
do { \
return (r); \
} while(0)
static http_parser * parser
Definition: test.c:40
unsigned char state
Definition: http_parser.h:216

Definition at line 63 of file http_parser_cpp.cpp.

Referenced by http_parser_execute().

#define SET_ERRNO (   e)
Value:
do { \
} while(0)
static http_parser * parser
Definition: test.c:40
unsigned char http_errno
Definition: http_parser.h:228

Definition at line 57 of file http_parser_cpp.cpp.

Referenced by http_parser_execute(), and http_parser_pause().

#define SPACE   " "

Definition at line 146 of file http_parser_cpp.cpp.

#define start_state   (parser->type == HTTP_REQUEST ? s_pre_start_req : s_pre_start_res)

Definition at line 429 of file http_parser_cpp.cpp.

#define STRICT_CHECK (   cond)

Definition at line 431 of file http_parser_cpp.cpp.

Referenced by http_parser_execute().

#define T (   v)    v

Definition at line 233 of file http_parser_cpp.cpp.

#define TOKEN (   c)    (tokens[(unsigned char)c])

Definition at line 400 of file http_parser_cpp.cpp.

Referenced by http_parser_execute().

#define TRANSFER_ENCODING   "transfer-encoding"

Definition at line 143 of file http_parser_cpp.cpp.

Referenced by http_parser_execute().

#define UPGRADE   "upgrade"

Definition at line 144 of file http_parser_cpp.cpp.

Enumeration Type Documentation

Enumerator
h_general 
h_general_and_quote 
h_general_and_quote_and_escape 
h_matching_content_length 
h_matching_transfer_encoding 
h_matching_upgrade 
h_content_length 
h_transfer_encoding 
h_upgrade 
h_matching_transfer_encoding_chunked 
h_transfer_encoding_chunked 
h_general 
h_general_and_quote 
h_general_and_quote_and_escape 
h_matching_content_length 
h_matching_transfer_encoding 
h_matching_upgrade 
h_content_length 
h_transfer_encoding 
h_upgrade 
h_matching_transfer_encoding_chunked 
h_transfer_encoding_chunked 

Definition at line 360 of file http_parser_cpp.cpp.

Enumerator
s_http_host_dead 
s_http_userinfo_start 
s_http_userinfo 
s_http_host_start 
s_http_host_v6_start 
s_http_host 
s_http_host_v6 
s_http_host_v6_end 
s_http_host_port_start 
s_http_host_port 
s_http_host_dead 
s_http_userinfo_start 
s_http_userinfo 
s_http_host_start 
s_http_host_v6_start 
s_http_host 
s_http_host_v6 
s_http_host_v6_end 
s_http_host_port_start 
s_http_host_port 

Definition at line 379 of file http_parser_cpp.cpp.

enum state
Enumerator
s_dead 
s_pre_start_req_or_res 
s_start_req_or_res 
s_res_or_resp_H 
s_pre_start_res 
s_start_res 
s_res_H 
s_res_HT 
s_res_HTT 
s_res_HTTP 
s_res_first_http_major 
s_res_http_major 
s_res_first_http_minor 
s_res_http_minor 
s_res_first_status_code 
s_res_status_code 
s_res_status_start 
s_res_status 
s_res_line_almost_done 
s_pre_start_req 
s_start_req 
s_req_method 
s_req_spaces_before_url 
s_req_schema 
s_req_schema_slash 
s_req_schema_slash_slash 
s_req_server_start 
s_req_server 
s_req_server_with_at 
s_req_host_start 
s_req_host 
s_req_host_ipv6 
s_req_host_done 
s_req_port 
s_req_path 
s_req_query_string_start 
s_req_query_string 
s_req_fragment_start 
s_req_fragment 
s_req_http_start 
s_req_http_H 
s_req_http_HT 
s_req_http_HTT 
s_req_http_HTTP 
s_req_first_http_major 
s_req_http_major 
s_req_first_http_minor 
s_req_http_minor 
s_req_line_almost_done 
s_header_field_start 
s_header_field 
s_header_value_start 
s_header_value 
s_header_value_lws 
s_header_almost_done 
s_chunk_size_start 
s_chunk_size 
s_chunk_parameters 
s_chunk_size_almost_done 
s_headers_almost_done 
s_headers_done 
s_chunk_data 
s_chunk_data_almost_done 
s_chunk_data_done 
s_body_identity 
s_body_identity_eof 
s_message_done 
s_dead 
s_pre_start_req_or_res 
s_start_req_or_res 
s_res_or_resp_H 
s_pre_start_res 
s_start_res 
s_res_H 
s_res_HT 
s_res_HTT 
s_res_HTTP 
s_res_first_http_major 
s_res_http_major 
s_res_first_http_minor 
s_res_http_minor 
s_res_first_status_code 
s_res_status_code 
s_res_status_start 
s_res_status 
s_res_line_almost_done 
s_pre_start_req 
s_start_req 
s_req_method 
s_req_spaces_before_url 
s_req_schema 
s_req_schema_slash 
s_req_schema_slash_slash 
s_req_server_start 
s_req_server 
s_req_server_with_at 
s_req_host_start 
s_req_host 
s_req_host_ipv6 
s_req_host_done 
s_req_port 
s_req_path 
s_req_query_string_start 
s_req_query_string 
s_req_fragment_start 
s_req_fragment 
s_req_http_start 
s_req_http_H 
s_req_http_HT 
s_req_http_HTT 
s_req_http_HTTP 
s_req_first_http_major 
s_req_http_major 
s_req_first_http_minor 
s_req_http_minor 
s_req_line_almost_done 
s_header_field_start 
s_header_field 
s_header_value_start 
s_header_value 
s_header_value_lws 
s_header_almost_done 
s_chunk_size_start 
s_chunk_size 
s_chunk_parameters 
s_chunk_size_almost_done 
s_headers_almost_done 
s_headers_done 
s_chunk_data 
s_chunk_data_almost_done 
s_chunk_data_done 
s_body_identity 
s_body_identity_eof 
s_message_done 

Definition at line 272 of file http_parser_cpp.cpp.

273  { s_dead = 1 /* important that this is > 0 */
277 
279  , s_start_res
280  , s_res_H
281  , s_res_HT
282  , s_res_HTT
283  , s_res_HTTP
291  , s_res_status
293 
295  , s_start_req
296  , s_req_method
298  , s_req_schema
302  , s_req_server
305  , s_req_host
308  , s_req_port
309  , s_req_path
315  , s_req_http_H
316  , s_req_http_HT
324 
330 
332 
334  , s_chunk_size
337 
340 
341  /* Important: 's_headers_done' must be the last 'header' state. All
342  * states beyond this must be 'body' states. It is used for overflow
343  * checking. See the PARSING_HEADER() macro.
344  */
345 
346  , s_chunk_data
349 
352 
354  };

Function Documentation

const char* http_errno_description ( enum http_errno  err)

Definition at line 2187 of file http_parser_cpp.cpp.

References http_strerror_tab.

2187  {
2188  assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
2189  return http_strerror_tab[err].description;
2190 }
static struct @2 http_strerror_tab[]
const char* http_errno_name ( enum http_errno  err)

Definition at line 2181 of file http_parser_cpp.cpp.

References http_strerror_tab.

2181  {
2182  assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
2183  return http_strerror_tab[err].name;
2184 }
static struct @2 http_strerror_tab[]
const char* http_method_str ( enum http_method  m)

Definition at line 2160 of file http_parser_cpp.cpp.

References m.

2161 {
2162  return method_strings[m];
2163 }
static map< string, int > m
static const char * method_strings[]
static int http_parse_host ( const char *  buf,
struct http_parser_url u,
int  found_at 
)
static

Definition at line 2259 of file http_parser_cpp.cpp.

References http_parser_url::field_data, http_parser_url::field_set, http_parse_host_char(), http_parser_url::len, http_parser_url::off, s, s_http_host, s_http_host_dead, s_http_host_port, s_http_host_port_start, s_http_host_start, s_http_host_v6, s_http_host_v6_start, s_http_userinfo, s_http_userinfo_start, UF_HOST, UF_PORT, and UF_USERINFO.

Referenced by http_parser_parse_url().

2259  {
2260  enum http_host_state s;
2261 
2262  const char *p;
2263  size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2264 
2265  u->field_data[UF_HOST].len = 0;
2266 
2267  s = found_at ? s_http_userinfo_start : s_http_host_start;
2268 
2269  for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2270  enum http_host_state new_s = http_parse_host_char(s, *p);
2271 
2272  if (new_s == s_http_host_dead) {
2273  return 1;
2274  }
2275 
2276  switch(new_s) {
2277  case s_http_host:
2278  if (s != s_http_host) {
2279  u->field_data[UF_HOST].off = p - buf;
2280  }
2281  u->field_data[UF_HOST].len++;
2282  break;
2283 
2284  case s_http_host_v6:
2285  if (s != s_http_host_v6) {
2286  u->field_data[UF_HOST].off = p - buf;
2287  }
2288  u->field_data[UF_HOST].len++;
2289  break;
2290 
2291  case s_http_host_port:
2292  if (s != s_http_host_port) {
2293  u->field_data[UF_PORT].off = p - buf;
2294  u->field_data[UF_PORT].len = 0;
2295  u->field_set |= (1 << UF_PORT);
2296  }
2297  u->field_data[UF_PORT].len++;
2298  break;
2299 
2300  case s_http_userinfo:
2301  if (s != s_http_userinfo) {
2302  u->field_data[UF_USERINFO].off = p - buf ;
2303  u->field_data[UF_USERINFO].len = 0;
2304  u->field_set |= (1 << UF_USERINFO);
2305  }
2306  u->field_data[UF_USERINFO].len++;
2307  break;
2308 
2309  default:
2310  break;
2311  }
2312  s = new_s;
2313  }
2314 
2315  /* Make sure we don't end somewhere unexpected */
2316  switch (s) {
2317  case s_http_host_start:
2318  case s_http_host_v6_start:
2319  case s_http_host_v6:
2321  case s_http_userinfo:
2322  case s_http_userinfo_start:
2323  return 1;
2324  default:
2325  break;
2326  }
2327 
2328  return 0;
2329 }
static enum http_host_state http_parse_host_char(enum http_host_state s, const char ch)
uint16_t field_set
Definition: http_parser.h:283
static set< string > s
http_host_state
Definition: http_parser.c:379
struct http_parser_url::@1 field_data[UF_MAX]
static enum http_host_state http_parse_host_char ( enum http_host_state  s,
const char  ch 
)
static

Definition at line 2194 of file http_parser_cpp.cpp.

References IS_HEX, IS_HOST_CHAR, IS_NUM, IS_USERINFO_CHAR, s_http_host, s_http_host_dead, s_http_host_port, s_http_host_port_start, s_http_host_start, s_http_host_v6, s_http_host_v6_end, s_http_host_v6_start, s_http_userinfo, and s_http_userinfo_start.

Referenced by http_parse_host().

2194  {
2195  switch(s) {
2196  case s_http_userinfo:
2197  case s_http_userinfo_start:
2198  if (ch == '@') {
2199  return s_http_host_start;
2200  }
2201 
2202  if (IS_USERINFO_CHAR(ch)) {
2203  return s_http_userinfo;
2204  }
2205  break;
2206 
2207  case s_http_host_start:
2208  if (ch == '[') {
2209  return s_http_host_v6_start;
2210  }
2211 
2212  if (IS_HOST_CHAR(ch)) {
2213  return s_http_host;
2214  }
2215 
2216  break;
2217 
2218  case s_http_host:
2219  if (IS_HOST_CHAR(ch)) {
2220  return s_http_host;
2221  }
2222 
2223  /* FALLTHROUGH */
2224  case s_http_host_v6_end:
2225  if (ch == ':') {
2226  return s_http_host_port_start;
2227  }
2228 
2229  break;
2230 
2231  case s_http_host_v6:
2232  if (ch == ']') {
2233  return s_http_host_v6_end;
2234  }
2235 
2236  /* FALLTHROUGH */
2237  case s_http_host_v6_start:
2238  if (IS_HEX(ch) || ch == ':' || ch == '.') {
2239  return s_http_host_v6;
2240  }
2241 
2242  break;
2243 
2244  case s_http_host_port:
2246  if (IS_NUM(ch)) {
2247  return s_http_host_port;
2248  }
2249 
2250  break;
2251 
2252  default:
2253  break;
2254  }
2255  return s_http_host_dead;
2256 }
#define IS_NUM(c)
#define IS_HEX(c)
auto ch
#define IS_HOST_CHAR(c)
#define IS_USERINFO_CHAR(c)
static set< string > s
size_t http_parser_execute ( http_parser parser,
const http_parser_settings settings,
const char *  data,
size_t  len 
)

Definition at line 603 of file http_parser_cpp.cpp.

References _CALLBACK_DATA, BS, c, CALLBACK_DATA, CALLBACK_DATA_NOADVANCE, CALLBACK_NOTIFY, CALLBACK_NOTIFY_NOADVANCE, CALLBACK_SPACE, ch, CHUNKED, CONTENT_LENGTH, http_parser::content_length, CR, data, folly::pushmi::operators::error(), F_CHUNKED, F_SKIPBODY, F_TRAILING, F_UPGRADE, http_parser::flags, h_content_length, h_general, h_general_and_quote, h_general_and_quote_and_escape, h_matching_content_length, h_matching_transfer_encoding, h_matching_transfer_encoding_chunked, h_matching_upgrade, h_transfer_encoding, h_transfer_encoding_chunked, h_upgrade, http_parser::header_state, HPE_CB_headers_complete, HPE_HEADER_OVERFLOW, HPE_HUGE_CHUNK_SIZE, HPE_HUGE_CONTENT_LENGTH, HPE_INVALID_CHUNK_SIZE, HPE_INVALID_CONSTANT, HPE_INVALID_CONTENT_LENGTH, HPE_INVALID_EOF_STATE, HPE_INVALID_FRAGMENT, HPE_INVALID_HEADER_TOKEN, HPE_INVALID_HOST, HPE_INVALID_INTERNAL_STATE, HPE_INVALID_METHOD, HPE_INVALID_PATH, HPE_INVALID_PORT, HPE_INVALID_QUERY_STRING, HPE_INVALID_STATUS, HPE_INVALID_TRANSFER_ENCODING, HPE_INVALID_UPGRADE, HPE_INVALID_URL, HPE_INVALID_VERSION, HPE_LF_EXPECTED, HPE_OK, HPE_STRICT, HPE_UNKNOWN, HTTP_CHECKOUT, HTTP_CONNECT, HTTP_COPY, HTTP_DELETE, http_parser::http_errno, HTTP_GET, HTTP_HEAD, HTTP_LOCK, http_parser::http_major, HTTP_MAX_HEADER_SIZE, HTTP_MERGE, http_parser::http_minor, HTTP_MKACTIVITY, HTTP_MKCOL, HTTP_MOVE, HTTP_MSEARCH, HTTP_NOTIFY, HTTP_OPTIONS, HTTP_PARSER_ERRNO, HTTP_PATCH, HTTP_POST, HTTP_PROPFIND, HTTP_PROPPATCH, HTTP_PUT, HTTP_REPORT, HTTP_REQUEST, HTTP_RESPONSE, HTTP_SUBSCRIBE, HTTP_TRACE, HTTP_UNLOCK, HTTP_UNSUBSCRIBE, http_parser::index, int8_t, IS_ALPHA, IS_ALPHANUM, IS_HEADER_CHAR, IS_HEX, IS_HOST_CHAR, IS_NUM, IS_URL_CHAR, LF, LOWER, MARK, http_parser::method, MIN, MOVE_FAST, MOVE_THE_HEAD, NEW_MESSAGE, http_parser::nread, http_parser_settings::on_headers_complete, PARSING_HEADER, QT, RETURN, s_body_identity, s_body_identity_eof, s_chunk_data, s_chunk_data_almost_done, s_chunk_data_done, s_chunk_parameters, s_chunk_size, s_chunk_size_almost_done, s_chunk_size_start, s_header_almost_done, s_header_field, s_header_field_start, s_header_value, s_header_value_lws, s_header_value_start, s_headers_almost_done, s_headers_done, s_message_done, s_pre_start_req, s_pre_start_req_or_res, s_pre_start_res, s_req_first_http_major, s_req_first_http_minor, s_req_fragment, s_req_fragment_start, s_req_host, s_req_host_done, s_req_host_ipv6, s_req_host_start, s_req_http_H, s_req_http_HT, s_req_http_HTT, s_req_http_HTTP, s_req_http_major, s_req_http_minor, s_req_http_start, s_req_line_almost_done, s_req_method, s_req_path, s_req_port, s_req_query_string, s_req_query_string_start, s_req_schema, s_req_schema_slash, s_req_schema_slash_slash, s_req_spaces_before_url, s_res_first_http_major, s_res_first_http_minor, s_res_first_status_code, s_res_H, s_res_HT, s_res_HTT, s_res_HTTP, s_res_http_major, s_res_http_minor, s_res_line_almost_done, s_res_or_resp_H, s_res_status, s_res_status_code, s_start_req, s_start_req_or_res, s_start_res, SET_ERRNO, http_parser::state, http_parser::status_code, STRICT_CHECK, TOKEN, TRANSFER_ENCODING, http_parser::type, uint64_t, UPGRADE, and http_parser::upgrade.

607 {
608  char c, ch;
609  int8_t unhex_val;
610  const char *p = data;
611 
612  /* Optimization: within the parsing loop below, we refer to this
613  * local copy of the state rather than parser->state. The compiler
614  * can't be sure whether parser->state will change during a callback,
615  * so it generates a lot of memory loads and stores to keep a register
616  * copy of the state in sync with the memory copy. We know, however,
617  * that the callbacks aren't allowed to change the parser state, so
618  * the parsing loop works with this local variable and only copies
619  * the value back to parser->loop before returning or invoking a
620  * callback.
621  */
622  unsigned char state = parser->state;
623  const unsigned int lenient = 0;
624 
625  /* We're in an error state. Don't bother doing anything. */
626  if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
627  RETURN(0);
628  }
629 
630  if (len == 0) {
631  switch (state) {
632  case s_body_identity_eof:
633  /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
634  * we got paused.
635  */
636  CALLBACK_NOTIFY_NOADVANCE(message_complete);
637  RETURN(0);
638 
640  case s_pre_start_res:
641  case s_pre_start_req:
642  RETURN(0);
643 
644  default:
646  RETURN(1);
647  }
648  }
649 
650  /* technically we could combine all of these (except for url_mark) into one
651  variable, saving stack space, but it seems more clear to have them
652  separated. */
653  const char *header_field_mark = 0;
654  const char *header_value_mark = 0;
655  const char *url_mark = 0;
656  const char *reason_mark = 0;
657  const char *body_mark = 0;
658 
659  if (state == s_header_field)
660  header_field_mark = data;
661  if (state == s_header_value)
662  header_value_mark = data;
663  if (state == s_req_path ||
664  state == s_req_schema ||
665  state == s_req_schema_slash ||
666  state == s_req_schema_slash_slash ||
667  state == s_req_port ||
668  state == s_req_query_string_start ||
669  state == s_req_query_string ||
670  state == s_req_host_start ||
671  state == s_req_host ||
672  state == s_req_host_ipv6 ||
673  state == s_req_host_done ||
674  state == s_req_fragment_start ||
675  state == s_req_fragment)
676  url_mark = data;
677  if (state == s_res_status)
678  reason_mark = data;
679 
680  /* Used only for overflow checking. If the parser is in a parsing-headers
681  * state, then its value is equal to max(data, the beginning of the current
682  * message or chunk). If the parser is in a not-parsing-headers state, then
683  * its value is irrelevant.
684  */
685  const char* data_or_header_data_start = data;
686 
687  for (p = data; p != data + len; p++) {
688  ch = *p;
689 
690  reexecute_byte:
691  switch (state) {
692 
694  if (ch == CR || ch == LF)
695  break;
696  state = s_start_req_or_res;
697  CALLBACK_NOTIFY_NOADVANCE(message_begin);
698  goto reexecute_byte;
699 
700  case s_start_req_or_res:
701  {
702  parser->flags = 0;
703  parser->content_length = -1;
704 
705  if (ch == 'H') {
706  state = s_res_or_resp_H;
707  } else {
708  parser->type = HTTP_REQUEST;
709  state = s_start_req;
710  goto reexecute_byte;
711  }
712 
713  break;
714  }
715 
716  case s_res_or_resp_H:
717  if (ch == 'T') {
718  parser->type = HTTP_RESPONSE;
719  state = s_res_HT;
720  } else {
721  if (ch != 'E') {
723  goto error;
724  }
725 
726  parser->type = HTTP_REQUEST;
727  parser->method = HTTP_HEAD;
728  parser->index = 2;
729  state = s_req_method;
730  }
731  break;
732 
733  case s_pre_start_res:
734  if (ch == CR || ch == LF)
735  break;
736  state = s_start_res;
737  CALLBACK_NOTIFY_NOADVANCE(message_begin);
738  goto reexecute_byte;
739 
740  case s_start_res:
741  {
742  parser->flags = 0;
743  parser->content_length = -1;
744 
745  switch (ch) {
746  case 'H':
747  state = s_res_H;
748  break;
749 
750  default:
752  goto error;
753  }
754 
755  break;
756  }
757 
758  case s_res_H:
759  STRICT_CHECK(ch != 'T');
760  state = s_res_HT;
761  break;
762 
763  case s_res_HT:
764  STRICT_CHECK(ch != 'T');
765  state = s_res_HTT;
766  break;
767 
768  case s_res_HTT:
769  STRICT_CHECK(ch != 'P');
770  state = s_res_HTTP;
771  break;
772 
773  case s_res_HTTP:
774  STRICT_CHECK(ch != '/');
775  state = s_res_first_http_major;
776  break;
777 
779  if (ch < '0' || ch > '9') {
781  goto error;
782  }
783 
784  parser->http_major = ch - '0';
785  state = s_res_http_major;
786  break;
787 
788  /* major HTTP version or dot */
789  case s_res_http_major:
790  {
791  if (ch == '.') {
792  state = s_res_first_http_minor;
793  break;
794  }
795 
796  if (!IS_NUM(ch)) {
798  goto error;
799  }
800 
801  parser->http_major *= 10;
802  parser->http_major += ch - '0';
803 
804  if (parser->http_major > 999) {
806  goto error;
807  }
808 
809  break;
810  }
811 
812  /* first digit of minor HTTP version */
814  if (!IS_NUM(ch)) {
816  goto error;
817  }
818 
819  parser->http_minor = ch - '0';
820  state = s_res_http_minor;
821  break;
822 
823  /* minor HTTP version or end of request line */
824  case s_res_http_minor:
825  {
826  if (ch == ' ') {
827  state = s_res_first_status_code;
828  break;
829  }
830 
831  if (!IS_NUM(ch)) {
833  goto error;
834  }
835 
836  parser->http_minor *= 10;
837  parser->http_minor += ch - '0';
838 
839  if (parser->http_minor > 999) {
841  goto error;
842  }
843 
844  break;
845  }
846 
848  {
849  if (!IS_NUM(ch)) {
850  if (ch == ' ') {
851  break;
852  }
853 
855  goto error;
856  }
857  parser->status_code = ch - '0';
858  state = s_res_status_code;
859  break;
860  }
861 
862  case s_res_status_code:
863  {
864  if (!IS_NUM(ch)) {
865  switch (ch) {
866  case ' ':
867  state = s_res_status;
868  break;
869  case CR:
870  state = s_res_line_almost_done;
871  break;
872  case LF:
873  state = s_header_field_start;
874  break;
875  default:
877  goto error;
878  }
879  break;
880  }
881 
882  parser->status_code *= 10;
883  parser->status_code += ch - '0';
884 
885  if (parser->status_code > 999) {
887  goto error;
888  }
889 
890  break;
891  }
892 
893  case s_res_status:
894  /* the human readable status. e.g. "NOT FOUND" */
895  MARK(reason);
896  if (ch == CR) {
897  state = s_res_line_almost_done;
898  CALLBACK_DATA(reason);
899  break;
900  }
901 
902  if (ch == LF) {
903  state = s_header_field_start;
904  CALLBACK_DATA(reason);
905  break;
906  }
907  break;
908 
910  STRICT_CHECK(ch != LF);
911  state = s_header_field_start;
912  break;
913 
914  case s_pre_start_req:
915  if (ch == CR || ch == LF) {
916  break;
917  }
918  state = s_start_req;
919  CALLBACK_NOTIFY_NOADVANCE(message_begin);
920  goto reexecute_byte;
921 
922  case s_start_req:
923  {
924  parser->flags = 0;
925  parser->content_length = -1;
926 
927  if (!IS_ALPHA(ch)) {
929  goto error;
930  }
931 
932  parser->method = (enum http_method) 0;
933  parser->index = 1;
934  switch (ch) {
935  case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
936  case 'D': parser->method = HTTP_DELETE; break;
937  case 'G': parser->method = HTTP_GET; break;
938  case 'H': parser->method = HTTP_HEAD; break;
939  case 'L': parser->method = HTTP_LOCK; break;
940  case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
941  case 'N': parser->method = HTTP_NOTIFY; break;
942  case 'O': parser->method = HTTP_OPTIONS; break;
943  case 'P': parser->method = HTTP_POST;
944  /* or PROPFIND or PROPPATCH or PUT or PATCH */
945  break;
946  case 'R': parser->method = HTTP_REPORT; break;
947  case 'S': parser->method = HTTP_SUBSCRIBE; break;
948  case 'T': parser->method = HTTP_TRACE; break;
949  case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
950  default:
952  goto error;
953  }
954  state = s_req_method;
955 
956  break;
957  }
958 
959  case s_req_method:
960  {
961  if (ch == '\0') {
963  goto error;
964  }
965 
966  const char *matcher = method_strings[parser->method];
967  if (ch == ' ' && matcher[parser->index] == '\0') {
968  state = s_req_spaces_before_url;
969  } else if (ch == matcher[parser->index]) {
970  ; /* nada */
971  } else if (parser->method == HTTP_CONNECT) {
972  if (parser->index == 1 && ch == 'H') {
973  parser->method = HTTP_CHECKOUT;
974  } else if (parser->index == 2 && ch == 'P') {
975  parser->method = HTTP_COPY;
976  } else {
977  goto error;
978  }
979  } else if (parser->method == HTTP_MKCOL) {
980  if (parser->index == 1 && ch == 'O') {
981  parser->method = HTTP_MOVE;
982  } else if (parser->index == 1 && ch == 'E') {
983  parser->method = HTTP_MERGE;
984  } else if (parser->index == 1 && ch == '-') {
985  parser->method = HTTP_MSEARCH;
986  } else if (parser->index == 2 && ch == 'A') {
987  parser->method = HTTP_MKACTIVITY;
988  } else {
989  goto error;
990  }
991  } else if (parser->index == 1 && parser->method == HTTP_POST) {
992  if (ch == 'R') {
993  parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
994  } else if (ch == 'U') {
995  parser->method = HTTP_PUT;
996  } else if (ch == 'A') {
997  parser->method = HTTP_PATCH;
998  } else {
999  goto error;
1000  }
1001  } else if (parser->index == 2 && parser->method == HTTP_UNLOCK && ch == 'S') {
1002  parser->method = HTTP_UNSUBSCRIBE;
1003  } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
1004  parser->method = HTTP_PROPPATCH;
1005  } else {
1007  goto error;
1008  }
1009 
1010  ++parser->index;
1011  break;
1012  }
1013 
1015  {
1016  if (ch == ' ') break;
1017 
1018  // CONNECT requests must be followed by a <host>:<port>
1019  if (parser->method == HTTP_CONNECT) {
1020  MARK(url);
1021  state = s_req_host_start;
1022  goto reexecute_byte;
1023  }
1024 
1025  if (ch == '/' || ch == '*') {
1026  MARK(url);
1027  state = s_req_path;
1028  break;
1029  }
1030 
1031  /* Proxied requests are followed by scheme of an absolute URI (alpha).
1032  * All other methods are followed by '/' or '*' (handled above).
1033  */
1034  if (IS_ALPHA(ch)) {
1035  MARK(url);
1036  state = s_req_schema;
1037  break;
1038  }
1039 
1041  goto error;
1042  }
1043 
1044  case s_req_schema:
1045  {
1046  if (IS_ALPHA(ch)) break;
1047 
1048  if (ch == ':') {
1049  state = s_req_schema_slash;
1050  break;
1051  }
1052 
1054  goto error;
1055  }
1056 
1057  case s_req_schema_slash:
1058  STRICT_CHECK(ch != '/');
1059  state = s_req_schema_slash_slash;
1060  break;
1061 
1063  STRICT_CHECK(ch != '/');
1064  state = s_req_host_start;
1065  break;
1066 
1067  case s_req_host_start:
1068  if (ch == '[') {
1069  state = s_req_host_ipv6;
1070  break;
1071  } else if (IS_ALPHANUM(ch)) {
1072  state = s_req_host;
1073  break;
1074  }
1075 
1077  goto error;
1078 
1079  case s_req_host:
1080  if (IS_HOST_CHAR(ch)) break;
1081  state = s_req_host_done;
1082  goto reexecute_byte;
1083 
1084  case s_req_host_ipv6:
1085  if (IS_HEX(ch) || ch == ':') break;
1086  if (ch == ']') {
1087  state = s_req_host_done;
1088  break;
1089  }
1090 
1092  goto error;
1093 
1094  case s_req_host_done:
1095  switch (ch) {
1096  case ':':
1097  state = s_req_port;
1098  break;
1099  case '/':
1100  state = s_req_path;
1101  break;
1102  case ' ':
1103  /* The request line looks like:
1104  * "GET http://foo.bar.com HTTP/1.1"
1105  * That is, there is no path.
1106  */
1107  state = s_req_http_start;
1108  CALLBACK_DATA(url);
1109  break;
1110  case '?':
1111  state = s_req_query_string_start;
1112  break;
1113  default:
1115  goto error;
1116  }
1117 
1118  break;
1119 
1120  case s_req_port:
1121  {
1122  if (IS_NUM(ch)) break;
1123  switch (ch) {
1124  case '/':
1125  state = s_req_path;
1126  break;
1127  case ' ':
1128  /* The request line looks like:
1129  * "GET http://foo.bar.com:1234 HTTP/1.1"
1130  * That is, there is no path.
1131  */
1132  state = s_req_http_start;
1133  CALLBACK_DATA(url);
1134  break;
1135  case '?':
1136  state = s_req_query_string_start;
1137  break;
1138  default:
1140  goto error;
1141  }
1142  break;
1143  }
1144 
1145  case s_req_path:
1146  {
1147  if (IS_URL_CHAR(ch)) break;
1148 
1149  switch (ch) {
1150  case ' ':
1151  state = s_req_http_start;
1152  CALLBACK_DATA(url);
1153  break;
1154  case CR:
1155  parser->http_major = 0;
1156  parser->http_minor = 9;
1157  state = s_headers_almost_done;
1158  CALLBACK_DATA(url);
1159  break;
1160  case LF:
1161  parser->http_major = 0;
1162  parser->http_minor = 9;
1163  state = s_headers_almost_done;
1164  CALLBACK_DATA(url);
1165  goto reexecute_byte;
1166  break;
1167  case '?':
1168  state = s_req_query_string_start;
1169  break;
1170  case '#':
1171  state = s_req_fragment_start;
1172  break;
1173  default:
1175  goto error;
1176  }
1177  break;
1178  }
1179 
1181  {
1182  if (IS_URL_CHAR(ch)) {
1183  state = s_req_query_string;
1184  break;
1185  }
1186 
1187  switch (ch) {
1188  case '?':
1189  break; /* XXX ignore extra '?' ... is this right? */
1190  case ' ':
1191  state = s_req_http_start;
1192  CALLBACK_DATA(url);
1193  break;
1194  case CR:
1195  parser->http_major = 0;
1196  parser->http_minor = 9;
1197  state = s_headers_almost_done;
1198  CALLBACK_DATA(url);
1199  break;
1200  case LF:
1201  parser->http_major = 0;
1202  parser->http_minor = 9;
1203  state = s_headers_almost_done;
1204  CALLBACK_DATA(url);
1205  goto reexecute_byte;
1206  break;
1207  case '#':
1208  state = s_req_fragment_start;
1209  break;
1210  default:
1212  goto error;
1213  }
1214  break;
1215  }
1216 
1217  case s_req_query_string:
1218  {
1219  if (IS_URL_CHAR(ch)) break;
1220 
1221  switch (ch) {
1222  case '?':
1223  /* allow extra '?' in query string */
1224  break;
1225  case ' ':
1226  state = s_req_http_start;
1227  CALLBACK_DATA(url);
1228  break;
1229  case CR:
1230  parser->http_major = 0;
1231  parser->http_minor = 9;
1232  state = s_headers_almost_done;
1233  CALLBACK_DATA(url);
1234  break;
1235  case LF:
1236  parser->http_major = 0;
1237  parser->http_minor = 9;
1238  state = s_headers_almost_done;
1239  CALLBACK_DATA(url);
1240  goto reexecute_byte;
1241  break;
1242  case '#':
1243  state = s_req_fragment_start;
1244  break;
1245  default:
1247  goto error;
1248  }
1249  break;
1250  }
1251 
1252  case s_req_fragment_start:
1253  {
1254  if (IS_URL_CHAR(ch)) {
1255  state = s_req_fragment;
1256  break;
1257  }
1258 
1259  switch (ch) {
1260  case ' ':
1261  state = s_req_http_start;
1262  CALLBACK_DATA(url);
1263  break;
1264  case CR:
1265  parser->http_major = 0;
1266  parser->http_minor = 9;
1267  state = s_headers_almost_done;
1268  CALLBACK_DATA(url);
1269  break;
1270  case LF:
1271  parser->http_major = 0;
1272  parser->http_minor = 9;
1273  state = s_headers_almost_done;
1274  CALLBACK_DATA(url);
1275  goto reexecute_byte;
1276  break;
1277  case '?':
1278  state = s_req_fragment;
1279  break;
1280  case '#':
1281  break;
1282  default:
1284  goto error;
1285  }
1286  break;
1287  }
1288 
1289  case s_req_fragment:
1290  {
1291  if (IS_URL_CHAR(ch)) break;
1292 
1293  switch (ch) {
1294  case ' ':
1295  state = s_req_http_start;
1296  CALLBACK_DATA(url);
1297  break;
1298  case CR:
1299  parser->http_major = 0;
1300  parser->http_minor = 9;
1301  state = s_headers_almost_done;
1302  CALLBACK_DATA(url);
1303  break;
1304  case LF:
1305  parser->http_major = 0;
1306  parser->http_minor = 9;
1307  state = s_headers_almost_done;
1308  CALLBACK_DATA(url);
1309  goto reexecute_byte;
1310  break;
1311  case '?':
1312  case '#':
1313  break;
1314  default:
1316  goto error;
1317  }
1318  break;
1319  }
1320 
1321  case s_req_http_start:
1322  switch (ch) {
1323  case 'H':
1324  state = s_req_http_H;
1325  break;
1326  case ' ':
1327  break;
1328  default:
1330  goto error;
1331  }
1332  break;
1333 
1334  case s_req_http_H:
1335  STRICT_CHECK(ch != 'T');
1336  state = s_req_http_HT;
1337  break;
1338 
1339  case s_req_http_HT:
1340  STRICT_CHECK(ch != 'T');
1341  state = s_req_http_HTT;
1342  break;
1343 
1344  case s_req_http_HTT:
1345  STRICT_CHECK(ch != 'P');
1346  state = s_req_http_HTTP;
1347  break;
1348 
1349  case s_req_http_HTTP:
1350  STRICT_CHECK(ch != '/');
1351  state = s_req_first_http_major;
1352  break;
1353 
1354  /* first digit of major HTTP version */
1356  if (ch < '0' || ch > '9') {
1358  goto error;
1359  }
1360 
1361  parser->http_major = ch - '0';
1362  state = s_req_http_major;
1363  break;
1364 
1365  /* major HTTP version or dot */
1366  case s_req_http_major:
1367  {
1368  if (ch == '.') {
1369  state = s_req_first_http_minor;
1370  break;
1371  }
1372 
1373  if (!IS_NUM(ch)) {
1375  goto error;
1376  }
1377 
1378  parser->http_major *= 10;
1379  parser->http_major += ch - '0';
1380 
1381  if (parser->http_major > 999) {
1383  goto error;
1384  }
1385 
1386  break;
1387  }
1388 
1389  /* first digit of minor HTTP version */
1391  if (!IS_NUM(ch)) {
1393  goto error;
1394  }
1395 
1396  parser->http_minor = ch - '0';
1397  state = s_req_http_minor;
1398  break;
1399 
1400  /* minor HTTP version or end of request line */
1401  case s_req_http_minor:
1402  {
1403  if (ch == CR) {
1404  if (parser->http_major== 0 && parser->http_minor == 9) {
1405  state = s_headers_almost_done;
1406  } else {
1407  state = s_req_line_almost_done;
1408  }
1409  break;
1410  }
1411 
1412  if (ch == LF) {
1413  if (parser->http_major == 0 && parser->http_minor == 9) {
1414  state = s_headers_almost_done;
1415  goto reexecute_byte;
1416  } else {
1417  state = s_header_field_start;
1418  }
1419  break;
1420  }
1421 
1422  /* XXX allow spaces after digit? */
1423 
1424  if (!IS_NUM(ch)) {
1426  goto error;
1427  }
1428 
1429  parser->http_minor *= 10;
1430  parser->http_minor += ch - '0';
1431 
1432  if (parser->http_minor > 999) {
1434  goto error;
1435  }
1436 
1437  break;
1438  }
1439 
1440  /* end of request line */
1442  {
1443  if (ch != LF) {
1445  goto error;
1446  }
1447 
1448  state = s_header_field_start;
1449  break;
1450  }
1451 
1452  case s_header_field_start:
1453  {
1454  if (ch == CR) {
1455  state = s_headers_almost_done;
1456  break;
1457  }
1458 
1459  if (ch == LF) {
1460  /* they might be just sending \n instead of \r\n so this would be
1461  * the second \n to denote the end of headers*/
1462  state = s_headers_almost_done;
1463  goto reexecute_byte;
1464  }
1465 
1466  c = TOKEN(ch);
1467 
1468  if (!c) {
1470  goto error;
1471  }
1472 
1473  MARK(header_field);
1474 
1475  parser->index = 0;
1476  state = s_header_field;
1477 
1478  switch (c) {
1479  case 'c':
1481  break;
1482 
1483  case 't':
1485  break;
1486 
1487  case 'u':
1488  parser->header_state = h_matching_upgrade;
1489  break;
1490 
1491  default:
1492  parser->header_state = h_general;
1493  break;
1494  }
1495  break;
1496  }
1497 
1498  case s_header_field:
1499  {
1500  c = TOKEN(ch);
1501 
1502  if (c) {
1503  switch (parser->header_state) {
1504  case h_general:
1505 
1506  // fast-forwarding, wheeeeeee!
1507  #define MOVE_THE_HEAD do { \
1508  ++p; \
1509  if (!TOKEN(*p)) { \
1510  ch = *p; \
1511  goto notatoken; \
1512  } \
1513  } while(0);
1514 
1515  if (data + len - p >= 9) {
1524  } else if (data + len - p >= 4) {
1528  }
1529 
1530  break;
1531 
1532  /* content-length */
1533 
1535  parser->index++;
1536  if (parser->index > sizeof(CONTENT_LENGTH)-1
1537  || c != CONTENT_LENGTH[parser->index]) {
1538  parser->header_state = h_general;
1539  } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1540  parser->header_state = h_content_length;
1541  }
1542  break;
1543 
1544  /* transfer-encoding */
1545 
1547  parser->index++;
1548  if (parser->index > sizeof(TRANSFER_ENCODING)-1
1549  || c != TRANSFER_ENCODING[parser->index]) {
1550  parser->header_state = h_general;
1551  } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1553  }
1554  break;
1555 
1556  /* upgrade */
1557 
1558  case h_matching_upgrade:
1559  parser->index++;
1560  if (parser->index > sizeof(UPGRADE)-1
1561  || c != UPGRADE[parser->index]) {
1562  parser->header_state = h_general;
1563  } else if (parser->index == sizeof(UPGRADE)-2) {
1564  parser->header_state = h_upgrade;
1565  }
1566  break;
1567 
1568  case h_content_length:
1569  case h_transfer_encoding:
1570  case h_upgrade:
1571  if (ch != ' ') parser->header_state = h_general;
1572  break;
1573 
1574  default:
1575  assert(0 && "Unknown header_state");
1576  break;
1577  }
1578  break;
1579  }
1580 
1581  notatoken:
1582  if (ch == ':') {
1583  state = s_header_value_start;
1584  // do not allow headers with trailing whitespaces
1585  // https://tools.ietf.org/html/rfc7230#section-3.2.4
1586  if (p - header_field_mark > 1 &&
1587  data[p - data - 1] == ' ') {
1589  goto error;
1590  }
1591  CALLBACK_DATA(header_field);
1592  break;
1593  }
1594 
1596  goto error;
1597  }
1598 
1599  case s_header_value_start:
1600  {
1601  if (ch == ' ' || ch == '\t') break;
1602 
1603  MARK(header_value);
1604 
1605  state = s_header_value;
1606  parser->index = 0;
1607 
1608  // Error out if a content_length, transfer_encoding, or upgrade header
1609  // was present with no actual value. These headers correspond with
1610  // special parser states that without the below accept empty header
1611  // values and so we can reject such requests here in the parser.
1612  // If more headers are added, can consider moving to a hash/map based
1613  // model below.
1614  if (ch == CR || ch == LF) {
1615  if (parser->header_state == h_content_length) {
1617  } else if (parser->header_state == h_transfer_encoding) {
1619  } else if (parser->header_state == h_upgrade) {
1621  }
1622 
1623  if (parser->http_errno != HPE_OK) {
1624  goto error;
1625  }
1626  }
1627 
1628  if (ch == CR) {
1629  STRICT_CHECK(parser->quote != 0);
1630  parser->header_state = h_general;
1631  state = s_header_almost_done;
1632  CALLBACK_DATA(header_value);
1633  break;
1634  }
1635 
1636  if (ch == LF) {
1637  STRICT_CHECK(parser->quote != 0);
1638  state = s_header_field_start;
1639  CALLBACK_DATA(header_value);
1640  break;
1641  }
1642 
1643  c = LOWER(ch);
1644 
1645  switch (parser->header_state) {
1646  case h_upgrade:
1647  parser->flags |= F_UPGRADE;
1648  parser->header_state = h_general;
1649  break;
1650 
1651  case h_transfer_encoding:
1652  /* looking for 'Transfer-Encoding: chunked' */
1653  if ('c' == c) {
1655  } else {
1656  parser->header_state = h_general;
1657  }
1658  break;
1659 
1660  case h_content_length:
1661  if (!IS_NUM(ch)) {
1663  goto error;
1664  }
1665 
1666  parser->content_length = ch - '0';
1667  break;
1668 
1669  default:
1670  parser->header_state = ch == QT ? h_general_and_quote : h_general;
1671  break;
1672  }
1673  break;
1674  }
1675 
1676  case s_header_value:
1677  {
1678  cr_or_lf_or_qt:
1679  if (ch == CR &&
1681  state = s_header_almost_done;
1682  CALLBACK_DATA(header_value);
1683  break;
1684  }
1685 
1686  if (ch == LF &&
1688  state = s_header_almost_done;
1689  CALLBACK_DATA_NOADVANCE(header_value);
1690  goto reexecute_byte;
1691  }
1692 
1693  if (!lenient && !IS_HEADER_CHAR(ch) &&
1696  goto error;
1697  }
1698 
1699  switch (parser->header_state) {
1700  case h_general:
1701  if (ch == QT) {
1703  }
1704 
1705  // fast-forwarding, wheee!
1706  #define MOVE_FAST do { \
1707  ++p; \
1708  ch = *p; \
1709  if (ch == CR || ch == LF || ch == QT || \
1710  ch == BS || !IS_HEADER_CHAR(ch)) { \
1711  goto cr_or_lf_or_qt; \
1712  } \
1713  } while(0);
1714 
1715  if (data + len - p >= 12) {
1716  MOVE_FAST
1717  MOVE_FAST
1718  MOVE_FAST
1719  MOVE_FAST
1720  MOVE_FAST
1721  MOVE_FAST
1722  MOVE_FAST
1723  MOVE_FAST
1724  MOVE_FAST
1725  MOVE_FAST
1726  MOVE_FAST
1727  } else if (data + len - p >= 5) {
1728  MOVE_FAST
1729  MOVE_FAST
1730  MOVE_FAST
1731  MOVE_FAST
1732  }
1733 
1734  break;
1735 
1736  case h_general_and_quote:
1737  if (ch == QT) {
1738  parser->header_state = h_general;
1739  } else if (ch == BS) {
1741  }
1742  break;
1743 
1746  break;
1747 
1748  // Not sure the below is relevant anymore as from
1749  // s_header_value_start it appears as though we can never
1750  // be in the situation below
1751  case h_transfer_encoding:
1753  goto error;
1754  break;
1755 
1756  case h_content_length:
1757  if (ch == ' ') break;
1758  if (!IS_NUM(ch)) {
1760  goto error;
1761  }
1762 
1763  if (parser->content_length > ((INT64_MAX - 10) / 10)) {
1764  /* overflow */
1766  goto error;
1767  }
1768 
1769  parser->content_length *= 10;
1770  parser->content_length += ch - '0';
1771  break;
1772 
1773  /* Transfer-Encoding: chunked */
1775  parser->index++;
1776  if (parser->index > sizeof(CHUNKED)-1
1777  || LOWER(ch) != CHUNKED[parser->index]) {
1778  parser->header_state = h_general;
1779  } else if (parser->index == sizeof(CHUNKED)-2) {
1781  }
1782  break;
1783 
1785  if (ch != ' ') {
1786  parser->header_state = h_general;
1787  }
1788  break;
1789 
1790  default:
1791  state = s_header_value;
1792  parser->header_state = h_general;
1793  break;
1794  }
1795  break;
1796  }
1797 
1798  case s_header_almost_done:
1799  {
1800  if (ch == LF) {
1801  state = s_header_value_lws;
1802  } else {
1803  state = s_header_value;
1804  }
1805 
1806  switch (parser->header_state) {
1808  parser->flags |= F_CHUNKED;
1809  break;
1810  default:
1811  break;
1812  }
1813 
1814  if (ch != LF) {
1815  CALLBACK_SPACE(header_value);
1816  }
1817 
1818  break;
1819  }
1820 
1821  case s_header_value_lws:
1822  {
1823  if (ch == ' ' || ch == '\t')
1824  {
1825  state = s_header_value_start;
1826  CALLBACK_SPACE(header_value);
1827  }
1828  else
1829  {
1830  state = s_header_field_start;
1831  goto reexecute_byte;
1832  }
1833  break;
1834  }
1835 
1836  case s_headers_almost_done:
1837  {
1838  STRICT_CHECK(ch != LF);
1839 
1840  if (ch != LF) {
1842  goto error;
1843  }
1844 
1845  if (parser->flags & F_TRAILING) {
1846  /* End of a chunked request */
1847  state = s_message_done;
1848  CALLBACK_NOTIFY_NOADVANCE(chunk_complete);
1849  goto reexecute_byte;
1850  }
1851 
1852  state = s_headers_done;
1853 
1854  /* Set this here so that on_headers_complete() callbacks can see it */
1855  parser->upgrade =
1856  (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT);
1857 
1858  /* Here we call the headers_complete callback. This is somewhat
1859  * different than other callbacks because if the user returns 1, we
1860  * will interpret that as saying that this message has no body. This
1861  * is needed for the annoying case of receiving a response to a HEAD
1862  * request.
1863  *
1864  * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1865  * we have to simulate it by handling a change in errno below.
1866  */
1867  size_t header_size = p - data + 1;
1868  switch (settings->on_headers_complete(parser, nullptr, header_size)) {
1869  case 0:
1870  break;
1871 
1872  case 1:
1873  parser->flags |= F_SKIPBODY;
1874  break;
1875 
1876  default:
1878  RETURN(p - data); /* Error */
1879  }
1880 
1881  if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1882  RETURN(p - data);
1883  }
1884 
1885  goto reexecute_byte;
1886  }
1887 
1888  case s_headers_done:
1889  {
1890  STRICT_CHECK(ch != LF);
1891 
1892  // we're done parsing headers, reset overflow counters
1893  parser->nread = 0;
1894  // (if we now move to s_body_*, then this is irrelevant)
1895  data_or_header_data_start = p;
1896 
1897  int hasBody = parser->flags & F_CHUNKED || parser->content_length > 0;
1898  if (parser->upgrade && (parser->method == HTTP_CONNECT ||
1899  (parser->flags & F_SKIPBODY) || !hasBody)) {
1900  /* Exit, the rest of the message is in a different protocol. */
1901  state = NEW_MESSAGE();
1902  CALLBACK_NOTIFY(message_complete);
1903  RETURN((p - data) + 1);
1904  }
1905 
1906  if (parser->flags & F_SKIPBODY) {
1907  state = NEW_MESSAGE();
1908  CALLBACK_NOTIFY(message_complete);
1909  } else if (parser->flags & F_CHUNKED) {
1910  /* chunked encoding - ignore Content-Length header */
1911  state = s_chunk_size_start;
1912  } else {
1913  if (parser->content_length == 0) {
1914  /* Content-Length header given but zero: Content-Length: 0\r\n */
1915  state = NEW_MESSAGE();
1916  CALLBACK_NOTIFY(message_complete);
1917  } else if (parser->content_length > 0) {
1918  /* Content-Length header given and non-zero */
1919  state = s_body_identity;
1920  } else {
1921  unsigned short sc = parser->status_code;
1922  if (parser->type == HTTP_REQUEST ||
1923  ((100 <= sc && sc <= 199) || sc == 204 || sc == 304)) {
1924  /* Assume content-length 0 - read the next */
1925  state = NEW_MESSAGE();
1926  CALLBACK_NOTIFY(message_complete);
1927  } else {
1928  /* Read body until EOF */
1929  state = s_body_identity_eof;
1930  }
1931  }
1932  }
1933 
1934  break;
1935  }
1936 
1937  case s_body_identity:
1938  {
1939  uint64_t to_read = MIN(parser->content_length, (data + len) - p);
1940 
1941  assert(parser->content_length > 0);
1942 
1943  /* The difference between advancing content_length and p is because
1944  * the latter will automatically advance on the next loop iteration.
1945  * Further, if content_length ends up at 0, we want to see the last
1946  * byte again for our message complete callback.
1947  */
1948  MARK(body);
1949  parser->content_length -= to_read;
1950  p += to_read - 1;
1951 
1952  if (parser->content_length == 0) {
1953  state = s_message_done;
1954 
1955  /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1956  *
1957  * The alternative to doing this is to wait for the next byte to
1958  * trigger the data callback, just as in every other case. The
1959  * problem with this is that this makes it difficult for the test
1960  * harness to distinguish between complete-on-EOF and
1961  * complete-on-length. It's not clear that this distinction is
1962  * important for applications, but let's keep it for now.
1963  */
1964  _CALLBACK_DATA(body, p - body_mark + 1, p - data);
1965  goto reexecute_byte;
1966  }
1967 
1968  break;
1969  }
1970 
1971  /* read until EOF */
1972  case s_body_identity_eof:
1973  MARK(body);
1974  p = data + len - 1;
1975 
1976  break;
1977 
1978  case s_message_done:
1979  state = NEW_MESSAGE();
1980  parser->nread = 0;
1981  data_or_header_data_start = p;
1982  CALLBACK_NOTIFY(message_complete);
1983  if (parser->upgrade) {
1984  /* Exit, the rest of the message is in a different protocol. */
1985  RETURN((p - data) + 1);
1986  }
1987  break;
1988 
1989  case s_chunk_size_start:
1990  {
1991  assert(parser->flags & F_CHUNKED);
1992 
1993  unhex_val = unhex[(unsigned char)ch];
1994  if (unhex_val == -1) {
1996  goto error;
1997  }
1998 
1999  parser->content_length = unhex_val;
2000  state = s_chunk_size;
2001  break;
2002  }
2003 
2004  case s_chunk_size:
2005  {
2006  assert(parser->flags & F_CHUNKED);
2007 
2008  if (ch == CR) {
2009  state = s_chunk_size_almost_done;
2010  break;
2011  }
2012 
2013  unhex_val = unhex[(unsigned char)ch];
2014 
2015  if (unhex_val == -1) {
2016  if (ch == ';' || ch == ' ') {
2017  state = s_chunk_parameters;
2018  break;
2019  }
2020 
2022  goto error;
2023  }
2024 
2025  if (parser->content_length > (INT64_MAX - unhex_val) >> 4) {
2026  /* overflow */
2028  goto error;
2029  }
2030  parser->content_length *= 16;
2031  parser->content_length += unhex_val;
2032  break;
2033  }
2034 
2035  case s_chunk_parameters:
2036  {
2037  assert(parser->flags & F_CHUNKED);
2038  /*
2039  * just ignore this shit. TODO check for overflow
2040  * TODO: It would be nice to pass this information to the
2041  * on_chunk_header callback.
2042  */
2043  if (ch == CR) {
2044  state = s_chunk_size_almost_done;
2045  break;
2046  }
2047  break;
2048  }
2049 
2051  {
2052  assert(parser->flags & F_CHUNKED);
2053  STRICT_CHECK(ch != LF);
2054 
2055  if (parser->content_length == 0) {
2056  parser->flags |= F_TRAILING;
2057  state = s_header_field_start;
2058  CALLBACK_NOTIFY(chunk_header);
2059  } else {
2060  state = s_chunk_data;
2061  CALLBACK_NOTIFY(chunk_header);
2062  }
2063  break;
2064  }
2065 
2066  case s_chunk_data:
2067  {
2068  uint64_t to_read = MIN(parser->content_length, (data + len) - p);
2069 
2070  assert(parser->flags & F_CHUNKED);
2071  assert(parser->content_length > 0);
2072 
2073  /* See the explanation in s_body_identity for why the content
2074  * length and data pointers are managed this way.
2075  */
2076  MARK(body);
2077  parser->content_length -= to_read;
2078  p += to_read - 1;
2079 
2080  if (parser->content_length == 0) {
2081  state = s_chunk_data_almost_done;
2082  }
2083 
2084  break;
2085  }
2086 
2088  assert(parser->flags & F_CHUNKED);
2089  assert(parser->content_length == 0);
2090  STRICT_CHECK(ch != CR);
2091  state = s_chunk_data_done;
2092  CALLBACK_DATA(body);
2093  break;
2094 
2095  case s_chunk_data_done:
2096  assert(parser->flags & F_CHUNKED);
2097  STRICT_CHECK(ch != LF);
2098  state = s_chunk_size_start;
2099  parser->nread = 0;
2100  data_or_header_data_start = p;
2101  CALLBACK_NOTIFY(chunk_complete);
2102  break;
2103 
2104  default:
2105  assert(0 && "unhandled state");
2107  goto error;
2108  }
2109  }
2110 
2111  /* We can check for overflow here because in Proxygen, len <= ~8KB and so the
2112  * worst thing that can happen is that we catch the overflow at 88KB rather
2113  * than at 80KB.
2114  * In case of chunk encoding, we count the overflow for every
2115  * chunk separately.
2116  * We zero the nread counter (and reset data_or_header_data_start) when we
2117  * start parsing a new message or a new chunk.
2118  */
2119  if (PARSING_HEADER(state)) {
2120  parser->nread += p - data_or_header_data_start;
2121  if (parser->nread > HTTP_MAX_HEADER_SIZE) {
2123  goto error;
2124  }
2125  }
2126 
2127  /* Run callbacks for any marks that we have leftover after we ran out of
2128  * bytes. There should be at most one of these set, so it's OK to invoke
2129  * them in series (unset marks will not result in callbacks).
2130  *
2131  * We use the NOADVANCE() variety of callbacks here because 'p' has already
2132  * overflowed 'data' and this allows us to correct for the off-by-one that
2133  * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
2134  * value that's in-bounds).
2135  */
2136 
2137  assert(((header_field_mark ? 1 : 0) +
2138  (header_value_mark ? 1 : 0) +
2139  (url_mark ? 1 : 0) +
2140  (reason_mark ? 1 : 0) +
2141  (body_mark ? 1 : 0)) <= 1);
2142 
2143  CALLBACK_DATA_NOADVANCE(header_field);
2144  CALLBACK_DATA_NOADVANCE(header_value);
2146  CALLBACK_DATA_NOADVANCE(reason);
2148 
2149  RETURN(len);
2150 
2151 error:
2152  if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
2154  }
2155 
2156  RETURN(p - data);
2157 }
#define HTTP_MAX_HEADER_SIZE
Definition: http_parser.h:64
#define MOVE_FAST
uint32_t nread
Definition: http_parser.h:220
#define _CALLBACK_DATA(FOR, LEN, ER)
unsigned char flags
Definition: http_parser.h:215
#define CALLBACK_SPACE(FOR)
#define QT
#define IS_NUM(c)
#define CALLBACK_NOTIFY(FOR)
#define IS_HEX(c)
unsigned short http_minor
Definition: http_parser.h:225
#define SET_ERRNO(e)
#define CR
#define PARSING_HEADER(state)
#define IS_ALPHA(c)
requires And< SemiMovable< VN >... > &&SemiMovable< E > auto error(E e)
Definition: error.h:48
static const int8_t unhex[256]
auto ch
#define NEW_MESSAGE()
unsigned short status_code
Definition: http_parser.h:226
#define TOKEN(c)
#define IS_ALPHANUM(c)
#define IS_HEADER_CHAR(ch)
#define IS_HOST_CHAR(c)
#define MIN(a, b)
#define CALLBACK_DATA(FOR)
#define HTTP_PARSER_ERRNO(p)
Definition: http_parser.h:202
#define IS_URL_CHAR(c)
#define MOVE_THE_HEAD
unsigned char state
Definition: http_parser.h:216
#define CHUNKED
unsigned char method
Definition: http_parser.h:227
unsigned short http_major
Definition: http_parser.h:224
#define CONTENT_LENGTH
http_data_cb on_headers_complete
Definition: http_parser.h:251
#define CALLBACK_DATA_NOADVANCE(FOR)
static const char * method_strings[]
#define LOWER(c)
http_method
Definition: http_parser.h:90
#define UPGRADE
unsigned char header_state
Definition: http_parser.h:217
#define CALLBACK_NOTIFY_NOADVANCE(FOR)
unsigned char http_errno
Definition: http_parser.h:228
#define BS
unsigned char type
Definition: http_parser.h:214
#define LF
#define RETURN(r)
#define MARK(FOR)
char c
#define TRANSFER_ENCODING
static constexpr uint64_t data[1]
Definition: Fingerprint.cpp:43
int64_t content_length
Definition: http_parser.h:221
#define STRICT_CHECK(cond)
state
Definition: http_parser.c:272
unsigned char index
Definition: http_parser.h:218
void http_parser_init ( http_parser parser,
enum http_parser_type  t 
)

Definition at line 2167 of file http_parser_cpp.cpp.

References http_parser::flags, HPE_OK, http_parser::http_errno, http_parser::http_major, http_parser::http_minor, HTTP_REQUEST, HTTP_RESPONSE, http_parser::method, http_parser::nread, s_pre_start_req, s_pre_start_req_or_res, s_pre_start_res, http_parser::state, folly::pushmi::detail::t, http_parser::type, and http_parser::upgrade.

2168 {
2169  parser->type = t;
2171  parser->nread = 0;
2172  parser->upgrade = 0;
2173  parser->flags = 0;
2174  parser->method = 0;
2175  parser->http_major = 0;
2176  parser->http_minor = 0;
2177  parser->http_errno = HPE_OK;
2178 }
uint32_t nread
Definition: http_parser.h:220
unsigned char flags
Definition: http_parser.h:215
unsigned short http_minor
Definition: http_parser.h:225
unsigned char state
Definition: http_parser.h:216
unsigned char method
Definition: http_parser.h:227
unsigned short http_major
Definition: http_parser.h:224
unsigned char http_errno
Definition: http_parser.h:228
unsigned char type
Definition: http_parser.h:214
int http_parser_parse_url ( const char *  buf,
size_t  buflen,
int  is_connect,
struct http_parser_url u 
)

Definition at line 2333 of file http_parser_cpp.cpp.

References http_parser_url::field_data, http_parser_url::field_set, http_parse_host(), http_parser_url::len, http_parser_url::off, parse_url_char(), http_parser_url::port, s, s_dead, s_req_fragment, s_req_fragment_start, s_req_path, s_req_query_string, s_req_query_string_start, s_req_schema, s_req_schema_slash, s_req_schema_slash_slash, s_req_server, s_req_server_start, s_req_server_with_at, s_req_spaces_before_url, UF_FRAGMENT, UF_HOST, UF_MAX, UF_PATH, UF_PORT, UF_QUERY, UF_SCHEMA, and uint16_t.

2335 {
2336  enum state s;
2337  const char *p;
2338  enum http_parser_url_fields uf, old_uf;
2339  int found_at = 0;
2340 
2341  u->port = u->field_set = 0;
2342  s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2343  uf = old_uf = UF_MAX;
2344 
2345  for (p = buf; p < buf + buflen; p++) {
2346  s = parse_url_char(s, *p);
2347 
2348  /* Figure out the next field that we're operating on */
2349  switch (s) {
2350  case s_dead:
2351  return 1;
2352 
2353  /* Skip delimeters */
2354  case s_req_schema_slash:
2356  case s_req_server_start:
2358  case s_req_fragment_start:
2359  continue;
2360 
2361  case s_req_schema:
2362  uf = UF_SCHEMA;
2363  break;
2364 
2365  case s_req_server_with_at:
2366  found_at = 1;
2367 
2368  /* FALLTHROUGH */
2369  case s_req_server:
2370  uf = UF_HOST;
2371  break;
2372 
2373  case s_req_path:
2374  uf = UF_PATH;
2375  break;
2376 
2377  case s_req_query_string:
2378  uf = UF_QUERY;
2379  break;
2380 
2381  case s_req_fragment:
2382  uf = UF_FRAGMENT;
2383  break;
2384 
2385  default:
2386  assert(!"Unexpected state");
2387  return 1;
2388  }
2389 
2390  /* Nothing's changed; soldier on */
2391  if (uf == old_uf) {
2392  u->field_data[uf].len++;
2393  continue;
2394  }
2395 
2396  u->field_data[uf].off = p - buf;
2397  u->field_data[uf].len = 1;
2398 
2399  u->field_set |= (1 << uf);
2400  old_uf = uf;
2401  }
2402 
2403  /* host must be present if there is a schema */
2404  /* parsing http:///toto will fail */
2405  if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) {
2406  if (http_parse_host(buf, u, found_at) != 0) {
2407  return 1;
2408  }
2409  }
2410 
2411  /* CONNECT requests can only contain "hostname:port" */
2412  if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2413  return 1;
2414  }
2415 
2416  if (u->field_set & (1 << UF_PORT)) {
2417  /* Don't bother with endp; we've already validated the string */
2418  unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, nullptr, 10);
2419 
2420  /* Ports have a max value of 2^16 */
2421  if (v > 0xffff) {
2422  return 1;
2423  }
2424 
2425  u->port = (uint16_t) v;
2426  }
2427 
2428  return 0;
2429 }
uint16_t field_set
Definition: http_parser.h:283
static enum state parse_url_char(enum state s, const char ch)
static int http_parse_host(const char *buf, struct http_parser_url *u, int found_at)
static set< string > s
struct http_parser_url::@1 field_data[UF_MAX]
http_parser_url_fields
Definition: http_parser.h:263
state
Definition: http_parser.c:272
void http_parser_pause ( http_parser parser,
int  paused 
)

Definition at line 2432 of file http_parser_cpp.cpp.

References HPE_OK, HPE_PAUSED, HTTP_PARSER_ERRNO, and SET_ERRNO.

2432  {
2433  /* Users should only be pausing/unpausing a parser that is not in an error
2434  * state. In non-debug builds, there's not much that we can do about this
2435  * other than ignore it.
2436  */
2437  if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2438  HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2439  SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2440  } else {
2441  assert(0 && "Attempting to pause parser in error state");
2442  }
2443 }
#define SET_ERRNO(e)
#define HTTP_PARSER_ERRNO(p)
Definition: http_parser.h:202
static enum state parse_url_char ( enum state  s,
const char  ch 
)
static

Definition at line 456 of file http_parser_cpp.cpp.

References IS_ALPHA, IS_URL_CHAR, IS_USERINFO_CHAR, s, s_dead, s_req_fragment, s_req_fragment_start, s_req_path, s_req_query_string, s_req_query_string_start, s_req_schema, s_req_schema_slash, s_req_schema_slash_slash, s_req_server, s_req_server_start, s_req_server_with_at, and s_req_spaces_before_url.

Referenced by http_parser_parse_url().

457 {
458  if (ch == ' ' || ch == '\r' || ch == '\n') {
459  return s_dead;
460  }
461 
462 #if HTTP_PARSER_STRICT
463  if (ch == '\t' || ch == '\f') {
464  return s_dead;
465  }
466 #endif
467 
468  switch (s) {
470  /* Proxied requests are followed by scheme of an absolute URI (alpha).
471  * All methods except CONNECT are followed by '/' or '*'.
472  */
473 
474  if (ch == '/' || ch == '*') {
475  return s_req_path;
476  }
477 
478  if (IS_ALPHA(ch)) {
479  return s_req_schema;
480  }
481 
482  break;
483 
484  case s_req_schema:
485  if (IS_ALPHA(ch)) {
486  return s;
487  }
488 
489  if (ch == ':') {
490  return s_req_schema_slash;
491  }
492 
493  break;
494 
495  case s_req_schema_slash:
496  if (ch == '/') {
498  }
499 
500  break;
501 
503  if (ch == '/') {
504  return s_req_server_start;
505  }
506 
507  break;
508 
510  if (ch == '@') {
511  return s_dead;
512  }
513 
514  /* FALLTHROUGH */
515  case s_req_server_start:
516  case s_req_server:
517  if (ch == '/') {
518  return s_req_path;
519  }
520 
521  if (ch == '?') {
523  }
524 
525  if (ch == '@') {
526  return s_req_server_with_at;
527  }
528 
529  if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
530  return s_req_server;
531  }
532 
533  break;
534 
535  case s_req_path:
536  if (IS_URL_CHAR(ch)) {
537  return s;
538  }
539 
540  switch (ch) {
541  case '?':
543 
544  case '#':
545  return s_req_fragment_start;
546  }
547 
548  break;
549 
551  case s_req_query_string:
552  if (IS_URL_CHAR(ch)) {
553  return s_req_query_string;
554  }
555 
556  switch (ch) {
557  case '?':
558  /* allow extra '?' in query string */
559  return s_req_query_string;
560 
561  case '#':
562  return s_req_fragment_start;
563  }
564 
565  break;
566 
568  if (IS_URL_CHAR(ch)) {
569  return s_req_fragment;
570  }
571 
572  switch (ch) {
573  case '?':
574  return s_req_fragment;
575 
576  case '#':
577  return s;
578  }
579 
580  break;
581 
582  case s_req_fragment:
583  if (IS_URL_CHAR(ch)) {
584  return s;
585  }
586 
587  switch (ch) {
588  case '?':
589  case '#':
590  return s;
591  }
592 
593  break;
594 
595  default:
596  break;
597  }
598 
599  /* We should never fall out of the switch above unless there's an error */
600  return s_dead;
601 }
#define IS_ALPHA(c)
auto ch
#define IS_URL_CHAR(c)
#define IS_USERINFO_CHAR(c)
static set< string > s

Variable Documentation

const char* description

Definition at line 438 of file http_parser_cpp.cpp.

struct { ... } http_strerror_tab[]
Initial value:
= {
}

Referenced by http_errno_description(), and http_errno_name().

const char* method_strings[]
static
Initial value:
=
{ "DELETE"
, "GET"
, "HEAD"
, "POST"
, "PUT"
, "CONNECT"
, "OPTIONS"
, "TRACE"
, "COPY"
, "LOCK"
, "MKCOL"
, "MOVE"
, "PROPFIND"
, "PROPPATCH"
, "UNLOCK"
, "REPORT"
, "MKACTIVITY"
, "CHECKOUT"
, "MERGE"
, "M-SEARCH"
, "NOTIFY"
, "SUBSCRIBE"
, "UNSUBSCRIBE"
, "PATCH"
}

Definition at line 149 of file http_parser_cpp.cpp.

const char* name

Definition at line 437 of file http_parser_cpp.cpp.

const uint8_t normal_url_char[256]
static

Definition at line 236 of file http_parser_cpp.cpp.

const char tokens[256]
static

Definition at line 184 of file http_parser_cpp.cpp.

const int8_t unhex[256]
static
Initial value:
=
{-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
}

Definition at line 219 of file http_parser_cpp.cpp.