diff --git a/include/net/http_parser.h b/include/net/http_parser.h index ea31fff128cba..d0b43301b7fdd 100644 --- a/include/net/http_parser.h +++ b/include/net/http_parser.h @@ -46,6 +46,8 @@ typedef unsigned __int64 u64_t; #include #include #endif +#include +#include /* Maximium header size allowed. If the macro is not defined * before including this header then the default is used. To @@ -229,38 +231,6 @@ struct http_parser_settings { }; -enum http_parser_url_fields { - UF_SCHEMA = 0 - , UF_HOST = 1 - , UF_PORT = 2 - , UF_PATH = 3 - , UF_QUERY = 4 - , UF_FRAGMENT = 5 - , UF_USERINFO = 6 - , UF_MAX = 7 -}; - - -/* Result structure for http_parser_parse_url(). - * - * Callers should index into field_data[] with UF_* values iff field_set - * has the relevant (1 << UF_*) bit set. As a courtesy to clients (and - * because we probably have padding left over), we convert any port to - * a u16_t. - */ -struct http_parser_url { - u16_t field_set; /* Bitmask of (1 << UF_*) values */ - u16_t port; /* Converted UF_PORT string */ - - struct { - u16_t off; /* Offset into buffer in which field - * starts - */ - u16_t len; /* Length of run in buffer */ - } field_data[UF_MAX]; -}; - - /* Returns the library version. Bits 16-23 contain the major version number, * bits 8-15 the minor version number and bits 0-7 the patch level. * Usage example: @@ -306,13 +276,6 @@ const char *http_errno_name(enum http_errno err); /* Return a string description of the given error */ const char *http_errno_description(enum http_errno err); -/* Initialize all http_parser_url members to 0 */ -void http_parser_url_init(struct http_parser_url *u); - -/* Parse a URL; return nonzero on failure */ -int http_parser_parse_url(const char *buf, size_t buflen, - int is_connect, struct http_parser_url *u); - /* Pause or un-pause the parser; a nonzero value pauses */ void http_parser_pause(struct http_parser *parser, int paused); diff --git a/include/net/http_parser_state.h b/include/net/http_parser_state.h new file mode 100644 index 0000000000000..a9bdfdf0ab52c --- /dev/null +++ b/include/net/http_parser_state.h @@ -0,0 +1,99 @@ +/* Copyright Joyent, Inc. and other Node contributors. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef _HTTP_PARSER_STATE_H_ +#define _HTTP_PARSER_STATE_H_ +#ifdef __cplusplus +extern "C" { +#endif + +enum state { + s_dead = 1, /* important that this is > 0 */ + s_start_req_or_res, + s_res_or_resp_H, + s_start_res, + s_res_H, + s_res_HT, + s_res_HTT, + s_res_HTTP, + s_res_first_http_major, + s_res_http_major, + s_res_first_http_minor, + s_res_http_minor, + s_res_first_status_code, + s_res_status_code, + s_res_status_start, + s_res_status, + s_res_line_almost_done, + s_start_req, + s_req_method, + s_req_spaces_before_url, + s_req_schema, + s_req_schema_slash, + s_req_schema_slash_slash, + s_req_server_start, + s_req_server, + s_req_server_with_at, + s_req_path, + s_req_query_string_start, + s_req_query_string, + s_req_fragment_start, + s_req_fragment, + s_req_http_start, + s_req_http_H, + s_req_http_HT, + s_req_http_HTT, + s_req_http_HTTP, + s_req_first_http_major, + s_req_http_major, + s_req_first_http_minor, + s_req_http_minor, + s_req_line_almost_done, + s_header_field_start, + s_header_field, + s_header_value_discard_ws, + s_header_value_discard_ws_almost_done, + s_header_value_discard_lws, + s_header_value_start, + s_header_value, + s_header_value_lws, + s_header_almost_done, + s_chunk_size_start, + s_chunk_size, + s_chunk_parameters, + s_chunk_size_almost_done, + s_headers_almost_done, + s_headers_done, + /* Important: 's_headers_done' must be the last 'header' state. All + * states beyond this must be 'body' states. It is used for overflow + * checking. See the PARSING_HEADER() macro. + */ + s_chunk_data, + s_chunk_data_almost_done, + s_chunk_data_done, + s_body_identity, + s_body_identity_eof, + s_message_done +}; + +#ifdef __cplusplus +} +#endif +#endif diff --git a/include/net/http_parser_url.h b/include/net/http_parser_url.h new file mode 100644 index 0000000000000..8859e7552f427 --- /dev/null +++ b/include/net/http_parser_url.h @@ -0,0 +1,74 @@ +/* Copyright Joyent, Inc. and other Node contributors. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef _HTTP_PARSER_URL_H_ +#define _HTTP_PARSER_URL_H_ +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include + +enum http_parser_url_fields { + UF_SCHEMA = 0 + , UF_HOST = 1 + , UF_PORT = 2 + , UF_PATH = 3 + , UF_QUERY = 4 + , UF_FRAGMENT = 5 + , UF_USERINFO = 6 + , UF_MAX = 7 +}; + +/* Result structure for http_parser_url_fields(). + * + * Callers should index into field_data[] with UF_* values iff field_set + * has the relevant (1 << UF_*) bit set. As a courtesy to clients (and + * because we probably have padding left over), we convert any port to + * a u16_t. + */ +struct http_parser_url { + u16_t field_set; /* Bitmask of (1 << UF_*) values */ + u16_t port; /* Converted UF_PORT string */ + + struct { + u16_t off; /* Offset into buffer in which field + * starts + */ + u16_t len; /* Length of run in buffer */ + } field_data[UF_MAX]; +}; + +enum state parse_url_char(enum state s, const char ch); + +/* Initialize all http_parser_url members to 0 */ +void http_parser_url_init(struct http_parser_url *u); + +/* Parse a URL; return nonzero on failure */ +int http_parser_parse_url(const char *buf, size_t buflen, + int is_connect, struct http_parser_url *u); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/subsys/net/lib/http/Kconfig b/subsys/net/lib/http/Kconfig index 2bc269f5b1a22..6b015e2e826b4 100644 --- a/subsys/net/lib/http/Kconfig +++ b/subsys/net/lib/http/Kconfig @@ -60,15 +60,25 @@ config HTTP_PARSER bool "HTTP Parser support" default n select HTTP + select HTTP_PARSER_URL help This option enables the http_parser library from nodejs. This parser requires some string-related routines commonly provided by a libc implementation. +config HTTP_PARSER_URL + bool "HTTP Parser for URL support" + default n + select HTTP + help + This option enables the URI parser library based on source from nodejs. + This parser requires some string-related routines commonly + provided by a libc implementation. + config HTTP_PARSER_STRICT bool "HTTP strict parsing" default n - depends on HTTP_PARSER + depends on (HTTP_PARSER || HTTP_PARSER_URL) help This option enables the strict parsing option diff --git a/subsys/net/lib/http/Makefile b/subsys/net/lib/http/Makefile index 304f5cae1491a..13a9325469096 100644 --- a/subsys/net/lib/http/Makefile +++ b/subsys/net/lib/http/Makefile @@ -1,5 +1,6 @@ ccflags-$(CONFIG_HTTP_PARSER_STRICT) += -DHTTP_PARSER_STRICT obj-$(CONFIG_HTTP_PARSER) := http_parser.o +obj-$(CONFIG_HTTP_PARSER_URL) += http_parser_url.o obj-$(CONFIG_HTTP_CLIENT) += http_client.o obj-$(CONFIG_HTTP_SERVER) += http_server.o diff --git a/subsys/net/lib/http/README_http_parser b/subsys/net/lib/http/README_http_parser index 5286765d1255a..adc51dbb679fa 100644 --- a/subsys/net/lib/http/README_http_parser +++ b/subsys/net/lib/http/README_http_parser @@ -8,4 +8,7 @@ can be found at: https://github.com/nodejs/http-parser/releases/tag/v2.7.1 https://github.com/nodejs/http-parser/archive/v2.7.1.tar.gz +NOTE: The portions which relate to URL parsing have been split out into +http_parser_url.c (orignially located in http_parser.c). + * "http-parser" is the project's name, "http_parser" is used in filenames. diff --git a/subsys/net/lib/http/http_parser.c b/subsys/net/lib/http/http_parser.c index eab6ca065b292..9ab2363bcc93f 100644 --- a/subsys/net/lib/http/http_parser.c +++ b/subsys/net/lib/http/http_parser.c @@ -171,119 +171,6 @@ s8_t unhex[256] = { }; -#ifdef HTTP_PARSER_STRICT -# define T(v) 0 -#else -# define T(v) v -#endif - - -static const u8_t normal_url_char[32] = { -/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ - 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, -/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ - 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0, -/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ - 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, -/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ - 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, -/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ - 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128, -/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ - 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, -/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ - 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, -/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ - 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, -/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ - 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, -/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ - 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, -/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ - 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, -/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ - 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, -/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ - 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, -/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ - 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, -/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ - 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, -/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ - 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, -}; - -#undef T - -enum state { - s_dead = 1, /* important that this is > 0 */ - s_start_req_or_res, - s_res_or_resp_H, - s_start_res, - s_res_H, - s_res_HT, - s_res_HTT, - s_res_HTTP, - s_res_first_http_major, - s_res_http_major, - s_res_first_http_minor, - s_res_http_minor, - s_res_first_status_code, - s_res_status_code, - s_res_status_start, - s_res_status, - s_res_line_almost_done, - s_start_req, - s_req_method, - s_req_spaces_before_url, - s_req_schema, - s_req_schema_slash, - s_req_schema_slash_slash, - s_req_server_start, - s_req_server, - s_req_server_with_at, - s_req_path, - s_req_query_string_start, - s_req_query_string, - s_req_fragment_start, - s_req_fragment, - s_req_http_start, - s_req_http_H, - s_req_http_HT, - s_req_http_HTT, - s_req_http_HTTP, - s_req_first_http_major, - s_req_http_major, - s_req_first_http_minor, - s_req_http_minor, - s_req_line_almost_done, - s_header_field_start, - s_header_field, - s_header_value_discard_ws, - s_header_value_discard_ws_almost_done, - s_header_value_discard_lws, - s_header_value_start, - s_header_value, - s_header_value_lws, - s_header_almost_done, - s_chunk_size_start, - s_chunk_size, - s_chunk_parameters, - s_chunk_size_almost_done, - s_headers_almost_done, - s_headers_done, - /* Important: 's_headers_done' must be the last 'header' state. All - * states beyond this must be 'body' states. It is used for overflow - * checking. See the PARSING_HEADER() macro. - */ - s_chunk_data, - s_chunk_data_almost_done, - s_chunk_data_done, - s_body_identity, - s_body_identity_eof, - s_message_done -}; - #define PARSING_HEADER(state) (state <= s_headers_done) enum header_states { @@ -312,21 +199,6 @@ enum header_states { h_connection_upgrade }; -enum http_host_state { - s_http_host_dead = 1, - s_http_userinfo_start, - s_http_userinfo, - s_http_host_start, - s_http_host_v6_start, - s_http_host, - s_http_host_v6, - s_http_host_v6_end, - s_http_host_v6_zone_start, - s_http_host_v6_zone, - s_http_host_port_start, - s_http_host_port -}; - static inline int cb_notify(struct http_parser *parser, enum state *current_state, http_cb cb, int cb_error, size_t *parsed, size_t already_parsed) @@ -406,12 +278,9 @@ int cb_data(struct http_parser *parser, http_data_cb cb, int cb_error, #ifdef HTTP_PARSER_STRICT #define TOKEN(c) (tokens[(unsigned char)c]) -#define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c)) #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-') #else #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c]) -#define IS_URL_CHAR(c) \ - (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80)) #define IS_HOST_CHAR(c) \ (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_') #endif @@ -494,167 +363,6 @@ static struct { int http_message_needs_eof(const struct http_parser *parser); -/* Our URL parser. - * - * This is designed to be shared by http_parser_execute() for URL validation, - * hence it has a state transition + byte-for-byte interface. In addition, it - * is meant to be embedded in http_parser_parse_url(), which does the dirty - * work of turning state transitions URL components for its API. - * - * This function should only be invoked with non-space characters. It is - * assumed that the caller cares about (and can detect) the transition between - * URL and non-URL states by looking for these. - */ -static enum state parse_url_char(enum state s, const char ch) -{ - if (ch == ' ' || ch == '\r' || ch == '\n') { - return s_dead; - } - -#ifdef HTTP_PARSER_STRICT - if (ch == '\t' || ch == '\f') { - return s_dead; - } -#endif - - switch (s) { - case s_req_spaces_before_url: - /* Proxied requests are followed by scheme of an absolute URI - * (alpha). - * All methods except CONNECT are followed by '/' or '*'. - */ - - if (ch == '/' || ch == '*') { - return s_req_path; - } - - if (IS_ALPHA(ch)) { - return s_req_schema; - } - - break; - - case s_req_schema: - if (IS_ALPHA(ch)) { - return s; - } - - if (ch == ':') { - return s_req_schema_slash; - } - - break; - - case s_req_schema_slash: - if (ch == '/') { - return s_req_schema_slash_slash; - } - - break; - - case s_req_schema_slash_slash: - if (ch == '/') { - return s_req_server_start; - } - - break; - - case s_req_server_with_at: - if (ch == '@') { - return s_dead; - } - - /* FALLTHROUGH */ - case s_req_server_start: - case s_req_server: - if (ch == '/') { - return s_req_path; - } - - if (ch == '?') { - return s_req_query_string_start; - } - - if (ch == '@') { - return s_req_server_with_at; - } - - if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') { - return s_req_server; - } - - break; - - case s_req_path: - if (IS_URL_CHAR(ch)) { - return s; - } - - switch (ch) { - case '?': - return s_req_query_string_start; - - case '#': - return s_req_fragment_start; - } - - break; - - case s_req_query_string_start: - case s_req_query_string: - if (IS_URL_CHAR(ch)) { - return s_req_query_string; - } - - switch (ch) { - case '?': - /* allow extra '?' in query string */ - return s_req_query_string; - - case '#': - return s_req_fragment_start; - } - - break; - - case s_req_fragment_start: - if (IS_URL_CHAR(ch)) { - return s_req_fragment; - } - - switch (ch) { - case '?': - return s_req_fragment; - - case '#': - return s; - } - - break; - - case s_req_fragment: - if (IS_URL_CHAR(ch)) { - return s; - } - - switch (ch) { - case '?': - case '#': - return s; - } - - break; - - default: - break; - } - - /* We should never fall out of the switch above unless there's - * an error - */ - return s_dead; -} - static int parser_header_state(struct http_parser *parser, char ch, char c) { @@ -2625,287 +2333,6 @@ const char *http_errno_description(enum http_errno err) return http_strerror_tab[err].description; } -static enum http_host_state -http_parse_host_char(enum http_host_state s, const char ch) -{ - switch (s) { - case s_http_userinfo: - case s_http_userinfo_start: - if (ch == '@') { - return s_http_host_start; - } - - if (IS_USERINFO_CHAR(ch)) { - return s_http_userinfo; - } - break; - - case s_http_host_start: - if (ch == '[') { - return s_http_host_v6_start; - } - - if (IS_HOST_CHAR(ch)) { - return s_http_host; - } - - break; - - case s_http_host: - if (IS_HOST_CHAR(ch)) { - return s_http_host; - } - - /* FALLTHROUGH */ - case s_http_host_v6_end: - if (ch == ':') { - return s_http_host_port_start; - } - - break; - - case s_http_host_v6: - if (ch == ']') { - return s_http_host_v6_end; - } - - /* FALLTHROUGH */ - case s_http_host_v6_start: - if (IS_HEX(ch) || ch == ':' || ch == '.') { - return s_http_host_v6; - } - - if (s == s_http_host_v6 && ch == '%') { - return s_http_host_v6_zone_start; - } - break; - - case s_http_host_v6_zone: - if (ch == ']') { - return s_http_host_v6_end; - } - - /* FALLTHROUGH */ - case s_http_host_v6_zone_start: - /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */ - if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || - ch == '_' || - ch == '~') { - return s_http_host_v6_zone; - } - break; - - case s_http_host_port: - case s_http_host_port_start: - if (IS_NUM(ch)) { - return s_http_host_port; - } - - break; - - default: - break; - } - return s_http_host_dead; -} - -static -int http_parse_host(const char *buf, struct http_parser_url *u, - int found_at) -{ - enum http_host_state s; - size_t buflen; - const char *p; - - buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len; - assert(u->field_set & (1 << UF_HOST)); - - u->field_data[UF_HOST].len = 0; - - s = found_at ? s_http_userinfo_start : s_http_host_start; - - for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) { - enum http_host_state new_s = http_parse_host_char(s, *p); - - if (new_s == s_http_host_dead) { - return 1; - } - - switch (new_s) { - case s_http_host: - if (s != s_http_host) { - u->field_data[UF_HOST].off = p - buf; - } - u->field_data[UF_HOST].len++; - break; - - case s_http_host_v6: - if (s != s_http_host_v6) { - u->field_data[UF_HOST].off = p - buf; - } - u->field_data[UF_HOST].len++; - break; - - case s_http_host_v6_zone_start: - case s_http_host_v6_zone: - u->field_data[UF_HOST].len++; - break; - - case s_http_host_port: - if (s != s_http_host_port) { - u->field_data[UF_PORT].off = p - buf; - u->field_data[UF_PORT].len = 0; - u->field_set |= (1 << UF_PORT); - } - u->field_data[UF_PORT].len++; - break; - - case s_http_userinfo: - if (s != s_http_userinfo) { - u->field_data[UF_USERINFO].off = p - buf; - u->field_data[UF_USERINFO].len = 0; - u->field_set |= (1 << UF_USERINFO); - } - u->field_data[UF_USERINFO].len++; - break; - - default: - break; - } - s = new_s; - } - - /* Make sure we don't end somewhere unexpected */ - switch (s) { - case s_http_host_start: - case s_http_host_v6_start: - case s_http_host_v6: - case s_http_host_v6_zone_start: - case s_http_host_v6_zone: - case s_http_host_port_start: - case s_http_userinfo: - case s_http_userinfo_start: - return 1; - default: - break; - } - - return 0; -} - -void -http_parser_url_init(struct http_parser_url *u) -{ - memset(u, 0, sizeof(*u)); -} - -int -http_parser_parse_url(const char *buf, size_t buflen, int is_connect, - struct http_parser_url *u) -{ - enum http_parser_url_fields old_uf; - enum http_parser_url_fields uf; - int found_at = 0; - const char *p; - enum state s; - - u->port = u->field_set = 0; - s = is_connect ? s_req_server_start : s_req_spaces_before_url; - old_uf = UF_MAX; - - for (p = buf; p < buf + buflen; p++) { - s = parse_url_char(s, *p); - - /* Figure out the next field that we're operating on */ - switch (s) { - case s_dead: - return 1; - - /* Skip delimeters */ - case s_req_schema_slash: - case s_req_schema_slash_slash: - case s_req_server_start: - case s_req_query_string_start: - case s_req_fragment_start: - continue; - - case s_req_schema: - uf = UF_SCHEMA; - break; - - case s_req_server_with_at: - found_at = 1; - - /* FALLTROUGH */ - case s_req_server: - uf = UF_HOST; - break; - - case s_req_path: - uf = UF_PATH; - break; - - case s_req_query_string: - uf = UF_QUERY; - break; - - case s_req_fragment: - uf = UF_FRAGMENT; - break; - - default: - assert(!"Unexpected state"); - return 1; - } - - /* Nothing's changed; soldier on */ - if (uf == old_uf) { - u->field_data[uf].len++; - continue; - } - - u->field_data[uf].off = p - buf; - u->field_data[uf].len = 1; - - u->field_set |= (1 << uf); - old_uf = uf; - } - - /* host must be present if there is a schema */ - /* parsing http:///toto will fail */ - if ((u->field_set & (1 << UF_SCHEMA)) && - (u->field_set & (1 << UF_HOST)) == 0) { - return 1; - } - - if (u->field_set & (1 << UF_HOST)) { - if (http_parse_host(buf, u, found_at) != 0) { - return 1; - } - } - - /* CONNECT requests can only contain "hostname:port" */ - if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) { - return 1; - } - - if (u->field_set & (1 << UF_PORT)) { - /* Don't bother with endp; we've already validated the string */ - unsigned long v; - - v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10); - - /* Ports have a max value of 2^16 */ - if (v > 0xffff) { - return 1; - } - - u->port = (u16_t) v; - } - - return 0; -} - void http_parser_pause(struct http_parser *parser, int paused) { /* Users should only be pausing/unpausing a parser that is not in an diff --git a/subsys/net/lib/http/http_parser_url.c b/subsys/net/lib/http/http_parser_url.c new file mode 100644 index 0000000000000..9cee1e78b9139 --- /dev/null +++ b/subsys/net/lib/http/http_parser_url.c @@ -0,0 +1,573 @@ +/* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev + * + * Additional changes are licensed under the same terms as NGINX and + * copyright Joyent, Inc. and other Node contributors. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include + +#ifndef BIT_AT +# define BIT_AT(a, i) \ + (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \ + (1 << ((unsigned int) (i) & 7)))) +#endif + +/* Set the mark FOR; non-destructive if mark is already set */ +#define MARK(FOR) \ +do { \ + if (!FOR##_mark) { \ + FOR##_mark = p; \ + } \ +} while (0) + + +#ifdef HTTP_PARSER_STRICT +# define T(v) 0 +#else +# define T(v) v +#endif + + +static const u8_t normal_url_char[32] = { +/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, +/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ + 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0, +/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, +/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, +/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ + 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128, +/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, +/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, +}; + +#undef T + +enum http_host_state { + s_http_host_dead = 1, + s_http_userinfo_start, + s_http_userinfo, + s_http_host_start, + s_http_host_v6_start, + s_http_host, + s_http_host_v6, + s_http_host_v6_end, + s_http_host_v6_zone_start, + s_http_host_v6_zone, + s_http_host_port_start, + s_http_host_port +}; + + +/* Macros for character classes; depends on strict-mode */ +#define LOWER(c) (unsigned char)(c | 0x20) +#define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z') +#define IS_NUM(c) ((c) >= '0' && (c) <= '9') +#define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c)) +#define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f')) + +#define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \ + (c) == '!' || (c) == '~' || (c) == '*' || \ + (c) == '\'' || (c) == '(' || (c) == ')') + +#define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \ + (c) == ';' || (c) == ':' || (c) == '&' || \ + (c) == '=' || (c) == '+' || (c) == '$' || \ + (c) == ',') + +#ifdef HTTP_PARSER_STRICT +#define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c)) +#define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-') +#else +#define IS_URL_CHAR(c) \ + (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80)) +#define IS_HOST_CHAR(c) \ + (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_') +#endif + +/* Our URL parser. + * + * This is designed to be shared by http_parser_execute() for URL validation, + * hence it has a state transition + byte-for-byte interface. In addition, it + * is meant to be embedded in http_parser_parse_url(), which does the dirty + * work of turning state transitions URL components for its API. + * + * This function should only be invoked with non-space characters. It is + * assumed that the caller cares about (and can detect) the transition between + * URL and non-URL states by looking for these. + */ +enum state parse_url_char(enum state s, const char ch) +{ + if (ch == ' ' || ch == '\r' || ch == '\n') { + return s_dead; + } + +#ifdef HTTP_PARSER_STRICT + if (ch == '\t' || ch == '\f') { + return s_dead; + } +#endif + + switch (s) { + case s_req_spaces_before_url: + /* Proxied requests are followed by scheme of an absolute URI + * (alpha). + * All methods except CONNECT are followed by '/' or '*'. + */ + + if (ch == '/' || ch == '*') { + return s_req_path; + } + + if (IS_ALPHA(ch)) { + return s_req_schema; + } + + break; + + case s_req_schema: + if (IS_ALPHA(ch)) { + return s; + } + + if (ch == ':') { + return s_req_schema_slash; + } + + break; + + case s_req_schema_slash: + if (ch == '/') { + return s_req_schema_slash_slash; + } + + break; + + case s_req_schema_slash_slash: + if (ch == '/') { + return s_req_server_start; + } + + break; + + case s_req_server_with_at: + if (ch == '@') { + return s_dead; + } + + /* FALLTHROUGH */ + case s_req_server_start: + case s_req_server: + if (ch == '/') { + return s_req_path; + } + + if (ch == '?') { + return s_req_query_string_start; + } + + if (ch == '@') { + return s_req_server_with_at; + } + + if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') { + return s_req_server; + } + + break; + + case s_req_path: + if (IS_URL_CHAR(ch)) { + return s; + } + + switch (ch) { + case '?': + return s_req_query_string_start; + + case '#': + return s_req_fragment_start; + } + + break; + + case s_req_query_string_start: + case s_req_query_string: + if (IS_URL_CHAR(ch)) { + return s_req_query_string; + } + + switch (ch) { + case '?': + /* allow extra '?' in query string */ + return s_req_query_string; + + case '#': + return s_req_fragment_start; + } + + break; + + case s_req_fragment_start: + if (IS_URL_CHAR(ch)) { + return s_req_fragment; + } + + switch (ch) { + case '?': + return s_req_fragment; + + case '#': + return s; + } + + break; + + case s_req_fragment: + if (IS_URL_CHAR(ch)) { + return s; + } + + switch (ch) { + case '?': + case '#': + return s; + } + + break; + + default: + break; + } + + /* We should never fall out of the switch above unless there's + * an error + */ + return s_dead; +} + +static enum http_host_state +http_parse_host_char(enum http_host_state s, const char ch) +{ + switch (s) { + case s_http_userinfo: + case s_http_userinfo_start: + if (ch == '@') { + return s_http_host_start; + } + + if (IS_USERINFO_CHAR(ch)) { + return s_http_userinfo; + } + break; + + case s_http_host_start: + if (ch == '[') { + return s_http_host_v6_start; + } + + if (IS_HOST_CHAR(ch)) { + return s_http_host; + } + + break; + + case s_http_host: + if (IS_HOST_CHAR(ch)) { + return s_http_host; + } + + /* FALLTHROUGH */ + case s_http_host_v6_end: + if (ch == ':') { + return s_http_host_port_start; + } + + break; + + case s_http_host_v6: + if (ch == ']') { + return s_http_host_v6_end; + } + + /* FALLTHROUGH */ + case s_http_host_v6_start: + if (IS_HEX(ch) || ch == ':' || ch == '.') { + return s_http_host_v6; + } + + if (s == s_http_host_v6 && ch == '%') { + return s_http_host_v6_zone_start; + } + break; + + case s_http_host_v6_zone: + if (ch == ']') { + return s_http_host_v6_end; + } + + /* FALLTHROUGH */ + case s_http_host_v6_zone_start: + /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */ + if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || + ch == '_' || + ch == '~') { + return s_http_host_v6_zone; + } + break; + + case s_http_host_port: + case s_http_host_port_start: + if (IS_NUM(ch)) { + return s_http_host_port; + } + + break; + + default: + break; + } + return s_http_host_dead; +} + +static +int http_parse_host(const char *buf, struct http_parser_url *u, + int found_at) +{ + enum http_host_state s; + size_t buflen; + const char *p; + + buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len; + assert(u->field_set & (1 << UF_HOST)); + + u->field_data[UF_HOST].len = 0; + + s = found_at ? s_http_userinfo_start : s_http_host_start; + + for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) { + enum http_host_state new_s = http_parse_host_char(s, *p); + + if (new_s == s_http_host_dead) { + return 1; + } + + switch (new_s) { + case s_http_host: + if (s != s_http_host) { + u->field_data[UF_HOST].off = p - buf; + } + u->field_data[UF_HOST].len++; + break; + + case s_http_host_v6: + if (s != s_http_host_v6) { + u->field_data[UF_HOST].off = p - buf; + } + u->field_data[UF_HOST].len++; + break; + + case s_http_host_v6_zone_start: + case s_http_host_v6_zone: + u->field_data[UF_HOST].len++; + break; + + case s_http_host_port: + if (s != s_http_host_port) { + u->field_data[UF_PORT].off = p - buf; + u->field_data[UF_PORT].len = 0; + u->field_set |= (1 << UF_PORT); + } + u->field_data[UF_PORT].len++; + break; + + case s_http_userinfo: + if (s != s_http_userinfo) { + u->field_data[UF_USERINFO].off = p - buf; + u->field_data[UF_USERINFO].len = 0; + u->field_set |= (1 << UF_USERINFO); + } + u->field_data[UF_USERINFO].len++; + break; + + default: + break; + } + s = new_s; + } + + /* Make sure we don't end somewhere unexpected */ + switch (s) { + case s_http_host_start: + case s_http_host_v6_start: + case s_http_host_v6: + case s_http_host_v6_zone_start: + case s_http_host_v6_zone: + case s_http_host_port_start: + case s_http_userinfo: + case s_http_userinfo_start: + return 1; + default: + break; + } + + return 0; +} + +void +http_parser_url_init(struct http_parser_url *u) +{ + memset(u, 0, sizeof(*u)); +} + +int +http_parser_parse_url(const char *buf, size_t buflen, int is_connect, + struct http_parser_url *u) +{ + enum http_parser_url_fields old_uf; + enum http_parser_url_fields uf; + int found_at = 0; + const char *p; + enum state s; + + u->port = u->field_set = 0; + s = is_connect ? s_req_server_start : s_req_spaces_before_url; + old_uf = UF_MAX; + + for (p = buf; p < buf + buflen; p++) { + s = parse_url_char(s, *p); + + /* Figure out the next field that we're operating on */ + switch (s) { + case s_dead: + return 1; + + /* Skip delimeters */ + case s_req_schema_slash: + case s_req_schema_slash_slash: + case s_req_server_start: + case s_req_query_string_start: + case s_req_fragment_start: + continue; + + case s_req_schema: + uf = UF_SCHEMA; + break; + + case s_req_server_with_at: + found_at = 1; + + /* FALLTROUGH */ + case s_req_server: + uf = UF_HOST; + break; + + case s_req_path: + uf = UF_PATH; + break; + + case s_req_query_string: + uf = UF_QUERY; + break; + + case s_req_fragment: + uf = UF_FRAGMENT; + break; + + default: + assert(!"Unexpected state"); + return 1; + } + + /* Nothing's changed; soldier on */ + if (uf == old_uf) { + u->field_data[uf].len++; + continue; + } + + u->field_data[uf].off = p - buf; + u->field_data[uf].len = 1; + + u->field_set |= (1 << uf); + old_uf = uf; + } + + /* host must be present if there is a schema */ + /* parsing http:///toto will fail */ + if ((u->field_set & (1 << UF_SCHEMA)) && + (u->field_set & (1 << UF_HOST)) == 0) { + return 1; + } + + if (u->field_set & (1 << UF_HOST)) { + if (http_parse_host(buf, u, found_at) != 0) { + return 1; + } + } + + /* CONNECT requests can only contain "hostname:port" */ + if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) { + return 1; + } + + if (u->field_set & (1 << UF_PORT)) { + /* Don't bother with endp; we've already validated the string */ + unsigned long v; + + v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10); + + /* Ports have a max value of 2^16 */ + if (v > 0xffff) { + return 1; + } + + u->port = (u16_t) v; + } + + return 0; +}