This is an automated email from the ASF dual-hosted git repository. briang pushed a commit to branch master in repository https://git-dual.apache.org/repos/asf/trafficserver.git
The following commit(s) were added to refs/heads/master by this push: new 9f9dc38 TS-4312 Add config to strictly parse URL according to RFC 3986. This closes #574 9f9dc38 is described below commit 9f9dc3832d16a79072e1f9c7abf69048e58535cc Author: Shen Zhang <sezh...@linkedin.com> AuthorDate: Sun Apr 17 09:55:34 2016 -0700 TS-4312 Add config to strictly parse URL according to RFC 3986. This closes #574 --- doc/admin-guide/files/records.config.en.rst | 5 +++ lib/ts/CompileParseRules.cc | 4 +- lib/ts/ParseRules.h | 39 ++++++++++++++++--- mgmt/RecordsConfig.cc | 2 + proxy/hdrs/HTTP.cc | 6 +-- proxy/hdrs/HTTP.h | 10 ++--- proxy/hdrs/HdrTSOnly.cc | 4 +- proxy/hdrs/URL.cc | 59 ++++++++++++++++++++++++++++- proxy/hdrs/URL.h | 2 +- proxy/http/HttpConfig.cc | 3 ++ proxy/http/HttpConfig.h | 4 +- proxy/http/HttpSM.cc | 3 +- 12 files changed, 120 insertions(+), 21 deletions(-) diff --git a/doc/admin-guide/files/records.config.en.rst b/doc/admin-guide/files/records.config.en.rst index 55ac374..b3421ba 100644 --- a/doc/admin-guide/files/records.config.en.rst +++ b/doc/admin-guide/files/records.config.en.rst @@ -1040,6 +1040,11 @@ Value Effect An arbitrary string value that, if set, will be used to replace any request ``User-Agent`` header. +.. ts:cv:: CONFIG proxy.config.http.strict_uri_parsing INT 0 + + Enables (``1``) or disables (``0``) Traffic Server to return a 400 Bad Request + if client's request URI includes character which is not RFC 3986 compliant + Parent Proxy Configuration ========================== diff --git a/lib/ts/CompileParseRules.cc b/lib/ts/CompileParseRules.cc index efb39a0..7d92bb0 100644 --- a/lib/ts/CompileParseRules.cc +++ b/lib/ts/CompileParseRules.cc @@ -105,8 +105,8 @@ main() tparseRulesCType[c] |= is_eow_BIT; if (ParseRules::is_token(c)) tparseRulesCType[c] |= is_token_BIT; - if (ParseRules::is_wildmat(c)) - tparseRulesCType[c] |= is_wildmat_BIT; + if (ParseRules::is_uri(c)) + tparseRulesCType[c] |= is_uri_BIT; if (ParseRules::is_sep(c)) tparseRulesCType[c] |= is_sep_BIT; if (ParseRules::is_empty(c)) diff --git a/lib/ts/ParseRules.h b/lib/ts/ParseRules.h index 2b430cf..5e8f8d6 100644 --- a/lib/ts/ParseRules.h +++ b/lib/ts/ParseRules.h @@ -59,7 +59,7 @@ typedef unsigned int CTypeResult; #define is_wslfcr_BIT (1 << 20) #define is_eow_BIT (1 << 21) #define is_token_BIT (1 << 22) -#define is_wildmat_BIT (1 << 23) +#define is_uri_BIT (1 << 23) #define is_sep_BIT (1 << 24) #define is_empty_BIT (1 << 25) #define is_alnum_BIT (1 << 26) @@ -122,7 +122,7 @@ public: static CTypeResult is_punct(char c); // !"#$%&'()*+,-./:;<>=?@_{}|~ static CTypeResult is_end_of_url(char c); // NUL,CR,SP static CTypeResult is_eow(char c); // NUL,CR,LF - static CTypeResult is_wildmat(char c); // \,*,?,[ + static CTypeResult is_uri(char c); // A-Z,a-z,0-9 :/?#[]@!$&'()*+,;=-._~% static CTypeResult is_sep(char c); // NULL,COMMA,':','!',wslfcr static CTypeResult is_empty(char c); // wslfcr,# static CTypeResult is_alnum(char c); // 0-9,A-Z,a-z @@ -590,12 +590,41 @@ ParseRules::is_eow(char c) } inline CTypeResult -ParseRules::is_wildmat(char c) +ParseRules::is_uri(char c) { #ifndef COMPILE_PARSE_RULES - return (parseRulesCType[(unsigned char)c] & is_wildmat_BIT); + return (parseRulesCType[(unsigned char)c] & is_uri_BIT); #else - return (c == '*' || c == '?' || c == '[' || c == '\\'); + if (is_alnum(c)) + return (true); + + switch (c) { + case ':': + case '/': + case '?': + case '#': + case '[': + case ']': + case '@': + case '!': + case '$': + case '&': + case '\'': + case '(': + case ')': + case '*': + case '+': + case ',': + case ';': + case '=': + case '-': + case '.': + case '_': + case '~': + case '%': + return (true); + } + return (false); #endif } diff --git a/mgmt/RecordsConfig.cc b/mgmt/RecordsConfig.cc index 8d17edb..55d9cdc 100644 --- a/mgmt/RecordsConfig.cc +++ b/mgmt/RecordsConfig.cc @@ -440,6 +440,8 @@ static const RecordElement RecordsConfig[] = , {RECT_CONFIG, "proxy.config.http.post.check.content_length.enabled", RECD_INT, "1", RECU_DYNAMIC, RR_NULL, RECC_INT, "[0-1]", RECA_NULL} , + {RECT_CONFIG, "proxy.config.http.strict_uri_parsing", RECD_INT, "0", RECU_DYNAMIC, RR_NULL, RECC_INT, "[0-1]", RECA_NULL} + , // # Send http11 requests // # // # 0 - Never diff --git a/proxy/hdrs/HTTP.cc b/proxy/hdrs/HTTP.cc index 31fc92e..526362b 100644 --- a/proxy/hdrs/HTTP.cc +++ b/proxy/hdrs/HTTP.cc @@ -874,7 +874,7 @@ http_parser_clear(HTTPParser *parser) MIMEParseResult http_parser_parse_req(HTTPParser *parser, HdrHeap *heap, HTTPHdrImpl *hh, const char **start, const char *end, - bool must_copy_strings, bool eof) + bool must_copy_strings, bool eof, bool strict_uri_parsing) { if (parser->m_parsing_http) { MIMEScanner *scanner = &parser->m_mime_parser.m_scanner; @@ -939,7 +939,7 @@ http_parser_parse_req(HTTPParser *parser, HdrHeap *heap, HTTPHdrImpl *hh, const ink_assert(hh->u.req.m_url_impl != NULL); url = hh->u.req.m_url_impl; url_start = &(cur[4]); - err = ::url_parse(heap, url, &url_start, &(end[-11]), must_copy_strings); + err = ::url_parse(heap, url, &url_start, &(end[-11]), must_copy_strings, strict_uri_parsing); if (err < 0) return err; http_hdr_version_set(hh, version); @@ -1073,7 +1073,7 @@ http_parser_parse_req(HTTPParser *parser, HdrHeap *heap, HTTPHdrImpl *hh, const ink_assert(hh->u.req.m_url_impl != NULL); url = hh->u.req.m_url_impl; - err = ::url_parse(heap, url, &url_start, url_end, must_copy_strings); + err = ::url_parse(heap, url, &url_start, url_end, must_copy_strings, strict_uri_parsing); if (err < 0) return err; diff --git a/proxy/hdrs/HTTP.h b/proxy/hdrs/HTTP.h index ae20b48..b32d3ef 100644 --- a/proxy/hdrs/HTTP.h +++ b/proxy/hdrs/HTTP.h @@ -441,7 +441,7 @@ const char *http_hdr_reason_lookup(unsigned status); void http_parser_init(HTTPParser *parser); void http_parser_clear(HTTPParser *parser); MIMEParseResult http_parser_parse_req(HTTPParser *parser, HdrHeap *heap, HTTPHdrImpl *hh, const char **start, const char *end, - bool must_copy_strings, bool eof); + bool must_copy_strings, bool eof, bool strict_uri_parsing); MIMEParseResult validate_hdr_host(HTTPHdrImpl *hh); MIMEParseResult http_parser_parse_resp(HTTPParser *parser, HdrHeap *heap, HTTPHdrImpl *hh, const char **start, const char *end, bool must_copy_strings, bool eof); @@ -618,10 +618,10 @@ public: const char *reason_get(int *length); void reason_set(const char *value, int length); - MIMEParseResult parse_req(HTTPParser *parser, const char **start, const char *end, bool eof); + MIMEParseResult parse_req(HTTPParser *parser, const char **start, const char *end, bool eof, bool strict_uri_parsing = false); MIMEParseResult parse_resp(HTTPParser *parser, const char **start, const char *end, bool eof); - MIMEParseResult parse_req(HTTPParser *parser, IOBufferReader *r, int *bytes_used, bool eof); + MIMEParseResult parse_req(HTTPParser *parser, IOBufferReader *r, int *bytes_used, bool eof, bool strict_uri_parsing = false); MIMEParseResult parse_resp(HTTPParser *parser, IOBufferReader *r, int *bytes_used, bool eof); public: @@ -1231,12 +1231,12 @@ HTTPHdr::reason_set(const char *value, int length) -------------------------------------------------------------------------*/ inline MIMEParseResult -HTTPHdr::parse_req(HTTPParser *parser, const char **start, const char *end, bool eof) +HTTPHdr::parse_req(HTTPParser *parser, const char **start, const char *end, bool eof, bool strict_uri_parsing) { ink_assert(valid()); ink_assert(m_http->m_polarity == HTTP_TYPE_REQUEST); - return http_parser_parse_req(parser, m_heap, m_http, start, end, true, eof); + return http_parser_parse_req(parser, m_heap, m_http, start, end, true, eof, strict_uri_parsing); } /*------------------------------------------------------------------------- diff --git a/proxy/hdrs/HdrTSOnly.cc b/proxy/hdrs/HdrTSOnly.cc index c6aef49..6a59f7c 100644 --- a/proxy/hdrs/HdrTSOnly.cc +++ b/proxy/hdrs/HdrTSOnly.cc @@ -45,7 +45,7 @@ -------------------------------------------------------------------------*/ MIMEParseResult -HTTPHdr::parse_req(HTTPParser *parser, IOBufferReader *r, int *bytes_used, bool eof) +HTTPHdr::parse_req(HTTPParser *parser, IOBufferReader *r, int *bytes_used, bool eof, bool strict_uri_parsing) { const char *start; const char *tmp; @@ -71,7 +71,7 @@ HTTPHdr::parse_req(HTTPParser *parser, IOBufferReader *r, int *bytes_used, bool int heap_slot = m_heap->attach_block(r->get_current_block(), start); m_heap->lock_ronly_str_heap(heap_slot); - state = http_parser_parse_req(parser, m_heap, m_http, &tmp, end, false, eof); + state = http_parser_parse_req(parser, m_heap, m_http, &tmp, end, false, eof, strict_uri_parsing); m_heap->set_ronly_str_heap_end(heap_slot, tmp); m_heap->unlock_ronly_str_heap(heap_slot); diff --git a/proxy/hdrs/URL.cc b/proxy/hdrs/URL.cc index c343b42..0aff2c1 100644 --- a/proxy/hdrs/URL.cc +++ b/proxy/hdrs/URL.cc @@ -1156,9 +1156,28 @@ url_parse_scheme(HdrHeap *heap, URLImpl *url, const char **start, const char *en return PARSE_ERROR; // no non-whitespace found } +/** + * This method will return TRUE if the uri is strictly compliant with + * RFC 3986 and it will return FALSE if not. + */ +static bool +url_is_strictly_compliant(const char *start, const char *end) +{ + for (const char *i = start; i < end; ++i) { + if (!ParseRules::is_uri(*i)) { + Debug("http", "Non-RFC compliant character [0x%.2X] found in URL", (unsigned char)*i); + return false; + } + } + return true; +} + MIMEParseResult -url_parse(HdrHeap *heap, URLImpl *url, const char **start, const char *end, bool copy_strings_p) +url_parse(HdrHeap *heap, URLImpl *url, const char **start, const char *end, bool copy_strings_p, bool strict_uri_parsing) { + if (strict_uri_parsing && !url_is_strictly_compliant(*start, end)) + return PARSE_ERROR; + MIMEParseResult zret = url_parse_scheme(heap, url, start, end, copy_strings_p); return PARSE_CONT == zret ? url_parse_http(heap, url, start, end, copy_strings_p) : zret; } @@ -1798,4 +1817,42 @@ REGRESSION_TEST(VALIDATE_HDR_FIELD)(RegressionTest *t, int /* level ATS_UNUSED * } } + +REGRESSION_TEST(ParseRules_strict_URI)(RegressionTest *t, int /* level ATS_UNUSED */, int *pstatus) +{ + const struct { + const char *const uri; + bool valid; + } http_strict_uri_parsing_test_case[] = {{"/home", true}, + {"/path/data?key=value#id", true}, + {"/ABCDEFGHIJKLMNOPQRSTUVWXYZ", true}, + {"/abcdefghijklmnopqrstuvwxyz", true}, + {"/0123456789", true}, + {":/?#[]@", true}, + {"!$&'()*+,;=", true}, + {"-._~", true}, + {"%", true}, + {"\n", false}, + {"\"", false}, + {"<", false}, + {">", false}, + {"\\", false}, + {"^", false}, + {"`", false}, + {"{", false}, + {"|", false}, + {"}", false}, + {"é", false}}; + + TestBox box(t, pstatus); + box = REGRESSION_TEST_PASSED; + + for (unsigned int i = 0; i < sizeof(http_strict_uri_parsing_test_case) / sizeof(http_strict_uri_parsing_test_case[0]); ++i) { + const char *const uri = http_strict_uri_parsing_test_case[i].uri; + box.check(url_is_strictly_compliant(uri, uri + strlen(uri)) == http_strict_uri_parsing_test_case[i].valid, + "Strictly parse URI: \"%s\", expected %s, but not", uri, + (http_strict_uri_parsing_test_case[i].valid ? "true" : "false")); + } +} + #endif // TS_HAS_TESTS diff --git a/proxy/hdrs/URL.h b/proxy/hdrs/URL.h index bb7741a..a68edca 100644 --- a/proxy/hdrs/URL.h +++ b/proxy/hdrs/URL.h @@ -236,7 +236,7 @@ void url_params_set(HdrHeap *heap, URLImpl *url, const char *value, int length, void url_query_set(HdrHeap *heap, URLImpl *url, const char *value, int length, bool copy_string); void url_fragment_set(HdrHeap *heap, URLImpl *url, const char *value, int length, bool copy_string); -MIMEParseResult url_parse(HdrHeap *heap, URLImpl *url, const char **start, const char *end, bool copy_strings); +MIMEParseResult url_parse(HdrHeap *heap, URLImpl *url, const char **start, const char *end, bool copy_strings, bool strict_uri_parsing = false); MIMEParseResult url_parse_no_path_component_breakdown(HdrHeap *heap, URLImpl *url, const char **start, const char *end, bool copy_strings); MIMEParseResult url_parse_internet(HdrHeap *heap, URLImpl *url, const char **start, const char *end, bool copy_strings); diff --git a/proxy/http/HttpConfig.cc b/proxy/http/HttpConfig.cc index 4d6a2c6..3ab4a88 100644 --- a/proxy/http/HttpConfig.cc +++ b/proxy/http/HttpConfig.cc @@ -922,6 +922,7 @@ HttpConfig::startup() HttpEstablishStaticConfigLongLong(c.oride.flow_high_water_mark, "proxy.config.http.flow_control.high_water"); HttpEstablishStaticConfigLongLong(c.oride.flow_low_water_mark, "proxy.config.http.flow_control.low_water"); HttpEstablishStaticConfigByte(c.oride.post_check_content_length_enabled, "proxy.config.http.post.check.content_length.enabled"); + HttpEstablishStaticConfigByte(c.strict_uri_parsing, "proxy.config.http.strict_uri_parsing"); // [amc] This is a bit of a mess, need to figure out to make this cleaner. RecRegisterConfigUpdateCb("proxy.config.http.server_session_sharing.match", &http_server_session_sharing_cb, &c); @@ -1345,6 +1346,8 @@ HttpConfig::reconfigure() params->referer_filter_enabled = INT_TO_BOOL(m_master.referer_filter_enabled); params->referer_format_redirect = INT_TO_BOOL(m_master.referer_format_redirect); + params->strict_uri_parsing = INT_TO_BOOL(m_master.strict_uri_parsing); + params->oride.accept_encoding_filter_enabled = INT_TO_BOOL(m_master.oride.accept_encoding_filter_enabled); params->oride.down_server_timeout = m_master.oride.down_server_timeout; diff --git a/proxy/http/HttpConfig.h b/proxy/http/HttpConfig.h index 6db1078..e43796e 100644 --- a/proxy/http/HttpConfig.h +++ b/proxy/http/HttpConfig.h @@ -712,6 +712,8 @@ public: MgmtByte referer_filter_enabled; MgmtByte referer_format_redirect; + MgmtByte strict_uri_parsing; + /////////////////// // reverse proxy // /////////////////// @@ -856,7 +858,7 @@ inline HttpConfigParams::HttpConfigParams() parent_connect_attempts(4), per_parent_connect_attempts(2), parent_connect_timeout(30), anonymize_other_header_list(NULL), enable_http_stats(1), icp_enabled(0), stale_icp_enabled(0), cache_vary_default_text(NULL), cache_vary_default_images(NULL), cache_vary_default_other(NULL), cache_enable_default_vary_headers(0), cache_post_method(0), connect_ports_string(NULL), - connect_ports(NULL), push_method_enabled(0), referer_filter_enabled(0), referer_format_redirect(0), reverse_proxy_enabled(0), + connect_ports(NULL), push_method_enabled(0), referer_filter_enabled(0), referer_format_redirect(0), strict_uri_parsing(0), reverse_proxy_enabled(0), url_remap_required(1), record_cop_page(0), errors_log_error_pages(1), enable_http_info(0), cluster_time_delta(0), redirection_host_no_port(1), post_copy_size(2048), ignore_accept_mismatch(0), ignore_accept_language_mismatch(0), ignore_accept_encoding_mismatch(0), ignore_accept_charset_mismatch(0), send_100_continue_response(0), diff --git a/proxy/http/HttpSM.cc b/proxy/http/HttpSM.cc index 06e811d..e5b5c05 100644 --- a/proxy/http/HttpSM.cc +++ b/proxy/http/HttpSM.cc @@ -624,7 +624,8 @@ HttpSM::state_read_client_request_header(int event, void *data) // tokenize header // ///////////////////// - MIMEParseResult state = t_state.hdr_info.client_request.parse_req(&http_parser, ua_buffer_reader, &bytes_used, ua_entry->eos); + MIMEParseResult state = t_state.hdr_info.client_request.parse_req(&http_parser, ua_buffer_reader, &bytes_used, ua_entry->eos, + t_state.http_config_param->strict_uri_parsing); client_request_hdr_bytes += bytes_used; -- To stop receiving notification emails like this one, please contact ['"commits@trafficserver.apache.org" <commits@trafficserver.apache.org>'].