This is an automated email from the ASF dual-hosted git repository. bcall pushed a commit to branch 8.0.x in repository https://gitbox.apache.org/repos/asf/trafficserver.git
The following commit(s) were added to refs/heads/8.0.x by this push: new 6affb92 Fixed how we handle uknown schemes 6affb92 is described below commit 6affb92d8c85896e8b60abb9f684af8109280a0a Author: Bryan Call <bc...@apache.org> AuthorDate: Thu Jan 30 10:42:00 2020 -0800 Fixed how we handle uknown schemes (cherry picked from commit c20a369fa93b22b492e6aed8bee1da5fa4af1cab) --- proxy/hdrs/HdrTest.cc | 101 +++++++++++++++++++++++++++++++++++++++----------- proxy/hdrs/URL.cc | 49 +++++++++++++----------- 2 files changed, 108 insertions(+), 42 deletions(-) diff --git a/proxy/hdrs/HdrTest.cc b/proxy/hdrs/HdrTest.cc index 49107ca..8046e69 100644 --- a/proxy/hdrs/HdrTest.cc +++ b/proxy/hdrs/HdrTest.cc @@ -343,44 +343,100 @@ HdrTest::test_url() // Start with an easy one... "http://trafficserver.apache.org/index.html", - // "cheese://bogosity", This fails, but it's not clear it should work... - - "some.place", "some.place/", "http://some.place", "http://some.place/", "http://some.place/path", - "http://some.place/path;params", "http://some.place/path;params?query", "http://some.place/path;params?query#fragment", - "http://some.place/path?query#fragment", "http://some.place/path#fragment", + "cheese://bogosity", + + "some.place", + "some.place/", + "http://some.place", + "http://some.place/", + "http://some.place/path", + "http://some.place/path;params", + "http://some.place/path;params?query", + "http://some.place/path;params?query#fragment", + "http://some.place/path?query#fragment", + "http://some.place/path#fragment", - "some.place:80", "some.place:80/", "http://some.place:80", "http://some.place:80/", + "some.place:80", + "some.place:80/", + "http://some.place:80", + "http://some.place:80/", - "foo@some.place:80", "foo@some.place:80/", "http://foo@some.place:80", "http://foo@some.place:80/", + "foo@some.place:80", + "foo@some.place:80/", + "http://foo@some.place:80", + "http://foo@some.place:80/", - "foo:bar@some.place:80", "foo:bar@some.place:80/", "http://foo:bar@some.place:80", "http://foo:bar@some.place:80/", + "foo:bar@some.place:80", + "foo:bar@some.place:80/", + "http://foo:bar@some.place:80", + "http://foo:bar@some.place:80/", // Some address stuff - "http://172.16.28.101", "http://172.16.28.101:8080", "http://[::]", "http://[::1]", "http://[fc01:172:16:28::101]", - "http://[fc01:172:16:28::101]:80", "http://[fc01:172:16:28:BAAD:BEEF:DEAD:101]", - "http://[fc01:172:16:28:BAAD:BEEF:DEAD:101]:8080", "http://172.16.28.101/some/path", "http://172.16.28.101:8080/some/path", - "http://[::1]/some/path", "http://[fc01:172:16:28::101]/some/path", "http://[fc01:172:16:28::101]:80/some/path", - "http://[fc01:172:16:28:BAAD:BEEF:DEAD:101]/some/path", "http://[fc01:172:16:28:BAAD:BEEF:DEAD:101]:8080/some/path", - "http://172.16.28.101/", "http://[fc01:172:16:28:BAAD:BEEF:DEAD:101]:8080/", - - "foo:bar@some.place", "foo:bar@some.place/", "http://foo:bar@some.place", "http://foo:bar@some.place/", - "http://foo:bar@[::1]:8080/", "http://foo@[::1]", - - "mms://sm02.tsqa.example.com/0102rally.asf", "pnm://foo:bar@some.place:80/path;params?query#fragment", - "rtsp://foo:bar@some.place:80/path;params?query#fragment", "rtspu://foo:bar@some.place:80/path;params?query#fragment", + "http://172.16.28.101", + "http://172.16.28.101:8080", + "http://[::]", + "http://[::1]", + "http://[fc01:172:16:28::101]", + "http://[fc01:172:16:28::101]:80", + "http://[fc01:172:16:28:BAAD:BEEF:DEAD:101]", + "http://[fc01:172:16:28:BAAD:BEEF:DEAD:101]:8080", + "http://172.16.28.101/some/path", + "http://172.16.28.101:8080/some/path", + "http://[::1]/some/path", + "http://[fc01:172:16:28::101]/some/path", + "http://[fc01:172:16:28::101]:80/some/path", + "http://[fc01:172:16:28:BAAD:BEEF:DEAD:101]/some/path", + "http://[fc01:172:16:28:BAAD:BEEF:DEAD:101]:8080/some/path", + "http://172.16.28.101/", + "http://[fc01:172:16:28:BAAD:BEEF:DEAD:101]:8080/", + + // "foo:@some.place", TODO - foo:@some.place is change to foo@some.place in the test + "foo:bar@some.place", + "foo:bar@some.place/", + "http://foo:bar@some.place", + "http://foo:bar@some.place/", + "http://foo:bar@[::1]:8080/", + "http://foo@[::1]", + + "mms://sm02.tsqa.example.com/0102rally.asf", + "pnm://foo:bar@some.place:80/path;params?query#fragment", + "rtsp://foo:bar@some.place:80/path;params?query#fragment", + "rtspu://foo:bar@some.place:80/path;params?query#fragment", "/finance/external/cbsm/*http://cbs.marketwatch.com/archive/19990713/news/current/net.htx?source=blq/yhoo&dist=yhoo", - "http://a.b.com/xx.jpg?newpath=http://bob.dave.com"}; + "http://a.b.com/xx.jpg?newpath=http://bob.dave.com", + + "ht-tp://a.b.com", + "ht+tp://a.b.com", + "ht.tp://a.b.com", + + "h1ttp://a.b.com", + "http1://a.b.com", + }; static const char *bad[] = { "http://[1:2:3:4:5:6:7:8:9]", "http://1:2:3:4:5:6:7:8:A:B", "http://bob.com[::1]", "http://[::1].com", + "http://foo:bar:b...@bob.com/", "http://foo:bar:baz@[::1]:8080/", + "http://]", "http://:", + "http:/", + "http:/foo.bar.com/", + "~http://invalid.char.in.scheme/foo", + "http~://invalid.char.in.scheme/foo", + "ht~tp://invalid.char.in.scheme/foo", + "1http://first.char.not.alpha", + "some.domain.com/http://invalid.domain/foo", + ":", + "://", + + // maybe this should be a valid URL + "a.b.com/xx.jpg?newpath=http://bob.dave.com", }; int err, failed; @@ -400,6 +456,7 @@ HdrTest::test_url() url.create(nullptr); err = url.parse(&start, end); if (err < 0) { + printf("Failed to parse url '%s'\n", start); failed = 1; break; } @@ -448,6 +505,8 @@ HdrTest::test_url() failed = 1; printf("Successfully parsed invalid url '%s'", x); break; + } else { + printf(" bad URL - PARSE FAILED: '%s'\n", bad[i]); } } diff --git a/proxy/hdrs/URL.cc b/proxy/hdrs/URL.cc index 8f94329..d38a1f5 100644 --- a/proxy/hdrs/URL.cc +++ b/proxy/hdrs/URL.cc @@ -1120,34 +1120,41 @@ url_parse_scheme(HdrHeap *heap, URLImpl *url, const char **start, const char *en const char *scheme_end = nullptr; int scheme_wks_idx; + // Skip over spaces while (' ' == *cur && ++cur < end) { - ; } + if (cur < end) { scheme_start = scheme_end = cur; - // special case 'http:' for performance - if ((end - cur >= 5) && (((cur[0] ^ 'h') | (cur[1] ^ 't') | (cur[2] ^ 't') | (cur[3] ^ 'p') | (cur[4] ^ ':')) == 0)) { - scheme_end = cur + 4; // point to colon - url_scheme_set(heap, url, scheme_start, URL_WKSIDX_HTTP, 4, copy_strings_p); - } else if ('/' != *cur) { - // For forward transparent mode, the URL for the method can just be a path, - // so don't scan that for a scheme, as we could find a false positive if there - // is a URL in the parameters (which is legal). + + // If the URL is more complex then a path, parse to see if there is a scheme + if ('/' != *cur) { + // Search for a : it could be part of a scheme or a username:password while (':' != *cur && ++cur < end) { - ; } - if (cur < end) { // found a colon - scheme_wks_idx = hdrtoken_tokenize(scheme_start, cur - scheme_start, &scheme_wks); - - /* Distinguish between a scheme only and a username by looking past the colon. If it is missing - or it's a slash, presume scheme. Otherwise it's a username with a password. - */ - if ((scheme_wks_idx > 0 && hdrtoken_wks_to_token_type(scheme_wks) == HDRTOKEN_TYPE_SCHEME) || // known scheme - (cur >= end - 1 || cur[1] == '/')) // no more data or slash past colon - { - scheme_end = cur; - url_scheme_set(heap, url, scheme_start, scheme_wks_idx, scheme_end - scheme_start, copy_strings_p); + + // If there is a :// then there is a scheme + if (cur + 2 < end && cur[1] == '/' && cur[2] == '/') { // found "://" + scheme_end = cur; + scheme_wks_idx = hdrtoken_tokenize(scheme_start, scheme_end - scheme_start, &scheme_wks); + + if (!(scheme_wks_idx > 0 && hdrtoken_wks_to_token_type(scheme_wks) == HDRTOKEN_TYPE_SCHEME)) { + // Unknown scheme, validate the scheme + + // RFC 3986 Section 3.1 + // These are the valid characters in a scheme: + // scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + // return an error if there is another character in the scheme + if (!ParseRules::is_alpha(*scheme_start)) { + return PARSE_RESULT_ERROR; + } + for (cur = scheme_start + 1; cur < scheme_end; ++cur) { + if (!(ParseRules::is_alnum(*cur) != 0 || *cur == '+' || *cur == '-' || *cur == '.')) { + return PARSE_RESULT_ERROR; + } + } } + url_scheme_set(heap, url, scheme_start, scheme_wks_idx, scheme_end - scheme_start, copy_strings_p); } } *start = scheme_end;