# HG changeset patch
# User Maxim Dounin <[email protected]>
# Date 1755535197 -10800
# Mon Aug 18 19:39:57 2025 +0300
# Node ID 097c394fa0fd14169bbe054c469886213cbaabad
# Parent 5cc30c1b91dba2a4358a74093300697ecda9ec39
Updated request line parsing to allow uncommon chars in host.
Previously, only ALPHA, DIGIT, ".", and "-" were allowed in the host
component of the request line (if it's not an IP literal). On the other
hand, RFC 3986 allows the following:
reg-name = *( unreserved / pct-encoded / sub-delims )
unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
/ "*" / "+" / "," / ";" / "="
pct-encoded = "%" HEXDIG HEXDIG
Notably, the "_" character is used in practice but was not allowed in the
request line. At the same time, this and other characters do actually work
in practice, as they are accepted in the Host header field, which uses more
relaxed parsing.
With this change, all characters which are valid in the host name per
RFC 3986 are also allowed in the request line.
diff --git a/src/http/ngx_http_parse.c b/src/http/ngx_http_parse.c
--- a/src/http/ngx_http_parse.c
+++ b/src/http/ngx_http_parse.c
@@ -392,7 +392,16 @@ ngx_http_parse_request_line(ngx_http_req
break;
}
- if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-') {
+ if (ch >= '0' && ch <= '9') {
+ break;
+ }
+
+ if (ch == '.' || ch == '-' || ch == '_' || ch == '~'
+ || ch == '!' || ch == '$' || ch == '&' || ch == '\''
+ || ch == '(' || ch == ')' || ch == '*' || ch == '+'
+ || ch == ',' || ch == ';' || ch == '=' || ch == '%')
+ {
+ /* unreserved, sub-delims, pct-encoded */
break;
}