details: http://freenginx.org/hg/nginx/rev/4ae00e4104c2 branches: changeset: 9417:4ae00e4104c2 user: Maxim Dounin <[email protected]> date: Thu Aug 21 23:49:46 2025 +0300 description: Improved ngx_http_validate_host() to do better syntax validation.
With this change, syntax validation in ngx_http_validate_host() is mostly identical to the one used for the request line parsing, which now follows RFC 3986. In particular, as a result of this change non-ASCII characters and invalid port numbers are no longer allowed in the Host header. This also fixes "." not being stripped if port contained ".", as such requests are now rejected by syntax checks. Prodded by Anton Mironenko, https://github.com/freenginx/nginx/issues/11 diffstat: src/http/ngx_http_request.c | 151 ++++++++++++++++++++++++++++++++----------- 1 files changed, 112 insertions(+), 39 deletions(-) diffs (181 lines): diff --git a/src/http/ngx_http_request.c b/src/http/ngx_http_request.c --- a/src/http/ngx_http_request.c +++ b/src/http/ngx_http_request.c @@ -2155,9 +2155,11 @@ ngx_http_validate_host(ngx_str_t *host, size_t i, dot_pos, host_len; enum { - sw_usual = 0, + sw_start = 0, + sw_host, + sw_host_end, sw_literal, - sw_rest + sw_port } state; dot_pos = host->len; @@ -2165,55 +2167,126 @@ ngx_http_validate_host(ngx_str_t *host, h = host->data; - state = sw_usual; + state = sw_start; for (i = 0; i < host->len; i++) { ch = h[i]; - switch (ch) { - - case '.': - if (dot_pos == i - 1) { - return NGX_DECLINED; - } - dot_pos = i; - break; - - case ':': - if (state == sw_usual) { - host_len = i; - state = sw_rest; - } - break; - - case '[': - if (i == 0) { + switch (state) { + + case sw_start: + + if (ch == '[') { + host_len = 0; state = sw_literal; - } - break; - - case ']': - if (state == sw_literal) { - host_len = i + 1; - state = sw_rest; + break; } - break; - - default: - - if (ngx_path_separator(ch)) { - return NGX_DECLINED; - } - - if (ch <= 0x20 || ch == 0x7f) { - return NGX_DECLINED; + + state = sw_host; + + /* fall through */ + + case sw_host: + + if (ch >= 'a' && ch <= 'z') { + break; } if (ch >= 'A' && ch <= 'Z') { alloc = 1; + break; } - break; + if (ch >= '0' && ch <= '9') { + break; + } + + if (ch == '.') { + if (dot_pos == i - 1) { + return NGX_DECLINED; + } + dot_pos = i; + break; + } + + if (ch == '-' || ch == '_' || ch == '~' + || ch == '!' || ch == '$' || ch == '&' || ch == '\'' + || ch == '(' || ch == ')' || ch == '*' || ch == '+' + || ch == ',' || ch == ';' || ch == '=' || ch == '%') + { + /* unreserved, sub-delims, pct-encoded */ + break; + } + + /* fall through */ + + case sw_host_end: + + host_len = i; + + if (ch == ':') { + state = sw_port; + break; + } + + /* notably, "/" and "\" are rejected */ + + return NGX_DECLINED; + + case sw_literal: + + if (ch >= '0' && ch <= '9') { + break; + } + + if (ch >= 'a' && ch <= 'z') { + break; + } + + if (ch >= 'A' && ch <= 'Z') { + alloc = 1; + break; + } + + if (ch == ':') { + break; + } + + if (ch == '.') { + if (dot_pos == i - 1) { + return NGX_DECLINED; + } + dot_pos = i; + break; + } + + if (ch == '-' || ch == '_' || ch == '~' + || ch == '!' || ch == '$' || ch == '&' || ch == '\'' + || ch == '(' || ch == ')' || ch == '*' || ch == '+' + || ch == ',' || ch == ';' || ch == '=' || ch == '%') + { + /* unreserved, sub-delims, pct-encoded */ + break; + } + + if (ch == ']') { + host_len = i + 1; + state = sw_host_end; + break; + } + + /* notably, "/" and "\" are rejected */ + + return NGX_DECLINED; + + case sw_port: + + if (ch >= '0' && ch <= '9') { + break; + } + + return NGX_DECLINED; + } }
