On Thu, Jun 11, 2020 at 8:52 AM jean-frederic clere <[email protected]> wrote:
>
> Should I commit my first proposal (it is easily backportable to 2.4.x)
> and later work on the next one?
How about something like the attached patch?
It's a new single ap_normalize_path() helper with options (like
AP_NORMALIZE_MERGE_SLASHES and AP_NORMALIZE_PATH_PARAMETERS), which we
can use in multiple places to replace ap_getparents(). The patch is
without the mod_rewrite bits (for now), and uses the "mapping=servlet"
syntax for the new proxypass parameter (which I found more
extensible).
The issue with re-calling ap_getparents() in ap_proxy_trans_match()
(or ap_normalize_path() in my patch still) is that it happens after
%-decoding, so bets are off.
For instance "/docs/..%3Bfoo=bar/%3Bfoo=bar/test/index.jsp" is not the
same as "http://localhost:8000/docs/..;foo=bar/;foo=bar/test/index.jsp"
and shouldn't be matched the same by mapping=servlet.
We need a way to forward non %-decoded URLs upto mod_proxy (reverse)
if we want to normalize a second time..
Regards;
Yann.
Index: include/httpd.h
===================================================================
--- include/httpd.h (revision 1878328)
+++ include/httpd.h (working copy)
@@ -1779,8 +1779,21 @@ AP_DECLARE(void) ap_no2slash(char *name)
AP_DECLARE(void) ap_no2slash_ex(char *name, int is_fs_path)
AP_FN_ATTR_NONNULL_ALL;
+#define AP_NORMALIZE_NOT_ABOVE_ROOT (1u << 0)
+#define AP_NORMALIZE_MERGE_SLASHES (1u << 1)
+#define AP_NORMALIZE_PATH_PARAMETERS (1u << 2)
/**
+ * Remove all ////, /./ and /xx/../ substrings from a path, and more
+ * depending on passed in flags.
+ * @param path The path to normalize
+ * @param flags bitmask of AP_NORMALIZE_* flags
+ * @return non-zero on success
+ */
+AP_DECLARE(int) ap_normalize_path(char *path, unsigned int flags)
+ AP_FN_ATTR_NONNULL((1));
+
+/**
* Remove all ./ and xx/../ substrings from a file name. Also remove
* any leading ../ or /../ substrings.
* @param name the file name to parse
Index: modules/dav/main/util.c
===================================================================
--- modules/dav/main/util.c (revision 1878328)
+++ modules/dav/main/util.c (working copy)
@@ -664,7 +664,11 @@ static dav_error * dav_process_if_header(request_r
/* note that parsed_uri.path is allocated; we can trash it */
/* clean up the URI a bit */
- ap_getparents(parsed_uri.path);
+ if (!ap_normalize_path(parsed_uri.path, 0)) {
+ return dav_new_error(r->pool, HTTP_BAD_REQUEST,
+ DAV_ERR_IF_TAGGED, rv,
+ "Invalid URI path tagged If-header.");
+ }
/* the resources we will compare to have unencoded paths */
if (ap_unescape_url(parsed_uri.path) != OK) {
Index: modules/generators/mod_autoindex.c
===================================================================
--- modules/generators/mod_autoindex.c (revision 1878328)
+++ modules/generators/mod_autoindex.c (working copy)
@@ -1266,8 +1266,7 @@ static struct ent *make_parent_entry(apr_int32_t a
if (!(p->name = ap_make_full_path(r->pool, r->uri, "../"))) {
return (NULL);
}
- ap_getparents(p->name);
- if (!*p->name) {
+ if (!ap_normalize_path(p->name, 0)) {
return (NULL);
}
Index: modules/proxy/mod_proxy.c
===================================================================
--- modules/proxy/mod_proxy.c (revision 1878328)
+++ modules/proxy/mod_proxy.c (working copy)
@@ -753,6 +753,25 @@ PROXY_DECLARE(int) ap_proxy_trans_match(request_re
mismatch = 1;
use_uri = r->uri;
}
+ else if (!nocanon && (ent->flags & PROXYPASS_MAPPING_SERVLET)) {
+ char *uri = apr_pstrdup(r->pool, r->uri);
+
+ /* check against the backend servlet url */
+ if (!ap_normalize_path(uri, AP_NORMALIZE_MERGE_SLASHES |
+ AP_NORMALIZE_PATH_PARAMETERS)) {
+ ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, r,
+ "servlet normalization failed for '%s'", r->uri);
+ return HTTP_INTERNAL_SERVER_ERROR;
+ }
+
+ if (!alias_match(uri, ent->fake)) {
+ ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, APLOGNO()
+ "servlet normalization for '%s' mismatch; "
+ "declining '%s'", r->uri, uri);
+ return DECLINED;
+ }
+ }
+
found = apr_pstrcat(r->pool, "proxy:", real, use_uri + len, NULL);
}
}
@@ -1806,6 +1825,9 @@ static const char *
else if (!strcasecmp(word,"noquery")) {
flags |= PROXYPASS_NOQUERY;
}
+ else if (!strcasecmp(word,"mapping=servlet")) {
+ flags |= PROXYPASS_MAPPING_SERVLET;
+ }
else {
char *val = strchr(word, '=');
if (!val) {
Index: modules/proxy/mod_proxy.h
===================================================================
--- modules/proxy/mod_proxy.h (revision 1878328)
+++ modules/proxy/mod_proxy.h (working copy)
@@ -123,6 +123,7 @@ struct proxy_remote {
#define PROXYPASS_NOCANON 0x01
#define PROXYPASS_INTERPOLATE 0x02
#define PROXYPASS_NOQUERY 0x04
+#define PROXYPASS_MAPPING_SERVLET 0x08
struct proxy_alias {
const char *real;
const char *fake;
Index: server/protocol.c
===================================================================
--- server/protocol.c (revision 1878328)
+++ server/protocol.c (working copy)
@@ -627,6 +627,14 @@ AP_CORE_DECLARE(void) ap_parse_uri(request_rec *r,
}
else {
status = apr_uri_parse(r->pool, uri, &r->parsed_uri);
+ if (status == APR_SUCCESS
+ && (r->parsed_uri.path != NULL)
+ && (r->parsed_uri.path[0] != '/')
+ && (r->method_number != M_OPTIONS
+ || strcmp(r->parsed_uri.path, "*") != 0)) {
+ /* Invalid request-target per rfc7230#section-5.3 */
+ status = APR_EINVAL;
+ }
}
if (status == APR_SUCCESS) {
@@ -640,8 +648,15 @@ AP_CORE_DECLARE(void) ap_parse_uri(request_rec *r,
}
r->args = r->parsed_uri.query;
- r->uri = r->parsed_uri.path ? r->parsed_uri.path
- : apr_pstrdup(r->pool, "/");
+ if (r->parsed_uri.path) {
+ r->uri = r->parsed_uri.path;
+ }
+ else if (r->method_number == M_OPTIONS) {
+ r->uri = apr_pstrdup(r->pool, "*");
+ }
+ else {
+ r->uri = apr_pstrdup(r->pool, "/");
+ }
#if defined(OS2) || defined(WIN32)
/* Handle path translations for OS/2 and plug security hole.
Index: server/request.c
===================================================================
--- server/request.c (revision 1878328)
+++ server/request.c (working copy)
@@ -169,12 +169,14 @@ AP_DECLARE(int) ap_process_request_internal(reques
core_dir_config *d;
core_server_config *sconf =
ap_get_core_module_config(r->server->module_config);
+ unsigned int normalize_flags = 0;
- /* Ignore embedded %2F's in path for proxy requests */
+ /* Ignore URL unescaping for proxy requests */
if (!r->proxyreq && r->parsed_uri.path) {
d = ap_get_core_module_config(r->per_dir_config);
if (d->allow_encoded_slashes) {
- access_status = ap_unescape_url_keep2f(r->parsed_uri.path, d->decode_encoded_slashes);
+ access_status = ap_unescape_url_keep2f(r->parsed_uri.path,
+ d->decode_encoded_slashes);
}
else {
access_status = ap_unescape_url(r->parsed_uri.path);
@@ -185,7 +187,7 @@ AP_DECLARE(int) ap_process_request_internal(reques
ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, APLOGNO(00026)
"found %%2f (encoded '/') in URI "
"(decoded='%s'), returning 404",
- r->parsed_uri.path);
+ r->uri);
}
}
return access_status;
@@ -192,14 +194,21 @@ AP_DECLARE(int) ap_process_request_internal(reques
}
}
- ap_getparents(r->uri); /* OK --- shrinking transformations... */
- if (sconf->merge_slashes != AP_CORE_CONFIG_OFF) {
- ap_no2slash(r->uri);
- if (r->parsed_uri.path) {
- ap_no2slash(r->parsed_uri.path);
+ if (r->parsed_uri.path) {
+ /* OK --- shrinking transformations... */
+ if (sconf->merge_slashes != AP_CORE_CONFIG_OFF) {
+ normalize_flags |= AP_NORMALIZE_MERGE_SLASHES;
}
- }
+ if (!ap_normalize_path(r->parsed_uri.path, normalize_flags)) {
+ ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO()
+ "invalid URI path (%s)", r->uri);
+ return HTTP_BAD_REQUEST;
+ }
+ /* Apply transformations */
+ r->uri = r->parsed_uri.path;
+ }
+
/* All file subrequests are a huge pain... they cannot bubble through the
* next several steps. Only file subrequests are allowed an empty uri,
* otherwise let translate_name kill the request.
Index: server/util.c
===================================================================
--- server/util.c (revision 1878328)
+++ server/util.c (working copy)
@@ -77,7 +77,7 @@
#include "test_char.h"
/* Win32/NetWare/OS2 need to check for both forward and back slashes
- * in ap_getparents() and ap_escape_url.
+ * in ap_normalize_path() and ap_escape_url().
*/
#ifdef CASE_BLIND_FILESYSTEM
#define IS_SLASH(s) ((s == '/') || (s == '\\'))
@@ -492,75 +492,114 @@ AP_DECLARE(apr_status_t) ap_pregsub_ex(apr_pool_t
return rc;
}
-/*
- * Parse .. so we don't compromise security
- */
-AP_DECLARE(void) ap_getparents(char *name)
+/* Forward declare */
+static char x2c(const char *what);
+
+#define IS_SLASH_OR_NUL(s) (s == '\0' || IS_SLASH(s))
+
+static int normalize_path(char *path, unsigned int flags)
{
- char *next;
- int l, w, first_dot;
+ apr_size_t l, w;
- /* Four paseses, as per RFC 1808 */
- /* a) remove ./ path segments */
- for (next = name; *next && (*next != '.'); next++) {
+ /* We assume path[0] == '/' below */
+ if (!IS_SLASH(path[0])) {
+ /* Besides "OPTIONS *" requests, a request-target path should start
+ * with '/' (RFC-7230 section-5.3), so anything else is invalid.
+ */
+ return (path[0] == '*' && path[1] == '\0');
}
- l = w = first_dot = next - name;
- while (name[l] != '\0') {
- if (name[l] == '.' && IS_SLASH(name[l + 1])
- && (l == 0 || IS_SLASH(name[l - 1])))
- l += 2;
- else
- name[w++] = name[l++];
- }
+ for (l = w = 1; path[l] != '\0';) {
+ if ((flags & AP_NORMALIZE_PATH_PARAMETERS) && path[l] == ';') {
+ /* Remove path parameters ;foo=bar/ from any path segment */
+ do {
+ l++;
+ } while (!IS_SLASH_OR_NUL(path[l]));
+ continue;
+ }
- /* b) remove trailing . path, segment */
- if (w == 1 && name[0] == '.')
- w--;
- else if (w > 1 && name[w - 1] == '.' && IS_SLASH(name[w - 2]))
- w--;
- name[w] = '\0';
+ /* RFC-3986 section 2.3:
+ * For consistency, percent-encoded octets in the ranges of
+ * ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D),
+ * period (%2E), underscore (%5F), or tilde (%7E) should [...]
+ * be decoded to their corresponding unreserved characters by
+ * URI normalizers.
+ */
+ if (path[l] == '%' && apr_isxdigit(path[l + 1])
+ && apr_isxdigit(path[l + 2])) {
+ const char c = x2c(&path[l + 1]);
+ if (apr_isalnum(c) || (c && strchr("-._~", c))) {
+ /* Replace last char and fall through as the current
+ * read position */
+ l += 2;
+ path[l] = c;
+ }
+ }
- /* c) remove all xx/../ segments. (including leading ../ and /../) */
- l = first_dot;
+ if (IS_SLASH(path[w - 1])) {
+ if ((flags & AP_NORMALIZE_MERGE_SLASHES) && IS_SLASH(path[l])) {
+ /* Collapse ///// sequences to / */
+ do {
+ l++;
+ } while (IS_SLASH(path[l]));
+ continue;
+ }
- while (name[l] != '\0') {
- if (name[l] == '.' && name[l + 1] == '.' && IS_SLASH(name[l + 2])
- && (l == 0 || IS_SLASH(name[l - 1]))) {
- int m = l + 3, n;
+ if (path[l] == '.') {
+ if (IS_SLASH_OR_NUL(path[l + 1])) {
+ /* Remove /./ segments */
+ l++;
+ if (path[l]) {
+ l++;
+ }
+ continue;
+ }
- l = l - 2;
- if (l >= 0) {
- while (l >= 0 && !IS_SLASH(name[l]))
- l--;
- l++;
+ if (path[l + 1] == '.' && IS_SLASH_OR_NUL(path[l + 2])) {
+ /* Remove /xx/../ segments */
+ if (w == 1) {
+ if (flags & AP_NORMALIZE_NOT_ABOVE_ROOT) {
+ return 0;
+ }
+ l += 2;
+ continue;
+ }
+
+ /* Wind w back to remove the previous segment */
+ do {
+ w--;
+ } while (!IS_SLASH(path[w - 1]));
+
+ /* Move l forward to the next segment */
+ l += 2;
+ if (path[l]) {
+ l++;
+ }
+ continue;
+ }
}
- else
- l = 0;
- n = l;
- while ((name[n] = name[m]))
- (++n, ++m);
}
- else
- ++l;
+
+ path[w++] = path[l++];
}
+ path[w] = '\0';
- /* d) remove trailing xx/.. segment. */
- if (l == 2 && name[0] == '.' && name[1] == '.')
- name[0] = '\0';
- else if (l > 2 && name[l - 1] == '.' && name[l - 2] == '.'
- && IS_SLASH(name[l - 3])) {
- l = l - 4;
- if (l >= 0) {
- while (l >= 0 && !IS_SLASH(name[l]))
- l--;
- l++;
- }
- else
- l = 0;
- name[l] = '\0';
- }
+ return 1;
}
+
+AP_DECLARE(int) ap_normalize_path(char *path, unsigned int flags)
+{
+ return normalize_path(path, flags);
+}
+
+/*
+ * Normalize .. so we don't compromise security
+ */
+AP_DECLARE(void) ap_getparents(char *name)
+{
+ (void)normalize_path(name, 0);
+}
+
AP_DECLARE(void) ap_no2slash_ex(char *name, int is_fs_path)
{