On Thu, Jun 11, 2020 at 1:22 PM Yann Ylavic <ylavic....@gmail.com> wrote:
>
> On Thu, Jun 11, 2020 at 9:57 AM Yann Ylavic <ylavic....@gmail.com> wrote:
> >
> > On Thu, Jun 11, 2020 at 9:50 AM Yann Ylavic <ylavic....@gmail.com> wrote:
> > >
> > > We need a way to forward non %-decoded URLs upto mod_proxy (reverse)
> > > if we want to normalize a second time..
> >
> > IOW, this block in ap_process_request_internal():
> [snip]
> > Should go _after_ the following:
> [snip]
>
> Or we could introduce a new pre_translate_name hook which would
> execute before %-decoding, and be used by mod_proxy when
> "ProxyPreTranslation on" is configured, and be a prerequisite for
> mapping=servlet.
>
> I find ProxyPreTranslation also useful for the non-servlet case btw.
>
> Something like this attached v2 patch.

Here is a v3 with the relevant pre_translate_name hooks only and
ap_getparents() preserved when the URI does not start with '/' (which
makes the patch read better too).

>
> Regards;
> Yann.
Index: include/http_request.h
===================================================================
--- include/http_request.h	(revision 1878328)
+++ include/http_request.h	(working copy)
@@ -365,6 +365,16 @@ AP_DECLARE_HOOK(int,create_request,(request_rec *r
 
 /**
  * This hook allow modules an opportunity to translate the URI into an
+ * actual filename, before URL decoding happens.
+ * rules will be followed.
+ * @param r The current request
+ * @return OK, DECLINED, or HTTP_...
+ * @ingroup hooks
+ */
+AP_DECLARE_HOOK(int,pre_translate_name,(request_rec *r))
+
+/**
+ * This hook allow modules an opportunity to translate the URI into an
  * actual filename.  If no modules do anything special, the server's default
  * rules will be followed.
  * @param r The current request
Index: include/httpd.h
===================================================================
--- include/httpd.h	(revision 1878328)
+++ include/httpd.h	(working copy)
@@ -1779,8 +1779,21 @@ AP_DECLARE(void) ap_no2slash(char *name)
 AP_DECLARE(void) ap_no2slash_ex(char *name, int is_fs_path)
                  AP_FN_ATTR_NONNULL_ALL;
 
+#define AP_NORMALIZE_NOT_ABOVE_ROOT     (1u <<  0)
+#define AP_NORMALIZE_MERGE_SLASHES      (1u <<  1)
+#define AP_NORMALIZE_PATH_PARAMETERS    (1u <<  2)
 
 /**
+ * Remove all ////, /./ and /xx/../ substrings from a path, and more
+ * depending on passed in flags.
+ * @param path The path to normalize
+ * @param flags bitmask of AP_NORMALIZE_* flags
+ * @return non-zero on success
+ */
+AP_DECLARE(int) ap_normalize_path(char *path, unsigned int flags)
+                AP_FN_ATTR_NONNULL((1));
+
+/**
  * Remove all ./ and xx/../ substrings from a file name. Also remove
  * any leading ../ or /../ substrings.
  * @param name the file name to parse
Index: modules/dav/main/util.c
===================================================================
--- modules/dav/main/util.c	(revision 1878328)
+++ modules/dav/main/util.c	(working copy)
@@ -664,7 +664,11 @@ static dav_error * dav_process_if_header(request_r
             /* note that parsed_uri.path is allocated; we can trash it */
 
             /* clean up the URI a bit */
-            ap_getparents(parsed_uri.path);
+            if (!ap_normalize_path(parsed_uri.path, 0)) {
+                return dav_new_error(r->pool, HTTP_BAD_REQUEST,
+                                     DAV_ERR_IF_TAGGED, rv,
+                                     "Invalid URI path tagged If-header.");
+            }
 
             /* the resources we will compare to have unencoded paths */
             if (ap_unescape_url(parsed_uri.path) != OK) {
Index: modules/generators/mod_autoindex.c
===================================================================
--- modules/generators/mod_autoindex.c	(revision 1878328)
+++ modules/generators/mod_autoindex.c	(working copy)
@@ -1266,8 +1266,7 @@ static struct ent *make_parent_entry(apr_int32_t a
     if (!(p->name = ap_make_full_path(r->pool, r->uri, "../"))) {
         return (NULL);
     }
-    ap_getparents(p->name);
-    if (!*p->name) {
+    if (!ap_normalize_path(p->name, 0)) {
         return (NULL);
     }
 
Index: modules/generators/mod_info.c
===================================================================
--- modules/generators/mod_info.c	(revision 1878328)
+++ modules/generators/mod_info.c	(working copy)
@@ -322,6 +322,7 @@ static const hook_lookup_t request_hooks[] = {
     {"HTTP Scheme", ap_hook_get_http_scheme},
     {"Default Port", ap_hook_get_default_port},
     {"Quick Handler", ap_hook_get_quick_handler},
+    {"Pre-Translate Name", ap_hook_get_pre_translate_name},
     {"Translate Name", ap_hook_get_translate_name},
     {"Map to Storage", ap_hook_get_map_to_storage},
     {"Check Access", ap_hook_get_access_checker_ex},
Index: modules/proxy/mod_proxy.c
===================================================================
--- modules/proxy/mod_proxy.c	(revision 1878328)
+++ modules/proxy/mod_proxy.c	(working copy)
@@ -753,6 +753,26 @@ PROXY_DECLARE(int) ap_proxy_trans_match(request_re
                 mismatch = 1;
                 use_uri = r->uri;
             }
+            else if (dconf->pre_trans > 0
+                     && (ent->flags & PROXYPASS_MAPPING_SERVLET)) {
+                char *uri = apr_pstrdup(r->pool, r->uri);
+
+                /* check against the backend servlet url */
+                if (!ap_normalize_path(uri, AP_NORMALIZE_MERGE_SLASHES |
+                                            AP_NORMALIZE_PATH_PARAMETERS)) {
+                    ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, r,
+                                  "servlet normalization failed for '%s'", r->uri);
+                    return HTTP_INTERNAL_SERVER_ERROR;
+                }
+
+                if (!alias_match(uri, ent->fake)) {
+                    ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, APLOGNO()
+                                  "servlet normalization for '%s' mismatch; "
+                                  "declining '%s'", r->uri, uri);
+                    return DECLINED;
+                }
+            }
+
             found = apr_pstrcat(r->pool, "proxy:", real, use_uri + len, NULL);
         }
     }
@@ -795,7 +815,7 @@ PROXY_DECLARE(int) ap_proxy_trans_match(request_re
     return DONE;
 }
 
-static int proxy_trans(request_rec *r)
+static int proxy_trans(request_rec *r, int pre)
 {
     int i;
     struct proxy_alias *ent;
@@ -825,6 +845,10 @@ PROXY_DECLARE(int) ap_proxy_trans_match(request_re
 
     dconf = ap_get_module_config(r->per_dir_config, &proxy_module);
 
+    if ((pre != 0) ^ (dconf->pre_trans > 0)) {
+        return DECLINED;
+    }
+
     /* short way - this location is reverse proxied? */
     if (dconf->alias) {
         int rv = ap_proxy_trans_match(r, dconf->alias, dconf);
@@ -849,6 +873,15 @@ PROXY_DECLARE(int) ap_proxy_trans_match(request_re
     return DECLINED;
 }
 
+static int proxy_pre_translate_name(request_rec *r)
+{
+    return proxy_trans(r, 1);
+}
+static int proxy_translate_name(request_rec *r)
+{
+    return proxy_trans(r, 0);
+}
+
 static int proxy_walk(request_rec *r)
 {
     proxy_server_conf *sconf = ap_get_module_config(r->server->module_config,
@@ -1593,6 +1626,7 @@ static void *create_proxy_dir_config(apr_pool_t *p
     new->add_forwarded_headers_set = 0;
     new->forward_100_continue = 1;
     new->forward_100_continue_set = 0;
+    new->pre_trans = -1; /* unset */
 
     return (void *) new;
 }
@@ -1651,6 +1685,9 @@ static void *merge_proxy_dir_config(apr_pool_t *p,
     new->forward_100_continue_set = add->forward_100_continue_set
                                     || base->forward_100_continue_set;
     
+    new->pre_trans = (add->pre_trans == -1) ? base->pre_trans
+                                            : add->pre_trans;
+
     return new;
 }
 
@@ -1806,6 +1843,9 @@ static const char *
         else if (!strcasecmp(word,"noquery")) {
             flags |= PROXYPASS_NOQUERY;
         }
+        else if (!strcasecmp(word,"mapping=servlet")) {
+            flags |= PROXYPASS_MAPPING_SERVLET;
+        }
         else {
             char *val = strchr(word, '=');
             if (!val) {
@@ -2799,6 +2839,9 @@ static const command_rec proxy_cmds[] =
     AP_INIT_FLAG("Proxy100Continue", forward_100_continue, NULL, RSRC_CONF|ACCESS_CONF,
      "on if 100-Continue should be forwarded to the origin server, off if the "
      "proxy should handle it by itself"),
+    AP_INIT_FLAG("ProxyPreTranslate", ap_set_flag_slot_char,
+        (void*)APR_OFFSETOF(proxy_dir_conf, pre_trans),
+        RSRC_CONF|ACCESS_CONF, "Run URI translation with no URL percent decoding"),
     {NULL}
 };
 
@@ -3147,7 +3190,10 @@ static void register_hooks(apr_pool_t *p)
     /* handler */
     ap_hook_handler(proxy_handler, NULL, NULL, APR_HOOK_FIRST);
     /* filename-to-URI translation */
-    ap_hook_translate_name(proxy_trans, aszSucc, NULL, APR_HOOK_FIRST);
+    ap_hook_pre_translate_name(proxy_pre_translate_name, aszSucc, NULL,
+                               APR_HOOK_MIDDLE);
+    ap_hook_translate_name(proxy_translate_name, aszSucc, NULL,
+                           APR_HOOK_FIRST);
     /* walk <Proxy > entries and suppress default TRACE behavior */
     ap_hook_map_to_storage(proxy_map_location, NULL,NULL, APR_HOOK_FIRST);
     /* fixups */
Index: modules/proxy/mod_proxy.h
===================================================================
--- modules/proxy/mod_proxy.h	(revision 1878328)
+++ modules/proxy/mod_proxy.h	(working copy)
@@ -123,6 +123,7 @@ struct proxy_remote {
 #define PROXYPASS_NOCANON 0x01
 #define PROXYPASS_INTERPOLATE 0x02
 #define PROXYPASS_NOQUERY 0x04
+#define PROXYPASS_MAPPING_SERVLET 0x08
 struct proxy_alias {
     const char  *real;
     const char  *fake;
@@ -243,6 +244,9 @@ typedef struct {
     unsigned int forward_100_continue_set:1;
 
     apr_array_header_t *error_override_codes;
+
+    /** Whether to run translation in pre_translate_name hook */
+    int pre_trans;
 } proxy_dir_conf;
 
 /* if we interpolate env vars per-request, we'll need a per-request
Index: server/protocol.c
===================================================================
--- server/protocol.c	(revision 1878328)
+++ server/protocol.c	(working copy)
@@ -627,6 +627,14 @@ AP_CORE_DECLARE(void) ap_parse_uri(request_rec *r,
     }
     else {
         status = apr_uri_parse(r->pool, uri, &r->parsed_uri);
+        if (status == APR_SUCCESS
+            && (r->parsed_uri.path != NULL)
+            && (r->parsed_uri.path[0] != '/')
+            && (r->method_number != M_OPTIONS
+                || strcmp(r->parsed_uri.path, "*") != 0)) {
+            /* Invalid request-target per rfc7230#section-5.3 */
+            status = APR_EINVAL;
+        }
     }
 
     if (status == APR_SUCCESS) {
@@ -640,8 +648,15 @@ AP_CORE_DECLARE(void) ap_parse_uri(request_rec *r,
         }
 
         r->args = r->parsed_uri.query;
-        r->uri = r->parsed_uri.path ? r->parsed_uri.path
-                 : apr_pstrdup(r->pool, "/");
+        if (r->parsed_uri.path) {
+            r->uri = r->parsed_uri.path;
+        }
+        else if (r->method_number == M_OPTIONS) {
+            r->uri = apr_pstrdup(r->pool, "*");
+        }
+        else {
+            r->uri = apr_pstrdup(r->pool, "/");
+        }
 
 #if defined(OS2) || defined(WIN32)
         /* Handle path translations for OS/2 and plug security hole.
Index: server/request.c
===================================================================
--- server/request.c	(revision 1878328)
+++ server/request.c	(working copy)
@@ -59,6 +59,7 @@
 #define APLOG_MODULE_INDEX AP_CORE_MODULE_INDEX
 
 APR_HOOK_STRUCT(
+    APR_HOOK_LINK(pre_translate_name)
     APR_HOOK_LINK(translate_name)
     APR_HOOK_LINK(map_to_storage)
     APR_HOOK_LINK(check_user_id)
@@ -74,6 +75,8 @@ APR_HOOK_STRUCT(
     APR_HOOK_LINK(force_authn)
 )
 
+AP_IMPLEMENT_HOOK_RUN_FIRST(int,pre_translate_name,
+                            (request_rec *r), (r), DECLINED)
 AP_IMPLEMENT_HOOK_RUN_FIRST(int,translate_name,
                             (request_rec *r), (r), DECLINED)
 AP_IMPLEMENT_HOOK_RUN_FIRST(int,map_to_storage,
@@ -170,11 +173,35 @@ AP_DECLARE(int) ap_process_request_internal(reques
     core_server_config *sconf =
         ap_get_core_module_config(r->server->module_config);
 
-    /* Ignore embedded %2F's in path for proxy requests */
-    if (!r->proxyreq && r->parsed_uri.path) {
+    if (r->parsed_uri.path) {
+        unsigned int normalize_flags = 0;
+        if (sconf->merge_slashes != AP_CORE_CONFIG_OFF) { 
+            normalize_flags |= AP_NORMALIZE_MERGE_SLASHES;
+        }
+
+        /* Shrinking transformations */
+        if (!ap_normalize_path(r->parsed_uri.path, normalize_flags)) {
+            ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO()
+                          "invalid URI path (%s)", r->uri);
+            return HTTP_BAD_REQUEST;
+        }
+
+        /* Apply transformations */
+        r->uri = r->parsed_uri.path;
+    }
+
+    /* Let pre_translate_name hooks own non-decoded URIs */
+    access_status = ap_run_pre_translate_name(r);
+    if (access_status != OK && access_status != DECLINED) {
+        return access_status;
+    }
+
+    /* Ignore URL unescaping for translated URIs already */
+    if (access_status == DECLINED && r->parsed_uri.path) {
         d = ap_get_core_module_config(r->per_dir_config);
         if (d->allow_encoded_slashes) {
-            access_status = ap_unescape_url_keep2f(r->parsed_uri.path, d->decode_encoded_slashes);
+            access_status = ap_unescape_url_keep2f(r->parsed_uri.path,
+                                                   d->decode_encoded_slashes);
         }
         else {
             access_status = ap_unescape_url(r->parsed_uri.path);
@@ -185,21 +212,16 @@ AP_DECLARE(int) ap_process_request_internal(reques
                     ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, APLOGNO(00026)
                                   "found %%2f (encoded '/') in URI "
                                   "(decoded='%s'), returning 404",
-                                  r->parsed_uri.path);
+                                  r->uri);
                 }
             }
             return access_status;
         }
+
+        /* Apply transformations */
+        r->uri = r->parsed_uri.path;
     }
 
-    ap_getparents(r->uri);     /* OK --- shrinking transformations... */
-    if (sconf->merge_slashes != AP_CORE_CONFIG_OFF) { 
-        ap_no2slash(r->uri);
-        if (r->parsed_uri.path) {
-            ap_no2slash(r->parsed_uri.path);
-        }
-     }
-
     /* All file subrequests are a huge pain... they cannot bubble through the
      * next several steps.  Only file subrequests are allowed an empty uri,
      * otherwise let translate_name kill the request.
Index: server/util.c
===================================================================
--- server/util.c	(revision 1878328)
+++ server/util.c	(working copy)
@@ -77,7 +77,7 @@
 #include "test_char.h"
 
 /* Win32/NetWare/OS2 need to check for both forward and back slashes
- * in ap_getparents() and ap_escape_url.
+ * in ap_normalize_path() and ap_escape_url().
  */
 #ifdef CASE_BLIND_FILESYSTEM
 #define IS_SLASH(s) ((s == '/') || (s == '\\'))
@@ -492,8 +492,108 @@ AP_DECLARE(apr_status_t) ap_pregsub_ex(apr_pool_t
     return rc;
 }
 
+/* Forward declare */
+static char x2c(const char *what);
+
+#define IS_SLASH_OR_NUL(s) (s == '\0' || IS_SLASH(s))
+
+static int normalize_path(char *path, unsigned int flags)
+{
+    apr_size_t l, w;
+
+    /* We assume path[0] == '/' below */
+    if (!IS_SLASH(path[0])) {
+        /* Besides "OPTIONS *" requests, a request-target path should start
+         * with '/' (RFC-7230 section-5.3), so anything else is invalid.
+         */
+        return (path[0] == '*' && path[1] == '\0');
+    }
+
+    for (l = w = 1; path[l] != '\0';) {
+        if ((flags & AP_NORMALIZE_PATH_PARAMETERS) && path[l] == ';') {
+            /* Remove path parameters ;foo=bar/ from any path segment */
+            do {
+                l++;
+            } while (!IS_SLASH_OR_NUL(path[l]));
+            continue;
+        }
+
+        /* RFC-3986 section 2.3:
+         *  For consistency, percent-encoded octets in the ranges of
+         *  ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D),
+         *  period (%2E), underscore (%5F), or tilde (%7E) should [...]
+         *  be decoded to their corresponding unreserved characters by
+         *  URI normalizers.
+         */
+        if (path[l] == '%' && apr_isxdigit(path[l + 1])
+                           && apr_isxdigit(path[l + 2])) {
+            const char c = x2c(&path[l + 1]);
+            if (apr_isalnum(c) || (c && strchr("-._~", c))) {
+                /* Replace last char and fall through as the current
+                 * read position */
+                l += 2;
+                path[l] = c;
+            }
+        }
+
+        if (IS_SLASH(path[w - 1])) {
+            if ((flags & AP_NORMALIZE_MERGE_SLASHES) && IS_SLASH(path[l])) {
+                /* Collapse ///// sequences to / */
+                do {
+                    l++;
+                } while (IS_SLASH(path[l]));
+                continue;
+            }
+
+            if (path[l] == '.') {
+                if (IS_SLASH_OR_NUL(path[l + 1])) {
+                    /* Remove /./ segments */
+                    l++;
+                    if (path[l]) {
+                        l++;
+                    }
+                    continue;
+                }
+
+                if (path[l + 1] == '.' && IS_SLASH_OR_NUL(path[l + 2])) {
+                    /* Remove /xx/../ segments */
+                    if (w == 1) {
+                        if (flags & AP_NORMALIZE_NOT_ABOVE_ROOT) {
+                            return 0;
+                        }
+                        l += 2;
+                        continue;
+                    }
+
+                    /* Wind w back to remove the previous segment */
+                    do {
+                        w--;
+                    } while (!IS_SLASH(path[w - 1]));
+
+                    /* Move l forward to the next segment */
+                    l += 2;
+                    if (path[l]) {
+                        l++;
+                    }
+                    continue;
+                }
+            }
+        }
+
+        path[w++] = path[l++];
+    }
+    path[w] = '\0';
+
+    return 1;
+}
+
+AP_DECLARE(int) ap_normalize_path(char *path, unsigned int flags)
+{
+    return normalize_path(path, flags);
+}
+
 /*
- * Parse .. so we don't compromise security
+ * Normalize .. so we don't compromise security
  */
 AP_DECLARE(void) ap_getparents(char *name)
 {
@@ -500,6 +600,14 @@ AP_DECLARE(void) ap_getparents(char *name)
     char *next;
     int l, w, first_dot;
 
+    /* If the path starts with '/', ap_normalize_path() is faster.
+     * Otherthise fallback to legacy four passes.
+     */
+    if (IS_SLASH(name[0])) {
+        (void)normalize_path(name, 0);
+        return;
+    }
+
     /* Four paseses, as per RFC 1808 */
     /* a) remove ./ path segments */
     for (next = name; *next && (*next != '.'); next++) {
@@ -561,6 +669,7 @@ AP_DECLARE(void) ap_getparents(char *name)
         name[l] = '\0';
     }
 }
+
 AP_DECLARE(void) ap_no2slash_ex(char *name, int is_fs_path)
 {
 

Reply via email to