Some time ago, I posted a draft fix for PR#41798:
http://www.mail-archive.com/dev@httpd.apache.org/msg37836.html

It attracted some comments here, and needed further work.

I've written and test-driven a slightly more sophisticated patch:

* ProxyPass directive accepts an optional "nocanon" keyword,
  that tells us not to canonicalise the URL.  Without "nocanon",
  behaviour is unchanged.

* proxy_trans checks nocanon.  If set, it constructs r->filename
  from r->unparsed_uri.  To deal with Rudiger's objections to
  my previous patch, it does so only after matching the ProxyPass
  to r->uri.  If there's a mismatch between the two, indicating
  path cleaning, it issues a 301 redirect, as indicated by Roy
  for when we change a URL.

* If nocanon is set, then HTTP canonicalisation skips
  ap_proxy_canonenc.  This is in line with the forward-proxy
  fix to PR#42592.

New patch attached, soliciting review.

-- 
Nick Kew

Application Development with Apache - the Apache Modules Book
http://www.apachetutor.org/
Index: modules/proxy/mod_proxy_http.c
===================================================================
--- modules/proxy/mod_proxy_http.c	(revision 587155)
+++ modules/proxy/mod_proxy_http.c	(working copy)
@@ -82,13 +82,19 @@
 
     /* process path */
     /* In a reverse proxy, our URL has been processed, so canonicalise
-     * In a forward proxy, we have and MUST NOT MANGLE the original,
-     * so just check it for disallowed chars.
+     * unless proxy-nocanon is set to say it's raw
+     * In a forward proxy, we have and MUST NOT MANGLE the original.
      */
     switch (r->proxyreq) {
     default: /* wtf are we doing here? */
     case PROXYREQ_REVERSE:
-        path = ap_proxy_canonenc(r->pool, url, strlen(url), enc_path, 0, r->proxyreq);
+        if (apr_table_get(r->notes, "proxy-nocanon")) {
+            path = url;   /* this is the raw path */
+        }
+        else {
+            path = ap_proxy_canonenc(r->pool, url, strlen(url),
+                                     enc_path, 0, r->proxyreq);
+        }
         break;
     case PROXYREQ_PROXY:
         path = url;
Index: modules/proxy/mod_proxy.c
===================================================================
--- modules/proxy/mod_proxy.c	(revision 587155)
+++ modules/proxy/mod_proxy.c	(working copy)
@@ -228,12 +228,12 @@
                 else
                     worker->status &= ~PROXY_WORKER_HOT_STANDBY;
             }
-	    else if (*v == 'I' || *v == 'i') {
-	    	if (mode)
-		    worker->status |= PROXY_WORKER_IGNORE_ERRORS;
-		else
-		    worker->status &= ~PROXY_WORKER_IGNORE_ERRORS;
-	    }
+            else if (*v == 'I' || *v == 'i') {
+                if (mode)
+                    worker->status |= PROXY_WORKER_IGNORE_ERRORS;
+                else
+                    worker->status &= ~PROXY_WORKER_IGNORE_ERRORS;
+            }
             else {
                 return "Unknown status parameter option";
             }
@@ -509,7 +509,9 @@
     const char *fake;
     const char *real;
     ap_regmatch_t regm[AP_MAX_REG_MATCH];
+    ap_regmatch_t reg1[AP_MAX_REG_MATCH];
     char *found = NULL;
+    int mismatch = 0;
 
     if (r->proxyreq) {
         /* someone has already set up the proxy, it was possibly ourselves
@@ -524,6 +526,7 @@
      */
 
     for (i = 0; i < conf->aliases->nelts; i++) {
+        const char *use_uri = ent[i].nocanon ? r->unparsed_uri : r->uri;
         if (dconf->interpolate_env == 1) {
             fake = proxy_interpolate(r, ent[i].fake);
             real = proxy_interpolate(r, ent[i].real);
@@ -537,27 +540,36 @@
                 if ((real[0] == '!') && (real[1] == '\0')) {
                     return DECLINED;
                 }
-                found = ap_pregsub(r->pool, real, r->uri, AP_MAX_REG_MATCH,
-                                   regm);
-                /* Note: The strcmp() below catches cases where there
-                 * was no regex substitution. This is so cases like:
-                 *
-                 *    ProxyPassMatch \.gif balancer://foo
-                 *
-                 * will work "as expected". The upshot is that the 2
-                 * directives below act the exact same way (ie: $1 is implied):
-                 *
-                 *    ProxyPassMatch ^(/.*\.gif)$ balancer://foo
-                 *    ProxyPassMatch ^(/.*\.gif)$ balancer://foo$1
-                 *
-                 * which may be confusing.
-                 */
-                if (found && strcmp(found, real)) {
-                    found = apr_pstrcat(r->pool, "proxy:", found, NULL);
+                /* test that we haven't reduced the URI */
+                if (ent[i].nocanon
+                    && ap_regexec(ent[i].regex, r->unparsed_uri,
+                                  AP_MAX_REG_MATCH, reg1, 0)) {
+                    mismatch = 1;
                 }
                 else {
-                    found = apr_pstrcat(r->pool, "proxy:", real, r->uri,
-                                        NULL);
+                    found = ap_pregsub(r->pool, real, use_uri,
+                                       AP_MAX_REG_MATCH,
+                                       use_uri == r->uri ? regm : reg1);
+                    /* Note: The strcmp() below catches cases where there
+                     * was no regex substitution. This is so cases like:
+                     *
+                     *    ProxyPassMatch \.gif balancer://foo
+                     *
+                     * will work "as expected". The upshot is that the 2
+                     * directives below act the exact same way (ie: $1 is implied):
+                     *
+                     *    ProxyPassMatch ^(/.*\.gif)$ balancer://foo
+                     *    ProxyPassMatch ^(/.*\.gif)$ balancer://foo$1
+                     *
+                     * which may be confusing.
+                     */
+                    if (found && strcmp(found, real)) {
+                        found = apr_pstrcat(r->pool, "proxy:", found, NULL);
+                    }
+                    else {
+                        found = apr_pstrcat(r->pool, "proxy:", real,
+                                            use_uri, NULL);
+                    }
                 }
             }
         }
@@ -568,16 +580,37 @@
                 if ((real[0] == '!') && (real[1] == '\0')) {
                     return DECLINED;
                 }
-
-                found = apr_pstrcat(r->pool, "proxy:", real,
-                                    r->uri + len, NULL);
-
+                if (ent[i].nocanon
+                    && len != alias_match(r->unparsed_uri, ent[i].fake)) {
+                    mismatch = 1;
+                }
+                else {
+                    found = apr_pstrcat(r->pool, "proxy:", real,
+                                        use_uri + len, NULL);
+                }
             }
         }
+        if (mismatch) {
+            /* We made a reducing transformation, so we can't safely use
+             * unparsed_uri.  Send a redirect to the canonicalised URI
+             * instead.  FIXME: this breaks if protocol isn't http.
+             */
+            use_uri = apr_pstrcat(r->pool, "http://";,
+                          r->hostname?r->hostname:r->server->server_hostname,
+                          ":", r->parsed_uri.port?r->parsed_uri.port_str:"80",
+                          r->uri, NULL);
+            apr_table_setn(r->err_headers_out, "Location", use_uri);
+            return HTTP_MOVED_PERMANENTLY;
+        }
+
         if (found) {
             r->filename = found;
             r->handler = "proxy-server";
             r->proxyreq = PROXYREQ_REVERSE;
+            if (ent[i].nocanon) {
+                /* mod_proxy_http needs to be told.  Different module. */
+                apr_table_set(r->notes, "proxy-nocanon", "1");
+            }
             return OK;
         }
     }
@@ -1185,6 +1218,7 @@
     const apr_table_entry_t *elts;
     int i;
     int use_regex = is_regex;
+    int nocanon = 0;
 
     while (*arg) {
         word = ap_getword_conf(cmd->pool, &arg);
@@ -1198,8 +1232,12 @@
             }
             f = word;
         }
-        else if (!r)
+        else if (!r) {
             r = word;
+        }
+        else if (!strcasecmp(word,"nocanon")) {
+            nocanon = 1;
+        }
         else {
             char *val = strchr(word, '=');
             if (!val) {
@@ -1230,6 +1268,7 @@
     new = apr_array_push(conf->aliases);
     new->fake = apr_pstrdup(cmd->pool, f);
     new->real = apr_pstrdup(cmd->pool, r);
+    new->nocanon = nocanon;
     if (use_regex) {
         new->regex = ap_pregcomp(cmd->pool, f, AP_REG_EXTENDED);
         if (new->regex == NULL)
Index: modules/proxy/mod_proxy.h
===================================================================
--- modules/proxy/mod_proxy.h	(revision 587155)
+++ modules/proxy/mod_proxy.h	(working copy)
@@ -113,6 +113,7 @@
     const char  *real;
     const char  *fake;
     ap_regex_t  *regex;
+    int nocanon;
 };
 
 struct dirconn_entry {

Reply via email to