Some time ago, I posted a draft fix for PR#41798: http://www.mail-archive.com/dev@httpd.apache.org/msg37836.html
It attracted some comments here, and needed further work. I've written and test-driven a slightly more sophisticated patch: * ProxyPass directive accepts an optional "nocanon" keyword, that tells us not to canonicalise the URL. Without "nocanon", behaviour is unchanged. * proxy_trans checks nocanon. If set, it constructs r->filename from r->unparsed_uri. To deal with Rudiger's objections to my previous patch, it does so only after matching the ProxyPass to r->uri. If there's a mismatch between the two, indicating path cleaning, it issues a 301 redirect, as indicated by Roy for when we change a URL. * If nocanon is set, then HTTP canonicalisation skips ap_proxy_canonenc. This is in line with the forward-proxy fix to PR#42592. New patch attached, soliciting review. -- Nick Kew Application Development with Apache - the Apache Modules Book http://www.apachetutor.org/
Index: modules/proxy/mod_proxy_http.c =================================================================== --- modules/proxy/mod_proxy_http.c (revision 587155) +++ modules/proxy/mod_proxy_http.c (working copy) @@ -82,13 +82,19 @@ /* process path */ /* In a reverse proxy, our URL has been processed, so canonicalise - * In a forward proxy, we have and MUST NOT MANGLE the original, - * so just check it for disallowed chars. + * unless proxy-nocanon is set to say it's raw + * In a forward proxy, we have and MUST NOT MANGLE the original. */ switch (r->proxyreq) { default: /* wtf are we doing here? */ case PROXYREQ_REVERSE: - path = ap_proxy_canonenc(r->pool, url, strlen(url), enc_path, 0, r->proxyreq); + if (apr_table_get(r->notes, "proxy-nocanon")) { + path = url; /* this is the raw path */ + } + else { + path = ap_proxy_canonenc(r->pool, url, strlen(url), + enc_path, 0, r->proxyreq); + } break; case PROXYREQ_PROXY: path = url; Index: modules/proxy/mod_proxy.c =================================================================== --- modules/proxy/mod_proxy.c (revision 587155) +++ modules/proxy/mod_proxy.c (working copy) @@ -228,12 +228,12 @@ else worker->status &= ~PROXY_WORKER_HOT_STANDBY; } - else if (*v == 'I' || *v == 'i') { - if (mode) - worker->status |= PROXY_WORKER_IGNORE_ERRORS; - else - worker->status &= ~PROXY_WORKER_IGNORE_ERRORS; - } + else if (*v == 'I' || *v == 'i') { + if (mode) + worker->status |= PROXY_WORKER_IGNORE_ERRORS; + else + worker->status &= ~PROXY_WORKER_IGNORE_ERRORS; + } else { return "Unknown status parameter option"; } @@ -509,7 +509,9 @@ const char *fake; const char *real; ap_regmatch_t regm[AP_MAX_REG_MATCH]; + ap_regmatch_t reg1[AP_MAX_REG_MATCH]; char *found = NULL; + int mismatch = 0; if (r->proxyreq) { /* someone has already set up the proxy, it was possibly ourselves @@ -524,6 +526,7 @@ */ for (i = 0; i < conf->aliases->nelts; i++) { + const char *use_uri = ent[i].nocanon ? r->unparsed_uri : r->uri; if (dconf->interpolate_env == 1) { fake = proxy_interpolate(r, ent[i].fake); real = proxy_interpolate(r, ent[i].real); @@ -537,27 +540,36 @@ if ((real[0] == '!') && (real[1] == '\0')) { return DECLINED; } - found = ap_pregsub(r->pool, real, r->uri, AP_MAX_REG_MATCH, - regm); - /* Note: The strcmp() below catches cases where there - * was no regex substitution. This is so cases like: - * - * ProxyPassMatch \.gif balancer://foo - * - * will work "as expected". The upshot is that the 2 - * directives below act the exact same way (ie: $1 is implied): - * - * ProxyPassMatch ^(/.*\.gif)$ balancer://foo - * ProxyPassMatch ^(/.*\.gif)$ balancer://foo$1 - * - * which may be confusing. - */ - if (found && strcmp(found, real)) { - found = apr_pstrcat(r->pool, "proxy:", found, NULL); + /* test that we haven't reduced the URI */ + if (ent[i].nocanon + && ap_regexec(ent[i].regex, r->unparsed_uri, + AP_MAX_REG_MATCH, reg1, 0)) { + mismatch = 1; } else { - found = apr_pstrcat(r->pool, "proxy:", real, r->uri, - NULL); + found = ap_pregsub(r->pool, real, use_uri, + AP_MAX_REG_MATCH, + use_uri == r->uri ? regm : reg1); + /* Note: The strcmp() below catches cases where there + * was no regex substitution. This is so cases like: + * + * ProxyPassMatch \.gif balancer://foo + * + * will work "as expected". The upshot is that the 2 + * directives below act the exact same way (ie: $1 is implied): + * + * ProxyPassMatch ^(/.*\.gif)$ balancer://foo + * ProxyPassMatch ^(/.*\.gif)$ balancer://foo$1 + * + * which may be confusing. + */ + if (found && strcmp(found, real)) { + found = apr_pstrcat(r->pool, "proxy:", found, NULL); + } + else { + found = apr_pstrcat(r->pool, "proxy:", real, + use_uri, NULL); + } } } } @@ -568,16 +580,37 @@ if ((real[0] == '!') && (real[1] == '\0')) { return DECLINED; } - - found = apr_pstrcat(r->pool, "proxy:", real, - r->uri + len, NULL); - + if (ent[i].nocanon + && len != alias_match(r->unparsed_uri, ent[i].fake)) { + mismatch = 1; + } + else { + found = apr_pstrcat(r->pool, "proxy:", real, + use_uri + len, NULL); + } } } + if (mismatch) { + /* We made a reducing transformation, so we can't safely use + * unparsed_uri. Send a redirect to the canonicalised URI + * instead. FIXME: this breaks if protocol isn't http. + */ + use_uri = apr_pstrcat(r->pool, "http://", + r->hostname?r->hostname:r->server->server_hostname, + ":", r->parsed_uri.port?r->parsed_uri.port_str:"80", + r->uri, NULL); + apr_table_setn(r->err_headers_out, "Location", use_uri); + return HTTP_MOVED_PERMANENTLY; + } + if (found) { r->filename = found; r->handler = "proxy-server"; r->proxyreq = PROXYREQ_REVERSE; + if (ent[i].nocanon) { + /* mod_proxy_http needs to be told. Different module. */ + apr_table_set(r->notes, "proxy-nocanon", "1"); + } return OK; } } @@ -1185,6 +1218,7 @@ const apr_table_entry_t *elts; int i; int use_regex = is_regex; + int nocanon = 0; while (*arg) { word = ap_getword_conf(cmd->pool, &arg); @@ -1198,8 +1232,12 @@ } f = word; } - else if (!r) + else if (!r) { r = word; + } + else if (!strcasecmp(word,"nocanon")) { + nocanon = 1; + } else { char *val = strchr(word, '='); if (!val) { @@ -1230,6 +1268,7 @@ new = apr_array_push(conf->aliases); new->fake = apr_pstrdup(cmd->pool, f); new->real = apr_pstrdup(cmd->pool, r); + new->nocanon = nocanon; if (use_regex) { new->regex = ap_pregcomp(cmd->pool, f, AP_REG_EXTENDED); if (new->regex == NULL) Index: modules/proxy/mod_proxy.h =================================================================== --- modules/proxy/mod_proxy.h (revision 587155) +++ modules/proxy/mod_proxy.h (working copy) @@ -113,6 +113,7 @@ const char *real; const char *fake; ap_regex_t *regex; + int nocanon; }; struct dirconn_entry {