Some time ago, I posted a draft fix for PR#41798:
http://www.mail-archive.com/[email protected]/msg37836.html
It attracted some comments here, and needed further work.
I've written and test-driven a slightly more sophisticated patch:
* ProxyPass directive accepts an optional "nocanon" keyword,
that tells us not to canonicalise the URL. Without "nocanon",
behaviour is unchanged.
* proxy_trans checks nocanon. If set, it constructs r->filename
from r->unparsed_uri. To deal with Rudiger's objections to
my previous patch, it does so only after matching the ProxyPass
to r->uri. If there's a mismatch between the two, indicating
path cleaning, it issues a 301 redirect, as indicated by Roy
for when we change a URL.
* If nocanon is set, then HTTP canonicalisation skips
ap_proxy_canonenc. This is in line with the forward-proxy
fix to PR#42592.
New patch attached, soliciting review.
--
Nick Kew
Application Development with Apache - the Apache Modules Book
http://www.apachetutor.org/
Index: modules/proxy/mod_proxy_http.c
===================================================================
--- modules/proxy/mod_proxy_http.c (revision 587155)
+++ modules/proxy/mod_proxy_http.c (working copy)
@@ -82,13 +82,19 @@
/* process path */
/* In a reverse proxy, our URL has been processed, so canonicalise
- * In a forward proxy, we have and MUST NOT MANGLE the original,
- * so just check it for disallowed chars.
+ * unless proxy-nocanon is set to say it's raw
+ * In a forward proxy, we have and MUST NOT MANGLE the original.
*/
switch (r->proxyreq) {
default: /* wtf are we doing here? */
case PROXYREQ_REVERSE:
- path = ap_proxy_canonenc(r->pool, url, strlen(url), enc_path, 0, r->proxyreq);
+ if (apr_table_get(r->notes, "proxy-nocanon")) {
+ path = url; /* this is the raw path */
+ }
+ else {
+ path = ap_proxy_canonenc(r->pool, url, strlen(url),
+ enc_path, 0, r->proxyreq);
+ }
break;
case PROXYREQ_PROXY:
path = url;
Index: modules/proxy/mod_proxy.c
===================================================================
--- modules/proxy/mod_proxy.c (revision 587155)
+++ modules/proxy/mod_proxy.c (working copy)
@@ -228,12 +228,12 @@
else
worker->status &= ~PROXY_WORKER_HOT_STANDBY;
}
- else if (*v == 'I' || *v == 'i') {
- if (mode)
- worker->status |= PROXY_WORKER_IGNORE_ERRORS;
- else
- worker->status &= ~PROXY_WORKER_IGNORE_ERRORS;
- }
+ else if (*v == 'I' || *v == 'i') {
+ if (mode)
+ worker->status |= PROXY_WORKER_IGNORE_ERRORS;
+ else
+ worker->status &= ~PROXY_WORKER_IGNORE_ERRORS;
+ }
else {
return "Unknown status parameter option";
}
@@ -509,7 +509,9 @@
const char *fake;
const char *real;
ap_regmatch_t regm[AP_MAX_REG_MATCH];
+ ap_regmatch_t reg1[AP_MAX_REG_MATCH];
char *found = NULL;
+ int mismatch = 0;
if (r->proxyreq) {
/* someone has already set up the proxy, it was possibly ourselves
@@ -524,6 +526,7 @@
*/
for (i = 0; i < conf->aliases->nelts; i++) {
+ const char *use_uri = ent[i].nocanon ? r->unparsed_uri : r->uri;
if (dconf->interpolate_env == 1) {
fake = proxy_interpolate(r, ent[i].fake);
real = proxy_interpolate(r, ent[i].real);
@@ -537,27 +540,36 @@
if ((real[0] == '!') && (real[1] == '\0')) {
return DECLINED;
}
- found = ap_pregsub(r->pool, real, r->uri, AP_MAX_REG_MATCH,
- regm);
- /* Note: The strcmp() below catches cases where there
- * was no regex substitution. This is so cases like:
- *
- * ProxyPassMatch \.gif balancer://foo
- *
- * will work "as expected". The upshot is that the 2
- * directives below act the exact same way (ie: $1 is implied):
- *
- * ProxyPassMatch ^(/.*\.gif)$ balancer://foo
- * ProxyPassMatch ^(/.*\.gif)$ balancer://foo$1
- *
- * which may be confusing.
- */
- if (found && strcmp(found, real)) {
- found = apr_pstrcat(r->pool, "proxy:", found, NULL);
+ /* test that we haven't reduced the URI */
+ if (ent[i].nocanon
+ && ap_regexec(ent[i].regex, r->unparsed_uri,
+ AP_MAX_REG_MATCH, reg1, 0)) {
+ mismatch = 1;
}
else {
- found = apr_pstrcat(r->pool, "proxy:", real, r->uri,
- NULL);
+ found = ap_pregsub(r->pool, real, use_uri,
+ AP_MAX_REG_MATCH,
+ use_uri == r->uri ? regm : reg1);
+ /* Note: The strcmp() below catches cases where there
+ * was no regex substitution. This is so cases like:
+ *
+ * ProxyPassMatch \.gif balancer://foo
+ *
+ * will work "as expected". The upshot is that the 2
+ * directives below act the exact same way (ie: $1 is implied):
+ *
+ * ProxyPassMatch ^(/.*\.gif)$ balancer://foo
+ * ProxyPassMatch ^(/.*\.gif)$ balancer://foo$1
+ *
+ * which may be confusing.
+ */
+ if (found && strcmp(found, real)) {
+ found = apr_pstrcat(r->pool, "proxy:", found, NULL);
+ }
+ else {
+ found = apr_pstrcat(r->pool, "proxy:", real,
+ use_uri, NULL);
+ }
}
}
}
@@ -568,16 +580,37 @@
if ((real[0] == '!') && (real[1] == '\0')) {
return DECLINED;
}
-
- found = apr_pstrcat(r->pool, "proxy:", real,
- r->uri + len, NULL);
-
+ if (ent[i].nocanon
+ && len != alias_match(r->unparsed_uri, ent[i].fake)) {
+ mismatch = 1;
+ }
+ else {
+ found = apr_pstrcat(r->pool, "proxy:", real,
+ use_uri + len, NULL);
+ }
}
}
+ if (mismatch) {
+ /* We made a reducing transformation, so we can't safely use
+ * unparsed_uri. Send a redirect to the canonicalised URI
+ * instead. FIXME: this breaks if protocol isn't http.
+ */
+ use_uri = apr_pstrcat(r->pool, "http://",
+ r->hostname?r->hostname:r->server->server_hostname,
+ ":", r->parsed_uri.port?r->parsed_uri.port_str:"80",
+ r->uri, NULL);
+ apr_table_setn(r->err_headers_out, "Location", use_uri);
+ return HTTP_MOVED_PERMANENTLY;
+ }
+
if (found) {
r->filename = found;
r->handler = "proxy-server";
r->proxyreq = PROXYREQ_REVERSE;
+ if (ent[i].nocanon) {
+ /* mod_proxy_http needs to be told. Different module. */
+ apr_table_set(r->notes, "proxy-nocanon", "1");
+ }
return OK;
}
}
@@ -1185,6 +1218,7 @@
const apr_table_entry_t *elts;
int i;
int use_regex = is_regex;
+ int nocanon = 0;
while (*arg) {
word = ap_getword_conf(cmd->pool, &arg);
@@ -1198,8 +1232,12 @@
}
f = word;
}
- else if (!r)
+ else if (!r) {
r = word;
+ }
+ else if (!strcasecmp(word,"nocanon")) {
+ nocanon = 1;
+ }
else {
char *val = strchr(word, '=');
if (!val) {
@@ -1230,6 +1268,7 @@
new = apr_array_push(conf->aliases);
new->fake = apr_pstrdup(cmd->pool, f);
new->real = apr_pstrdup(cmd->pool, r);
+ new->nocanon = nocanon;
if (use_regex) {
new->regex = ap_pregcomp(cmd->pool, f, AP_REG_EXTENDED);
if (new->regex == NULL)
Index: modules/proxy/mod_proxy.h
===================================================================
--- modules/proxy/mod_proxy.h (revision 587155)
+++ modules/proxy/mod_proxy.h (working copy)
@@ -113,6 +113,7 @@
const char *real;
const char *fake;
ap_regex_t *regex;
+ int nocanon;
};
struct dirconn_entry {