On Tue, Dec 17, 2019 at 06:08:56AM +0100, Willy Tarreau wrote:
> But now I'm starting to suspect that most of the problem comes from the
> fact that people who used to rely on regex in the past will not as easily
> perform their rewrites using set-path as they would using a replace rule
> which is very similar to the old set. So probably we'd need to introduce
> a "replace-path" action and suggest it in the warning emitted for reqrep.
> 
> I think it is important that we properly address such needs and am
> willing to backport anything like this to 2.1 to ease the transition if
> that's the best solution.

What about this ? It does exactly what's needed for me. It's self-contained
enough that we could get it backported to 2.1 and maybe even to 2.0 (though
it would require some adaptations to legacy mode there).

Willy
>From 290a8473a9a8d65685a6a3c4922a581eaa7c3377 Mon Sep 17 00:00:00 2001
From: Willy Tarreau <w...@1wt.eu>
Date: Tue, 17 Dec 2019 06:52:51 +0100
Subject: MINOR: http: add a new "replace-path" action

This action is very similar to "replace-uri" except that it only acts on the
path component. This is assumed to better match users' expectations when they
used to rely on "replace-uri" in HTTP/1 because mostly origin forms were used
in H1 while mostly absolute URI form is used in H2, and their rules very often
start with a '/', and as such do not match.

It could help users to get this backported to 2.0 and 2.1.
---
 doc/configuration.txt | 27 ++++++++++++++++++++++++++-
 src/cfgparse-listen.c |  2 +-
 src/http_act.c        | 21 ++++++++++++++++-----
 3 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/doc/configuration.txt b/doc/configuration.txt
index fdcdb04fae..3ba340c38d 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -4463,6 +4463,30 @@ http-request replace-header <name> <match-regex> 
<replace-fmt>
     # outputs:
     User-Agent: foo
 
+http-request replace-path <match-regex> <replace-fmt>
+                           [ { if | unless } <condition> ]
+
+  This works like "replace-header" except that it works on the request's path
+  component instead of a header. The path component starts at the first '/'
+  after an optional scheme+authority. It does contain the query string if any
+  is present. The replacement does not modify the scheme nor authority.
+
+  It is worth noting that regular expressions may be more expensive to evaluate
+  than certain ACLs, so rare replacements may benefit from a condition to avoid
+  performing the evaluation at all if it does not match.
+
+  Example:
+    # prefix /foo : turn /bar?q=1 into /foo/bar?q=1 :
+    http-request replace-path (.*) /foo\1
+
+    # suffix /foo : turn /bar?q=1 into /bar/foo?q=1 :
+    http-request replace-path ([^?]*)(\?(.*))? \1/foo\2
+
+    # strip /foo : turn /foo/bar?q=1 into /bar?q=1
+    http-request replace-path /foo/(.*) /\1
+    # or more efficient if only some requests match :
+    http-request replace-path /foo/(.*) /\1 if { url_beg /foo/ }
+
 http-request replace-uri <match-regex> <replace-fmt>
                            [ { if | unless } <condition> ]
 
@@ -4484,7 +4508,8 @@ http-request replace-uri <match-regex> <replace-fmt>
   with HTTP/2, clients are encouraged to send absolute URIs only, which look
   like the ones HTTP/1 clients use to talk to proxies. Such partial replace-uri
   rules may then fail in HTTP/2 when they work in HTTP/1. Either the rules need
-  to be adapted to optionally match a scheme and authority.
+  to be adapted to optionally match a scheme and authority, or replace-path
+  should be used.
 
   Example:
     # rewrite all "http" absolute requests to "https":
diff --git a/src/cfgparse-listen.c b/src/cfgparse-listen.c
index 507e071734..9975e46876 100644
--- a/src/cfgparse-listen.c
+++ b/src/cfgparse-listen.c
@@ -3694,7 +3694,7 @@ stats_error_parsing:
        }
        else if (!strcmp(args[0], "cliexp") || !strcmp(args[0], "reqrep")) {  
/* replace request header from a regex */
                ha_alert("parsing [%s:%d] : The '%s' directive is not supported 
anymore since HAProxy 2.1. "
-                        "Use 'http-request replace-uri' and 'http-request 
replace-header' instead.\n",
+                        "Use 'http-request replace-path', 'http-request 
replace-uri' or 'http-request replace-header' instead.\n",
                         file, linenum, args[0]);
                err_code |= ERR_ALERT | ERR_FATAL;
                goto out;
diff --git a/src/http_act.c b/src/http_act.c
index d6015d3624..61200ba368 100644
--- a/src/http_act.c
+++ b/src/http_act.c
@@ -142,6 +142,7 @@ static enum act_return http_action_replace_uri(struct 
act_rule *rule, struct pro
        enum act_return ret = ACT_RET_ERR;
        struct buffer *replace, *output;
        struct ist uri;
+       int action;
        int len;
 
        replace = alloc_trash_chunk();
@@ -149,6 +150,13 @@ static enum act_return http_action_replace_uri(struct 
act_rule *rule, struct pro
        if (!replace || !output)
                goto leave;
        uri = htx_sl_req_uri(http_get_stline(htxbuf(&s->req.buf)));
+       action = 3; // replace uri
+
+       if (rule->arg.act.p[0] == (void *)1) {
+               uri = http_get_path(uri);
+               action = 1; // replace path
+       }
+
        if (!regex_exec_match2(rule->arg.act.p[1], uri.ptr, uri.len, MAX_MATCH, 
pmatch, 0))
                goto leave;
 
@@ -161,8 +169,7 @@ static enum act_return http_action_replace_uri(struct 
act_rule *rule, struct pro
        if (len == -1)
                goto leave;
 
-       /* 3 is the set-uri action */
-       http_req_replace_stline(3, output->area, len, px, s);
+       http_req_replace_stline(action, output->area, len, px, s);
 
        ret = ACT_RET_CONT;
 
@@ -172,9 +179,9 @@ leave:
        return ret;
 }
 
-/* parse a "replace-uri" http-request action.
+/* parse a "replace-uri" or "replace-path" http-request action.
  * This action takes 2 arguments (a regex and a replacement format string).
- * The resulting rule makes use of arg->act.p[0] to store the action (0 for 
now),
+ * The resulting rule makes use of arg->act.p[0] to store the action (0/1 for 
now),
  * p[1] to store the compiled regex, and arg->act.p[2..3] to store the 
log-format
  * list head. It returns ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR on error.
  */
@@ -185,7 +192,10 @@ static enum act_parse_ret parse_replace_uri(const char 
**args, int *orig_arg, st
        char *error = NULL;
 
        rule->action = ACT_CUSTOM;
-       rule->arg.act.p[0] = (void *)0; // replace-uri
+       if (strcmp(args[cur_arg-1], "replace-path") == 0)
+               rule->arg.act.p[0] = (void *)1; // replace-path
+       else
+               rule->arg.act.p[0] = (void *)0; // replace-uri
        rule->action_ptr = http_action_replace_uri;
 
        if (!*args[cur_arg] || !*args[cur_arg+1] ||
@@ -691,6 +701,7 @@ static struct action_kw_list http_req_actions = {
                { "capture",    parse_http_req_capture },
                { "reject",     parse_http_action_reject },
                { "disable-l7-retry", parse_http_req_disable_l7_retry },
+               { "replace-path", parse_replace_uri },
                { "replace-uri", parse_replace_uri },
                { "set-method", parse_set_req_line },
                { "set-path",   parse_set_req_line },
-- 
2.20.1

Reply via email to