Willy,
Christopher,

something simple for a start. This one adds the http-request action and a
very simple normalizer to test whether it works. Turns out it does :-)

You can see the new `ist` helpers in action already. I'm pretty happy that
I was able to implement this completely with the new `ist` API.

Best regards
Tim Düsterhus

Apply with `git am --scissors` to automatically cut the commit message.

-- >8 --
This patch adds the `http-request normalize-uri` action that was requested in
GitHub issue #714.

Currently only a `merge-slashes` normalizer is implemented. This normalizer
merges adjacent slashes into a single slash, thus removing empty path segments.
---
 doc/configuration.txt                  |  12 +++
 include/haproxy/action-t.h             |   4 +
 include/haproxy/uri_normalizer.h       |   4 +
 reg-tests/http-rules/normalize_uri.vtc |  87 +++++++++++++++++++++
 src/http_act.c                         | 101 +++++++++++++++++++++++++
 src/uri_normalizer.c                   |  29 +++++++
 6 files changed, 237 insertions(+)
 create mode 100644 reg-tests/http-rules/normalize_uri.vtc

diff --git a/doc/configuration.txt b/doc/configuration.txt
index 01a01eccc..d3030b478 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -6011,6 +6011,18 @@ http-request early-hint <name> <fmt> [ { if | unless } 
<condition> ]
 
   See RFC 8297 for more information.
 
+http-request normalize-uri <normalizer> [ { if | unless } <condition> ]
+
+  Performs normalization of the request's URI. The following normalizers are
+  available:
+
+  - merge-slashes: Merges adjacent slashes within the "path" component into a
+      single slash.
+
+      Example:
+      - //        -> /
+      - /foo//bar -> /foo/bar
+
 http-request redirect <rule> [ { if | unless } <condition> ]
 
   This performs an HTTP redirection based on a redirect rule. This is exactly
diff --git a/include/haproxy/action-t.h b/include/haproxy/action-t.h
index 9009e4aae..4a3e3f8bd 100644
--- a/include/haproxy/action-t.h
+++ b/include/haproxy/action-t.h
@@ -101,6 +101,10 @@ enum act_timeout_name {
        ACT_TIMEOUT_TUNNEL,
 };
 
+enum act_normalize_uri {
+       ACT_NORMALIZE_URI_MERGE_SLASHES,
+};
+
 /* NOTE: if <.action_ptr> is defined, the referenced function will always be
  *       called regardless the action type. */
 struct act_rule {
diff --git a/include/haproxy/uri_normalizer.h b/include/haproxy/uri_normalizer.h
index 82ef97324..b6e15e281 100644
--- a/include/haproxy/uri_normalizer.h
+++ b/include/haproxy/uri_normalizer.h
@@ -14,6 +14,10 @@
 #ifndef _HAPROXY_URI_NORMALIZER_H
 #define _HAPROXY_URI_NORMALIZER_H
 
+#include <import/ist.h>
+
+struct ist uri_normalizer_path_merge_slashes(const struct ist path, char 
*trash, size_t len);
+
 #endif /* _HAPROXY_URI_NORMALIZER_H */
 
 /*
diff --git a/reg-tests/http-rules/normalize_uri.vtc 
b/reg-tests/http-rules/normalize_uri.vtc
new file mode 100644
index 000000000..3303760d4
--- /dev/null
+++ b/reg-tests/http-rules/normalize_uri.vtc
@@ -0,0 +1,87 @@
+varnishtest "normalize-uri tests"
+#REQUIRE_VERSION=2.4
+
+# This reg-test tests the http-request normalize-uri action.
+
+feature ignore_unknown_macro
+
+server s1 {
+    rxreq
+    txresp
+} -repeat 10 -start
+
+haproxy h1 -conf {
+    defaults
+        mode http
+        timeout connect 1s
+        timeout client  1s
+        timeout server  1s
+
+    frontend fe_merge_slashes
+        bind "fd@${fe_merge_slashes}"
+
+        http-request set-var(txn.before) url
+        http-request normalize-uri merge-slashes
+        http-request set-var(txn.after) url
+
+        http-response add-header before  %[var(txn.before)]
+        http-response add-header after  %[var(txn.after)]
+
+        default_backend be
+
+    backend be
+        server s1 ${s1_addr}:${s1_port}
+
+} -start
+
+client c1 -connect ${h1_fe_merge_slashes_sock} {
+    txreq -url "/foo/bar"
+    rxresp
+    expect resp.http.before == "/foo/bar"
+    expect resp.http.after == "/foo/bar"
+
+    txreq -url "/foo//bar"
+    rxresp
+    expect resp.http.before == "/foo//bar"
+    expect resp.http.after == "/foo/bar"
+
+    txreq -url "/foo///bar"
+    rxresp
+    expect resp.http.before == "/foo///bar"
+    expect resp.http.after == "/foo/bar"
+
+    txreq -url "///foo///bar"
+    rxresp
+    expect resp.http.before == "///foo///bar"
+    expect resp.http.after == "/foo/bar"
+
+    txreq -url "///foo/bar"
+    rxresp
+    expect resp.http.before == "///foo/bar"
+    expect resp.http.after == "/foo/bar"
+
+    txreq -url "///foo///bar///"
+    rxresp
+    expect resp.http.before == "///foo///bar///"
+    expect resp.http.after == "/foo/bar/"
+
+    txreq -url "///"
+    rxresp
+    expect resp.http.before == "///"
+    expect resp.http.after == "/"
+
+    txreq -url "/foo?bar=///"
+    rxresp
+    expect resp.http.before == "/foo?bar=///"
+    expect resp.http.after == "/foo?bar=///"
+
+    txreq -url "//foo?bar=///"
+    rxresp
+    expect resp.http.before == "//foo?bar=///"
+    expect resp.http.after == "/foo?bar=///"
+
+    txreq -req OPTIONS -url "*"
+    rxresp
+    expect resp.http.before == "*"
+    expect resp.http.after == "*"
+} -run
diff --git a/src/http_act.c b/src/http_act.c
index c699671a3..347c9c731 100644
--- a/src/http_act.c
+++ b/src/http_act.c
@@ -36,6 +36,7 @@
 #include <haproxy/stream_interface.h>
 #include <haproxy/tools.h>
 #include <haproxy/uri_auth-t.h>
+#include <haproxy/uri_normalizer.h>
 #include <haproxy/version.h>
 
 
@@ -194,6 +195,105 @@ static enum act_parse_ret parse_set_req_line(const char 
**args, int *orig_arg, s
        return ACT_RET_PRS_OK;
 }
 
+/* This function executes the http-request normalize-uri action.
+ * `rule->action` is expected to be a value from `enum act_normalize_uri`.
+ *
+ * On success, it returns ACT_RET_CONT. If an error
+ * occurs while soft rewrites are enabled, the action is canceled, but the rule
+ * processing continue. Otherwsize ACT_RET_ERR is returned.
+ */
+static enum act_return http_action_normalize_uri(struct act_rule *rule, struct 
proxy *px,
+                                                 struct session *sess, struct 
stream *s, int flags)
+{
+       enum act_return ret = ACT_RET_CONT;
+       struct htx *htx = htxbuf(&s->req.buf);
+       const struct ist uri = htx_sl_req_uri(http_get_stline(htx));
+       struct buffer *replace = alloc_trash_chunk();
+
+       if (!replace)
+               goto fail_alloc;
+
+       switch ((enum act_normalize_uri) rule->action) {
+               case ACT_NORMALIZE_URI_MERGE_SLASHES: {
+                       struct ist path = http_get_path(uri);
+                       struct ist newpath;
+
+                       if (!isttest(path))
+                               goto leave;
+
+                       path = iststop(path, '?');
+
+                       newpath = uri_normalizer_path_merge_slashes(path, 
replace->area, replace->size);
+
+                       if (!isttest(newpath))
+                               goto fail_rewrite;
+
+                       if (!http_replace_req_path(htx, newpath, 0))
+                               goto fail_rewrite;
+
+                       break;
+               }
+       }
+
+  leave:
+       free_trash_chunk(replace);
+       return ret;
+
+  fail_alloc:
+       if (!(s->flags & SF_ERR_MASK))
+               s->flags |= SF_ERR_RESOURCE;
+       ret = ACT_RET_ERR;
+       goto leave;
+ 
+  fail_rewrite:
+       _HA_ATOMIC_ADD(&sess->fe->fe_counters.failed_rewrites, 1);
+       if (s->flags & SF_BE_ASSIGNED)
+               _HA_ATOMIC_ADD(&s->be->be_counters.failed_rewrites, 1);
+       if (sess->listener && sess->listener->counters)
+               _HA_ATOMIC_ADD(&sess->listener->counters->failed_rewrites, 1);
+       if (objt_server(s->target))
+               
_HA_ATOMIC_ADD(&__objt_server(s->target)->counters.failed_rewrites, 1);
+
+       if (!(s->txn->req.flags & HTTP_MSGF_SOFT_RW)) {
+               ret = ACT_RET_ERR;
+               if (!(s->flags & SF_ERR_MASK))
+                       s->flags |= SF_ERR_PRXCOND;
+       }
+       goto leave;
+}
+
+/* Parses the http-request normalize-uri action. It expects a single 
<normalizer>
+ * argument, corresponding too a value in `enum act_normalize_uri`.
+ *
+ * It returns ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_http_normalize_uri(const char **args, int 
*orig_arg, struct proxy *px,
+                                                   struct act_rule *rule, char 
**err)
+{
+       int cur_arg = *orig_arg;
+
+       rule->action_ptr = http_action_normalize_uri;
+       rule->release_ptr = NULL;
+
+       if (!*args[cur_arg] ||
+           (*args[cur_arg + 1] && strcmp(args[cur_arg + 1], "if") != 0 && 
strcmp(args[cur_arg + 1], "unless") != 0)) {
+               memprintf(err, "expects exactly 1 argument <normalizer>");
+               return ACT_RET_PRS_ERR;
+       }
+
+       if (strcmp(args[cur_arg], "merge-slashes") == 0) {
+               rule->action = ACT_NORMALIZE_URI_MERGE_SLASHES;
+       }
+       else {
+               memprintf(err, "unknown normalizer '%s'", args[cur_arg]);
+               return ACT_RET_PRS_ERR;
+       }
+       cur_arg++;
+
+       *orig_arg = cur_arg;
+       return ACT_RET_PRS_OK;
+}
+
 /* This function executes a replace-uri action. It finds its arguments in
  * <rule>.arg.http. It builds a string in the trash from the format string
  * previously filled by function parse_replace_uri() and will execute the regex
@@ -2194,6 +2294,7 @@ static struct action_kw_list http_req_actions = {
                { "deny",             parse_http_deny,                 0 },
                { "disable-l7-retry", parse_http_req_disable_l7_retry, 0 },
                { "early-hint",       parse_http_set_header,           0 },
+               { "normalize-uri",    parse_http_normalize_uri,        0 },
                { "redirect",         parse_http_redirect,             0 },
                { "reject",           parse_http_action_reject,        0 },
                { "replace-header",   parse_http_replace_header,       0 },
diff --git a/src/uri_normalizer.c b/src/uri_normalizer.c
index 7db47d198..681a1b0cd 100644
--- a/src/uri_normalizer.c
+++ b/src/uri_normalizer.c
@@ -10,9 +10,38 @@
  *
  */
 
+#include <import/ist.h>
+
 #include <haproxy/api.h>
 #include <haproxy/uri_normalizer.h>
 
+/* Merges adjacent slashes in the given path. Returns an ist containing the 
new path
+ * and backed by `trash` or IST_NULL if the `len` not sufficiently large to 
store
+ * the resulting path.
+ */
+struct ist uri_normalizer_path_merge_slashes(const struct ist path, char 
*trash, size_t len)
+{
+       struct ist scanner = path;
+       struct ist newpath = ist2(trash, 0);
+
+       if (len < istlen(path))
+               return IST_NULL;
+
+       while (istlen(scanner) > 0) {
+               char current = istshift(&scanner);
+
+               if (current == '/') {
+                       while (istlen(scanner) > 0 && *istptr(scanner) == '/')
+                               scanner = istnext(scanner);
+               }
+
+               newpath = __istappend(newpath, current);
+       }
+
+       return newpath;
+}
+
+
 /*
  * Local variables:
  *  c-indent-level: 8
-- 
2.31.1


Reply via email to