Willy,
Christopher,

and this final one adds a normalizer to turn the hex digits of percent
encoding into uppercase. Uppercase is the variant preferred by the URI RFC, so
this is what we do.

Best regards
Tim Düsterhus

Apply with `git am --scissors` to automatically cut the commit message.

-- >8 --
This normalizer uppercases the hexadecimal characters used in percent-encoding.

See GitHub Issue #714.
---
 doc/configuration.txt                  | 14 ++++++
 include/haproxy/action-t.h             |  2 +
 include/haproxy/uri_normalizer.h       |  1 +
 reg-tests/http-rules/normalize_uri.vtc | 65 +++++++++++++++++++++++++-
 src/http_act.c                         | 33 +++++++++++++
 src/uri_normalizer.c                   | 56 ++++++++++++++++++++++
 6 files changed, 170 insertions(+), 1 deletion(-)

diff --git a/doc/configuration.txt b/doc/configuration.txt
index 3422d3aa6..7d073cae6 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -6014,6 +6014,7 @@ http-request early-hint <name> <fmt> [ { if | unless } 
<condition> ]
 http-request normalize-uri <normalizer> [ { if | unless } <condition> ]
 http-request normalize-uri dotdot [ full ] [ { if | unless } <condition> ]
 http-request normalize-uri merge-slashes [ { if | unless } <condition> ]
+http-request normalize-uri percent-upper [ strict ] [ { if | unless } 
<condition> ]
 http-request normalize-uri sort-query [ { if | unless } <condition> ]
 
   Performs normalization of the request's URI. The following normalizers are
@@ -6046,6 +6047,19 @@ http-request normalize-uri sort-query [ { if | unless } 
<condition> ]
       - //        -> /
       - /foo//bar -> /foo/bar
 
+  - percent-upper: Uppercases letters within percent-encoded sequences
+      (RFC 3986#6.2.21).
+
+      Example:
+      - /%6f -> /%6F
+      - /%zz -> /%zz
+
+      If the "strict" option is specified then invalid sequences will result
+      in a HTTP 400 Bad Request being returned.
+
+      Example:
+      - /%zz -> HTTP 400
+
   - sort-query: Sorts the query string parameters by parameter name.
       Parameters are assumed to be delimited by '&'. Shorter names sort before
       longer names and identical parameter names maintain their relative order.
diff --git a/include/haproxy/action-t.h b/include/haproxy/action-t.h
index ae43a936d..cce2a2e23 100644
--- a/include/haproxy/action-t.h
+++ b/include/haproxy/action-t.h
@@ -106,6 +106,8 @@ enum act_normalize_uri {
        ACT_NORMALIZE_URI_DOTDOT,
        ACT_NORMALIZE_URI_DOTDOT_FULL,
        ACT_NORMALIZE_URI_SORT_QUERY,
+       ACT_NORMALIZE_URI_PERCENT_UPPER,
+       ACT_NORMALIZE_URI_PERCENT_UPPER_STRICT,
 };
 
 /* NOTE: if <.action_ptr> is defined, the referenced function will always be
diff --git a/include/haproxy/uri_normalizer.h b/include/haproxy/uri_normalizer.h
index b8bb62525..dc732daec 100644
--- a/include/haproxy/uri_normalizer.h
+++ b/include/haproxy/uri_normalizer.h
@@ -18,6 +18,7 @@
 
 #include <haproxy/uri_normalizer-t.h>
 
+struct ist uri_normalizer_percent_upper(const struct ist input, int strict, 
char *trash, size_t len, enum uri_normalizer_err *err);
 struct ist uri_normalizer_path_dotdot(const struct ist path, int full, char 
*trash, size_t len, enum uri_normalizer_err *err);
 struct ist uri_normalizer_path_merge_slashes(const struct ist path, char 
*trash, size_t len, enum uri_normalizer_err *err);
 struct ist uri_normalizer_query_sort(const struct ist query, const char delim, 
char *trash, size_t len, enum uri_normalizer_err *err);
diff --git a/reg-tests/http-rules/normalize_uri.vtc 
b/reg-tests/http-rules/normalize_uri.vtc
index cb3fa2f63..e900677e9 100644
--- a/reg-tests/http-rules/normalize_uri.vtc
+++ b/reg-tests/http-rules/normalize_uri.vtc
@@ -8,7 +8,7 @@ feature ignore_unknown_macro
 server s1 {
     rxreq
     txresp
-} -repeat 34 -start
+} -repeat 43 -start
 
 haproxy h1 -conf {
     defaults
@@ -58,6 +58,30 @@ haproxy h1 -conf {
 
         default_backend be
 
+    frontend fe_percent_upper
+        bind "fd@${fe_percent_upper}"
+
+        http-request set-var(txn.before) url
+        http-request normalize-uri percent-upper
+        http-request set-var(txn.after) url
+
+        http-response add-header before  %[var(txn.before)]
+        http-response add-header after  %[var(txn.after)]
+
+        default_backend be
+
+    frontend fe_percent_upper_strict
+        bind "fd@${fe_percent_upper_strict}"
+
+        http-request set-var(txn.before) url
+        http-request normalize-uri percent-upper strict
+        http-request set-var(txn.after) url
+
+        http-response add-header before  %[var(txn.before)]
+        http-response add-header after  %[var(txn.after)]
+
+        default_backend be
+
     backend be
         server s1 ${s1_addr}:${s1_port}
 
@@ -249,3 +273,42 @@ client c3 -connect ${h1_fe_sort_query_sock} {
     expect resp.http.before == "*"
     expect resp.http.after == "*"
 } -run
+
+client c4 -connect ${h1_fe_percent_upper_sock} {
+    txreq -url "/a?a=a"
+    rxresp
+    expect resp.http.before == "/a?a=a"
+    expect resp.http.after == "/a?a=a"
+
+    txreq -url "/%aa?a=%aa"
+    rxresp
+    expect resp.http.before == "/%aa?a=%aa"
+    expect resp.http.after == "/%AA?a=%AA"
+
+    txreq -url "/%zz?a=%zz"
+    rxresp
+    expect resp.status == 200
+    expect resp.http.before == "/%zz?a=%zz"
+    expect resp.http.after == "/%zz?a=%zz"
+
+    txreq -req OPTIONS -url "*"
+    rxresp
+    expect resp.http.before == "*"
+    expect resp.http.after == "*"
+} -run
+
+client c5 -connect ${h1_fe_percent_upper_strict_sock} {
+    txreq -url "/a?a=a"
+    rxresp
+    expect resp.http.before == "/a?a=a"
+    expect resp.http.after == "/a?a=a"
+
+    txreq -url "/%aa?a=%aa"
+    rxresp
+    expect resp.http.before == "/%aa?a=%aa"
+    expect resp.http.after == "/%AA?a=%AA"
+
+    txreq -url "/%zz?a=%zz"
+    rxresp
+    expect resp.status == 400
+} -run
diff --git a/src/http_act.c b/src/http_act.c
index f6b0901c4..c9c696518 100644
--- a/src/http_act.c
+++ b/src/http_act.c
@@ -274,6 +274,24 @@ static enum act_return http_action_normalize_uri(struct 
act_rule *rule, struct p
 
                        break;
                }
+               case ACT_NORMALIZE_URI_PERCENT_UPPER:
+               case ACT_NORMALIZE_URI_PERCENT_UPPER_STRICT: {
+                       struct ist path = http_get_path(uri);
+                       struct ist newpath;
+
+                       if (!isttest(path))
+                               goto leave;
+
+                       newpath = uri_normalizer_percent_upper(path, 
rule->action == ACT_NORMALIZE_URI_PERCENT_UPPER_STRICT, replace->area, 
replace->size, &err);
+
+                       if (!isttest(newpath))
+                               goto err;
+
+                       if (!http_replace_req_path(htx, newpath, 1))
+                               goto fail_rewrite;
+
+                       break;
+               }
        }
 
   leave:
@@ -364,6 +382,21 @@ static enum act_parse_ret parse_http_normalize_uri(const 
char **args, int *orig_
 
                rule->action = ACT_NORMALIZE_URI_SORT_QUERY;
        }
+       else if (strcmp(args[cur_arg], "percent-upper") == 0) {
+               cur_arg++;
+
+               if (strcmp(args[cur_arg], "strict") == 0) {
+                       cur_arg++;
+                       rule->action = ACT_NORMALIZE_URI_PERCENT_UPPER_STRICT;
+               }
+               else if (!*args[cur_arg]) {
+                       rule->action = ACT_NORMALIZE_URI_PERCENT_UPPER;
+               }
+               else if (strcmp(args[cur_arg], "if") != 0 && 
strcmp(args[cur_arg], "unless") != 0) {
+                       memprintf(err, "unknown argument '%s' for 
'percent-upper' normalizer", args[cur_arg]);
+                       return ACT_RET_PRS_ERR;
+               }
+       }
        else {
                memprintf(err, "unknown normalizer '%s'", args[cur_arg]);
                return ACT_RET_PRS_ERR;
diff --git a/src/uri_normalizer.c b/src/uri_normalizer.c
index c3d7924c8..d5eea079b 100644
--- a/src/uri_normalizer.c
+++ b/src/uri_normalizer.c
@@ -13,8 +13,64 @@
 #include <import/ist.h>
 
 #include <haproxy/api.h>
+#include <haproxy/tools.h>
 #include <haproxy/uri_normalizer.h>
 
+/* Uppercases letters used in percent encoding. Returns an ist containing the 
new uri
+ * and backed by `trash` or IST_NULL if normalizing failed. In this case the 
`err`
+ * argument will contain the error that occurred.
+ *
+ * If `strict` is set to 0 then percent characters that are not followed by a
+ * hexadecimal digit are returned as-is without modifying the following 
letters.
+ * If `strict` is set to 1 then `URI_NORMALIZER_ERR_INVALID_INPUT` is returned
+ * for invalid sequences.
+ */
+struct ist uri_normalizer_percent_upper(const struct ist uri, int strict, char 
*trash, size_t len, enum uri_normalizer_err *err)
+{
+       struct ist scanner = uri;
+       struct ist newuri = ist2(trash, 0);
+
+       if (len < istlen(uri)) {
+               *err = URI_NORMALIZER_ERR_TRASH;
+               goto out;
+       }
+
+       while (istlen(scanner)) {
+               const char current = istshift(&scanner);
+
+               if (current == '%') {
+                       if (istlen(scanner) >= 2) {
+                               if (ishex(istptr(scanner)[0]) && 
ishex(istptr(scanner)[1])) {
+                                       newuri = __istappend(newuri, current);
+                                       newuri = __istappend(newuri, 
toupper(istshift(&scanner)));
+                                       newuri = __istappend(newuri, 
toupper(istshift(&scanner)));
+                                       continue;
+                               }
+                       }
+
+                       if (strict) {
+                               *err = URI_NORMALIZER_ERR_INVALID_INPUT;
+                               goto out;
+                       }
+                       else {
+                               newuri = __istappend(newuri, current);
+                       }
+               }
+               else {
+                       newuri = __istappend(newuri, current);
+               }
+       }
+
+       *err = URI_NORMALIZER_ERR_NONE;
+
+  out:
+
+       if (*err == URI_NORMALIZER_ERR_NONE)
+               return newuri;
+       else
+               return IST_NULL;
+}
+
 /* Merges `/../` with preceding path segments. Returns an ist containing the 
new path
  * and backed by `trash` or IST_NULL if normalizing failed. In this case the 
`err`
  * argument will contain the error that occurred.
-- 
2.31.1


Reply via email to