Willy,
Christopher,

this is in prepatation for the next normalizer which normalizes character case
of the percent encoding.

The resources I found are not clear on whether a percent that is not followed
by two hex digits is valid or not. Most browsers and servers appear to support
it, so I opted to leave the user a choice whether to reject it or not.

To support this I need to differ between "normalizing failed for internal
reasons" and "normalizing failed, because the user sent garbage".

Overall I am not happy with this patch, because the control flow is ugly. I
also wasn't sure in which cases I need to increase server counters (e.g.
failed_rewrites) or not. I'd be happy if you could give some advice!

Best regards
Tim Düsterhus

Apply with `git am --scissors` to automatically cut the commit message.

-- >8 --
Specific normalizations might not be possible, because the input is completely
bogus. Support returning a HTTP 400 in those cases, with regular rewriting
failures remaining a 500.
---
 include/haproxy/uri_normalizer-t.h | 32 ++++++++++++
 include/haproxy/uri_normalizer.h   |  8 +--
 src/http_act.c                     | 33 +++++++++---
 src/uri_normalizer.c               | 81 ++++++++++++++++++++----------
 4 files changed, 118 insertions(+), 36 deletions(-)
 create mode 100644 include/haproxy/uri_normalizer-t.h

diff --git a/include/haproxy/uri_normalizer-t.h 
b/include/haproxy/uri_normalizer-t.h
new file mode 100644
index 000000000..d98ded976
--- /dev/null
+++ b/include/haproxy/uri_normalizer-t.h
@@ -0,0 +1,32 @@
+/*
+ * include/haproxy/uri_normalizer.h
+ * HTTP request URI normalization.
+ *
+ * Copyright 2021 Tim Duesterhus <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#ifndef _HAPROXY_URI_NORMALIZER_T_H
+#define _HAPROXY_URI_NORMALIZER_T_H
+
+enum uri_normalizer_err {
+       URI_NORMALIZER_ERR_NONE = 0,
+       URI_NORMALIZER_ERR_TRASH,
+       URI_NORMALIZER_ERR_ALLOC,
+       URI_NORMALIZER_ERR_INVALID_INPUT,
+       URI_NORMALIZER_ERR_BUG = 0xdead,
+};
+
+#endif /* _HAPROXY_URI_NORMALIZER_T_H */
+
+/*
+ * Local variables:
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ * End:
+ */
diff --git a/include/haproxy/uri_normalizer.h b/include/haproxy/uri_normalizer.h
index 52bb004db..b8bb62525 100644
--- a/include/haproxy/uri_normalizer.h
+++ b/include/haproxy/uri_normalizer.h
@@ -16,9 +16,11 @@
 
 #include <import/ist.h>
 
-struct ist uri_normalizer_path_dotdot(const struct ist path, int full, char 
*trash, size_t len);
-struct ist uri_normalizer_path_merge_slashes(const struct ist path, char 
*trash, size_t len);
-struct ist uri_normalizer_query_sort(const struct ist query, const char delim, 
char *trash, size_t len);
+#include <haproxy/uri_normalizer-t.h>
+
+struct ist uri_normalizer_path_dotdot(const struct ist path, int full, char 
*trash, size_t len, enum uri_normalizer_err *err);
+struct ist uri_normalizer_path_merge_slashes(const struct ist path, char 
*trash, size_t len, enum uri_normalizer_err *err);
+struct ist uri_normalizer_query_sort(const struct ist query, const char delim, 
char *trash, size_t len, enum uri_normalizer_err *err);
 
 #endif /* _HAPROXY_URI_NORMALIZER_H */
 
diff --git a/src/http_act.c b/src/http_act.c
index 2fa11dbdf..f6b0901c4 100644
--- a/src/http_act.c
+++ b/src/http_act.c
@@ -209,6 +209,7 @@ static enum act_return http_action_normalize_uri(struct 
act_rule *rule, struct p
        struct htx *htx = htxbuf(&s->req.buf);
        const struct ist uri = htx_sl_req_uri(http_get_stline(htx));
        struct buffer *replace = alloc_trash_chunk();
+       enum uri_normalizer_err err;
 
        if (!replace)
                goto fail_alloc;
@@ -223,10 +224,10 @@ static enum act_return http_action_normalize_uri(struct 
act_rule *rule, struct p
 
                        path = iststop(path, '?');
 
-                       newpath = uri_normalizer_path_merge_slashes(path, 
replace->area, replace->size);
+                       newpath = uri_normalizer_path_merge_slashes(path, 
replace->area, replace->size, &err);
 
                        if (!isttest(newpath))
-                               goto fail_rewrite;
+                               goto err;
 
                        if (!http_replace_req_path(htx, newpath, 0))
                                goto fail_rewrite;
@@ -243,10 +244,10 @@ static enum act_return http_action_normalize_uri(struct 
act_rule *rule, struct p
 
                        path = iststop(path, '?');
 
-                       newpath = uri_normalizer_path_dotdot(path, rule->action 
== ACT_NORMALIZE_URI_DOTDOT_FULL, replace->area, replace->size);
+                       newpath = uri_normalizer_path_dotdot(path, rule->action 
== ACT_NORMALIZE_URI_DOTDOT_FULL, replace->area, replace->size, &err);
 
                        if (!isttest(newpath))
-                               goto fail_rewrite;
+                               goto err;
 
                        if (!http_replace_req_path(htx, newpath, 0))
                                goto fail_rewrite;
@@ -263,10 +264,10 @@ static enum act_return http_action_normalize_uri(struct 
act_rule *rule, struct p
 
                        query = istfind(path, '?');
 
-                       newquery = uri_normalizer_query_sort(query, '&', 
replace->area, replace->size);
+                       newquery = uri_normalizer_query_sort(query, '&', 
replace->area, replace->size, &err);
 
                        if (!isttest(newquery))
-                               goto fail_rewrite;
+                               goto err;
 
                        if (!http_replace_req_query(htx, newquery))
                                goto fail_rewrite;
@@ -284,7 +285,7 @@ static enum act_return http_action_normalize_uri(struct 
act_rule *rule, struct p
                s->flags |= SF_ERR_RESOURCE;
        ret = ACT_RET_ERR;
        goto leave;
- 
+
   fail_rewrite:
        _HA_ATOMIC_ADD(&sess->fe->fe_counters.failed_rewrites, 1);
        if (s->flags & SF_BE_ASSIGNED)
@@ -300,6 +301,24 @@ static enum act_return http_action_normalize_uri(struct 
act_rule *rule, struct p
                        s->flags |= SF_ERR_PRXCOND;
        }
        goto leave;
+
+  err:
+       switch (err) {
+       case URI_NORMALIZER_ERR_NONE:
+       case URI_NORMALIZER_ERR_BUG:
+       case URI_NORMALIZER_ERR_TRASH:
+               /* These must not happen. */
+               BUG_ON("Logic error during URI normalization.");
+               ret = ACT_RET_ERR;
+               goto leave;
+       case URI_NORMALIZER_ERR_ALLOC:
+               goto fail_alloc;
+       case URI_NORMALIZER_ERR_INVALID_INPUT:
+               ret = ACT_RET_INV;
+               goto leave;
+       }
+
+       my_unreachable();
 }
 
 /* Parses the http-request normalize-uri action. It expects a single 
<normalizer>
diff --git a/src/uri_normalizer.c b/src/uri_normalizer.c
index 7482a3c97..c3d7924c8 100644
--- a/src/uri_normalizer.c
+++ b/src/uri_normalizer.c
@@ -16,22 +16,25 @@
 #include <haproxy/uri_normalizer.h>
 
 /* Merges `/../` with preceding path segments. Returns an ist containing the 
new path
- * and backed by `trash` or IST_NULL if the `len` not sufficiently large to 
store
- * the resulting path.
+ * and backed by `trash` or IST_NULL if normalizing failed. In this case the 
`err`
+ * argument will contain the error that occurred.
  * 
  * If `full` is set to `0` then `/../` will be printed at the start of the 
resulting
  * path if the number of `/../` exceeds the number of other segments. If 
`full` is
  * set to `1` these will not be printed.
  */
-struct ist uri_normalizer_path_dotdot(const struct ist path, int full, char 
*trash, size_t len)
+struct ist uri_normalizer_path_dotdot(const struct ist path, int full, char 
*trash, size_t len, enum uri_normalizer_err *err)
 {
+       struct ist newpath;
        ssize_t offset = istlen(path) - 1;
        char *tail = trash + len;
        char *head = tail;
        int up = 0;
 
-       if (len < istlen(path))
-               return IST_NULL;
+       if (len < istlen(path)) {
+               *err = URI_NORMALIZER_ERR_TRASH;
+               goto out;
+       }
 
        /* Handle `/..` at the end of the path without a trailing slash. */
        if (offset >= 2 && istmatch(istadv(path, offset - 2), ist("/.."))) {
@@ -89,20 +92,31 @@ struct ist uri_normalizer_path_dotdot(const struct ist 
path, int full, char *tra
                }
        }
 
-       return ist2(head, tail - head);
+       newpath = ist2(head, tail - head);
+
+       *err = URI_NORMALIZER_ERR_NONE;
+
+  out:
+
+       if (*err == URI_NORMALIZER_ERR_NONE)
+               return newpath;
+       else
+               return IST_NULL;
 }
 
 /* Merges adjacent slashes in the given path. Returns an ist containing the 
new path
- * and backed by `trash` or IST_NULL if the `len` not sufficiently large to 
store
- * the resulting path.
+ * and backed by `trash` or IST_NULL if normalizing failed. In this case the 
`err`
+ * argument will contain the error that occurred.
  */
-struct ist uri_normalizer_path_merge_slashes(const struct ist path, char 
*trash, size_t len)
+struct ist uri_normalizer_path_merge_slashes(const struct ist path, char 
*trash, size_t len, enum uri_normalizer_err *err)
 {
        struct ist scanner = path;
        struct ist newpath = ist2(trash, 0);
 
-       if (len < istlen(path))
-               return IST_NULL;
+       if (len < istlen(path)) {
+               *err = URI_NORMALIZER_ERR_TRASH;
+               goto out;
+       }
 
        while (istlen(scanner) > 0) {
                char current = istshift(&scanner);
@@ -115,7 +129,14 @@ struct ist uri_normalizer_path_merge_slashes(const struct 
ist path, char *trash,
                newpath = __istappend(newpath, current);
        }
 
-       return newpath;
+       *err = URI_NORMALIZER_ERR_NONE;
+
+  out:
+
+       if (*err == URI_NORMALIZER_ERR_NONE)
+               return newpath;
+       else
+               return IST_NULL;
 }
 
 /* Compares two query parameters by name. Query parameters are ordered
@@ -151,10 +172,10 @@ static int query_param_cmp(const void *a, const void *b)
 }
 
 /* Sorts the parameters within the given query string. Returns an ist 
containing
- * the new path and backed by `trash` or IST_NULL if the `len` not sufficiently
- * large to store the resulting path.
+ * the new path and backed by `trash` or IST_NULL if normalizing failed. In 
this
+ * case the `err` argument will contain the error that occurred.
  */
-struct ist uri_normalizer_query_sort(const struct ist query, const char delim, 
char *trash, size_t len)
+struct ist uri_normalizer_query_sort(const struct ist query, const char delim, 
char *trash, size_t len, enum uri_normalizer_err *err)
 {
        struct ist scanner = istadv(query, 1);
        struct ist *params = NULL;
@@ -163,8 +184,10 @@ struct ist uri_normalizer_query_sort(const struct ist 
query, const char delim, c
        size_t param_count = 0;
        size_t i;
 
-       if (len < istlen(query))
-               goto fail;
+       if (len < istlen(query)) {
+               *err = URI_NORMALIZER_ERR_TRASH;
+               goto out;
+       }
 
        while (istlen(scanner) > 0) {
                const struct ist param = istsplit(&scanner, delim);
@@ -174,8 +197,10 @@ struct ist uri_normalizer_query_sort(const struct ist 
query, const char delim, c
 
                        param_size *= 2;
 
-                       if (!(realloc = reallocarray(params, param_size, 
sizeof(*realloc))))
-                               goto fail;
+                       if (!(realloc = reallocarray(params, param_size, 
sizeof(*realloc)))) {
+                               *err = URI_NORMALIZER_ERR_ALLOC;
+                               goto out;
+                       }
 
                        params = realloc;
                }
@@ -191,18 +216,22 @@ struct ist uri_normalizer_query_sort(const struct ist 
query, const char delim, c
                if (i > 0)
                        newquery = __istappend(newquery, '&');
 
-               if (istcat(&newquery, params[i], len) < 0)
-                       goto fail;
+               if (istcat(&newquery, params[i], len) < 0) {
+                       /* We verified the size of the trash buffer, thus this 
cannot happen. */
+                       *err = URI_NORMALIZER_ERR_BUG;
+                       goto out;
+               }
        }
 
-       free(params);
-
-       return newquery;
+       *err = URI_NORMALIZER_ERR_NONE;
 
-  fail:
+  out:
        free(params);
 
-       return IST_NULL;
+       if (*err == URI_NORMALIZER_ERR_NONE)
+               return newquery;
+       else
+               return IST_NULL;
 }
 
 /*
-- 
2.31.1


Reply via email to