Willy, Christopher, this is in prepatation for the next normalizer which normalizes character case of the percent encoding.
The resources I found are not clear on whether a percent that is not followed by two hex digits is valid or not. Most browsers and servers appear to support it, so I opted to leave the user a choice whether to reject it or not. To support this I need to differ between "normalizing failed for internal reasons" and "normalizing failed, because the user sent garbage". Overall I am not happy with this patch, because the control flow is ugly. I also wasn't sure in which cases I need to increase server counters (e.g. failed_rewrites) or not. I'd be happy if you could give some advice! Best regards Tim Düsterhus Apply with `git am --scissors` to automatically cut the commit message. -- >8 -- Specific normalizations might not be possible, because the input is completely bogus. Support returning a HTTP 400 in those cases, with regular rewriting failures remaining a 500. --- include/haproxy/uri_normalizer-t.h | 32 ++++++++++++ include/haproxy/uri_normalizer.h | 8 +-- src/http_act.c | 33 +++++++++--- src/uri_normalizer.c | 81 ++++++++++++++++++++---------- 4 files changed, 118 insertions(+), 36 deletions(-) create mode 100644 include/haproxy/uri_normalizer-t.h diff --git a/include/haproxy/uri_normalizer-t.h b/include/haproxy/uri_normalizer-t.h new file mode 100644 index 000000000..d98ded976 --- /dev/null +++ b/include/haproxy/uri_normalizer-t.h @@ -0,0 +1,32 @@ +/* + * include/haproxy/uri_normalizer.h + * HTTP request URI normalization. + * + * Copyright 2021 Tim Duesterhus <[email protected]> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#ifndef _HAPROXY_URI_NORMALIZER_T_H +#define _HAPROXY_URI_NORMALIZER_T_H + +enum uri_normalizer_err { + URI_NORMALIZER_ERR_NONE = 0, + URI_NORMALIZER_ERR_TRASH, + URI_NORMALIZER_ERR_ALLOC, + URI_NORMALIZER_ERR_INVALID_INPUT, + URI_NORMALIZER_ERR_BUG = 0xdead, +}; + +#endif /* _HAPROXY_URI_NORMALIZER_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/uri_normalizer.h b/include/haproxy/uri_normalizer.h index 52bb004db..b8bb62525 100644 --- a/include/haproxy/uri_normalizer.h +++ b/include/haproxy/uri_normalizer.h @@ -16,9 +16,11 @@ #include <import/ist.h> -struct ist uri_normalizer_path_dotdot(const struct ist path, int full, char *trash, size_t len); -struct ist uri_normalizer_path_merge_slashes(const struct ist path, char *trash, size_t len); -struct ist uri_normalizer_query_sort(const struct ist query, const char delim, char *trash, size_t len); +#include <haproxy/uri_normalizer-t.h> + +struct ist uri_normalizer_path_dotdot(const struct ist path, int full, char *trash, size_t len, enum uri_normalizer_err *err); +struct ist uri_normalizer_path_merge_slashes(const struct ist path, char *trash, size_t len, enum uri_normalizer_err *err); +struct ist uri_normalizer_query_sort(const struct ist query, const char delim, char *trash, size_t len, enum uri_normalizer_err *err); #endif /* _HAPROXY_URI_NORMALIZER_H */ diff --git a/src/http_act.c b/src/http_act.c index 2fa11dbdf..f6b0901c4 100644 --- a/src/http_act.c +++ b/src/http_act.c @@ -209,6 +209,7 @@ static enum act_return http_action_normalize_uri(struct act_rule *rule, struct p struct htx *htx = htxbuf(&s->req.buf); const struct ist uri = htx_sl_req_uri(http_get_stline(htx)); struct buffer *replace = alloc_trash_chunk(); + enum uri_normalizer_err err; if (!replace) goto fail_alloc; @@ -223,10 +224,10 @@ static enum act_return http_action_normalize_uri(struct act_rule *rule, struct p path = iststop(path, '?'); - newpath = uri_normalizer_path_merge_slashes(path, replace->area, replace->size); + newpath = uri_normalizer_path_merge_slashes(path, replace->area, replace->size, &err); if (!isttest(newpath)) - goto fail_rewrite; + goto err; if (!http_replace_req_path(htx, newpath, 0)) goto fail_rewrite; @@ -243,10 +244,10 @@ static enum act_return http_action_normalize_uri(struct act_rule *rule, struct p path = iststop(path, '?'); - newpath = uri_normalizer_path_dotdot(path, rule->action == ACT_NORMALIZE_URI_DOTDOT_FULL, replace->area, replace->size); + newpath = uri_normalizer_path_dotdot(path, rule->action == ACT_NORMALIZE_URI_DOTDOT_FULL, replace->area, replace->size, &err); if (!isttest(newpath)) - goto fail_rewrite; + goto err; if (!http_replace_req_path(htx, newpath, 0)) goto fail_rewrite; @@ -263,10 +264,10 @@ static enum act_return http_action_normalize_uri(struct act_rule *rule, struct p query = istfind(path, '?'); - newquery = uri_normalizer_query_sort(query, '&', replace->area, replace->size); + newquery = uri_normalizer_query_sort(query, '&', replace->area, replace->size, &err); if (!isttest(newquery)) - goto fail_rewrite; + goto err; if (!http_replace_req_query(htx, newquery)) goto fail_rewrite; @@ -284,7 +285,7 @@ static enum act_return http_action_normalize_uri(struct act_rule *rule, struct p s->flags |= SF_ERR_RESOURCE; ret = ACT_RET_ERR; goto leave; - + fail_rewrite: _HA_ATOMIC_ADD(&sess->fe->fe_counters.failed_rewrites, 1); if (s->flags & SF_BE_ASSIGNED) @@ -300,6 +301,24 @@ static enum act_return http_action_normalize_uri(struct act_rule *rule, struct p s->flags |= SF_ERR_PRXCOND; } goto leave; + + err: + switch (err) { + case URI_NORMALIZER_ERR_NONE: + case URI_NORMALIZER_ERR_BUG: + case URI_NORMALIZER_ERR_TRASH: + /* These must not happen. */ + BUG_ON("Logic error during URI normalization."); + ret = ACT_RET_ERR; + goto leave; + case URI_NORMALIZER_ERR_ALLOC: + goto fail_alloc; + case URI_NORMALIZER_ERR_INVALID_INPUT: + ret = ACT_RET_INV; + goto leave; + } + + my_unreachable(); } /* Parses the http-request normalize-uri action. It expects a single <normalizer> diff --git a/src/uri_normalizer.c b/src/uri_normalizer.c index 7482a3c97..c3d7924c8 100644 --- a/src/uri_normalizer.c +++ b/src/uri_normalizer.c @@ -16,22 +16,25 @@ #include <haproxy/uri_normalizer.h> /* Merges `/../` with preceding path segments. Returns an ist containing the new path - * and backed by `trash` or IST_NULL if the `len` not sufficiently large to store - * the resulting path. + * and backed by `trash` or IST_NULL if normalizing failed. In this case the `err` + * argument will contain the error that occurred. * * If `full` is set to `0` then `/../` will be printed at the start of the resulting * path if the number of `/../` exceeds the number of other segments. If `full` is * set to `1` these will not be printed. */ -struct ist uri_normalizer_path_dotdot(const struct ist path, int full, char *trash, size_t len) +struct ist uri_normalizer_path_dotdot(const struct ist path, int full, char *trash, size_t len, enum uri_normalizer_err *err) { + struct ist newpath; ssize_t offset = istlen(path) - 1; char *tail = trash + len; char *head = tail; int up = 0; - if (len < istlen(path)) - return IST_NULL; + if (len < istlen(path)) { + *err = URI_NORMALIZER_ERR_TRASH; + goto out; + } /* Handle `/..` at the end of the path without a trailing slash. */ if (offset >= 2 && istmatch(istadv(path, offset - 2), ist("/.."))) { @@ -89,20 +92,31 @@ struct ist uri_normalizer_path_dotdot(const struct ist path, int full, char *tra } } - return ist2(head, tail - head); + newpath = ist2(head, tail - head); + + *err = URI_NORMALIZER_ERR_NONE; + + out: + + if (*err == URI_NORMALIZER_ERR_NONE) + return newpath; + else + return IST_NULL; } /* Merges adjacent slashes in the given path. Returns an ist containing the new path - * and backed by `trash` or IST_NULL if the `len` not sufficiently large to store - * the resulting path. + * and backed by `trash` or IST_NULL if normalizing failed. In this case the `err` + * argument will contain the error that occurred. */ -struct ist uri_normalizer_path_merge_slashes(const struct ist path, char *trash, size_t len) +struct ist uri_normalizer_path_merge_slashes(const struct ist path, char *trash, size_t len, enum uri_normalizer_err *err) { struct ist scanner = path; struct ist newpath = ist2(trash, 0); - if (len < istlen(path)) - return IST_NULL; + if (len < istlen(path)) { + *err = URI_NORMALIZER_ERR_TRASH; + goto out; + } while (istlen(scanner) > 0) { char current = istshift(&scanner); @@ -115,7 +129,14 @@ struct ist uri_normalizer_path_merge_slashes(const struct ist path, char *trash, newpath = __istappend(newpath, current); } - return newpath; + *err = URI_NORMALIZER_ERR_NONE; + + out: + + if (*err == URI_NORMALIZER_ERR_NONE) + return newpath; + else + return IST_NULL; } /* Compares two query parameters by name. Query parameters are ordered @@ -151,10 +172,10 @@ static int query_param_cmp(const void *a, const void *b) } /* Sorts the parameters within the given query string. Returns an ist containing - * the new path and backed by `trash` or IST_NULL if the `len` not sufficiently - * large to store the resulting path. + * the new path and backed by `trash` or IST_NULL if normalizing failed. In this + * case the `err` argument will contain the error that occurred. */ -struct ist uri_normalizer_query_sort(const struct ist query, const char delim, char *trash, size_t len) +struct ist uri_normalizer_query_sort(const struct ist query, const char delim, char *trash, size_t len, enum uri_normalizer_err *err) { struct ist scanner = istadv(query, 1); struct ist *params = NULL; @@ -163,8 +184,10 @@ struct ist uri_normalizer_query_sort(const struct ist query, const char delim, c size_t param_count = 0; size_t i; - if (len < istlen(query)) - goto fail; + if (len < istlen(query)) { + *err = URI_NORMALIZER_ERR_TRASH; + goto out; + } while (istlen(scanner) > 0) { const struct ist param = istsplit(&scanner, delim); @@ -174,8 +197,10 @@ struct ist uri_normalizer_query_sort(const struct ist query, const char delim, c param_size *= 2; - if (!(realloc = reallocarray(params, param_size, sizeof(*realloc)))) - goto fail; + if (!(realloc = reallocarray(params, param_size, sizeof(*realloc)))) { + *err = URI_NORMALIZER_ERR_ALLOC; + goto out; + } params = realloc; } @@ -191,18 +216,22 @@ struct ist uri_normalizer_query_sort(const struct ist query, const char delim, c if (i > 0) newquery = __istappend(newquery, '&'); - if (istcat(&newquery, params[i], len) < 0) - goto fail; + if (istcat(&newquery, params[i], len) < 0) { + /* We verified the size of the trash buffer, thus this cannot happen. */ + *err = URI_NORMALIZER_ERR_BUG; + goto out; + } } - free(params); - - return newquery; + *err = URI_NORMALIZER_ERR_NONE; - fail: + out: free(params); - return IST_NULL; + if (*err == URI_NORMALIZER_ERR_NONE) + return newquery; + else + return IST_NULL; } /* -- 2.31.1

