Christopher, here I am using `istdiff` and a trash buffer instead of dynamic allocation.
Best regards Tim Düsterhus Apply with `git am --scissors` to automatically cut the commit message. -- >8 -- This normalizer sorts the `&` delimited query parameters by parameter name. See GitHub Issue #714. --- doc/configuration.txt | 10 +++ include/haproxy/action-t.h | 1 + include/haproxy/uri_normalizer.h | 1 + reg-tests/http-rules/normalize_uri.vtc | 81 ++++++++++++++++++++++- src/http_act.c | 22 +++++++ src/uri_normalizer.c | 89 ++++++++++++++++++++++++++ 6 files changed, 203 insertions(+), 1 deletion(-) diff --git a/doc/configuration.txt b/doc/configuration.txt index a073da5fe..862427e51 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -6014,6 +6014,7 @@ http-request early-hint <name> <fmt> [ { if | unless } <condition> ] http-request normalize-uri <normalizer> [ { if | unless } <condition> ] http-request normalize-uri dotdot [ full ] [ { if | unless } <condition> ] http-request normalize-uri merge-slashes [ { if | unless } <condition> ] +http-request normalize-uri sort-query [ { if | unless } <condition> ] Performs normalization of the request's URI. The following normalizers are available: @@ -6045,6 +6046,15 @@ http-request normalize-uri merge-slashes [ { if | unless } <condition> ] - // -> / - /foo//bar -> /foo/bar + - sort-query: Sorts the query string parameters by parameter name. + Parameters are assumed to be delimited by '&'. Shorter names sort before + longer names and identical parameter names maintain their relative order. + + Example: + - /?c=3&a=1&b=2 -> /?a=1&b=2&c=3 + - /?aaa=3&a=1&aa=2 -> /?a=1&aa=2&aaa=3 + - /?a=3&b=4&a=1&b=5&a=2 -> /?a=3&a=1&a=2&b=4&b=5 + http-request redirect <rule> [ { if | unless } <condition> ] This performs an HTTP redirection based on a redirect rule. This is exactly diff --git a/include/haproxy/action-t.h b/include/haproxy/action-t.h index 5a8155929..ae43a936d 100644 --- a/include/haproxy/action-t.h +++ b/include/haproxy/action-t.h @@ -105,6 +105,7 @@ enum act_normalize_uri { ACT_NORMALIZE_URI_MERGE_SLASHES, ACT_NORMALIZE_URI_DOTDOT, ACT_NORMALIZE_URI_DOTDOT_FULL, + ACT_NORMALIZE_URI_SORT_QUERY, }; /* NOTE: if <.action_ptr> is defined, the referenced function will always be diff --git a/include/haproxy/uri_normalizer.h b/include/haproxy/uri_normalizer.h index 811a7ebb6..c16dd3ffa 100644 --- a/include/haproxy/uri_normalizer.h +++ b/include/haproxy/uri_normalizer.h @@ -20,6 +20,7 @@ enum uri_normalizer_err uri_normalizer_path_dotdot(const struct ist path, int full, struct ist *dst); enum uri_normalizer_err uri_normalizer_path_merge_slashes(const struct ist path, struct ist *dst); +enum uri_normalizer_err uri_normalizer_query_sort(const struct ist query, const char delim, struct ist *dst); #endif /* _HAPROXY_URI_NORMALIZER_H */ diff --git a/reg-tests/http-rules/normalize_uri.vtc b/reg-tests/http-rules/normalize_uri.vtc index 5ee73a308..cb3fa2f63 100644 --- a/reg-tests/http-rules/normalize_uri.vtc +++ b/reg-tests/http-rules/normalize_uri.vtc @@ -8,7 +8,7 @@ feature ignore_unknown_macro server s1 { rxreq txresp -} -repeat 21 -start +} -repeat 34 -start haproxy h1 -conf { defaults @@ -46,6 +46,18 @@ haproxy h1 -conf { default_backend be + frontend fe_sort_query + bind "fd@${fe_sort_query}" + + http-request set-var(txn.before) url + http-request normalize-uri sort-query + http-request set-var(txn.after) url + + http-response add-header before %[var(txn.before)] + http-response add-header after %[var(txn.after)] + + default_backend be + backend be server s1 ${s1_addr}:${s1_port} @@ -170,3 +182,70 @@ client c2 -connect ${h1_fe_dotdot_sock} { expect resp.http.after == "*" expect resp.http.after-full == "*" } -run + +client c3 -connect ${h1_fe_sort_query_sock} { + txreq -url "/?a=a" + rxresp + expect resp.http.before == "/?a=a" + expect resp.http.after == "/?a=a" + + txreq -url "/?a=a&z=z" + rxresp + expect resp.http.before == "/?a=a&z=z" + expect resp.http.after == "/?a=a&z=z" + + txreq -url "/?z=z&a=a" + rxresp + expect resp.http.before == "/?z=z&a=a" + expect resp.http.after == "/?a=a&z=z" + + txreq -url "/?a=z&z=a" + rxresp + expect resp.http.before == "/?a=z&z=a" + expect resp.http.after == "/?a=z&z=a" + + txreq -url "/?z=a&a=z" + rxresp + expect resp.http.before == "/?z=a&a=z" + expect resp.http.after == "/?a=z&z=a" + + txreq -url "/?c&b&a&z&x&y" + rxresp + expect resp.http.before == "/?c&b&a&z&x&y" + expect resp.http.after == "/?a&b&c&x&y&z" + + txreq -url "/?a=&aa=&aaa=&aaaa=" + rxresp + expect resp.http.before == "/?a=&aa=&aaa=&aaaa=" + expect resp.http.after == "/?a=&aa=&aaa=&aaaa=" + + txreq -url "/?aaaa=&a=&aa=&aaa=" + rxresp + expect resp.http.before == "/?aaaa=&a=&aa=&aaa=" + expect resp.http.after == "/?a=&aa=&aaa=&aaaa=" + + txreq -url "/?a=5&a=3&a=1&a=2&a=4" + rxresp + expect resp.http.before == "/?a=5&a=3&a=1&a=2&a=4" + expect resp.http.after == "/?a=5&a=3&a=1&a=2&a=4" + + txreq -url "/?a=5&b=3&a=1&a=2&b=4" + rxresp + expect resp.http.before == "/?a=5&b=3&a=1&a=2&b=4" + expect resp.http.after == "/?a=5&a=1&a=2&b=3&b=4" + + txreq -url "/" + rxresp + expect resp.http.before == "/" + expect resp.http.after == "/" + + txreq -url "/?" + rxresp + expect resp.http.before == "/?" + expect resp.http.after == "/?" + + txreq -req OPTIONS -url "*" + rxresp + expect resp.http.before == "*" + expect resp.http.after == "*" +} -run diff --git a/src/http_act.c b/src/http_act.c index 3751b002f..1480563c9 100644 --- a/src/http_act.c +++ b/src/http_act.c @@ -250,6 +250,23 @@ static enum act_return http_action_normalize_uri(struct act_rule *rule, struct p break; } + case ACT_NORMALIZE_URI_SORT_QUERY: { + const struct ist path = http_get_path(uri); + struct ist newquery = ist2(replace->area, replace->size); + + if (!isttest(path)) + goto leave; + + err = uri_normalizer_query_sort(istfind(path, '?'), '&', &newquery); + + if (err != URI_NORMALIZER_ERR_NONE) + break; + + if (!http_replace_req_query(htx, newquery)) + goto fail_rewrite; + + break; + } } switch (err) { @@ -330,6 +347,11 @@ static enum act_parse_ret parse_http_normalize_uri(const char **args, int *orig_ return ACT_RET_PRS_ERR; } } + else if (strcmp(args[cur_arg], "sort-query") == 0) { + cur_arg++; + + rule->action = ACT_NORMALIZE_URI_SORT_QUERY; + } else { memprintf(err, "unknown normalizer '%s'", args[cur_arg]); return ACT_RET_PRS_ERR; diff --git a/src/uri_normalizer.c b/src/uri_normalizer.c index 53a3321d4..bd67542e8 100644 --- a/src/uri_normalizer.c +++ b/src/uri_normalizer.c @@ -13,6 +13,8 @@ #include <import/ist.h> #include <haproxy/api.h> +#include <haproxy/buf.h> +#include <haproxy/chunk.h> #include <haproxy/uri_normalizer.h> /* Merges `/../` with preceding path segments. @@ -140,6 +142,93 @@ enum uri_normalizer_err uri_normalizer_path_merge_slashes(const struct ist path, return err; } +/* Compares two query parameters by name. Query parameters are ordered + * as with memcmp. Shorter parameter names are ordered lower. Identical + * parameter names are compared by their pointer to maintain a stable + * sort. + */ +static int query_param_cmp(const void *a, const void *b) +{ + const struct ist param_a = *(struct ist*)a; + const struct ist param_b = *(struct ist*)b; + const struct ist param_a_name = iststop(param_a, '='); + const struct ist param_b_name = iststop(param_b, '='); + + int cmp = istdiff(param_a_name, param_b_name); + + if (cmp != 0) + return cmp; + + /* The contents are identical: Compare the pointer. */ + if (istptr(param_a) < istptr(param_b)) + return -1; + + if (istptr(param_a) > istptr(param_b)) + return 1; + + return 0; +} + +/* Sorts the parameters within the given query string. */ +enum uri_normalizer_err uri_normalizer_query_sort(const struct ist query, const char delim, struct ist *dst) +{ + enum uri_normalizer_err err; + + const size_t size = istclear(dst); + struct ist newquery = *dst; + + struct ist scanner = query; + + const struct buffer *trash = get_trash_chunk(); + struct ist *params = (struct ist *)b_orig(trash); + const size_t max_param = b_size(trash) / sizeof(*params); + size_t param_count = 0; + + size_t i; + + /* The query will have the same length. */ + if (size < istlen(query)) { + err = URI_NORMALIZER_ERR_ALLOC; + goto fail; + } + + /* Handle the leading '?'. */ + newquery = __istappend(newquery, istshift(&scanner)); + + while (istlen(scanner) > 0) { + const struct ist param = istsplit(&scanner, delim); + + if (param_count + 1 > max_param) { + err = URI_NORMALIZER_ERR_ALLOC; + goto fail; + } + + params[param_count] = param; + param_count++; + } + + qsort(params, param_count, sizeof(*params), query_param_cmp); + + for (i = 0; i < param_count; i++) { + if (i > 0) + newquery = __istappend(newquery, '&'); + + if (istcat(&newquery, params[i], size) < 0) { + /* This is impossible, because we checked the size of the destination buffer. */ + my_unreachable(); + err = URI_NORMALIZER_ERR_INTERNAL_ERROR; + goto fail; + } + } + + *dst = newquery; + + return URI_NORMALIZER_ERR_NONE; + + fail: + + return err; +} /* * Local variables: -- 2.31.1