In domain-based routing and policy rules, suffix matching on hostnames is
often easier to express as a prefix match on reversed labels. A dedicated
converter makes this convenient with existing fetches and matchers.

This also has a performance benefit for large maps. Prefix string matches use
the prefix-tree index (PAT_MATCH_BEG with pat_idx_tree_pfx), while end matches
use the string-list index (PAT_MATCH_END with pat_idx_list_str), so
reversed-label lookups can avoid linear suffix scans.

This patch adds "reverse_dom", a string converter that reverses domain labels,
ignores one optional trailing dot on input, and rejects empty labels. It
intentionally leaves trailing-dot handling to the caller so configurations can
choose between exact matches, subdomain-only matches, or an explicit dotted
form built with "concat(.)" for prefix lookups.

Examples:
  example.com      -> com.example
  mail.example.com -> com.example.mail

The documentation is updated and a reg-test covers the converter itself, the
explicit dotted form for "map_beg()", and the subdomain-only "-m beg" case.
---
 doc/configuration.txt               | 33 ++++++++++
 reg-tests/converter/reverse_dom.map |  2 +
 reg-tests/converter/reverse_dom.vtc | 94 +++++++++++++++++++++++++++++
 src/sample.c                        | 65 +++++++++++++++++++-
 4 files changed, 191 insertions(+), 3 deletions(-)
 create mode 100644 reg-tests/converter/reverse_dom.map
 create mode 100644 reg-tests/converter/reverse_dom.vtc

diff --git a/doc/configuration.txt b/doc/configuration.txt
index 9601e4564..0945ed1e2 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -21075,6 +21075,7 @@ param(name[,delim])                                
string       string
 port_only                                          string       integer
 protobuf(field_number[,field_type])                binary       binary
 regsub(regex,subst[,flags])                        string       string
+reverse_dom                                        string       string
 rfc7239_field(field)                               string       string
 rfc7239_is_valid                                   string       boolean
 rfc7239_n2nn                                       string       address / str
@@ -22558,6 +22559,38 @@ regsub(<regex>,<subst>[,<flags>])
      http-request redirect location 
%[url,'regsub("(foo|bar)([0-9]+)?","\2\1",i)']
      http-request redirect location 
%[url,regsub(\"(foo|bar)([0-9]+)?\",\"\2\1\",i)]
 
+reverse_dom
+  Converts a string containing an FQDN-like hostname into its reversed-label
+  form. A single trailing dot on the input is ignored. Empty labels cause the
+  converter to fail.
+
+  This converter does not lowercase its input and does not strip any port.
+  It is meant to be combined with existing converters such as "lower" or
+  "host_only" when needed.
+
+  The trailing-dot policy is intentionally left to the caller. This allows
+  callers to decide whether they want to match the apex too or only
+  subdomains.
+
+  The reversed-label form is useful for large domain maps because it turns
+  domain suffix lookups into prefix lookups, allowing the use of indexed prefix
+  matchers such as "map_beg".
+
+  Examples:
+    "example.com" -> "com.example"
+    "mail.example.com" -> "com.example.mail"
+    "example.com." -> "com.example"
+
+    # match only subdomains of example.net, not the apex
+    acl example_net_sub req.hdr(Host),host_only,reverse_dom -m beg net.example.
+
+    # match only the apex
+    acl example_net_apex req.hdr(Host),host_only,reverse_dom -i net.example
+
+    # exact-or-subdomain prefix lookup using an explicit dotted form
+    http-request set-var(txn.rev_host) 
req.hdr(Host),host_only,reverse_dom,concat(.)
+    use_backend %[var(txn.rev_host),map_beg(/etc/haproxy/domains.map)]
+
 rfc7239_field(<field>)
   Extracts a single field/parameter from RFC 7239 compliant header value input.
 
diff --git a/reg-tests/converter/reverse_dom.map 
b/reg-tests/converter/reverse_dom.map
new file mode 100644
index 000000000..6c438bbc0
--- /dev/null
+++ b/reg-tests/converter/reverse_dom.map
@@ -0,0 +1,2 @@
+com.example. example
+com.example.mail. mail
diff --git a/reg-tests/converter/reverse_dom.vtc 
b/reg-tests/converter/reverse_dom.vtc
new file mode 100644
index 000000000..4248bb64a
--- /dev/null
+++ b/reg-tests/converter/reverse_dom.vtc
@@ -0,0 +1,94 @@
+varnishtest "reverse_dom converter test"
+
+feature ignore_unknown_macro
+
+server s1 {
+       rxreq
+       txresp -hdr "Connection: close"
+} -repeat 8 -start
+
+haproxy h1 -conf {
+    global
+    .if feature(THREAD)
+        thread-groups 1
+    .endif
+
+    defaults
+       mode http
+       timeout connect "${HAPROXY_TEST_TIMEOUT-5s}"
+       timeout client  "${HAPROXY_TEST_TIMEOUT-5s}"
+       timeout server  "${HAPROXY_TEST_TIMEOUT-5s}"
+
+    frontend fe
+       bind "fd@${fe}"
+
+       http-request set-var(txn.rev_const) str(MaIl.EXAMPLE.com),reverse_dom
+       http-request set-var(txn.rev_host) req.hdr(Host),host_only,reverse_dom 
if { req.hdr(Host) -m found }
+       http-request set-var(txn.rev_host_dot) var(txn.rev_host),concat(.) if { 
var(txn.rev_host) -m found }
+       http-request set-var(txn.route) 
var(txn.rev_host_dot),map_beg(${testdir}/reverse_dom.map,miss) if { 
var(txn.rev_host_dot) -m found }
+       http-request set-var(txn.sub_only) str(no)
+       http-request set-var(txn.sub_only) str(yes) if { var(txn.rev_host) -m 
beg com.example. }
+
+       http-request return status 200 hdr X-Rev-Const "%[var(txn.rev_const)]" 
hdr X-Rev-Host "%[var(txn.rev_host)]" hdr X-Route "%[var(txn.route)]" hdr 
X-Sub-Only "%[var(txn.sub_only)]"
+
+       default_backend be
+
+    backend be
+       server s1 ${s1_addr}:${s1_port}
+} -start
+
+client c1 -connect ${h1_fe_sock} {
+       txreq -url "/" -hdr "Host: example.com"
+       rxresp
+       expect resp.status == 200
+       expect resp.http.x-rev-const == "com.EXAMPLE.MaIl"
+       expect resp.http.x-rev-host == "com.example"
+       expect resp.http.x-route == "example"
+       expect resp.http.x-sub-only == "no"
+
+       txreq -url "/" -hdr "Host: mail.example.com"
+       rxresp
+       expect resp.status == 200
+       expect resp.http.x-rev-host == "com.example.mail"
+       expect resp.http.x-route == "mail"
+       expect resp.http.x-sub-only == "yes"
+
+       txreq -url "/" -hdr "Host: example.com."
+       rxresp
+       expect resp.status == 200
+       expect resp.http.x-rev-host == "com.example"
+       expect resp.http.x-route == "example"
+       expect resp.http.x-sub-only == "no"
+
+       txreq -url "/" -hdr "Host: localhost"
+       rxresp
+       expect resp.status == 200
+       expect resp.http.x-rev-host == "localhost"
+       expect resp.http.x-route == "miss"
+       expect resp.http.x-sub-only == "no"
+
+       txreq -url "/" -hdr "Host: badexample.com"
+       rxresp
+       expect resp.status == 200
+       expect resp.http.x-rev-host == "com.badexample"
+       expect resp.http.x-route == "miss"
+       expect resp.http.x-sub-only == "no"
+
+       txreq -url "/" -hdr "Host: foo..bar"
+       rxresp
+       expect resp.status == 200
+       expect resp.http.x-rev-host == "<undef>"
+       expect resp.http.x-route == "<undef>"
+
+       txreq -url "/" -hdr "Host: .example.com"
+       rxresp
+       expect resp.status == 200
+       expect resp.http.x-rev-host == "<undef>"
+       expect resp.http.x-route == "<undef>"
+
+       txreq -url "/" -hdr "Host: ."
+       rxresp
+       expect resp.status == 200
+       expect resp.http.x-rev-host == "<undef>"
+       expect resp.http.x-route == "<undef>"
+} -run
diff --git a/src/sample.c b/src/sample.c
index 24891d457..f3f4769cb 100644
--- a/src/sample.c
+++ b/src/sample.c
@@ -2311,6 +2311,64 @@ static int sample_conv_str2upper(const struct arg 
*arg_p, struct sample *smp, vo
        return 1;
 }
 
+/* Reverses the order of labels in an FQDN-like string. A single trailing dot
+ * on input is ignored. Empty labels are rejected.
+ */
+static int sample_conv_reverse_dom(const struct arg *arg_p, struct sample 
*smp, void *private)
+{
+       const char *input = smp->data.u.str.area;
+       struct buffer *trash;
+       int input_len = smp->data.u.str.data;
+       int out = 0;
+       int label_end;
+       int label_start;
+       int label_len;
+
+       if (!input_len)
+               return 0;
+
+       if (input[input_len - 1] == '.') {
+               input_len--;
+               if (!input_len)
+                       return 0;
+       }
+
+       if (input[0] == '.')
+               return 0;
+
+       trash = get_trash_chunk_sz(input_len + 1);
+       if (!trash)
+               return 0;
+
+       label_end = input_len;
+       while (label_end > 0) {
+               label_start = label_end - 1;
+               while (label_start >= 0 && input[label_start] != '.')
+                       label_start--;
+               label_start++;
+
+               if (label_start == label_end)
+                       return 0;
+
+               label_len = label_end - label_start;
+               memcpy(trash->area + out, input + label_start, label_len);
+               out += label_len;
+
+               if (label_start == 0)
+                       break;
+
+               trash->area[out++] = '.';
+               label_end = label_start - 1;
+       }
+
+       trash->area[out] = 0;
+       trash->data = out;
+       smp->data.u.str = *trash;
+       smp->data.type = SMP_T_STR;
+       smp->flags &= ~SMP_F_CONST;
+       return 1;
+}
+
 /* takes the IPv4 mask in args[0] and an optional IPv6 mask in args[1] */
 static int sample_conv_ipmask(const struct arg *args, struct sample *smp, void 
*private)
 {
@@ -5774,9 +5832,10 @@ static struct sample_conv_kw_list sample_conv_kws = 
{ILH, {
        { "param",   sample_conv_param,        ARG2(1,STR,STR),       
sample_conv_param_check,  SMP_T_STR,  SMP_T_STR  },
        { "regsub",  sample_conv_regsub,       ARG3(2,REG,STR,STR),   
sample_conv_regsub_check, SMP_T_STR,  SMP_T_STR  },
        { "sha1",    sample_conv_sha1,         0,                     NULL,     
                SMP_T_BIN,  SMP_T_BIN  },
-       { "strcmp",  sample_conv_strcmp,       ARG1(1,STR),           
smp_check_strcmp,         SMP_T_STR,  SMP_T_SINT },
-       { "host_only", sample_conv_host_only,  0,                     NULL,     
                SMP_T_STR,  SMP_T_STR  },
-       { "port_only", sample_conv_port_only,  0,                     NULL,     
                SMP_T_STR,  SMP_T_SINT },
+       { "strcmp",      sample_conv_strcmp,      ARG1(1,STR),        
smp_check_strcmp,         SMP_T_STR,  SMP_T_SINT },
+       { "host_only",   sample_conv_host_only,   0,                  NULL,     
                SMP_T_STR,  SMP_T_STR  },
+       { "port_only",   sample_conv_port_only,   0,                  NULL,     
                SMP_T_STR,  SMP_T_SINT },
+       { "reverse_dom", sample_conv_reverse_dom, 0,                  NULL,     
                SMP_T_STR,  SMP_T_STR  },
 
        /* gRPC converters. */
        { "ungrpc", sample_conv_ungrpc,    ARG2(1,PBUF_FNUM,STR), 
sample_conv_protobuf_check, SMP_T_BIN, SMP_T_BIN  },
-- 
2.54.0

Reply via email to