Re: [PATCH v2] *_dom matching header functions now also split on :

2011-09-10 Thread Finn Arne Gangstad
On Fri, Sep 09, 2011 at 09:03:15PM +0200, Willy Tarreau wrote:
 Hi Finn,
 
 Yes, thanks for the update, I noticed this too and fixed it during
 tests. I'm OK with this method. I have just replaced the macros with
 inline functions (which I verified produced the same code) because
 the result is more readable. And I removed the typedef, as I consider
 that typedefs for scalars just add obfuscation, but I know it's a
 matter of taste.
 
 I'm about to merge the attached patch, unless you raise your hand in
 a very short time ;-)

Thanks, it's much nicer like this! The typedef was only there to
support the two different macro versions since they needed different
types in the delimiter. 

- Finn Arne



Re: [PATCH v2] *_dom matching header functions now also split on :

2011-09-10 Thread Willy Tarreau
On Sat, Sep 10, 2011 at 09:59:46AM +0200, Finn Arne Gangstad wrote:
 On Fri, Sep 09, 2011 at 09:03:15PM +0200, Willy Tarreau wrote:
  Hi Finn,
  
  Yes, thanks for the update, I noticed this too and fixed it during
  tests. I'm OK with this method. I have just replaced the macros with
  inline functions (which I verified produced the same code) because
  the result is more readable. And I removed the typedef, as I consider
  that typedefs for scalars just add obfuscation, but I know it's a
  matter of taste.
  
  I'm about to merge the attached patch, unless you raise your hand in
  a very short time ;-)
 
 Thanks, it's much nicer like this! The typedef was only there to
 support the two different macro versions since they needed different
 types in the delimiter. 

Perfect. I've merged it.

Thanks for the quick feedback,
Willy




Re: [PATCH v2] *_dom matching header functions now also split on :

2011-09-09 Thread Willy Tarreau
Hi Finn,

On Thu, Sep 08, 2011 at 07:13:44PM +0200, Finn Arne Gangstad wrote:
 Heh. Too many different versions of this now, so faulty version was
 sent. Stripping delimiters at the end of the pattern was broken.
 
 The following needs to be applied on top:

Yes, thanks for the update, I noticed this too and fixed it during
tests. I'm OK with this method. I have just replaced the macros with
inline functions (which I verified produced the same code) because
the result is more readable. And I removed the typedef, as I consider
that typedefs for scalars just add obfuscation, but I know it's a
matter of taste.

I'm about to merge the attached patch, unless you raise your hand in
a very short time ;-)

BTW, the hasZeroByte function made me realize that I had a bug in its
equivalent in halog : byte 0x80 was unexpectedly accepted despite the
efforts I made for it not to happen (I stupidly xored with the negation
of the 80808080 mask). I obviously did something wrong! So I took the
opportunity to fix the algo there too.

Thanks!
Willy

From e8c7ecc2ddecab4366a46cd949dc8a56d425fabc Mon Sep 17 00:00:00 2001
From: Finn Arne Gangstad fin...@pvv.org
Date: Fri, 9 Sep 2011 16:09:50 +0200
Subject: [MINOR] http: *_dom matching header functions now also split on :

*_dom is mostly used for matching Host headers, and host headers may
include port numbers. To avoid having to create multiple rules with
and without :port-number in hdr_dom rules, change the *_dom
matching functions to also handle : as a delimiter.

Typically there are rules like this in haproxy.cfg:

  acl is_foo  hdr_dom(host) www.foo.com

Most clients send Host: www.foo.com in their HTTP header, but some
send Host: www.foo.com:80 (which is allowed), and the above
rule will now work for those clients as well.

[Note: patch was edited before merge, any unexpected bug is mine /willy]
---
 src/acl.c |   53 ++---
 1 files changed, 38 insertions(+), 15 deletions(-)

diff --git a/src/acl.c b/src/acl.c
index 9d9a746..33f00ed 100644
--- a/src/acl.c
+++ b/src/acl.c
@@ -546,12 +546,35 @@ int acl_match_sub(struct acl_test *test, struct 
acl_pattern *pattern)
return ACL_PAT_FAIL;
 }
 
+/* Background: Fast way to find a zero byte in a word
+ * http://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
+ * hasZeroByte = (v - 0x01010101UL)  ~v  0x80808080UL;
+ *
+ * To look for 4 different byte values, xor the word with those bytes and
+ * then check for zero bytes:
+ *
+ * v = (((unsigned char)c * 0x1010101U) ^ delimiter)
+ * where delimiter is the 4 byte values to look for (as an uint)
+ * and c is the character that is being tested
+ */
+static inline unsigned int is_delimiter(unsigned char c, unsigned int mask)
+{
+   mask ^= (c * 0x01010101); /* propagate the char to all 4 bytes */
+   return (mask - 0x01010101)  ~mask  0x80808080U;
+}
+
+static inline unsigned int make_4delim(unsigned char d1, unsigned char d2, 
unsigned char d3, unsigned char d4)
+{
+   return d1  24 | d2  16 | d3  8 | d4;
+}
+
 /* This one is used by other real functions. It checks that the pattern is
  * included inside the tested string, but enclosed between the specified
- * delimitor, or a '/' or a '?' or at the beginning or end of the string.
- * The delimitor is stripped at the beginning or end of the pattern.
+ * delimiters or at the beginning or end of the string. The delimiters are
+ * provided as an unsigned int made by make_4delim() and match up to 4 
different
+ * delimiters. Delimiters are stripped at the beginning and end of the pattern.
  */
-static int match_word(struct acl_test *test, struct acl_pattern *pattern, char 
delim)
+static int match_word(struct acl_test *test, struct acl_pattern *pattern, 
unsigned int delimiters)
 {
int may_match, icase;
char *c, *end;
@@ -560,13 +583,13 @@ static int match_word(struct acl_test *test, struct 
acl_pattern *pattern, char d
 
pl = pattern-len;
ps = pattern-ptr.str;
-   while (pl  0  (*ps == delim || *ps == '/' || *ps == '?')) {
+
+   while (pl  0  is_delimiter(*ps, delimiters)) {
pl--;
ps++;
}
 
-   while (pl  0 
-  (ps[pl - 1] == delim || ps[pl - 1] == '/' || ps[pl - 1] == '?'))
+   while (pl  0  is_delimiter(ps[pl - 1], delimiters))
pl--;
 
if (pl  test-len)
@@ -576,7 +599,7 @@ static int match_word(struct acl_test *test, struct 
acl_pattern *pattern, char d
icase = pattern-flags  ACL_PAT_F_IGNORE_CASE;
end = test-ptr + test-len - pl;
for (c = test-ptr; c = end; c++) {
-   if (*c == '/' || *c == delim || *c == '?') {
+   if (is_delimiter(*c, delimiters)) {
may_match = 1;
continue;
}
@@ -587,12 +610,12 @@ static int match_word(struct acl_test *test, struct 
acl_pattern *pattern, char d
if (icase) {

[PATCH v2] *_dom matching header functions now also split on :

2011-09-08 Thread Finn Arne Gangstad
*_dom is mostly used for matching Host headers, and host headers may
include port numbers. To avoid having to create multiple rules with
and without :port-number in hdr_dom rules, change the *_dom
matching functions to also handle : as a delimiter.

Typically there are rules like this in haproxy.cfg:

  acl is_foo  hdr_dom(host) www.foo.com

Most clients send Host: www.foo.com in their HTTP header, but some
send Host: www.foo.com:80 (which is allowed), and the above
rule will now work for those clients as well.
---

There are two versions here, one straightforward version which is more
or less identical to the existing code in performance, and one
optimized version which is significantly faster (at least on a 2 year
old core2 and a i7-2600, all I have to test on).



 src/acl.c |   55 ---
 1 files changed, 40 insertions(+), 15 deletions(-)

diff --git a/src/acl.c b/src/acl.c
index 9d9a746..cb49b43 100644
--- a/src/acl.c
+++ b/src/acl.c
@@ -546,12 +546,38 @@ int acl_match_sub(struct acl_test *test, struct 
acl_pattern *pattern)
return ACL_PAT_FAIL;
 }
 
+#if 0
+/* Straightforward implementation first - resaonably quick, but
+   can do better. */
+typedef const char *delimiter_mask_t;
+#define DELIMITER_MASK(a,b,c,d) a b c d
+#define IS_DELIMITER(c) (delimiter[0] == (c) || delimiter[1] == (c) || 
delimiter[2] == (c) || delimiter[3] == (c))
+
+#else
+
+/* Fast version. Background: Fast way to find a zero byte in a word
+ * http://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
+ * hasZeroByte = (v - 0x01010101UL)  ~v  0x80808080UL;
+ *
+ * To look for 4 different byte values, xor the word with those bytes and
+ * then check for zero bytes:
+ *
+ * v = (((unsigned char)c * 0x1010101U) ^ delimiter)
+ * where delimiter is the 4 byte values to look for (as an uint)
+ * and c is the character that is being tested
+ */
+typedef unsigned int delimiter_mask_t;
+#define DELIMITER_MASK(a,b,c,d) ((unsigned char)(*a) | (unsigned char)(*b)  
8 | (unsigned char)(*c)  16 | (unsigned char)(*d)  24)
+#define IS_DELIMITER(c) (unsigned char)c * 0x1010101U) ^ delimiter) - 
0x01010101)  ~(((unsigned char)c * 0x1010101U) ^ delimiter)  0x80808080U)
+
+#endif
+
 /* This one is used by other real functions. It checks that the pattern is
  * included inside the tested string, but enclosed between the specified
- * delimitor, or a '/' or a '?' or at the beginning or end of the string.
- * The delimitor is stripped at the beginning or end of the pattern.
+ * delimiters or at the beginning or end of the string.
+ * Delimiters are stripped at the beginning and end of the pattern.
  */
-static int match_word(struct acl_test *test, struct acl_pattern *pattern, char 
delim)
+static int match_word(struct acl_test *test, struct acl_pattern *pattern, 
delimiter_mask_t delimiter)
 {
int may_match, icase;
char *c, *end;
@@ -560,13 +586,12 @@ static int match_word(struct acl_test *test, struct 
acl_pattern *pattern, char d
 
pl = pattern-len;
ps = pattern-ptr.str;
-   while (pl  0  (*ps == delim || *ps == '/' || *ps == '?')) {
+   while (pl  0  IS_DELIMITER(*ps)) {
pl--;
ps++;
}
 
-   while (pl  0 
-  (ps[pl - 1] == delim || ps[pl - 1] == '/' || ps[pl - 1] == '?'))
+   while (pl  0  IS_DELIMITER(*ps))
pl--;
 
if (pl  test-len)
@@ -576,7 +601,7 @@ static int match_word(struct acl_test *test, struct 
acl_pattern *pattern, char d
icase = pattern-flags  ACL_PAT_F_IGNORE_CASE;
end = test-ptr + test-len - pl;
for (c = test-ptr; c = end; c++) {
-   if (*c == '/' || *c == delim || *c == '?') {
+   if (IS_DELIMITER(*c)) {
may_match = 1;
continue;
}
@@ -587,12 +612,12 @@ static int match_word(struct acl_test *test, struct 
acl_pattern *pattern, char d
if (icase) {
if ((tolower(*c) == tolower(*ps)) 
(strncasecmp(ps, c, pl) == 0) 
-   (c == end || c[pl] == '/' || c[pl] == delim || 
c[pl] == '?'))
+   (c == end || IS_DELIMITER(c[pl])))
return ACL_PAT_PASS;
} else {
if ((*c == *ps) 
(strncmp(ps, c, pl) == 0) 
-   (c == end || c[pl] == '/' || c[pl] == delim || 
c[pl] == '?'))
+   (c == end || IS_DELIMITER(c[pl])))
return ACL_PAT_PASS;
}
may_match = 0;
@@ -601,21 +626,21 @@ static int match_word(struct acl_test *test, struct 
acl_pattern *pattern, char d
 }
 
 /* Checks that the pattern is included inside the tested string, but enclosed
- * between slashes or at the beginning or end of the string. Slashes at the
- * beginning or end of