Hi,
attached patch is from branch lp:~squid/squid/characterset/
It extracts some of the work done in parser-ng to implement Alex'
Tokenizer API, bringing O(N) find-any{,-not}-of matching to SBuf.
Note: in IRC discussions, Amos mentioned find_first_of and
find_first_not_of not being naming convention compliant (that's right,
they are modeled after std::string). I'm fine with changing them as a
follow-up commit if this is the collegial decision; I'd avoid do to it
as part of this merge.
--
/kinkie
# Bazaar merge directive format 2 (Bazaar 0.90)
# revision_id: [email protected]
# target_branch: ../trunk/
# testament_sha1: 2b205c8cc59e789afd02c0f28c60e293d4cd0fab
# timestamp: 2013-12-15 12:47:44 +0100
# base_revision_id: [email protected]\
# zm096u228wthxqiw
#
# Begin patch
=== modified file 'src/Makefile.am'
--- src/Makefile.am 2013-12-05 15:26:22 +0000
+++ src/Makefile.am 2013-12-15 11:47:07 +0000
@@ -15,6 +15,7 @@
DnsLookupDetails.cc
SBUF_SOURCE= \
+ base/CharacterSet.h \
base/InstanceId.h \
MemBlob.h \
MemBlob.cc \
=== modified file 'src/SBuf.cc'
--- src/SBuf.cc 2013-11-27 15:52:03 +0000
+++ src/SBuf.cc 2013-12-15 11:47:07 +0000
@@ -688,12 +688,8 @@
}
SBuf::size_type
-SBuf::find_first_of(const SBuf &set, size_type startPos) const
+SBuf::find_first_of(const CharacterSet &set, size_type startPos) const
{
- // if set is 1 char big, use the char search. Stats updated there
- if (set.length() == 1)
- return find(set[0], startPos);
-
++stats.find;
if (startPos == npos)
@@ -702,20 +698,41 @@
if (startPos >= length())
return npos;
- if (set.length() == 0)
- return npos;
-
- debugs(24, 7, "any of '" << set << "' " << " in id " << id);
- char *cur = buf()+startPos, *end = bufEnd();
+ debugs(24, 7, "first of characterset " << set.name << " in id " << id);
+ char *cur = buf()+startPos;
+ const char *end = bufEnd();
while (cur < end) {
- if (memchr(set.buf(), *cur, set.length()))
- return (cur-buf());
+ if (set[*cur])
+ return cur-buf();
++cur;
}
debugs(24, 7, "not found");
return npos;
}
+SBuf::size_type
+SBuf::find_first_not_of(const CharacterSet &set, size_type startPos) const
+{
+ ++stats.find;
+
+ if (startPos == npos)
+ return npos;
+
+ if (startPos >= length())
+ return npos;
+
+ debugs(24, 7, "first not of characterset " << set.name << " in id " << id);
+ char *cur = buf()+startPos;
+ const char *end = bufEnd();
+ while (cur < end) {
+ if (!set[*cur])
+ return cur-buf();
+ ++cur;
+ }
+ debugs(24, 7, "not found");
+ return length();
+}
+
/*
* TODO: borrow a sscanf implementation from Linux or similar?
* we'd really need a vsnscanf(3)... ? As an alternative, a
=== modified file 'src/SBuf.h'
--- src/SBuf.h 2013-12-04 18:37:08 +0000
+++ src/SBuf.h 2013-12-15 11:47:07 +0000
@@ -29,6 +29,7 @@
#ifndef SQUID_SBUF_H
#define SQUID_SBUF_H
+#include "base/CharacterSet.h"
#include "base/InstanceId.h"
#include "MemBlob.h"
#include "SBufExceptions.h"
@@ -513,7 +514,15 @@
* \param startPos if specified, ignore any occurrences before that position
* if npos, then npos is always returned
*/
- size_type find_first_of(const SBuf &set, size_type startPos = 0) const;
+ size_type find_first_of(const CharacterSet &set, size_type startPos = 0) const;
+
+ /** Find first occurrence character NOT in character set
+ *
+ * \return length() if all characters in the SBuf are from set
+ * \param startPos if specified, ignore any occurrences before that position
+ * if npos, then npos is always returned
+ */
+ size_type find_first_not_of(const CharacterSet &set, size_type startPos = 0) const;
/** sscanf-alike
*
=== added file 'src/base/CharacterSet.h'
--- src/base/CharacterSet.h 1970-01-01 00:00:00 +0000
+++ src/base/CharacterSet.h 2013-12-15 11:47:07 +0000
@@ -0,0 +1,44 @@
+#ifndef _SQUID_SRC_PARSER_CHARACTERSET_H
+#define _SQUID_SRC_PARSER_CHARACTERSET_H
+
+#include <vector>
+
+class CharacterSet
+{
+public:
+ CharacterSet(const char *label, const char * const c) : name(label), chars_(std::vector<bool>(256,false)) {
+ size_t clen = strlen(c);
+ for (size_t i = 0; i < clen; ++i)
+ chars_[static_cast<uint8_t>(c[i])] = true;
+ }
+
+ /// whether a given character exists in the set
+ bool operator[](unsigned char c) const {return chars_[static_cast<uint8_t>(c)];}
+
+ /// add a given char to the character set. c must be >= 0.
+ CharacterSet & add(const unsigned char c) {chars_[static_cast<uint8_t>(c)] = true; return *this; }
+
+ /// add all characters from the given CharacterSet to this one
+ const CharacterSet &operator +=(const CharacterSet &src) {
+ //precondition: src.chars_.size() == chars_.size()
+ std::vector<bool>::const_iterator s = src.chars_.begin();
+ const std::vector<bool>::const_iterator e = src.chars_.end();
+ std::vector<bool>::iterator d = chars_.begin();
+ while (s != e) {
+ if (*s)
+ *d = true;
+ ++s;
+ ++d;
+ }
+ return *this;
+ }
+
+ /// name of this character set
+ const char * name;
+
+private:
+ /// characters defined in this set
+ std::vector<bool> chars_; //std::vector<bool> is optimized
+};
+
+#endif /* _SQUID_SRC_PARSER_CHARACTERSET_H */
=== modified file 'src/base/Makefile.am'
--- src/base/Makefile.am 2013-11-26 09:43:29 +0000
+++ src/base/Makefile.am 2013-12-15 11:47:07 +0000
@@ -12,6 +12,7 @@
AsyncJobCalls.h \
AsyncCallQueue.cc \
AsyncCallQueue.h \
+ CharacterSet.h \
TidyPointer.h \
CbcPointer.h \
InstanceId.h \
=== modified file 'src/icmp/Makefile.am'
--- src/icmp/Makefile.am 2013-12-03 07:49:13 +0000
+++ src/icmp/Makefile.am 2013-12-15 11:47:07 +0000
@@ -23,6 +23,7 @@
noinst_LTLIBRARIES = libicmp-core.la libicmp.la
SBUF_SOURCE= \
+ $(top_srcdir)/src/base/CharacterSet.h \
$(top_srcdir)/src/SBuf.h \
$(top_srcdir)/src/SBuf.cc \
$(top_srcdir)/src/MemBlob.h \
=== modified file 'src/tests/SBufFindTest.cc'
--- src/tests/SBufFindTest.cc 2013-11-11 12:06:11 +0000
+++ src/tests/SBufFindTest.cc 2013-12-15 11:47:07 +0000
@@ -105,7 +105,7 @@
{
theFindString = theStringHay.find_first_of(theStringNeedle, thePos);
theBareNeedlePos = theStringHay.find_first_of(theStringNeedle);
- theFindSBuf = theSBufHay.find_first_of(theSBufNeedle, thePos);
+ theFindSBuf = theSBufHay.find_first_of(CharacterSet("cs",theSBufNeedle.c_str()), thePos);
checkResults("find_first_of");
}
=== modified file 'src/tests/testSBuf.cc'
--- src/tests/testSBuf.cc 2013-11-27 15:52:03 +0000
+++ src/tests/testSBuf.cc 2013-12-15 11:47:07 +0000
@@ -759,23 +759,47 @@
SBuf::size_type idx;
// not found
- idx=haystack.find_first_of(SBuf("ADHRWYP"));
+ idx=haystack.find_first_of(CharacterSet("t1","ADHRWYP"));
CPPUNIT_ASSERT_EQUAL(SBuf::npos,idx);
// found at beginning
- idx=haystack.find_first_of(SBuf("THANDF"));
+ idx=haystack.find_first_of(CharacterSet("t2","THANDF"));
CPPUNIT_ASSERT_EQUAL(0U,idx);
//found at end of haystack
- idx=haystack.find_first_of(SBuf("QWERYVg"));
+ idx=haystack.find_first_of(CharacterSet("t3","QWERYVg"));
CPPUNIT_ASSERT_EQUAL(haystack.length()-1,idx);
//found in the middle of haystack
- idx=haystack.find_first_of(SBuf("QWERqYV"));
+ idx=haystack.find_first_of(CharacterSet("t4","QWERqYV"));
CPPUNIT_ASSERT_EQUAL(4U,idx);
}
void
+testSBuf::testFindFirstNotOf()
+{
+ SBuf haystack(literal);
+ SBuf::size_type idx;
+
+ // all chars from the set
+ idx=haystack.find_first_not_of(CharacterSet("t1",literal.c_str()));
+ CPPUNIT_ASSERT_EQUAL(haystack.length(),idx);
+
+ // found at beginning
+ idx=haystack.find_first_not_of(CharacterSet("t2","a"));
+ CPPUNIT_ASSERT_EQUAL(0U,idx);
+
+ //found at end of haystack
+ idx=haystack.find_first_not_of(CharacterSet("t3",literal.substr(0,literal.length()-1).c_str()));
+ CPPUNIT_ASSERT_EQUAL(haystack.length()-1,idx);
+
+ //found in the middle of haystack
+ idx=haystack.find_first_not_of(CharacterSet("t4","The"));
+ CPPUNIT_ASSERT_EQUAL(3U,idx);
+}
+
+
+void
testSBuf::testAutoFind()
{
SBufFindTest test;
=== modified file 'src/tests/testSBuf.h'
--- src/tests/testSBuf.h 2013-07-26 09:20:09 +0000
+++ src/tests/testSBuf.h 2013-12-15 11:47:07 +0000
@@ -35,6 +35,7 @@
CPPUNIT_TEST( testRFindChar );
CPPUNIT_TEST( testRFindSBuf );
CPPUNIT_TEST( testFindFirstOf );
+ CPPUNIT_TEST( testFindFirstNotOf );
CPPUNIT_TEST( testPrintf );
CPPUNIT_TEST( testScanf );
CPPUNIT_TEST( testCopy );
@@ -79,6 +80,7 @@
void testStartsWith();
void testSBufStream();
void testFindFirstOf();
+ void testFindFirstNotOf();
void testAutoFind();
void testStdStringOps();
};
# Begin bundle
IyBCYXphYXIgcmV2aXNpb24gYnVuZGxlIHY0CiMKQlpoOTFBWSZTWVHCPn4ACPLfgGR0fX///39n
/66////6YA79p9PHO3AeuS9FFUClAx3zl3vek6o665N7m6mqNGWstVQkklT8Jgin6m0ZEeo9KZlG
R6BlHqbU0BtJoAAZBKmiZMRpoKPSbKhpk2SabUAAaaAGgAAA0Jk1NT1TQMRkZA000AAAAAAAABIk
RomhJmAKehPSZTY0pskyPFNpBkyaNqNBoaDakhMEzVA0NAGgNBoAAAAADagBJEE0AEACahpk0nkS
fqjJmiaANDTQGjCUSQsrBFFC+Z4kwYtufgNerflG/xlj9LTY8kj7oWIHpR3z1XHBWuc/oMtKI1/5
FHEgMB3DQ2i1DmWspjzT9/eXBWUz7aVEwj4vl0TwseXByknNDhrXgx34YWaoiIz9GOKV0z3pUy22
0AUMSLazSxsi9eK6KIQYeEaqpzlW/KbwrgYVyhqvbI06oQJSBINb/dErPKYvpUkvgSxOP6aZnaNJ
hiccSDvA0AMbGwbTbbY2hsX1Kj/cQXasFJZnGlmp6nVEKZjEVGMWTWkSY8oCAdYSk1Bh5Kyhlk6N
OLg6DQ6xUowJzQ2MpGIKjyjOzCrhCoBBLARRYKC1cPwY5RiYrAnjdtUnX0JRCaK1sbEGhNHNRNTx
QM1JAyDCnQlKLh9iImdRqpjv3ltK1NLJJB72SFFiJw/22lsBPqXK4GyKx1muFtnvzKGIibUcEjru
2YxUFtL71jNpCVRLQm6xBPZGAiM2aD/c201hfgZUKmLY/DC4QSy6UcadBzZqc4kdSuQTAr31rMyb
dxEuZ0KASkl13dqsjM8YkVqm3EM8qCq7JzcVJWWfn6+rvs18F67d3m5S2agDe4IBjTTMzM7IGJNi
gBpdKOuaXK9aDczPScKkQt6BvxQ5Uq9vgXNFiCklAYbr1tB3c8I/j1dPJhx9UyWyfjciOIlGGzBK
cNcwpkHRELfzf8rt2orBsKXFs1c1Zhkdp1M6LhuGfyeqFazu9UP1R+Abo+lc6lixcASyZBdsvjoc
9TOzw0Ijw2u02hqTN9ecrUoKDKjlGMZ5vmMCQxI7gyjsLyZiwGMAMpWbeorYeYlviHLnmEil1dSi
GuQOpQcJjrezwshipMQYtoYWE0ViZFrg7C0ALCrlOVLZxQNtjS45CKlh0mcMZIIqLLN3NXZt5nO5
COTByZKphJmCE+SaMTvmnJJLmqvv6FYVzQkOXWlwxRQ5kqUV8uESO60ILlXQPSzrOBUKi0d+UJIi
2C0siQkVFcDmQ6kzCmMlJFMJCwLOSqORfKxIrWrW54aZHcMRLzB3H9nI9nYnkTuX1uw4Bpsc2sOY
zFd9Knop+sWMQvdAvgVXHuWK0IYGo24kztzmQeIPUsBIrtkcyy2BK7VASWF1bHA539kellerGqqO
/M3mM0JGKu3MbtFoXpDJSaAay2oSLz0bJ2pCadhDQ2G8snkIoTHIwMhx49ZVmjmBziTGKTRkqITU
DYEBxAgRHiFkCeJuErFPugpTbsl4zhlXyNJQAmMpMgFYRLhrvASG823mo2FlyokLjmK3KSgqDk5/
grcloRmR9K2kNIjPGVRcTttpzxjHBiRiWZHo2JysUjFZrGthGFpXxwhMYFWPKhrUGCG6TKDA0kBq
GRxlQ2y65YVEqsxhVJaE1AR2DSoW8VEJl44ktLvcMVleJNai2QhZFxJ222a7lPENG80sN2qMq2Eo
vGOESgcVSMxZQjqObEzebByRqDt1xxe6eAztllljOlZOveTWZUMi8dIuyIxGIpbR3gzHK0wJSN5Y
VGKiYzMm6ZyTxlASJFZzIEFbrOOZcY6CKFoioRcTLDXE20iTImmBE9a6MsyWrTXhFzHKcSOcDAVS
JqRWWEogLMe+6NxrhBR6Gky9S3DlL6ZVoNkaGQ8mEM6oW0a+AbemGq0tLhy4qqtEiQiJdwtOmKdC
+cxEZRKiM9AgoakXKa5LcLroMoG7gslEkIfg5skauhMpXIbI0CsbbtSKifrevIgbhIrLPx2quksd
B2sHjBtWK2kxIpq0CUysegkZFkKy1dMTMZAgFBTrjjMiFxGkYPN+Zp0uZB6piGQg4xW0qacOCRtG
zNRWZFReSI8ZEzqYOumOett17ab6r1rl2qlCqyEovBifUckc1xLCqSLprCS2DnOCtNoem7eY0pYR
KrIcOxUJaFZAxHHrXyiRiUZfktNxXEGvKOG8gXmLkid7CLylRxJwsZK1jaXDA+QxGzTbcYF2Ai9Y
KxTNrHMiRpQwHWJsMDQ5FQ5I0qJ2x063Es3R9BmmZWiznbN8/QQTLVuO6TEY5CKoSHY5iKiwSpFk
QLPo2rjwv3SKmlkaUxjjNJFc5yMfVuTq6/YZxgxXhfcIi+mtDF3e+USSzVXJQ2DA8BDY9xAxEho7
KzEBw6Ep2LVImUBmjADdP4rnQHYHWsQQrAJmGEzMze/2klCcYvYnKYGDeEnO7b8fBVJdKZ0u8oS/
GGc4uKqaySQ45bmJaiWtZHijeleDQiorUEbH7XJ0cERSxY3hmry9OmUEuLI0w3VUZuMWIwjYWBxF
gK4LexAvwnkp18ArCifGPxdn4pa0bByvn9b6i93HmEK4vOJc8GuFpKYYMEkLRBETlgChYraTkOSY
xgmJFX6QIPXRVHnTAuNGn7DrJXkjANByDzwniKxhSUKaJyHKDTzlZXA2ERZynrid0wIEAAUlLJFg
kGBXbxtTkpOUmyqMF2rqFeeC3BbGh/VB0EtIET+IjogyZJBsiyEOkTQdBV1K7CQOm2OiUqG0TQoW
z1YSstnLw7bseOMYhISsuHgNet3XKEZwDiIIjCYyYuvV5AjHWDQmXV1C6XnzI7gHGPBeHrPpEBLm
tHPU3606iJg2EjI3jHYTIokXGw46VGEjcQN54Yst6xLxjYAH51zJILD3lC4x82lnkeDOaxgHDlyy
MLbb+T7/CAGdiVnSRcQ61llJl/BDjp/taCkYc/DASCtKr+5Q+tzYZPBQZulK5nKTIGNt1MJOCTAC
RixaS8N2kzkVHeQ6nwEXJFqRtoQvuPA94o6rUgcEYEi88TasTtKek1fmOxVl1D8OFuotJ9BGp9+z
u+OqRB0J6p4nQe7PrV33wogrIwdoVuQCL/IRr63jFBDgAWpag4iU7zqGeJkLMDjNhQey+HW5pEp4
5ETeKYHbmYdCZ7e6fLtWpA6D0ABRYMTZD8gzQXAohE5hISC+qzCU2aErZLtjxa1FW1MVXuvLuNLd
IV9EDG0L+rmq/c1/2Oeo5cW3p3MDyO/428KyYpiPRWoSMd5hi1wJLdaeuAOMSJb5v3dtYENKgvax
iBg3pIRCV/AIuEMFvUSCy8j3PjxRzyYM4/d5TkmaKEwLmSk54+o8T1neRJD0SHP+d3gbcjoXlZQ9
owjcZ9/l5OTLhjaIyG+dBgg80GNBmQ0bM5DnTj5ESvtqSXHJk0xQkbahsLJ7mLThQpbucAWLb4Hw
iI69JQRF8TowAz0MiupEhMJfegTSCdi+AHYeOYttYAQGSyDkTR0NaxCu8r65/QPHzHH7NNtajLsr
uGzOIDg2JBwwucKuwDUAGWbLrZ8vwphGPTgtH+ckBBIngMxCfucFN0MkHPdEAJ7F2+PqquwGYRna
i5MksMRmXgC5bAkxkceOyhehWiRAhD9xhGceNHIJG2JedN4fGg6ABkCzQeZ7BDIH58Ksl7PVBmTA
5kkRgEBdfhjE80jiyBmQUZB5S5N1PNNd9N3isQ7suTbOy4ipsaDtVSwrxkGfQ23urr4bRLNqyl8y
bJcgjKrOv9Ngj7LKva2JbYDOmGHYcTwO1R2wiwjmWApF8sIM5KUSf6kEkAeKLJDd7mIlMgYtHHBA
EuF3ckyPIvLEIsomQmZIZWCMJCMTDbWMeIkWkMlfBDCW7eY+0YXGpWe3MSezm6EdyU7/vnBsogLe
+gAMu2K+1DjJkw0j9qZLKYWAu4VDA5A7i3yCbt4HIGSOQjiSOwCzPLGueDItM0X1+Puxsen2dwj3
rkJFeQgKSSaC9hAgO7un5B6eJM986DDkiNEooEzlBIHBQB/k9MEUQSk2u0uIoR82v+XwOz4SJbtz
iLgAw8sMIrRBzcspwKlNIkVlpvNiGjMhDEiEDuvJr0MwMy0reVTdIkcL8nSp/Ww5KBsK5hT7XeEV
32cXmgh2bA+TQXLQVZ7gdxjX/uGXxX76xHFBrNAWv3kUG9NDaNdDoJRFbPdwAPmkv1J9VzYa9oDd
YVSZQTEU6gmUB0l1QoUNBl0PqTa7pS2GYLBe2PC1LPXp4JibEb0JNitjVElEomxpLE4TSMgHoyCR
UtdaKWqBWqtBCtKIIBTbbbbbOgQOgCoeMuqOjIFkkwHYaZtkeDLczNAQerfE5LExraZlwHKqL7Km
czUHMXDk3ogpSasFUmD2DPUnCNzh2SSLfl0EfP2AdVisjO3pxMF/VdoMMDDAwmGTEwylEB0rPWFW
CgRkn2YgHhDHzVkfeu/h2CwdckLzvBe4R4d1x9CYRx3WJjXPy65qYSOJWBcEhkFO9VRsPSnJBXbh
pHZWuKZbkgEhKoF6TQhkYshHejDr5n1HQ4gzDH5PyXKP/xdyRThQkFHCPn4=