Patch attached for just these SBuf and related unit test changes.

On 11/04/2014 10:24 a.m., Alex Rousskov wrote:
> On 04/10/2014 05:47 AM, Amos Jeffries wrote:
<snip>
> 
>> +    int cmp(const char *s, size_t n) const;
>> +    int caseCmp(const char *s, size_t n) const;
> 
> SBuf is currently using size_type, not size_t for comparison methods.
> Size_t is not used anywhere in SBuf.h AFAICT. Do we have to add size_t
> into the mix? If you start using size_type instead, please note that the
> type casting inside the methods will go away (but new type casts in the
> callers might be needed).

Done and const n.

> 
>> +    /// comparison with a C-string
>> +    int cmp(const char *s, size_t n) const;
>> +
>> +    /// case-insensitive comparison with a C-string
>> +    int caseCmp(const char *s, size_t n) const;
> 
> There is no good reason to force callers to call length(s) IMO. Please
> add a default npos value for n. This will also make the new methods more
> consistent with the existing ones.
> 

The situation for C-string is different from SBuf. Since SBuf contain
lengths and guarantee non-NULL buffers we are not using the N parameter
for any bounds safety.
 Whereas with C-string it is required for over-read bounds safety.

Done anyway.

> 
>> +    if (rv != 0)
>> +        return rv;
>> +    if (length() == n)
>> +        return 0;
>> +    if (length() > n)
>> +        return 1;
>> +    return -1;
> 
> This seemingly correct logic actually leads to wrong comparison results:
> 
>     strncmp("foo", "f", 1) is 0
> but
>     SBuf("foo").cmp("f", 1) is +1
> 
> If you adapted that code from the existing SBuf::compare(), please note
> that the similar compare() code works because it is preceded by
> truncation of _both_ strings to [up to] n characters.
> 
> To minimize code duplication, you probably want to do here what old
> compare() does for old cmp() and caseCmp().
> 
> Finally, this bug also exposes the lack of comparison test cases. The
> attached unpolished patch adds a few. If you like them, please integrate
> with your changes and polish as needed. See TODO in patch preamble for a
> better approach though.
> 

Added with a slight modification to use the existing sign() converter.
Thank you, that let me find and fix several off-by-1 bugs and the
terminator handling bugs relatively easily.


> 
>> +int
>> +SBuf::cmp(const char *s, size_t n) const
>> +{
>> +    assert(s != NULL);
> 
> It may be better to allow NULL c-strings with zero n:
> 
>     if (!n)
>         return 0;
>     assert(s);
> 
> I do not insist on this change.
> 
> Same for the other caseCmp().
> 

Combined the two now same as the SBuf variant. With the above and some
added checks for handling NULL pointers and empty strings.

> 
>> +    size_type byteCompareLen = min(n, static_cast<size_t>(length()));
> ...
>> +    int rv = memcmp(buf(), s, byteCompareLen);
> ...
>> +    size_type byteCompareLen = min(n, static_cast<size_t>(length()));
> ...
>> +    int rv = memcasecmp(buf(), s, byteCompareLen);
> 
> Make these variables const if possible.
> 
> I would also make the "n" parameter itself const but that should be done
> for other cmp() methods as well then.
> 

Done all including n.

Amos
=== modified file 'src/SBuf.cc'
--- src/SBuf.cc 2014-04-06 07:08:04 +0000
+++ src/SBuf.cc 2014-04-11 14:30:48 +0000
@@ -360,62 +360,101 @@
     store_->mem[off_+pos] = toset;
     ++stats.setChar;
 }
 
 static int
 memcasecmp(const char *b1, const char *b2, SBuf::size_type len)
 {
     int rv=0;
     while (len > 0) {
         rv = tolower(*b1)-tolower(*b2);
         if (rv != 0)
             return rv;
         ++b1;
         ++b2;
         --len;
     }
     return rv;
 }
 
 int
-SBuf::compare(const SBuf &S, SBufCaseSensitive isCaseSensitive, size_type n) 
const
+SBuf::compare(const SBuf &S, SBufCaseSensitive isCaseSensitive, const 
size_type n) const
 {
     if (n != npos)
         return substr(0,n).compare(S.substr(0,n),isCaseSensitive);
 
-    size_type byteCompareLen = min(S.length(), length());
+    const size_type byteCompareLen = min(S.length(), length());
     ++stats.compareSlow;
     int rv = 0;
     if (isCaseSensitive == caseSensitive) {
         rv = memcmp(buf(), S.buf(), byteCompareLen);
     } else {
         rv = memcasecmp(buf(), S.buf(), byteCompareLen);
     }
     if (rv != 0)
         return rv;
     if (length() == S.length())
         return 0;
     if (length() > S.length())
         return 1;
     return -1;
 }
 
+int
+SBuf::compare(const char *s, SBufCaseSensitive isCaseSensitive, const 
size_type n) const
+{
+    // 0-length comparison,
+    // or full-length compare for NULL-string
+    if (!n || (n == npos && !s)) {
+        ++stats.compareFast;
+        return 0;
+    }
+
+    // N-length compare MUST provide a non-NULL C-string pointer
+    assert(s);
+
+    // recurse after finding length if unknown (including terminator byte)
+    if (n == npos)
+        return compare(s, isCaseSensitive, strlen(s)+1);
+
+    // if this SBuf is bigger than N truncate it.
+    // guaranteeing length() <= n for the following comparison
+    if (length() > n)
+        return substr(0,n).compare(s, isCaseSensitive, n);
+
+    const size_type byteCompareLen = min(n, length());
+    ++stats.compareSlow;
+    int rv = 0;
+    if (isCaseSensitive == caseSensitive) {
+        rv = memcmp(buf(), s, byteCompareLen);
+    } else {
+        rv = memcasecmp(buf(), s, byteCompareLen);
+    }
+    if (rv != 0)
+        return rv;
+    if (length() == n)
+        return 0;
+
+    // pretend we have a terminator and check against s.
+    return s[length()] == '\0' ? 0 : -1;
+}
+
 bool
 SBuf::startsWith(const SBuf &S, SBufCaseSensitive isCaseSensitive) const
 {
     debugs(24, 8, id << " startsWith " << S.id << ", caseSensitive: " <<
            isCaseSensitive);
     if (length() < S.length()) {
         debugs(24, 8, "no, too short");
         ++stats.compareFast;
         return false;
     }
     return (compare(S, isCaseSensitive, S.length()) == 0);
 }
 
 bool
 SBuf::operator ==(const SBuf & S) const
 {
     debugs(24, 8, id << " == " << S.id);
     if (length() != S.length()) {
         debugs(24, 8, "no, different lengths");
         ++stats.compareFast;

=== modified file 'src/SBuf.h'
--- src/SBuf.h  2014-04-06 07:08:04 +0000
+++ src/SBuf.h  2014-04-11 02:18:14 +0000
@@ -238,49 +238,62 @@
     char at(size_type pos) const {checkAccessBounds(pos); return 
operator[](pos);}
 
     /** direct-access set a byte at a specified operation.
      *
      * \param pos the position to be overwritten
      * \param toset the value to be written
      * \throw OutOfBoundsException when pos is of bounds
      * \note bounds is 0 <= pos < length(); caller must pay attention to 
signedness
      * \note performs a copy-on-write if needed.
      */
     void setAt(size_type pos, char toset);
 
     /** compare to other SBuf, str(case)cmp-style
      *
      * \param isCaseSensitive one of caseSensitive or caseInsensitive
      * \param n compare up to this many bytes. if npos (default), compare 
whole SBufs
      * \retval >0 argument of the call is greater than called SBuf
      * \retval <0 argument of the call is smaller than called SBuf
      * \retval 0  argument of the call has the same contents of called SBuf
      */
-    int compare(const SBuf &S, SBufCaseSensitive isCaseSensitive, size_type n 
= npos) const;
+    int compare(const SBuf &S, SBufCaseSensitive isCaseSensitive, const 
size_type n = npos) const;
 
     /// shorthand version for compare
-    inline int cmp(const SBuf &S, size_type n = npos) const {
+    inline int cmp(const SBuf &S, const size_type n = npos) const {
         return compare(S,caseSensitive,n);
     }
 
     /// shorthand version for case-insensitive comparison
-    inline int caseCmp(const SBuf &S, size_type n = npos) const {
+    inline int caseCmp(const SBuf &S, const size_type n = npos) const {
+        return compare(S,caseInsensitive,n);
+    }
+
+    /// comparison with a C-string
+    int compare(const char *s, SBufCaseSensitive isCaseSensitive, const 
size_type n) const;
+
+    /// shorthand version for C-string compare
+    inline int cmp(const char *S, const size_type n = npos) const {
+        return compare(S,caseSensitive,n);
+    }
+
+    /// shorthand version for case-insensitive C-string comparison
+    inline int caseCmp(const char *S, const size_type n = npos) const {
         return compare(S,caseInsensitive,n);
     }
 
     /** check whether the entire supplied argument is a prefix of the SBuf.
      *  \param S the prefix to match against
      *  \param isCaseSensitive one of caseSensitive or caseInsensitive
      *  \retval true argument is a prefix of the SBuf
      */
     bool startsWith(const SBuf &S, SBufCaseSensitive isCaseSensitive = 
caseSensitive) const;
 
     bool operator ==(const SBuf & S) const;
     bool operator !=(const SBuf & S) const;
     bool operator <(const SBuf &S) const {return (cmp(S) < 0);}
     bool operator >(const SBuf &S) const {return (cmp(S) > 0);}
     bool operator <=(const SBuf &S) const {return (cmp(S) <= 0);}
     bool operator >=(const SBuf &S) const {return (cmp(S) >= 0);}
 
     /** Consume bytes at the head of the SBuf
      *
      * Consume N chars at SBuf head, or to SBuf's end,

=== modified file 'src/tests/testSBuf.cc'
--- src/tests/testSBuf.cc       2014-02-08 13:36:42 +0000
+++ src/tests/testSBuf.cc       2014-04-11 14:32:13 +0000
@@ -227,74 +227,129 @@
 // note: can't use cppunit's CPPUNIT_TEST_EXCEPTION because TextException 
asserts, and
 // so the test can't be properly completed.
 void
 testSBuf::testSubscriptOpFail()
 {
     char c;
     c=literal.at(literal.length()); //out of bounds by 1
     //notreached
     std::cout << c << std::endl;
 }
 
 static int sign(int v)
 {
     if (v < 0)
         return -1;
     if (v>0)
         return 1;
     return 0;
 }
 
+static void
+testComparisonStdFull(const char *left, const char *right)
+{
+//    std::cerr << std::endl << " cmp(char*) npos " << left << " ?= " << right;
+    CPPUNIT_ASSERT_EQUAL(sign(strcmp(left, right)), 
sign(SBuf(left).cmp(right)));
+//    std::cerr << std::endl << " cmp(SBuf) npos " << left << " ?= " << right;
+    CPPUNIT_ASSERT_EQUAL(sign(strcmp(left, right)), 
sign(SBuf(left).cmp(SBuf(right))));
+//    std::cerr << std::endl << " caseCmp(char*) npos " << left << " ?= " << 
right;
+    CPPUNIT_ASSERT_EQUAL(sign(strcasecmp(left, right)), 
sign(SBuf(left).caseCmp(right)));
+//    std::cerr << std::endl << " caseCmp(SBuf) npos " << left << " ?= " << 
right;
+    CPPUNIT_ASSERT_EQUAL(sign(strcasecmp(left, right)), 
sign(SBuf(left).caseCmp(SBuf(right))));
+}
+
+static void
+testComparisonStdN(const char *left, const char *right, const size_t n)
+{
+//    std::cerr << std::endl << " cmp(SBuf) " << n << ' ' << left << " ?= " << 
right;
+    CPPUNIT_ASSERT_EQUAL(sign(strncmp(left, right, n)), 
sign(SBuf(left).cmp(SBuf(right), n)));
+//    std::cerr << std::endl << " cmp(char*) " << n << ' ' << SBuf(left) << " 
?= " << right;
+    CPPUNIT_ASSERT_EQUAL(sign(strncmp(left, right, n)), 
sign(SBuf(left).cmp(right, n)));
+//    std::cerr << std::endl << " caseCmp(SBuf) " << n << ' ' << left << " ?= 
" << right;
+    CPPUNIT_ASSERT_EQUAL(sign(strncasecmp(left, right, n)), 
sign(SBuf(left).caseCmp(SBuf(right), n)));
+//    std::cerr << std::endl << " caseCmp(char*) " << n << ' ' << SBuf(left) 
<< " ?= " << right;
+    CPPUNIT_ASSERT_EQUAL(sign(strncasecmp(left, right, n)), 
sign(SBuf(left).caseCmp(right, n)));
+}
+
+static void
+testComparisonStdOneWay(const char *left, const char *right)
+{
+//    std::cerr << std::endl << "CHECK: npos(" << (strlen(right)+1) << ") " << 
left << " ?= " << right;
+    testComparisonStdFull(left, right);
+    const size_t maxN = 2 + min(strlen(left), strlen(right));
+    for (size_t n = 0; n <= maxN; ++n) {
+//        std::cerr << std::endl << "CHECK: " << n << '/' << maxN << ' ' << 
left << " ?= " << right;
+        testComparisonStdN(left, right, n);
+//        std::cerr << std::endl;
+    }
+}
+
+static void
+testComparisonStd(const char *s1, const char *s2)
+{
+    testComparisonStdOneWay(s1, s2);
+    testComparisonStdOneWay(s2, s1);
+}
+
 void
 testSBuf::testComparisons()
 {
     //same length
     SBuf s1("foo"),s2("foe");
     CPPUNIT_ASSERT(s1.cmp(s2)>0);
     CPPUNIT_ASSERT(s1.caseCmp(s2)>0);
     CPPUNIT_ASSERT(s2.cmp(s1)<0);
     CPPUNIT_ASSERT_EQUAL(0,s1.cmp(s2,2));
     CPPUNIT_ASSERT_EQUAL(0,s1.caseCmp(s2,2));
     CPPUNIT_ASSERT(s1 > s2);
     CPPUNIT_ASSERT(s2 < s1);
     CPPUNIT_ASSERT_EQUAL(sign(s1.cmp(s2)),sign(strcmp(s1.c_str(),s2.c_str())));
     //different lengths
     s1.assign("foo");
     s2.assign("foof");
     CPPUNIT_ASSERT(s1.cmp(s2)<0);
     CPPUNIT_ASSERT_EQUAL(sign(s1.cmp(s2)),sign(strcmp(s1.c_str(),s2.c_str())));
     CPPUNIT_ASSERT(s1 < s2);
     // specifying the max-length and overhanging size
     CPPUNIT_ASSERT_EQUAL(1,SBuf("foolong").caseCmp(SBuf("foo"), 5));
     // case-insensive comaprison
     s1 = "foo";
     s2 = "fOo";
     CPPUNIT_ASSERT_EQUAL(0,s1.caseCmp(s2));
     CPPUNIT_ASSERT_EQUAL(0,s1.caseCmp(s2,2));
     // \0-clenliness test
     s1.assign("f\0oo",4);
     s2.assign("f\0Oo",4);
     CPPUNIT_ASSERT(s1.cmp(s2) > 0);
     CPPUNIT_ASSERT_EQUAL(0,s1.caseCmp(s2));
     CPPUNIT_ASSERT_EQUAL(0,s1.caseCmp(s2,3));
     CPPUNIT_ASSERT_EQUAL(0,s1.caseCmp(s2,2));
     CPPUNIT_ASSERT_EQUAL(0,s1.cmp(s2,2));
+
+    testComparisonStd("foo", "fooz");
+    testComparisonStd("foo", "foo");
+    testComparisonStd("foo", "f");
+    testComparisonStd("foo", "bar");
+
+    testComparisonStd("foo", "FOOZ");
+    testComparisonStd("foo", "FOO");
+    testComparisonStd("foo", "F");
 }
 
 void
 testSBuf::testConsume()
 {
     SBuf s1(literal),s2,s3;
     s2=s1.consume(4);
     s3.assign("The ");
     CPPUNIT_ASSERT_EQUAL(s2,s3);
     s3.assign("quick brown fox jumped over the lazy dog");
     CPPUNIT_ASSERT_EQUAL(s1,s3);
     s1.consume(40);
     CPPUNIT_ASSERT_EQUAL(s1,SBuf());
 }
 
 void
 testSBuf::testRawContent()
 {
     SBuf s1(literal);
     SBuf s2(s1);

Reply via email to