WhiteBoxTests.cpp

Jan Holesovsky (via logerrit) Thu, 18 Jun 2020 13:43:23 -0700

Rebased ref, commits from common ancestor:
commit 0975d0bb0187e5dca76be4bedad0b48d2c35ea3c
Author:     Jan Holesovsky <ke...@collabora.com>
AuthorDate: Thu Jun 18 11:07:54 2020 +0200
Commit:     Andras Timar <andras.ti...@collabora.com>
CommitDate: Thu Jun 18 22:42:07 2020 +0200


    Sanitize the access_header.
    
    The access_header can contain a lot of nonsense, like whitespace around
    or additional \n's or \r's.  We used to sanitize that, but then
    regressed in e95413d151c3f0d9476063c8520dd477342ed235 where the
    "tokenize by any of \n\r" was by mistake replaced with "tokenize by
    string '\n\r'".
    
    Unfortunately the unit test didn't uncover that, and the further
    refactorings of the related code have hidden that even more.
    
    Change-Id: Ie2bf950d0426292770b599e40ee2401101162ff2
    Reviewed-on: https://gerrit.libreoffice.org/c/online/+/96637
    Reviewed-by: Ashod Nakashian <ashnak...@gmail.com>
    Tested-by: Jenkins CollaboraOffice <jenkinscollaboraoff...@gmail.com>

diff --git a/common/Authorization.cpp b/common/Authorization.cpp
index abaddae26..ad4381ef5 100644
--- a/common/Authorization.cpp
+++ b/common/Authorization.cpp
@@ -52,8 +52,9 @@ void Authorization::authorizeRequest(Poco::Net::HTTPRequest& 
request) const
             // there might be more headers in here; like
             //   Authorization: Basic ....
             //   X-Something-Custom: Huh
-            // Regular expression evaluates and finds "\n\r" and tokenizes 
accordingly
-            StringVector tokens(Util::tokenize(_data, "\n\r"));
+            // Split based on \n's or \r's and trim, to avoid nonsense in the
+            // headers
+            StringVector tokens(Util::tokenizeAnyOf(_data, "\n\r"));
             for (auto it = tokens.begin(); it != tokens.end(); ++it)
             {
                 std::string token = tokens.getParam(*it);
diff --git a/common/Util.cpp b/common/Util.cpp
index 347fc4562..ee1aa19b1 100644
--- a/common/Util.cpp
+++ b/common/Util.cpp
@@ -987,6 +987,51 @@ namespace Util
         }
     #endif
 
+    StringVector tokenizeAnyOf(const std::string& s, const char* delimiters)
+    {
+        // trim from the end so that we do not have to check this exact case
+        // later
+        std::size_t length = s.length();
+        while (length > 0 && s[length - 1] == ' ')
+            --length;
+
+        if (length == 0)
+            return StringVector();
+
+        std::size_t delimitersLength = std::strlen(delimiters);
+        std::size_t start = 0;
+
+        std::vector<StringToken> tokens;
+        tokens.reserve(16);
+
+        while (start < length)
+        {
+            // ignore the leading whitespace
+            while (start < length && s[start] == ' ')
+                ++start;
+
+            // anything left?
+            if (start == length)
+                break;
+
+            std::size_t end = s.find_first_of(delimiters, start, 
delimitersLength);
+            if (end == std::string::npos)
+                end = length;
+
+            // trim the trailing whitespace
+            std::size_t trimEnd = end;
+            while (start < trimEnd && s[trimEnd - 1] == ' ')
+                --trimEnd;
+
+            // add only non-empty tokens
+            if (start < trimEnd)
+                tokens.emplace_back(start, trimEnd - start);
+
+            start = end + 1;
+        }
+
+        return StringVector(s, std::move(tokens));
+    }
 }
 
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/common/Util.hpp b/common/Util.hpp
index fed1525e6..9c82b3068 100644
--- a/common/Util.hpp
+++ b/common/Util.hpp
@@ -433,29 +433,12 @@ namespace Util
         return StringVector(s, std::move(tokens));
     }
 
-    inline StringVector tokenize(const std::string& s, const char* delimiter)
-    {
-        if (s.empty())
-            return StringVector();
-
-        std::size_t start = 0;
-        std::size_t end = s.find(delimiter, start);
-
-        std::vector<StringToken> tokens;
-        tokens.reserve(16);
+    /** Tokenize based on any of the characters in 'delimiters'.
 
-        tokens.emplace_back(start, end - start);
-        start = end + std::strlen(delimiter);
-
-        while (end != std::string::npos)
-        {
-            end = s.find(delimiter, start);
-            tokens.emplace_back(start, end - start);
-            start = end + std::strlen(delimiter);
-        }
-
-        return StringVector(s, std::move(tokens));
-    }
+        Ie. when there is '\n\r' in there, any of them means a delimiter.
+        In addition, trim the values so there are no leadiding or trailing 
spaces.
+    */
+    StringVector tokenizeAnyOf(const std::string& s, const char* delimiters);
 
 #ifdef IOS
 
diff --git a/test/WhiteBoxTests.cpp b/test/WhiteBoxTests.cpp
index bb2f57e4c..a2e676198 100644
--- a/test/WhiteBoxTests.cpp
+++ b/test/WhiteBoxTests.cpp
@@ -33,6 +33,7 @@ class WhiteBoxTests : public CPPUNIT_NS::TestFixture
     CPPUNIT_TEST(testSplitting);
     CPPUNIT_TEST(testMessageAbbreviation);
     CPPUNIT_TEST(testTokenizer);
+    CPPUNIT_TEST(testTokenizerTokenizeAnyOf);
     CPPUNIT_TEST(testReplace);
     CPPUNIT_TEST(testRegexListMatcher);
     CPPUNIT_TEST(testRegexListMatcher_Init);
@@ -54,6 +55,7 @@ class WhiteBoxTests : public CPPUNIT_NS::TestFixture
     void testSplitting();
     void testMessageAbbreviation();
     void testTokenizer();
+    void testTokenizerTokenizeAnyOf();
     void testReplace();
     void testRegexListMatcher();
     void testRegexListMatcher_Init();
@@ -426,6 +428,67 @@ void WhiteBoxTests::testTokenizer()
     LOK_ASSERT_EQUAL(static_cast<size_t>(0), ints.size());
 }
 
+void WhiteBoxTests::testTokenizerTokenizeAnyOf()
+{
+    StringVector tokens;
+    const char delimiters[] = "\n\r"; // any of these delimits; and we trim 
whitespace
+
+    tokens = Util::tokenizeAnyOf("", delimiters);
+    LOK_ASSERT_EQUAL(static_cast<size_t>(0), tokens.size());
+
+    tokens = Util::tokenizeAnyOf("  ", delimiters);
+    LOK_ASSERT_EQUAL(static_cast<size_t>(0), tokens.size());
+
+    tokens = Util::tokenizeAnyOf("A", delimiters);
+    LOK_ASSERT_EQUAL(static_cast<size_t>(1), tokens.size());
+    LOK_ASSERT_EQUAL(std::string("A"), tokens[0]);
+
+    tokens = Util::tokenizeAnyOf("  A", delimiters);
+    LOK_ASSERT_EQUAL(static_cast<size_t>(1), tokens.size());
+    LOK_ASSERT_EQUAL(std::string("A"), tokens[0]);
+
+    tokens = Util::tokenizeAnyOf("A  ", delimiters);
+    LOK_ASSERT_EQUAL(static_cast<size_t>(1), tokens.size());
+    LOK_ASSERT_EQUAL(std::string("A"), tokens[0]);
+
+    tokens = Util::tokenizeAnyOf(" A ", delimiters);
+    LOK_ASSERT_EQUAL(static_cast<size_t>(1), tokens.size());
+    LOK_ASSERT_EQUAL(std::string("A"), tokens[0]);
+
+    tokens = Util::tokenizeAnyOf(" A  Z ", delimiters);
+    LOK_ASSERT_EQUAL(static_cast<size_t>(1), tokens.size());
+    LOK_ASSERT_EQUAL(std::string("A  Z"), tokens[0]);
+
+    tokens = Util::tokenizeAnyOf("\n", delimiters);
+    LOK_ASSERT_EQUAL(static_cast<size_t>(0), tokens.size());
+
+    tokens = Util::tokenizeAnyOf("\n\r\r\n", delimiters);
+    LOK_ASSERT_EQUAL(static_cast<size_t>(0), tokens.size());
+
+    tokens = Util::tokenizeAnyOf(" A  \nZ ", delimiters);
+    LOK_ASSERT_EQUAL(static_cast<size_t>(2), tokens.size());
+    LOK_ASSERT_EQUAL(std::string("A"), tokens[0]);
+    LOK_ASSERT_EQUAL(std::string("Z"), tokens[1]);
+
+    tokens = Util::tokenizeAnyOf(" A  Z\n ", delimiters);
+    LOK_ASSERT_EQUAL(static_cast<size_t>(1), tokens.size());
+    LOK_ASSERT_EQUAL(std::string("A  Z"), tokens[0]);
+
+    tokens = Util::tokenizeAnyOf(" A  Z  \n\r\r\n ", delimiters);
+    LOK_ASSERT_EQUAL(static_cast<size_t>(1), tokens.size());
+    LOK_ASSERT_EQUAL(std::string("A  Z"), tokens[0]);
+
+    tokens = Util::tokenizeAnyOf(" A  \n\r\r\n  \r  \n  Z  \n ", delimiters);
+    LOK_ASSERT_EQUAL(static_cast<size_t>(2), tokens.size());
+    LOK_ASSERT_EQUAL(std::string("A"), tokens[0]);
+    LOK_ASSERT_EQUAL(std::string("Z"), tokens[1]);
+
+    tokens = Util::tokenizeAnyOf("  \r A  \n  \r  \n  Z  \n ", delimiters);
+    LOK_ASSERT_EQUAL(static_cast<size_t>(2), tokens.size());
+    LOK_ASSERT_EQUAL(std::string("A"), tokens[0]);
+    LOK_ASSERT_EQUAL(std::string("Z"), tokens[1]);
+}
+
 void WhiteBoxTests::testReplace()
 {
     LOK_ASSERT_EQUAL(std::string("zesz one zwo flee"), Util::replace("test one 
two flee", "t", "z"));
commit 47a46ff7451b1b5f813196713d722743dc41baf9
Author:     Andras Timar <andras.ti...@collabora.com>
AuthorDate: Thu Jun 18 21:20:21 2020 +0200
Commit:     Andras Timar <andras.ti...@collabora.com>
CommitDate: Thu Jun 18 22:41:39 2020 +0200

    fix .gitreview for this branch
    
    Change-Id: Ie4e92529a73380ef13b6c4f6157cacc984a8fb79

diff --git a/.gitreview b/.gitreview
index 62b13043a..71d55941e 100644
--- a/.gitreview
+++ b/.gitreview
@@ -3,5 +3,5 @@ host=gerrit.libreoffice.org
 port=29418
 project=online
 defaultremote=logerrit
-defaultbranch=distro/collabora/co-4-2-4
+defaultbranch=distro/collabora/co-4-2
 
_______________________________________________
Libreoffice-commits mailing list
libreoffice-comm...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/libreoffice-commits

[Libreoffice-commits] online.git: Branch 'distro/collabora/co-4-2' - 2 commits - common/Authorization.cpp common/Util.cpp common/Util.hpp .gitreview test/WhiteBoxTests.cpp

Reply via email to