tools/CppunitTest_tools_test.mk    |    1 
 tools/qa/cppunit/test_Wildcard.cxx |   46 ++++++++++++++++++++++
 tools/source/fsys/wldcrd.cxx       |   77 ++++++++++++++++++++-----------------
 3 files changed, 89 insertions(+), 35 deletions(-)

New commits:
commit 2143fa31b9035c7c2cf302ccd3907d0853132e8f
Author:     Mike Kaganski <mike.kagan...@collabora.com>
AuthorDate: Wed Mar 30 22:41:49 2022 +0300
Commit:     Xisco Fauli <xiscofa...@libreoffice.org>
CommitDate: Thu Mar 31 19:42:49 2022 +0200

    tdf#148253: fix matching algorithm
    
    Using 'flag' as a "continue the loop" marker allowed to continue
    processing wildcard after its last character - not crashing because
    it was a subview of a larger string with separators, but failing
    the match.
    
    Change-Id: I308058b68c59d5719f3c8b5f5656998a95a9ba09
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/132336
    Tested-by: Jenkins
    Reviewed-by: Mike Kaganski <mike.kagan...@collabora.com>
    (cherry picked from commit 50add7c97e75d604287218f49c9283aab052fdf0)
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/132374
    Reviewed-by: Xisco Fauli <xiscofa...@libreoffice.org>

diff --git a/tools/CppunitTest_tools_test.mk b/tools/CppunitTest_tools_test.mk
index 096b3ddb1835..be47c09ab191 100644
--- a/tools/CppunitTest_tools_test.mk
+++ b/tools/CppunitTest_tools_test.mk
@@ -37,6 +37,7 @@ $(eval $(call 
gb_CppunitTest_add_exception_objects,tools_test, \
     tools/qa/cppunit/test_cpu_runtime_detection_AVX2 \
     tools/qa/cppunit/test_cpu_runtime_detection_SSE2 \
     tools/qa/cppunit/test_cpu_runtime_detection_SSSE3 \
+    tools/qa/cppunit/test_Wildcard \
 ))
 
 $(eval $(call gb_CppunitTest_add_exception_objects,tools_test,\
diff --git a/tools/qa/cppunit/test_Wildcard.cxx 
b/tools/qa/cppunit/test_Wildcard.cxx
new file mode 100644
index 000000000000..1760ca6932db
--- /dev/null
+++ b/tools/qa/cppunit/test_Wildcard.cxx
@@ -0,0 +1,46 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; 
fill-column: 100 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <sal/config.h>
+
+#include <sal/types.h>
+#include <cppunit/TestAssert.h>
+#include <cppunit/TestFixture.h>
+#include <cppunit/extensions/HelperMacros.h>
+#include <tools/wldcrd.hxx>
+
+namespace
+{
+class Test : public CppUnit::TestFixture
+{
+public:
+    void test_Wildcard();
+
+    CPPUNIT_TEST_SUITE(Test);
+    CPPUNIT_TEST(test_Wildcard);
+    CPPUNIT_TEST_SUITE_END();
+};
+
+void Test::test_Wildcard()
+{
+    WildCard wildcard(u"*.html;*??a;*\\*abc;*\\?xyz", ';'); // tdf#148253
+    CPPUNIT_ASSERT(wildcard.Matches(u"foo.html"));
+    CPPUNIT_ASSERT(wildcard.Matches(u"foo.ht.html")); // test stepping back 
after partial match
+    CPPUNIT_ASSERT(wildcard.Matches(u"foo.html.html")); // test stepping back 
after full match
+    CPPUNIT_ASSERT(wildcard.Matches(u"??aa")); // test stepping back with 
question marks
+    CPPUNIT_ASSERT(wildcard.Matches(u"111*abc")); // test escaped asterisk
+    CPPUNIT_ASSERT(!wildcard.Matches(u"111-abc")); // test escaped asterisk
+    CPPUNIT_ASSERT(wildcard.Matches(u"111?xyz")); // test escaped question mark
+    CPPUNIT_ASSERT(!wildcard.Matches(u"111-xyz")); // test escaped question 
mark
+}
+
+CPPUNIT_TEST_SUITE_REGISTRATION(Test);
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s 
cinkeys+=0=break: */
diff --git a/tools/source/fsys/wldcrd.cxx b/tools/source/fsys/wldcrd.cxx
index a9867c00ba51..e8769a00e047 100644
--- a/tools/source/fsys/wldcrd.cxx
+++ b/tools/source/fsys/wldcrd.cxx
@@ -27,67 +27,74 @@
  */
 bool WildCard::ImpMatch( std::u16string_view aWild, std::u16string_view aStr )
 {
-    int    pos=0;
-    int    flag=0;
+    const sal_Unicode* pPosAfterAsterisk = nullptr;
     const sal_Unicode* pWild = aWild.data();
     const sal_Unicode* pWildEnd = aWild.data() + aWild.size();
     const sal_Unicode* pStr = aStr.data();
     const sal_Unicode* pStrEnd = aStr.data() + aStr.size();
 
-    while ( pWild != pWildEnd || flag )
+    while (pWild != pWildEnd)
     {
         switch (*pWild)
         {
             case '?':
                 if ( pStr == pStrEnd )
                     return false;
-                break;
-
-            default:
-                if ( (*pWild == '\\') && (pWild + 1 != pWildEnd) && 
((*(pWild+1)=='?') || (*(pWild+1) == '*')) )
+                break; // Match -> proceed to the next character
+            case '\\': // Escaping '?' and '*'; don't we need to escape '\\'?
+                if ((pWild + 1 != pWildEnd) && ((*(pWild + 1) == '?') || 
(*(pWild + 1) == '*')))
                     pWild++;
-                if ( *pWild != *pStr )
-                    if ( !pos )
-                        return false;
-                    else
-                        pWild += pos;
-                else
-                    break;
-                // WARNING/TODO: may cause execution of next case in some
-                // circumstances!
+                [[fallthrough]];
+            default: // No wildcard, literal match
+                if (pStr == pStrEnd)
+                    return false;
+                if (*pWild == *pStr)
+                    break; // Match -> proceed to the next character
+                if (!pPosAfterAsterisk)
+                    return false;
+                pWild = pPosAfterAsterisk;
                 [[fallthrough]];
             case '*':
                 while ( pWild != pWildEnd && *pWild == '*' )
                     pWild++;
                 if ( pWild == pWildEnd )
                     return true;
-                flag = 1;
-                pos  = 0;
+                // Consider strange things like "**?*?*"
+                while (*pWild == '?')
+                {
+                    if (pStr == pStrEnd)
+                        return false;
+                    pWild++;
+                    pStr++;
+                    while (pWild != pWildEnd && *pWild == '*')
+                        pWild++;
+                    if (pWild == pWildEnd)
+                        return true;
+                }
+                // At this point, we are past wildcards, and a literal match 
must follow
                 if ( pStr == pStrEnd )
                     return false;
-                while ( pStr != pStrEnd && *pStr != *pWild )
+                pPosAfterAsterisk = pWild;
+                if ((*pWild == '\\') && (pWild + 1 != pWildEnd) && ((*(pWild + 
1) == '?') || (*(pWild + 1) == '*')))
+                    pWild++;
+                while (*pStr != *pWild)
                 {
-                    if ( *pWild == '?' ) {
-                        pWild++;
-                        while ( pWild != pWildEnd && *pWild == '*' )
-                            pWild++;
-                    }
                     pStr++;
                     if ( pStr == pStrEnd )
-                        return pWild == pWildEnd;
+                        return false;
                 }
-                break;
+                break; // Match -> proceed to the next character
         }
-        if ( pWild != pWildEnd )
-            pWild++;
-        if ( pStr != pStrEnd )
-            pStr++;
-        else
-            flag = 0;
-        if ( flag )
-            pos--;
+        // We arrive here when the current characters in pWild and pStr match
+        assert(pWild != pWildEnd);
+        pWild++;
+        assert(pStr != pStrEnd);
+        pStr++;
+        if (pWild == pWildEnd && pPosAfterAsterisk && pStr != pStrEnd)
+            pWild = pPosAfterAsterisk; // Try again on the rest of pStr
     }
-    return ( pStr == pStrEnd ) && ( pWild == pWildEnd );
+    assert(pWild == pWildEnd);
+    return pStr == pStrEnd;
 }
 
 bool WildCard::Matches( std::u16string_view rString ) const

Reply via email to