i18npool/source/search/textsearch.cxx |   21 ++++++++++++++-------
 sw/qa/extras/uiwriter/uiwriter.cxx    |   25 +++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 7 deletions(-)

New commits:
commit 1ec381335c7b2eef1443ce3756a35c3165b7964a
Author:     Mike Kaganski <mike.kagan...@collabora.com>
AuthorDate: Fri Feb 28 10:13:53 2020 +0300
Commit:     Mike Kaganski <mike.kagan...@collabora.com>
CommitDate: Fri Feb 28 11:12:18 2020 +0100

    tdf#130984: use RegexMatcher::region to properly limit the search
    
    This allows to pass enough of the text into the matcher to have the
    context for anchors/assertions, and at the same time, control the
    search region correctly for the cases where the end position is not
    at the end of the passed text, like when searching only inside runs
    of text having specified attributes.
    
    Change-Id: I6d1ff379c61cec734c0aa2a1dd913b1a73c5b84d
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/89660
    Tested-by: Jenkins
    Reviewed-by: Mike Kaganski <mike.kagan...@collabora.com>

diff --git a/i18npool/source/search/textsearch.cxx 
b/i18npool/source/search/textsearch.cxx
index 964dc6c0b256..0efa67bcf33a 100644
--- a/i18npool/source/search/textsearch.cxx
+++ b/i18npool/source/search/textsearch.cxx
@@ -896,9 +896,15 @@ void TextSearch::RESrchPrepare( const 
css::util::SearchOptions2& rOptions)
 }
 
 
-static bool lcl_findRegex( std::unique_ptr<icu::RegexMatcher> const & 
pRegexMatcher, sal_Int32 nStartPos, UErrorCode & rIcuErr )
+static bool lcl_findRegex(std::unique_ptr<icu::RegexMatcher> const& 
pRegexMatcher,
+                          sal_Int32 nStartPos, sal_Int32 nEndPos, UErrorCode& 
rIcuErr)
 {
-    if (!pRegexMatcher->find( nStartPos, rIcuErr))
+    pRegexMatcher->region(nStartPos, nEndPos, rIcuErr);
+    pRegexMatcher->useAnchoringBounds(false); // use whole text's anchoring 
bounds, not region's
+    pRegexMatcher->useTransparentBounds(true); // take text outside of the 
region into account for
+                                               // look-ahead/behind assertions
+
+    if (!pRegexMatcher->find(rIcuErr))
     {
         /* TODO: future versions could pass the UErrorCode or translations
          * thereof to the caller, for example to inform the user of
@@ -930,7 +936,7 @@ SearchResult TextSearch::RESrchFrwrd( const OUString& 
searchStr,
     // search until there is a valid match
     for(;;)
     {
-        if (!lcl_findRegex( pRegexMatcher, startPos, nIcuErr))
+        if (!lcl_findRegex( pRegexMatcher, startPos, endPos, nIcuErr))
             return aRet;
 
         // #i118887# ignore zero-length matches e.g. "a*" in "bc"
@@ -979,9 +985,10 @@ SearchResult TextSearch::RESrchBkwrd( const OUString& 
searchStr,
     // TODO: use ICU's backward searching once it becomes available
     //       as its replacement using forward search is not as good as the 
real thing
     UErrorCode nIcuErr = U_ZERO_ERROR;
-    const IcuUniString aSearchTargetStr( reinterpret_cast<const 
UChar*>(searchStr.getStr()), startPos);
+    const IcuUniString aSearchTargetStr(reinterpret_cast<const 
UChar*>(searchStr.getStr()),
+                                        searchStr.getLength());
     pRegexMatcher->reset( aSearchTargetStr);
-    if (!lcl_findRegex( pRegexMatcher, endPos, nIcuErr))
+    if (!lcl_findRegex( pRegexMatcher, endPos, startPos, nIcuErr))
         return aRet;
 
     // find the last match
@@ -1003,7 +1010,7 @@ SearchResult TextSearch::RESrchBkwrd( const OUString& 
searchStr,
         bFirst = false;
         if( nFoundEnd == nLastPos)
             ++nFoundEnd;
-    } while( lcl_findRegex( pRegexMatcher, nFoundEnd, nIcuErr));
+    } while( lcl_findRegex( pRegexMatcher, nFoundEnd, startPos, nIcuErr));
 
     // Ignore all zero-length matches except "$" anchor on first match.
     if (nGoodPos == nGoodEnd)
@@ -1015,7 +1022,7 @@ SearchResult TextSearch::RESrchBkwrd( const OUString& 
searchStr,
     }
 
     // find last match again to get its details
-    lcl_findRegex( pRegexMatcher, nGoodPos, nIcuErr);
+    lcl_findRegex( pRegexMatcher, nGoodPos, startPos, nIcuErr);
 
     // fill in the details of the last match
     const int nGroupCount = pRegexMatcher->groupCount();
diff --git a/sw/qa/extras/uiwriter/uiwriter.cxx 
b/sw/qa/extras/uiwriter/uiwriter.cxx
index 669471d4ea3c..8c4123c31a6b 100644
--- a/sw/qa/extras/uiwriter/uiwriter.cxx
+++ b/sw/qa/extras/uiwriter/uiwriter.cxx
@@ -2302,6 +2302,31 @@ void SwUiWriterTest::testTextSearch()
     uno::Reference<container::XIndexAccess> 
xIndex2(xReplace->findAll(xSearchDes));
     CPPUNIT_ASSERT_EQUAL(sal_Int32(3), xIndex2->getCount());
     // regex tests
+    xSearchDes->setPropertyValue("SearchRegularExpression", 
uno::makeAny(true));
+    // regex: test correct matching combined with attributes like BOLD
+    xSearchDes->setSearchString(".*"); // should match all bold words in the 
text
+    xIndex.set(xReplace->findAll(xSearchDes), uno::UNO_SET_THROW);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(3), xIndex->getCount());
+    uno::Reference<text::XTextRange> xFound(xIndex->getByIndex(0), 
uno::UNO_QUERY_THROW);
+    CPPUNIT_ASSERT_EQUAL(OUString("Hello"), xFound->getString());
+    xFound.set(xIndex->getByIndex(1), uno::UNO_QUERY_THROW);
+    CPPUNIT_ASSERT_EQUAL(OUString("This"), xFound->getString());
+    xFound.set(xIndex->getByIndex(2), uno::UNO_QUERY_THROW);
+    CPPUNIT_ASSERT_EQUAL(OUString("task"), xFound->getString());
+    // regex: test anchor combined with attributes like BOLD
+    xSearchDes->setSearchString("^.*|.*$"); // should match first and last 
words (they are bold)
+    xIndex.set(xReplace->findAll(xSearchDes), uno::UNO_SET_THROW);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(2), xIndex->getCount());
+    xFound.set(xIndex->getByIndex(0), uno::UNO_QUERY_THROW);
+    CPPUNIT_ASSERT_EQUAL(OUString("Hello"), xFound->getString());
+    xFound.set(xIndex->getByIndex(1), uno::UNO_QUERY_THROW);
+    CPPUNIT_ASSERT_EQUAL(OUString("task"), xFound->getString());
+    // regex: test look-ahead/look-behind assertions outside of the bold text
+    xSearchDes->setSearchString("(?<= ).*(?= )"); // should match second bold 
word
+    xIndex.set(xReplace->findAll(xSearchDes), uno::UNO_SET_THROW);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(1), xIndex->getCount());
+    xFound.set(xIndex->getByIndex(0), uno::UNO_QUERY_THROW);
+    CPPUNIT_ASSERT_EQUAL(OUString("This"), xFound->getString());
     xReplaceDes->setPropertyValue("SearchRegularExpression", 
uno::makeAny(true));
     // regex: test correct match of paragraph start
     xReplaceDes->setSearchString("^."); // should only match first character 
of the paragraph
_______________________________________________
Libreoffice-commits mailing list
libreoffice-comm...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/libreoffice-commits

Reply via email to