[Libreoffice-commits] core.git: Branch 'libreoffice-7-5' - editeng/source sw/qa

Mike Kaganski (via logerrit) Mon, 22 May 2023 04:02:57 -0700

 editeng/source/misc/svxacorr.cxx    |  102 ++++++++++++++++++++----------------
 sw/qa/extras/uiwriter/uiwriter6.cxx |   24 ++++++++
 2 files changed, 82 insertions(+), 44 deletions(-)


New commits:
commit 9eae9409a739c21ea27a480f55b434df8e613acd
Author:     Mike Kaganski <mike.kagan...@collabora.com>
AuthorDate: Fri May 19 14:01:02 2023 +0300
Commit:     Xisco Fauli <xiscofa...@libreoffice.org>
CommitDate: Mon May 22 13:02:32 2023 +0200

    tdf#155407: fix the second replacement in FnChgToEnEmDash
    
    It was broken from the beginning. The second replacement could
    look into a wrong string when checking if the characters around
    the "--" are eligible; it could use obsolete indices in the
    document, ignoring the previous replacement that changed the
    lendth of the text.
    
    This also replaces a use of char* to hold Unicode codepoints to
    pass to lcl_IsInAsciiArr, with an array of sal_Unicode (because
    all the checked values fit into it).
    
    Change-Id: I949630abc564fc0875be0b92228846497bb1a022
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/152002
    Tested-by: Jenkins
    Reviewed-by: Mike Kaganski <mike.kagan...@collabora.com>
    Signed-off-by: Xisco Fauli <xiscofa...@libreoffice.org>
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/152084
    Reviewed-by: Michael Stahl <michael.st...@allotropia.de>

diff --git a/editeng/source/misc/svxacorr.cxx b/editeng/source/misc/svxacorr.cxx
index 1ae3ed49dfd4..054011e2a390 100644
--- a/editeng/source/misc/svxacorr.cxx
+++ b/editeng/source/misc/svxacorr.cxx
@@ -100,11 +100,13 @@ constexpr OUStringLiteral pXMLImplWordStart_ExcptLstStr = 
u"WordExceptList.xml";
 constexpr OUStringLiteral pXMLImplCplStt_ExcptLstStr = 
u"SentenceExceptList.xml";
 constexpr OUStringLiteral pXMLImplAutocorr_ListStr = u"DocumentList.xml";
 
-const char
+// tdf#54409 check also typographical quotation marks in the case of skipped 
ASCII quotation marks
+// Curious, why these \u0083\u0084\u0089\u0091\u0092\u0093\u0094 are handled 
as "begin characters"?
+constexpr std::u16string_view
     /* also at these beginnings - Brackets and all kinds of begin characters */
-    sImplSttSkipChars[] = "\"\'([{\x83\x84\x89\x91\x92\x93\x94",
+    sImplSttSkipChars = 
u"\"'([{\u2018\u2019\u201a\u201b\u201c\u201d\u201e\u201f\u0083\u0084\u0089\u0091\u0092\u0093\u0094",
     /* also at these ends - Brackets and all kinds of begin characters */
-    sImplEndSkipChars[] = "\"\')]}\x83\x84\x89\x91\x92\x93\x94";
+    sImplEndSkipChars = 
u"\"')]}\u2018\u2019\u201a\u201b\u201c\u201d\u201e\u201f\u0083\u0084\u0089\u0091\u0092\u0093\u0094";
 
 static OUString EncryptBlockName_Imp(std::u16string_view rName);
 
@@ -171,20 +173,12 @@ static bool lcl_IsSymbolChar( CharClass const & rCC, 
const OUString& rTxt,
     return false;
 }
 
-static bool lcl_IsInAsciiArr( const char* pArr, const sal_Unicode c )
+static bool lcl_IsInArr(std::u16string_view arr, const sal_uInt32 c)
 {
-    // tdf#54409 check also typographical quotation marks in the case of 
skipped ASCII quotation marks
-    if ( 0x2018 <= c && c <= 0x201F && (pArr == sImplSttSkipChars || pArr == 
sImplEndSkipChars) )
-        return true;
-
-    bool bRet = false;
-    for( ; *pArr; ++pArr )
-        if( *pArr == c )
-        {
-            bRet = true;
-            break;
-        }
-    return bRet;
+    for (const auto c1 : arr)
+        if (c1 == c)
+            return true;
+    return false;
 }
 
 SvxAutoCorrDoc::~SvxAutoCorrDoc()
@@ -311,6 +305,8 @@ ACFlags SvxAutoCorrect::GetDefaultFlags()
 
 constexpr sal_Unicode cEmDash = 0x2014;
 constexpr sal_Unicode cEnDash = 0x2013;
+constexpr OUStringLiteral sEmDash(u"\u2014");
+constexpr OUStringLiteral sEnDash(u"\u2013");
 constexpr sal_Unicode cApostrophe = 0x2019;
 constexpr sal_Unicode cLeftDoubleAngleQuote = 0xAB;
 constexpr sal_Unicode cRightDoubleAngleQuote = 0xBB;
@@ -484,10 +480,10 @@ bool SvxAutoCorrect::FnChgOrdinalNumber(
         CharClass& rCC = GetCharClass(eLang);
 
         for (; nSttPos < nEndPos; ++nSttPos)
-            if (!lcl_IsInAsciiArr(sImplSttSkipChars, rTxt[nSttPos]))
+            if (!lcl_IsInArr(sImplSttSkipChars, rTxt[nSttPos]))
                 break;
         for (; nSttPos < nEndPos; --nEndPos)
-            if (!lcl_IsInAsciiArr(sImplEndSkipChars, rTxt[nEndPos - 1]))
+            if (!lcl_IsInArr(sImplEndSkipChars, rTxt[nEndPos - 1]))
                 break;
 
 
@@ -557,6 +553,7 @@ bool SvxAutoCorrect::FnChgToEnEmDash(
     // rTxt may refer to the frame text that will change in the calls to 
rDoc.Delete / rDoc.Insert;
     // keep a local copy for later use
     OUString aOrigTxt = rTxt;
+    sal_Int32 nFirstReplacementTextLengthChange = 0;
 
     // replace " - " or " --" with "enDash"
     if( 1 < nSttPos && 1 <= nEndPos - nSttPos )
@@ -569,7 +566,7 @@ bool SvxAutoCorrect::FnChgToEnEmDash(
                 '-' == rTxt[ nSttPos+1 ])
             {
                 sal_Int32 n;
-                for( n = nSttPos+2; n < nEndPos && lcl_IsInAsciiArr(
+                for( n = nSttPos+2; n < nEndPos && lcl_IsInArr(
                             sImplSttSkipChars,(cCh = rTxt[ n ]));
                         ++n )
                     ;
@@ -577,7 +574,7 @@ bool SvxAutoCorrect::FnChgToEnEmDash(
                 // found: " --[<AnySttChars>][A-z0-9]
                 if( rCC.isLetterNumeric( OUString(cCh) ) )
                 {
-                    for( n = nSttPos-1; n && lcl_IsInAsciiArr(
+                    for( n = nSttPos-1; n && lcl_IsInArr(
                             sImplEndSkipChars,(cCh = rTxt[ --n ])); )
                         ;
 
@@ -585,7 +582,8 @@ bool SvxAutoCorrect::FnChgToEnEmDash(
                     if( rCC.isLetterNumeric( OUString(cCh) ))
                     {
                         rDoc.Delete( nSttPos, nSttPos + 2 );
-                        rDoc.Insert( nSttPos, bAlwaysUseEmDash ? 
OUString(cEmDash) : OUString(cEnDash) );
+                        rDoc.Insert( nSttPos, bAlwaysUseEmDash ? sEmDash : 
sEnDash );
+                        nFirstReplacementTextLengthChange = -1; // 2 ch -> 1 ch
                         bRet = true;
                     }
                 }
@@ -604,7 +602,7 @@ bool SvxAutoCorrect::FnChgToEnEmDash(
             }
             if( ' ' == cCh )
             {
-                for( n = nSttPos; n < nEndPos && lcl_IsInAsciiArr(
+                for( n = nSttPos; n < nEndPos && lcl_IsInArr(
                             sImplSttSkipChars,(cCh = rTxt[ n ]));
                         ++n )
                     ;
@@ -613,14 +611,15 @@ bool SvxAutoCorrect::FnChgToEnEmDash(
                 if( rCC.isLetterNumeric( OUString(cCh) ) )
                 {
                     cCh = ' ';
-                    for( n = nTmpPos-1; n && lcl_IsInAsciiArr(
+                    for( n = nTmpPos-1; n && lcl_IsInArr(
                             sImplEndSkipChars,(cCh = rTxt[ --n ])); )
                             ;
                     // found: "[A-z0-9][<AnyEndChars>] - 
[<AnySttChars>][A-z0-9]
                     if( rCC.isLetterNumeric( OUString(cCh) ))
                     {
                         rDoc.Delete( nTmpPos, nTmpPos + nLen );
-                        rDoc.Insert( nTmpPos, bAlwaysUseEmDash ? 
OUString(cEmDash) : OUString(cEnDash) );
+                        rDoc.Insert( nTmpPos, bAlwaysUseEmDash ? sEmDash : 
sEnDash );
+                        nFirstReplacementTextLengthChange = 1 - nLen; // nLen 
ch -> 1 ch
                         bRet = true;
                     }
                 }
@@ -634,20 +633,35 @@ bool SvxAutoCorrect::FnChgToEnEmDash(
     bool bEnDash = (eLang == LANGUAGE_HUNGARIAN || eLang == LANGUAGE_FINNISH);
     if( 4 <= nEndPos - nSttPos )
     {
-        OUString sTmp( aOrigTxt.subView( nSttPos, nEndPos - nSttPos ) );
-        sal_Int32 nFndPos = sTmp.indexOf("--");
-        if( nFndPos != -1 && nFndPos &&
-            nFndPos + 2 < sTmp.getLength() &&
-            ( rCC.isLetterNumeric( sTmp, nFndPos - 1 ) ||
-              lcl_IsInAsciiArr( sImplEndSkipChars, aOrigTxt[ nFndPos - 1 ] )) 
&&
-            ( rCC.isLetterNumeric( sTmp, nFndPos + 2 ) ||
-            lcl_IsInAsciiArr( sImplSttSkipChars, aOrigTxt[ nFndPos + 2 ] )))
+        std::u16string_view sTmpView( aOrigTxt.subView( nSttPos, nEndPos - 
nSttPos ) );
+        size_t nFndPos = sTmpView.find(u"--");
+        if (nFndPos > 0 && nFndPos < sTmpView.size() - 2)
         {
-            nSttPos = nSttPos + nFndPos;
-            rDoc.Delete( nSttPos, nSttPos + 2 );
-            rDoc.Insert( nSttPos, (bEnDash || (rCC.isDigit( sTmp, nFndPos - 1 
) &&
-                rCC.isDigit( sTmp, nFndPos + 2 )) ? OUString(cEnDash) : 
OUString(cEmDash)) );
-            bRet = true;
+            // Use proper codepoints. Currently, CharClass::isLetterNumeric is 
broken, it
+            // uses the index *both* as code unit index (when checking it as 
ASCII), *and*
+            // as code point index (when passes to 
css::i18n::XCharacterClassification).
+            // Oh well... Anyway, single-codepoint strings will workaround it.
+            sal_Int32 nStart = nSttPos + nFndPos;
+            sal_uInt32 chStart = aOrigTxt.iterateCodePoints(&nStart, -1);
+            OUString sStart(&chStart, 1);
+            // No idea why sImplEndSkipChars is checked at start
+            if (rCC.isLetterNumeric(sStart, 0) || 
lcl_IsInArr(sImplEndSkipChars, chStart))
+            {
+                sal_Int32 nEnd = nSttPos + nFndPos + 2;
+                sal_uInt32 chEnd = aOrigTxt.iterateCodePoints(&nEnd, 1);
+                OUString sEnd(&chEnd, 1);
+                // No idea why sImplSttSkipChars is checked at end
+                if (rCC.isLetterNumeric(sEnd, 0) || 
lcl_IsInArr(sImplSttSkipChars, chEnd))
+                {
+                    nSttPos = nSttPos + nFndPos + 
nFirstReplacementTextLengthChange;
+                    rDoc.Delete(nSttPos, nSttPos + 2);
+                    rDoc.Insert(nSttPos,
+                                (bEnDash || (rCC.isDigit(sStart, 0) && 
rCC.isDigit(sEnd, 0))
+                                     ? sEnDash
+                                     : sEmDash));
+                    bRet = true;
+                }
+            }
         }
     }
     return bRet;
@@ -872,7 +886,7 @@ void SvxAutoCorrect::FnCapitalStartSentence( 
SvxAutoCorrDoc& rDoc,
         }
         else if (pWordStt && !rCC.isDigit(aText, pStr - pStart))
         {
-            if( (lcl_IsInAsciiArr( "-'", *pStr ) || *pStr == cApostrophe) && 
// These characters are allowed in words
+            if( (lcl_IsInArr( u"-'", *pStr ) || *pStr == cApostrophe) && // 
These characters are allowed in words
                 pWordStt - 1 == pStr &&
                 // Installation at beginning of paragraph. Replaced < by <= 
(#i38971#)
                 (pStart + 1) <= pStr &&
@@ -909,7 +923,7 @@ void SvxAutoCorrect::FnCapitalStartSentence( 
SvxAutoCorrDoc& rDoc,
 
     // Only capitalize, if string before specified characters is long enough
     if( *pDelim && 2 >= pDelim - pWordStt &&
-        lcl_IsInAsciiArr( ".-)>", *pDelim ) )
+        lcl_IsInArr( u".-)>", *pDelim ) )
         return;
 
     // tdf#59666 don't capitalize single Greek letters (except in Greek texts)
@@ -1337,7 +1351,7 @@ void SvxAutoCorrect::DoAutoCorrect( SvxAutoCorrDoc& rDoc, 
const OUString& rTxt,
                 {
                     sal_Unicode cPrev = rTxt[ nInsPos-1 ];
                     bSttQuote = NonFieldWordDelim(cPrev) ||
-                        lcl_IsInAsciiArr( "([{", cPrev ) ||
+                        lcl_IsInArr( u"([{", cPrev ) ||
                         ( cEmDash == cPrev ) ||
                         ( cEnDash == cPrev );
                     // tdf#38394 use opening quotation mark << in French 
l'<<word>>
@@ -1557,11 +1571,11 @@ void SvxAutoCorrect::DoAutoCorrect( SvxAutoCorrDoc& 
rDoc, const OUString& rTxt,
             {
                 sal_Int32 nCapLttrPos1 = nCapLttrPos, nInsPos1 = nInsPos;
                 while( nCapLttrPos1 < nInsPos &&
-                        lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ 
nCapLttrPos1 ] )
+                        lcl_IsInArr( sImplSttSkipChars, rTxt[ nCapLttrPos1 ] )
                         )
                         ++nCapLttrPos1;
                 while( nCapLttrPos1 < nInsPos1 && nInsPos1 &&
-                        lcl_IsInAsciiArr( sImplEndSkipChars, rTxt[ nInsPos1-1 
] )
+                        lcl_IsInArr( sImplEndSkipChars, rTxt[ nInsPos1-1 ] )
                         )
                         --nInsPos1;
 
@@ -1751,7 +1765,7 @@ OUString 
SvxAutoCorrect::GetPrevAutoCorrWord(SvxAutoCorrDoc const& rDoc, const O
     if( !nPos && !IsWordDelim( rTxt[ 0 ]))
         --nCapLttrPos;          // Beginning of paragraph and no Blank!
 
-    while( lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nCapLttrPos ]) )
+    while( lcl_IsInArr( sImplSttSkipChars, rTxt[ nCapLttrPos ]) )
         if( ++nCapLttrPos >= nEnd )
             return sRet;
 
@@ -1907,7 +1921,7 @@ OUString EncryptBlockName_Imp(std::u16string_view rName)
     aName.append('#').append(rName);
     for (size_t nLen = rName.size(), nPos = 1; nPos < nLen; ++nPos)
     {
-        if (lcl_IsInAsciiArr( "!/:.\\", aName[nPos]))
+        if (lcl_IsInArr( u"!/:.\\", aName[nPos]))
             aName[nPos] &= 0x0f;
     }
     return aName.makeStringAndClear();
diff --git a/sw/qa/extras/uiwriter/uiwriter6.cxx 
b/sw/qa/extras/uiwriter/uiwriter6.cxx
index c1fe8f97984a..c5f6a509f63b 100644
--- a/sw/qa/extras/uiwriter/uiwriter6.cxx
+++ b/sw/qa/extras/uiwriter/uiwriter6.cxx
@@ -2499,6 +2499,30 @@ CPPUNIT_TEST_FIXTURE(SwUiWriterTest6, testTdf151828)
     CPPUNIT_ASSERT_EQUAL(OUString("MyTableName"), pFormat->GetName());
 }
 
+CPPUNIT_TEST_FIXTURE(SwUiWriterTest6, testTdf155407)
+{
+    createSwDoc();
+    SwXTextDocument& rTextDoc = dynamic_cast<SwXTextDocument&>(*mxComponent);
+
+    {
+        emulateTyping(rTextDoc, u"Foo - 11’--’22 ");
+        // Without the fix in place, this would fail with
+        // - Expected: Foo – 11’—’22
+        // - Actual  : Foo – 11’--’22
+        CPPUNIT_ASSERT_EQUAL(OUString(u"Foo – 11’—’22 "), 
getParagraph(1)->getString());
+    }
+
+    dispatchCommand(mxComponent, ".uno:SelectAll", {}); // start again
+
+    {
+        emulateTyping(rTextDoc, u"Bar -- 111--222 ");
+        // Without the fix in place, this would fail with
+        // - Expected: Bar – 111–222
+        // - Actual  : Bar – 111-–22
+        CPPUNIT_ASSERT_EQUAL(OUString(u"Bar – 111–222 "), 
getParagraph(1)->getString());
+    }
+}
+
 CPPUNIT_PLUGIN_IMPLEMENT();
 
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

[Libreoffice-commits] core.git: Branch 'libreoffice-7-5' - editeng/source sw/qa

Reply via email to