core.git: Branch 'libreoffice-25-2' - i18nutil/source include/i18nutil sw/qa

Mike Kaganski (via logerrit) Fri, 13 Jun 2025 22:49:27 -0700

 i18nutil/source/utility/unicode.cxx |   52 +++++++++++++++---------------------
 include/i18nutil/unicode.hxx        |    4 ++
 sw/qa/extras/uiwriter/uiwriter7.cxx |   16 +++++++++++
 3 files changed, 42 insertions(+), 30 deletions(-)


New commits:
commit 5d3e38f3b99e36af1cbc3af96bfdcfa1239477f9
Author:     Mike Kaganski <[email protected]>
AuthorDate: Tue Jun 10 22:13:43 2025 +0200
Commit:     Adolfo Jayme Barrientos <[email protected]>
CommitDate: Sat Jun 14 07:48:58 2025 +0200

    tdf#166943: zero passed to AllowMoreInput means "no more input"
    
    Users of ToggleUnicodeCodepoint keep reading next characters to the left,
    until AllowMoreInput returns false (or selection ends). Attempting to
    read more characters to the left then there are, SwCursorShell::GetChar
    returns 0. This needs to be treated by AllowMoreInput as hard stop. It
    failed, when the previous character was a combining character.
    
    Change-Id: I203b150154e1948d4cebfd69442e30a076710f46
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/186341
    Tested-by: Jenkins
    Reviewed-by: Mike Kaganski <[email protected]>
    (cherry picked from commit 58a7c6ccfd3fa590460dba1ecbdef4483dcd5e08)
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/186351
    Reviewed-by: Adolfo Jayme Barrientos <[email protected]>

diff --git a/i18nutil/source/utility/unicode.cxx 
b/i18nutil/source/utility/unicode.cxx
index c9bfbeeb0a80..1e88b0d5d6b4 100644
--- a/i18nutil/source/utility/unicode.cxx
+++ b/i18nutil/source/utility/unicode.cxx
@@ -1051,11 +1051,13 @@ OUString unicode::formatPercent(double dNumber,
 
 bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 uChar)
 {
+    assert(!mbInputEnded);
+
+    if (uChar == 0)
+        return false;
+
     //arbitrarily chosen maximum length allowed - normal max usage would be 
around 30.
     if( maInput.getLength() > 255 )
-        mbAllowMoreChars = false;
-
-    if( !mbAllowMoreChars )
         return false;
 
     bool bPreventNonHex = false;
@@ -1067,7 +1069,6 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 
uChar)
         case css::i18n::UnicodeType::SURROGATE:
             if (bPreventNonHex || mbIsHexString)
             {
-                mbAllowMoreChars = false;
                 return false;
             }
 
@@ -1091,14 +1092,12 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 
uChar)
                 maInput.append(maUtf16);
             if( !maCombining.isEmpty() )
                 maInput.append(maCombining);
-            mbAllowMoreChars = false;
-            break;
+            return false;
 
         case css::i18n::UnicodeType::NON_SPACING_MARK:
         case css::i18n::UnicodeType::COMBINING_SPACING_MARK:
             if (bPreventNonHex || mbIsHexString)
             {
-                mbAllowMoreChars = false;
                 return false;
             }
 
@@ -1108,7 +1107,6 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 
uChar)
                 maInput = maUtf16;
                 if( !maCombining.isEmpty() )
                     maInput.append(maCombining);
-                mbAllowMoreChars = false;
                 return false;
             }
             maCombining.insertUtf32(0, uChar);
@@ -1121,7 +1119,6 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 
uChar)
                 maInput = maUtf16;
                 if( !maCombining.isEmpty() )
                     maInput.append(maCombining);
-                mbAllowMoreChars = false;
                 return false;
             }
 
@@ -1129,14 +1126,12 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 
uChar)
             {
                 maCombining.insertUtf32(0, uChar);
                 maInput = maCombining;
-                mbAllowMoreChars = false;
                 return false;
             }
 
             // 0 - 1f are control characters.  Do not process those.
             if( uChar < 0x20 )
             {
-                mbAllowMoreChars = false;
                 return false;
             }
 
@@ -1153,36 +1148,36 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 
uChar)
                     // treat as a normal character
                     else
                     {
-                        mbAllowMoreChars = false;
                         if( !bPreventNonHex )
                             maInput.insertUtf32(0, uChar);
+                        return false;
                     }
                     break;
                 case '+':
                     // + already found: skip when not U, or edge case of 
+U+xxxx
                     if( mbRequiresU || (maInput.indexOf("U+") == 0) )
-                        mbAllowMoreChars = false;
+                        return false;
                     // hex chars followed by '+' - now require a 'U'
                     else if ( !maInput.isEmpty() )
                         mbRequiresU = true;
                     // treat as a normal character
                     else
                     {
-                        mbAllowMoreChars = false;
                         if( !bPreventNonHex )
                             maInput.insertUtf32(0, uChar);
+                        return false;
                     }
                     break;
                 default:
                     // + already found. Since not U, cancel further input
                     if( mbRequiresU )
-                        mbAllowMoreChars = false;
+                        return false;
                     // maximum digits per notation is 8: only one notation
                     else if( maInput.indexOf("U+") == -1 && 
maInput.getLength() == 8 )
-                        mbAllowMoreChars = false;
+                        return false;
                     // maximum digits per notation is 8: previous notation 
found
                     else if( maInput.indexOf("U+") == 8 )
-                        mbAllowMoreChars = false;
+                        return false;
                     // a hex character. Add to string.
                     else if( rtl::isAsciiHexDigit(uChar) )
                     {
@@ -1192,36 +1187,35 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 
uChar)
                     // not a hex character: stop input. keep if it is the 
first input provided
                     else
                     {
-                        mbAllowMoreChars = false;
                         if( maInput.isEmpty() )
                             maInput.insertUtf32(0, uChar);
+                        return false;
                     }
             }
     }
-    return mbAllowMoreChars;
+    return true;
 }
 
 OUString ToggleUnicodeCodepoint::StringToReplace()
 {
+    // this function potentially modifies the input string. No more addition 
of characters
+#ifndef NDEBUG
+    mbInputEnded = true;
+#endif
+
     if( maInput.isEmpty() )
     {
         //edge case - input finished with incomplete low surrogate or 
combining characters without a base
-        if( mbAllowMoreChars )
-        {
-            if( !maUtf16.isEmpty() )
-                maInput = maUtf16;
-            if( !maCombining.isEmpty() )
-                maInput.append(maCombining);
-        }
+        if (!maUtf16.isEmpty())
+            maInput = maUtf16;
+        if (!maCombining.isEmpty())
+            maInput.append(maCombining);
         return maInput.toString();
     }
 
     if( !mbIsHexString )
         return maInput.toString();
 
-    //this function potentially modifies the input string.  Prevent addition 
of further characters
-    mbAllowMoreChars = false;
-
     //validate unicode notation.
     OUString sIn;
     sal_uInt32 nUnicode = 0;
diff --git a/include/i18nutil/unicode.hxx b/include/i18nutil/unicode.hxx
index ddb75784784f..300056792dc0 100644
--- a/include/i18nutil/unicode.hxx
+++ b/include/i18nutil/unicode.hxx
@@ -96,9 +96,11 @@ private:
     OUStringBuffer maInput;
     OUStringBuffer maUtf16;
     OUStringBuffer maCombining;
-    bool mbAllowMoreChars = true;
     bool mbRequiresU = false;
     bool mbIsHexString = false;
+#ifndef NDEBUG
+    bool mbInputEnded = false;
+#endif
 
 public:
     /**
diff --git a/sw/qa/extras/uiwriter/uiwriter7.cxx 
b/sw/qa/extras/uiwriter/uiwriter7.cxx
index c21afd97b332..11e9c40d1e1a 100644
--- a/sw/qa/extras/uiwriter/uiwriter7.cxx
+++ b/sw/qa/extras/uiwriter/uiwriter7.cxx
@@ -2424,6 +2424,22 @@ CPPUNIT_TEST_FIXTURE(SwUiWriterTest7, 
testUnicodeNotationToggle)
     // i.e., it converted the last combined character *before* the HEX code 
*to HEX*, replacing
     // the last character of the HEX; not the expected conversion of the code 
itself *from HEX*.
     CPPUNIT_ASSERT_EQUAL(u"\u0065\u0300n"_ustr, sDocString);
+
+    // When a combining character stands alone in the beginning of a line, 
toggle must not treat
+    // incoming "zero" indicating "there is no more input" as a character to 
combine with the
+    // combining character. Before tdf#166943 fix, it treated zero as such 
character, got input
+    // length of 2, tried to select and replace two characters to the left of 
cursor, and crashed
+    pWrtShell->SelAll();
+    pWrtShell->DelLeft();
+    pWrtShell->Insert2(u"U+0300"_ustr); // A combining diacritic code in the 
beginning of the text
+    dispatchCommand(mxComponent, u".uno:UnicodeNotationToggle"_ustr, 
aPropertyValues);
+    sDocString = 
pWrtShell->GetCursor()->GetPointNode().GetTextNode()->GetText();
+    CPPUNIT_ASSERT_EQUAL(u"\u0300"_ustr, sDocString); // A lone combining 
diacritic
+
+    // Toggle must not crash, and must produce the correct result
+    dispatchCommand(mxComponent, u".uno:UnicodeNotationToggle"_ustr, 
aPropertyValues);
+    sDocString = 
pWrtShell->GetCursor()->GetPointNode().GetTextNode()->GetText();
+    CPPUNIT_ASSERT_EQUAL(u"U+0300"_ustr, sDocString);
 }
 
 CPPUNIT_TEST_FIXTURE(SwUiWriterTest7, testTdf34957)

core.git: Branch 'libreoffice-25-2' - i18nutil/source include/i18nutil sw/qa

Reply via email to