i18nutil/source/utility/unicode.cxx | 52 +++++++++++++++--------------------- include/i18nutil/unicode.hxx | 4 ++ sw/qa/extras/uiwriter/uiwriter7.cxx | 16 +++++++++++ 3 files changed, 42 insertions(+), 30 deletions(-)
New commits: commit 5d3e38f3b99e36af1cbc3af96bfdcfa1239477f9 Author: Mike Kaganski <[email protected]> AuthorDate: Tue Jun 10 22:13:43 2025 +0200 Commit: Adolfo Jayme Barrientos <[email protected]> CommitDate: Sat Jun 14 07:48:58 2025 +0200 tdf#166943: zero passed to AllowMoreInput means "no more input" Users of ToggleUnicodeCodepoint keep reading next characters to the left, until AllowMoreInput returns false (or selection ends). Attempting to read more characters to the left then there are, SwCursorShell::GetChar returns 0. This needs to be treated by AllowMoreInput as hard stop. It failed, when the previous character was a combining character. Change-Id: I203b150154e1948d4cebfd69442e30a076710f46 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/186341 Tested-by: Jenkins Reviewed-by: Mike Kaganski <[email protected]> (cherry picked from commit 58a7c6ccfd3fa590460dba1ecbdef4483dcd5e08) Reviewed-on: https://gerrit.libreoffice.org/c/core/+/186351 Reviewed-by: Adolfo Jayme Barrientos <[email protected]> diff --git a/i18nutil/source/utility/unicode.cxx b/i18nutil/source/utility/unicode.cxx index c9bfbeeb0a80..1e88b0d5d6b4 100644 --- a/i18nutil/source/utility/unicode.cxx +++ b/i18nutil/source/utility/unicode.cxx @@ -1051,11 +1051,13 @@ OUString unicode::formatPercent(double dNumber, bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 uChar) { + assert(!mbInputEnded); + + if (uChar == 0) + return false; + //arbitrarily chosen maximum length allowed - normal max usage would be around 30. if( maInput.getLength() > 255 ) - mbAllowMoreChars = false; - - if( !mbAllowMoreChars ) return false; bool bPreventNonHex = false; @@ -1067,7 +1069,6 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 uChar) case css::i18n::UnicodeType::SURROGATE: if (bPreventNonHex || mbIsHexString) { - mbAllowMoreChars = false; return false; } @@ -1091,14 +1092,12 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 uChar) maInput.append(maUtf16); if( !maCombining.isEmpty() ) maInput.append(maCombining); - mbAllowMoreChars = false; - break; + return false; case css::i18n::UnicodeType::NON_SPACING_MARK: case css::i18n::UnicodeType::COMBINING_SPACING_MARK: if (bPreventNonHex || mbIsHexString) { - mbAllowMoreChars = false; return false; } @@ -1108,7 +1107,6 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 uChar) maInput = maUtf16; if( !maCombining.isEmpty() ) maInput.append(maCombining); - mbAllowMoreChars = false; return false; } maCombining.insertUtf32(0, uChar); @@ -1121,7 +1119,6 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 uChar) maInput = maUtf16; if( !maCombining.isEmpty() ) maInput.append(maCombining); - mbAllowMoreChars = false; return false; } @@ -1129,14 +1126,12 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 uChar) { maCombining.insertUtf32(0, uChar); maInput = maCombining; - mbAllowMoreChars = false; return false; } // 0 - 1f are control characters. Do not process those. if( uChar < 0x20 ) { - mbAllowMoreChars = false; return false; } @@ -1153,36 +1148,36 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 uChar) // treat as a normal character else { - mbAllowMoreChars = false; if( !bPreventNonHex ) maInput.insertUtf32(0, uChar); + return false; } break; case '+': // + already found: skip when not U, or edge case of +U+xxxx if( mbRequiresU || (maInput.indexOf("U+") == 0) ) - mbAllowMoreChars = false; + return false; // hex chars followed by '+' - now require a 'U' else if ( !maInput.isEmpty() ) mbRequiresU = true; // treat as a normal character else { - mbAllowMoreChars = false; if( !bPreventNonHex ) maInput.insertUtf32(0, uChar); + return false; } break; default: // + already found. Since not U, cancel further input if( mbRequiresU ) - mbAllowMoreChars = false; + return false; // maximum digits per notation is 8: only one notation else if( maInput.indexOf("U+") == -1 && maInput.getLength() == 8 ) - mbAllowMoreChars = false; + return false; // maximum digits per notation is 8: previous notation found else if( maInput.indexOf("U+") == 8 ) - mbAllowMoreChars = false; + return false; // a hex character. Add to string. else if( rtl::isAsciiHexDigit(uChar) ) { @@ -1192,36 +1187,35 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 uChar) // not a hex character: stop input. keep if it is the first input provided else { - mbAllowMoreChars = false; if( maInput.isEmpty() ) maInput.insertUtf32(0, uChar); + return false; } } } - return mbAllowMoreChars; + return true; } OUString ToggleUnicodeCodepoint::StringToReplace() { + // this function potentially modifies the input string. No more addition of characters +#ifndef NDEBUG + mbInputEnded = true; +#endif + if( maInput.isEmpty() ) { //edge case - input finished with incomplete low surrogate or combining characters without a base - if( mbAllowMoreChars ) - { - if( !maUtf16.isEmpty() ) - maInput = maUtf16; - if( !maCombining.isEmpty() ) - maInput.append(maCombining); - } + if (!maUtf16.isEmpty()) + maInput = maUtf16; + if (!maCombining.isEmpty()) + maInput.append(maCombining); return maInput.toString(); } if( !mbIsHexString ) return maInput.toString(); - //this function potentially modifies the input string. Prevent addition of further characters - mbAllowMoreChars = false; - //validate unicode notation. OUString sIn; sal_uInt32 nUnicode = 0; diff --git a/include/i18nutil/unicode.hxx b/include/i18nutil/unicode.hxx index ddb75784784f..300056792dc0 100644 --- a/include/i18nutil/unicode.hxx +++ b/include/i18nutil/unicode.hxx @@ -96,9 +96,11 @@ private: OUStringBuffer maInput; OUStringBuffer maUtf16; OUStringBuffer maCombining; - bool mbAllowMoreChars = true; bool mbRequiresU = false; bool mbIsHexString = false; +#ifndef NDEBUG + bool mbInputEnded = false; +#endif public: /** diff --git a/sw/qa/extras/uiwriter/uiwriter7.cxx b/sw/qa/extras/uiwriter/uiwriter7.cxx index c21afd97b332..11e9c40d1e1a 100644 --- a/sw/qa/extras/uiwriter/uiwriter7.cxx +++ b/sw/qa/extras/uiwriter/uiwriter7.cxx @@ -2424,6 +2424,22 @@ CPPUNIT_TEST_FIXTURE(SwUiWriterTest7, testUnicodeNotationToggle) // i.e., it converted the last combined character *before* the HEX code *to HEX*, replacing // the last character of the HEX; not the expected conversion of the code itself *from HEX*. CPPUNIT_ASSERT_EQUAL(u"\u0065\u0300n"_ustr, sDocString); + + // When a combining character stands alone in the beginning of a line, toggle must not treat + // incoming "zero" indicating "there is no more input" as a character to combine with the + // combining character. Before tdf#166943 fix, it treated zero as such character, got input + // length of 2, tried to select and replace two characters to the left of cursor, and crashed + pWrtShell->SelAll(); + pWrtShell->DelLeft(); + pWrtShell->Insert2(u"U+0300"_ustr); // A combining diacritic code in the beginning of the text + dispatchCommand(mxComponent, u".uno:UnicodeNotationToggle"_ustr, aPropertyValues); + sDocString = pWrtShell->GetCursor()->GetPointNode().GetTextNode()->GetText(); + CPPUNIT_ASSERT_EQUAL(u"\u0300"_ustr, sDocString); // A lone combining diacritic + + // Toggle must not crash, and must produce the correct result + dispatchCommand(mxComponent, u".uno:UnicodeNotationToggle"_ustr, aPropertyValues); + sDocString = pWrtShell->GetCursor()->GetPointNode().GetTextNode()->GetText(); + CPPUNIT_ASSERT_EQUAL(u"U+0300"_ustr, sDocString); } CPPUNIT_TEST_FIXTURE(SwUiWriterTest7, testTdf34957)
