include/unotools/charclass.hxx | 4 ++++ sw/source/core/edit/autofmt.cxx | 2 +- unotools/source/i18n/charclass.cxx | 17 +++++++++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-)
New commits: commit a6d35a7940a2c72594b470aec341c867e6faf82c Author: Baole Fang <baole.f...@gmail.com> AuthorDate: Fri Jun 23 11:47:54 2023 -0400 Commit: خالد حسني <kha...@libreoffice.org> CommitDate: Sat Jun 24 01:44:35 2023 +0200 tdf#142437: Fix word boundary detection in autocorrect Marks (combining and spacing) were incorrectly considered word separators, because isLetterNumeric() matches only for letters and numbers. The new isBase() matches any character with BASE_FORM character class, which covers letters, numbers, and marks. Change-Id: I27ec2f7fb8d360791a280d10aba9b6d16e7cfb71 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/153509 Tested-by: Jenkins Reviewed-by: خالد حسني <kha...@libreoffice.org> (cherry picked from commit caab94a3e0387bde05538cff91ff13446f330785) Reviewed-on: https://gerrit.libreoffice.org/c/core/+/153517 diff --git a/include/unotools/charclass.hxx b/include/unotools/charclass.hxx index 7cb35ba2a594..bc91aca63631 100644 --- a/include/unotools/charclass.hxx +++ b/include/unotools/charclass.hxx @@ -58,6 +58,9 @@ inline constexpr sal_Int32 nCharClassNumericTypeMask = css::i18n::KCharacterType::PRINTABLE | css::i18n::KCharacterType::BASE_FORM; +inline constexpr sal_Int32 nCharClassBaseType = + css::i18n::KCharacterType::BASE_FORM; + class UNOTOOLS_DLLPUBLIC CharClass { LanguageTag maLanguageTag; @@ -167,6 +170,7 @@ public: bool isDigit( const OUString& rStr, sal_Int32 nPos ) const; bool isAlphaNumeric( const OUString& rStr, sal_Int32 nPos ) const; bool isLetterNumeric( const OUString& rStr, sal_Int32 nPos ) const; + bool isBase( const OUString& rStr, sal_Int32 nPos ) const; bool isUpper( const OUString& rStr, sal_Int32 nPos ) const; bool isLetter( const OUString& rStr ) const; bool isNumeric( const OUString& rStr ) const; diff --git a/sw/source/core/edit/autofmt.cxx b/sw/source/core/edit/autofmt.cxx index 2c9eb69ac16a..010e0b20b80e 100644 --- a/sw/source/core/edit/autofmt.cxx +++ b/sw/source/core/edit/autofmt.cxx @@ -2110,7 +2110,7 @@ void SwAutoFormat::AutoCorrect(TextFrameIndex nPos) bFirstSent = true; [[fallthrough]]; default: - if (!(rAppCC.isLetterNumeric(*pText, sal_Int32(nPos)) + if (!(rAppCC.isBase(*pText, sal_Int32(nPos)) || '/' == cChar )) // '/' should not be a word separator (e.g. '1/2' needs to be handled as one word for replacement) { --nPos; // revert ++nPos which was decremented in for loop diff --git a/unotools/source/i18n/charclass.cxx b/unotools/source/i18n/charclass.cxx index be3a9f4f0ee0..423f9530f2cb 100644 --- a/unotools/source/i18n/charclass.cxx +++ b/unotools/source/i18n/charclass.cxx @@ -253,6 +253,23 @@ bool CharClass::isLetterNumeric( const OUString& rStr ) const return false; } +bool CharClass::isBase( const OUString& rStr, sal_Int32 nPos ) const +{ + sal_Unicode c = rStr[nPos]; + if ( c < 128 ) + return rtl::isAsciiAlphanumeric( c ); + + try + { + return (xCC->getCharacterType( rStr, nPos, getMyLocale() ) & nCharClassBaseType ) != 0; + } + catch ( const Exception& ) + { + TOOLS_WARN_EXCEPTION("unotools.i18n", "" ); + } + return false; +} + bool CharClass::isUpper( const OUString& rStr, sal_Int32 nPos ) const { sal_Unicode c = rStr[nPos];