include/unotools/charclass.hxx     |    4 ++++
 sw/source/core/edit/autofmt.cxx    |    2 +-
 unotools/source/i18n/charclass.cxx |   17 +++++++++++++++++
 3 files changed, 22 insertions(+), 1 deletion(-)

New commits:
commit a6d35a7940a2c72594b470aec341c867e6faf82c
Author:     Baole Fang <baole.f...@gmail.com>
AuthorDate: Fri Jun 23 11:47:54 2023 -0400
Commit:     خالد حسني <kha...@libreoffice.org>
CommitDate: Sat Jun 24 01:44:35 2023 +0200

    tdf#142437: Fix word boundary detection in autocorrect
    
    Marks (combining and spacing) were incorrectly considered word separators,
    because isLetterNumeric() matches only for letters and numbers.
    
    The new isBase() matches any character with BASE_FORM character class, 
which covers letters, numbers, and marks.
    
    Change-Id: I27ec2f7fb8d360791a280d10aba9b6d16e7cfb71
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/153509
    Tested-by: Jenkins
    Reviewed-by: خالد حسني <kha...@libreoffice.org>
    (cherry picked from commit caab94a3e0387bde05538cff91ff13446f330785)
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/153517

diff --git a/include/unotools/charclass.hxx b/include/unotools/charclass.hxx
index 7cb35ba2a594..bc91aca63631 100644
--- a/include/unotools/charclass.hxx
+++ b/include/unotools/charclass.hxx
@@ -58,6 +58,9 @@ inline constexpr sal_Int32 nCharClassNumericTypeMask =
     css::i18n::KCharacterType::PRINTABLE |
     css::i18n::KCharacterType::BASE_FORM;
 
+inline constexpr sal_Int32 nCharClassBaseType =
+    css::i18n::KCharacterType::BASE_FORM;
+
 class UNOTOOLS_DLLPUBLIC CharClass
 {
     LanguageTag                 maLanguageTag;
@@ -167,6 +170,7 @@ public:
     bool isDigit( const OUString& rStr, sal_Int32 nPos ) const;
     bool isAlphaNumeric( const OUString& rStr, sal_Int32 nPos ) const;
     bool isLetterNumeric( const OUString& rStr, sal_Int32 nPos ) const;
+    bool isBase( const OUString& rStr, sal_Int32 nPos ) const;
     bool isUpper( const OUString& rStr, sal_Int32 nPos ) const;
     bool isLetter( const OUString& rStr ) const;
     bool isNumeric( const OUString& rStr ) const;
diff --git a/sw/source/core/edit/autofmt.cxx b/sw/source/core/edit/autofmt.cxx
index 2c9eb69ac16a..010e0b20b80e 100644
--- a/sw/source/core/edit/autofmt.cxx
+++ b/sw/source/core/edit/autofmt.cxx
@@ -2110,7 +2110,7 @@ void SwAutoFormat::AutoCorrect(TextFrameIndex nPos)
                     bFirstSent = true;
                 [[fallthrough]];
             default:
-                if (!(rAppCC.isLetterNumeric(*pText, sal_Int32(nPos))
+                if (!(rAppCC.isBase(*pText, sal_Int32(nPos))
                         || '/' == cChar )) //  '/' should not be a word 
separator (e.g. '1/2' needs to be handled as one word for replacement)
                 {
                     --nPos;     // revert ++nPos which was decremented in for 
loop
diff --git a/unotools/source/i18n/charclass.cxx 
b/unotools/source/i18n/charclass.cxx
index be3a9f4f0ee0..423f9530f2cb 100644
--- a/unotools/source/i18n/charclass.cxx
+++ b/unotools/source/i18n/charclass.cxx
@@ -253,6 +253,23 @@ bool CharClass::isLetterNumeric( const OUString& rStr ) 
const
     return false;
 }
 
+bool CharClass::isBase( const OUString& rStr, sal_Int32 nPos ) const
+{
+    sal_Unicode c = rStr[nPos];
+    if ( c < 128 )
+        return rtl::isAsciiAlphanumeric( c );
+
+    try
+    {
+        return  (xCC->getCharacterType( rStr, nPos, getMyLocale() ) & 
nCharClassBaseType ) != 0;
+    }
+    catch ( const Exception& )
+    {
+        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
+    }
+    return false;
+}
+
 bool CharClass::isUpper( const OUString& rStr, sal_Int32 nPos ) const
 {
     sal_Unicode c = rStr[nPos];

Reply via email to