i18npool/qa/cppunit/test_breakiterator.cxx       |   30 +++++++++++++++++++++++
 i18npool/source/breakiterator/data/dict_word.txt |    2 -
 2 files changed, 31 insertions(+), 1 deletion(-)

New commits:
commit 6e002da1615b52cda4e9331e87878458b1fe9677
Author:     László Németh <nem...@numbertext.org>
AuthorDate: Thu Jun 27 10:06:03 2024 +0200
Commit:     László Németh <nem...@numbertext.org>
CommitDate: Thu Jun 27 16:48:41 2024 +0200

    tdf#161737 i18npool: fix fake spelling alarms with NNBSP
    
    Fix word break by excluding narrow no-break space at the
    end of the words for spell checking.
    
    This was a problem e.g. for French, where (automatically? or
    manually) inserted narrow no-break space is used to get correct
    typography before exclamation and question marks, also after and
    before guillemets, if the OpenType/Graphite font doesn't have this
    feature).
    
    Regression from commit 44699b3de37f07090ac6fee1cd97aa76036e9700
    "tdf#49885 BreakIterator rule upgrades".
    
    Note: this fixes also the problem, when digits separated
    by NNBSP thousand separator weren't handled by spell checking,
    alarming fake spelling mistakes, when "Check words with numbers"
    was enabled in Tools->Options->Languages and Locales->Writing Aids.
    (TODO: at the case of thousand separators, remove NBSP by the
    linguistic module or by the spell checking dictionaries to allow
    to check numbers with thousand separators and with correct suffix.)
    
    Change-Id: I36e10add7e0ba840f207a375ccc8668dbfef9572
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/169618
    Tested-by: Jenkins
    Reviewed-by: László Németh <nem...@numbertext.org>

diff --git a/i18npool/qa/cppunit/test_breakiterator.cxx 
b/i18npool/qa/cppunit/test_breakiterator.cxx
index e790c17e1155..6fbde026f565 100644
--- a/i18npool/qa/cppunit/test_breakiterator.cxx
+++ b/i18npool/qa/cppunit/test_breakiterator.cxx
@@ -992,6 +992,36 @@ void TestBreakIterator::testWordBoundaries()
         CPPUNIT_ASSERT_EQUAL(sal_Int32(8), aBounds.startPos);
         CPPUNIT_ASSERT_EQUAL(sal_Int32(11), aBounds.endPos);
     }
+
+    //  tdf#161737: narrow no-break space at the end of words resulted 
spelling mistakes
+    {
+        aLocale.Language = "en";
+        aLocale.Country = "US";
+
+        OUString aTest(u"L’espace fine insécable\u202F!"_ustr);
+        aBounds
+            = m_xBreak->getWordBoundary(aTest, 14, aLocale, 
i18n::WordType::DICTIONARY_WORD, false);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(14), aBounds.startPos);
+        // This was 24 (word + NNBSP)
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(23), aBounds.endPos);
+    }
+
+    //  tdf#161737: narrow no-break space between digits resulted spelling 
mistakes
+    //  as a quick fix, limit NBSP as word-part character only for editing, 
and not for spell checking
+    //  TODO: remove NBSP by the linguistic module or by the spell checking 
dictionaries to allow
+    //  to check numbers with thousand separators and with correct suffix
+    {
+        aLocale.Language = "en";
+        aLocale.Country = "US";
+
+        OUString aTest(u"1\u202F000\u202F000"_ustr);
+        aBounds
+            = m_xBreak->getWordBoundary(aTest, 2, aLocale, 
i18n::WordType::DICTIONARY_WORD, false);
+        // This was 0 (word + NNBSP)
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aBounds.startPos);
+        // This was 8 (word + NNBSP)
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aBounds.endPos);
+    }
 }
 
 void TestBreakIterator::testSentenceBoundaries()
diff --git a/i18npool/source/breakiterator/data/dict_word.txt 
b/i18npool/source/breakiterator/data/dict_word.txt
index f804b0eec214..deeec7dd659e 100644
--- a/i18npool/source/breakiterator/data/dict_word.txt
+++ b/i18npool/source/breakiterator/data/dict_word.txt
@@ -54,7 +54,7 @@ $Double_Quote       = [\p{Word_Break = Double_Quote}];
 $MidNumLet          = [\p{Word_Break = MidNumLet}];
 $MidNum             = [\p{Word_Break = MidNum}];
 $Numeric            = [\p{Word_Break = Numeric}];
-$ExtendNumLet       = [\p{Word_Break = ExtendNumLet}];
+$ExtendNumLet       = [\p{Word_Break = ExtendNumLet}-[:name = NARROW NO-BREAK 
SPACE:]];
 $WSegSpace          = [\p{Word_Break = WSegSpace}];
 $Extended_Pict      = [\p{Extended_Pictographic}];
 

Reply via email to