i18npool/qa/cppunit/test_breakiterator.cxx | 30 +++++++++++++++++++++++ i18npool/source/breakiterator/data/dict_word.txt | 2 - 2 files changed, 31 insertions(+), 1 deletion(-)
New commits: commit 6e002da1615b52cda4e9331e87878458b1fe9677 Author: László Németh <nem...@numbertext.org> AuthorDate: Thu Jun 27 10:06:03 2024 +0200 Commit: László Németh <nem...@numbertext.org> CommitDate: Thu Jun 27 16:48:41 2024 +0200 tdf#161737 i18npool: fix fake spelling alarms with NNBSP Fix word break by excluding narrow no-break space at the end of the words for spell checking. This was a problem e.g. for French, where (automatically? or manually) inserted narrow no-break space is used to get correct typography before exclamation and question marks, also after and before guillemets, if the OpenType/Graphite font doesn't have this feature). Regression from commit 44699b3de37f07090ac6fee1cd97aa76036e9700 "tdf#49885 BreakIterator rule upgrades". Note: this fixes also the problem, when digits separated by NNBSP thousand separator weren't handled by spell checking, alarming fake spelling mistakes, when "Check words with numbers" was enabled in Tools->Options->Languages and Locales->Writing Aids. (TODO: at the case of thousand separators, remove NBSP by the linguistic module or by the spell checking dictionaries to allow to check numbers with thousand separators and with correct suffix.) Change-Id: I36e10add7e0ba840f207a375ccc8668dbfef9572 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/169618 Tested-by: Jenkins Reviewed-by: László Németh <nem...@numbertext.org> diff --git a/i18npool/qa/cppunit/test_breakiterator.cxx b/i18npool/qa/cppunit/test_breakiterator.cxx index e790c17e1155..6fbde026f565 100644 --- a/i18npool/qa/cppunit/test_breakiterator.cxx +++ b/i18npool/qa/cppunit/test_breakiterator.cxx @@ -992,6 +992,36 @@ void TestBreakIterator::testWordBoundaries() CPPUNIT_ASSERT_EQUAL(sal_Int32(8), aBounds.startPos); CPPUNIT_ASSERT_EQUAL(sal_Int32(11), aBounds.endPos); } + + // tdf#161737: narrow no-break space at the end of words resulted spelling mistakes + { + aLocale.Language = "en"; + aLocale.Country = "US"; + + OUString aTest(u"L’espace fine insécable\u202F!"_ustr); + aBounds + = m_xBreak->getWordBoundary(aTest, 14, aLocale, i18n::WordType::DICTIONARY_WORD, false); + CPPUNIT_ASSERT_EQUAL(sal_Int32(14), aBounds.startPos); + // This was 24 (word + NNBSP) + CPPUNIT_ASSERT_EQUAL(sal_Int32(23), aBounds.endPos); + } + + // tdf#161737: narrow no-break space between digits resulted spelling mistakes + // as a quick fix, limit NBSP as word-part character only for editing, and not for spell checking + // TODO: remove NBSP by the linguistic module or by the spell checking dictionaries to allow + // to check numbers with thousand separators and with correct suffix + { + aLocale.Language = "en"; + aLocale.Country = "US"; + + OUString aTest(u"1\u202F000\u202F000"_ustr); + aBounds + = m_xBreak->getWordBoundary(aTest, 2, aLocale, i18n::WordType::DICTIONARY_WORD, false); + // This was 0 (word + NNBSP) + CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aBounds.startPos); + // This was 8 (word + NNBSP) + CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aBounds.endPos); + } } void TestBreakIterator::testSentenceBoundaries() diff --git a/i18npool/source/breakiterator/data/dict_word.txt b/i18npool/source/breakiterator/data/dict_word.txt index f804b0eec214..deeec7dd659e 100644 --- a/i18npool/source/breakiterator/data/dict_word.txt +++ b/i18npool/source/breakiterator/data/dict_word.txt @@ -54,7 +54,7 @@ $Double_Quote = [\p{Word_Break = Double_Quote}]; $MidNumLet = [\p{Word_Break = MidNumLet}]; $MidNum = [\p{Word_Break = MidNum}]; $Numeric = [\p{Word_Break = Numeric}]; -$ExtendNumLet = [\p{Word_Break = ExtendNumLet}]; +$ExtendNumLet = [\p{Word_Break = ExtendNumLet}-[:name = NARROW NO-BREAK SPACE:]]; $WSegSpace = [\p{Word_Break = WSegSpace}]; $Extended_Pict = [\p{Extended_Pictographic}];