i18npool/qa/cppunit/test_breakiterator.cxx | 35 +++++++++++++++- i18npool/source/breakiterator/breakiterator_unicode.cxx | 7 ++- 2 files changed, 39 insertions(+), 3 deletions(-)
New commits: commit 475d0c59c66fb7752d230f76130b17145aad0c12 Author: Caolán McNamara <caol...@redhat.com> Date: Tue Feb 14 16:07:10 2012 +0000 use icu's breakiterator for Thai, not our customized generic rules diff --git a/i18npool/qa/cppunit/test_breakiterator.cxx b/i18npool/qa/cppunit/test_breakiterator.cxx index ba000af..820e57b 100644 --- a/i18npool/qa/cppunit/test_breakiterator.cxx +++ b/i18npool/qa/cppunit/test_breakiterator.cxx @@ -70,7 +70,7 @@ public: CPPUNIT_TEST(testGraphemeIteration); CPPUNIT_TEST(testWeak); CPPUNIT_TEST(testAsian); -// CPPUNIT_TEST(testThai); + CPPUNIT_TEST(testThai); CPPUNIT_TEST_SUITE_END(); private: @@ -264,13 +264,13 @@ void TestBreakIterator::testThai() { const sal_Unicode THAI1[] = { 0x0E01, 0x0E38, 0x0E2B, 0x0E25, 0x0E32, 0x0E1A }; ::rtl::OUString aTest(THAI1, SAL_N_ELEMENTS(THAI1)); - aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale, i18n::WordType::DICTIONARY_WORD, true); CPPUNIT_ASSERT_MESSAGE("Should skip full word", aBounds.startPos == 0 && aBounds.endPos == aTest.getLength()); } +#ifdef TODO { const sal_Unicode NORTHERN_THAI1[] = { 0x0E01, 0x0E38, 0x0E4A, 0x0E2B, 0x0E25, 0x0E32, 0x0E1A }; ::rtl::OUString aTest(NORTHERN_THAI1, SAL_N_ELEMENTS(NORTHERN_THAI1)); @@ -279,6 +279,7 @@ void TestBreakIterator::testThai() CPPUNIT_ASSERT_MESSAGE("Should skip full word", aBounds.startPos == 0 && aBounds.endPos == aTest.getLength()); } +#endif } TestBreakIterator::TestBreakIterator() diff --git a/i18npool/source/breakiterator/breakiterator_unicode.cxx b/i18npool/source/breakiterator/breakiterator_unicode.cxx index 5f35795..aa5e1d8 100644 --- a/i18npool/source/breakiterator/breakiterator_unicode.cxx +++ b/i18npool/source/breakiterator/breakiterator_unicode.cxx @@ -129,10 +129,13 @@ void SAL_CALL BreakIterator_Unicode::loadICUBreakIterator(const com::sun::star:: OOoRuleBasedBreakIterator *rbi = NULL; - if (breakRules.getLength() > breakType && !breakRules[breakType].isEmpty()) { + if (breakRules.getLength() > breakType && !breakRules[breakType].isEmpty()) + { rbi = new OOoRuleBasedBreakIterator(udata_open("OpenOffice", "brk", OUStringToOString(breakRules[breakType], RTL_TEXTENCODING_ASCII_US).getStr(), &status), status); - } else { + } + else if (!rLocale.Language.equalsAsciiL(RTL_CONSTASCII_STRINGPARAM("th"))) //use icu's breakiterator for Thai + { status = U_ZERO_ERROR; OStringBuffer aUDName(64); aUDName.append(rule); commit fad63878e529ad775f4484330235242a7a900987 Author: Caolán McNamara <caol...@redhat.com> Date: Tue Feb 14 14:14:23 2012 +0000 add northern-thai example diff --git a/i18npool/qa/cppunit/test_breakiterator.cxx b/i18npool/qa/cppunit/test_breakiterator.cxx index a68bc77..ba000af 100644 --- a/i18npool/qa/cppunit/test_breakiterator.cxx +++ b/i18npool/qa/cppunit/test_breakiterator.cxx @@ -260,14 +260,24 @@ void TestBreakIterator::testThai() aLocale.Language = ::rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("th")); aLocale.Country = ::rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("TH")); + i18n::Boundary aBounds; { const sal_Unicode THAI1[] = { 0x0E01, 0x0E38, 0x0E2B, 0x0E25, 0x0E32, 0x0E1A }; - ::rtl::OUString aTest1(THAI1, SAL_N_ELEMENTS(THAI1)); + ::rtl::OUString aTest(THAI1, SAL_N_ELEMENTS(THAI1)); - i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest1, 0, aLocale, + aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale, i18n::WordType::DICTIONARY_WORD, true); CPPUNIT_ASSERT_MESSAGE("Should skip full word", - aBounds.startPos == 0 && aBounds.endPos == aTest1.getLength()); + aBounds.startPos == 0 && aBounds.endPos == aTest.getLength()); + } + + { + const sal_Unicode NORTHERN_THAI1[] = { 0x0E01, 0x0E38, 0x0E4A, 0x0E2B, 0x0E25, 0x0E32, 0x0E1A }; + ::rtl::OUString aTest(NORTHERN_THAI1, SAL_N_ELEMENTS(NORTHERN_THAI1)); + aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale, + i18n::WordType::DICTIONARY_WORD, true); + CPPUNIT_ASSERT_MESSAGE("Should skip full word", + aBounds.startPos == 0 && aBounds.endPos == aTest.getLength()); } } commit 1a450f53dbd31a34718f658a81b0fabbbc329764 Author: Caolán McNamara <caol...@redhat.com> Date: Tue Feb 14 14:09:12 2012 +0000 add a unit test to demo broken thai word-boundary detection diff --git a/i18npool/qa/cppunit/test_breakiterator.cxx b/i18npool/qa/cppunit/test_breakiterator.cxx index 661e46a..a68bc77 100644 --- a/i18npool/qa/cppunit/test_breakiterator.cxx +++ b/i18npool/qa/cppunit/test_breakiterator.cxx @@ -41,7 +41,8 @@ #include "cppunit/plugin/TestPlugIn.h" #include <com/sun/star/i18n/XBreakIterator.hpp> #include <com/sun/star/i18n/CharacterIteratorMode.hpp> -#include <com/sun/star/i18n/ScriptType.hdl> +#include <com/sun/star/i18n/ScriptType.hpp> +#include <com/sun/star/i18n/WordType.hpp> #include <rtl/strbuf.hxx> @@ -62,12 +63,14 @@ public: void testGraphemeIteration(); void testWeak(); void testAsian(); + void testThai(); CPPUNIT_TEST_SUITE(TestBreakIterator); CPPUNIT_TEST(testLineBreaking); CPPUNIT_TEST(testGraphemeIteration); CPPUNIT_TEST(testWeak); CPPUNIT_TEST(testAsian); +// CPPUNIT_TEST(testThai); CPPUNIT_TEST_SUITE_END(); private: @@ -249,6 +252,25 @@ void TestBreakIterator::testAsian() } } +//A test to ensure that our thai word boundary detection is useful +//http://lists.freedesktop.org/archives/libreoffice/2012-February/025959.html +void TestBreakIterator::testThai() +{ + lang::Locale aLocale; + aLocale.Language = ::rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("th")); + aLocale.Country = ::rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("TH")); + + { + const sal_Unicode THAI1[] = { 0x0E01, 0x0E38, 0x0E2B, 0x0E25, 0x0E32, 0x0E1A }; + ::rtl::OUString aTest1(THAI1, SAL_N_ELEMENTS(THAI1)); + + i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest1, 0, aLocale, + i18n::WordType::DICTIONARY_WORD, true); + CPPUNIT_ASSERT_MESSAGE("Should skip full word", + aBounds.startPos == 0 && aBounds.endPos == aTest1.getLength()); + } +} + TestBreakIterator::TestBreakIterator() { m_xContext = cppu::defaultBootstrap_InitialComponentContext();
_______________________________________________ Libreoffice-commits mailing list Libreoffice-commits@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits