sw/CppunitTest_sw_txtimport.mk | 63 ++++++++ sw/CppunitTest_sw_uwriter.mk | 2 sw/Library_sw.mk | 1 sw/Module_sw.mk | 1 sw/qa/extras/txtexport/txtexport.cxx | 97 ++----------- sw/qa/extras/txtimport/data/UTF16BEWITHBOM.txt |binary sw/qa/extras/txtimport/data/UTF16BEWITHOUTBOM.txt |binary sw/qa/extras/txtimport/data/UTF16LEWITHBOM.txt |binary sw/qa/extras/txtimport/data/UTF16LEWITHOUTBOM.txt |binary sw/qa/extras/txtimport/data/UTF8WITHBOM.txt | 2 sw/qa/extras/txtimport/data/UTF8WITHOUTBOM.txt | 2 sw/qa/extras/txtimport/data/bullets.odt |binary sw/qa/extras/txtimport/txtimport.cxx | 155 ++++++++++++++++++++++ sw/source/filter/basflt/iodetect.cxx | 27 +++ 14 files changed, 269 insertions(+), 81 deletions(-)
New commits: commit ef77a256de527f6d00212839e55f949024f2e7bc Author: Tomofumi Yagi <yagitmkn...@gmail.com> AuthorDate: Wed Sep 16 18:11:22 2020 +0900 Commit: Michael Stahl <michael.st...@cib.de> CommitDate: Wed Sep 30 10:18:23 2020 +0200 tdf#60145 sw: fix UTF-8 encoding without BOM is not detected Writer can now detect Unicode type even if importing text file does not have a BOM. Change-Id: I70fa4145672d855f9ef9df040a930ff5d1564905 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/102884 Tested-by: Jenkins Reviewed-by: Eike Rathke <er...@redhat.com> Reviewed-by: Michael Stahl <michael.st...@cib.de> diff --git a/sw/CppunitTest_sw_txtimport.mk b/sw/CppunitTest_sw_txtimport.mk new file mode 100644 index 000000000000..646a2ea83391 --- /dev/null +++ b/sw/CppunitTest_sw_txtimport.mk @@ -0,0 +1,63 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +#************************************************************************* +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +#************************************************************************* + +$(eval $(call gb_CppunitTest_CppunitTest,sw_txtimport)) + +$(eval $(call gb_CppunitTest_use_common_precompiled_header,sw_txtimport)) + +$(eval $(call gb_CppunitTest_add_exception_objects,sw_txtimport, \ + sw/qa/extras/txtimport/txtimport \ +)) + +$(eval $(call gb_CppunitTest_use_libraries,sw_txtimport, \ + comphelper \ + cppu \ + cppuhelper \ + i18nlangtag \ + sal \ + sfx \ + sw \ + swqahelper \ + test \ + tl \ + unotest \ + utl \ + vcl \ + $(gb_UWINAPI) \ +)) + +$(eval $(call gb_CppunitTest_use_externals,sw_txtimport,\ + boost_headers \ + libxml2 \ +)) + +$(eval $(call gb_CppunitTest_set_include,sw_txtimport,\ + -I$(SRCDIR)/sw/inc \ + -I$(SRCDIR)/sw/source/core/inc \ + -I$(SRCDIR)/sw/source/uibase/inc \ + -I$(SRCDIR)/sw/qa/inc \ + $$(INCLUDE) \ +)) + +$(eval $(call gb_CppunitTest_use_api,sw_txtimport,\ + udkapi \ + offapi \ + oovbaapi \ +)) + +$(eval $(call gb_CppunitTest_use_ure,sw_txtimport)) +$(eval $(call gb_CppunitTest_use_vcl,sw_txtimport)) + +$(eval $(call gb_CppunitTest_use_rdb,sw_txtimport,services)) + +$(eval $(call gb_CppunitTest_use_configuration,sw_txtimport)) + +# vim: set noet sw=4 ts=4: diff --git a/sw/CppunitTest_sw_uwriter.mk b/sw/CppunitTest_sw_uwriter.mk index 6b9ffa4ba683..e6a490c5dff1 100644 --- a/sw/CppunitTest_sw_uwriter.mk +++ b/sw/CppunitTest_sw_uwriter.mk @@ -64,7 +64,9 @@ $(eval $(call gb_CppunitTest_use_libraries,sw_uwriter, \ $(eval $(call gb_CppunitTest_use_externals,sw_uwriter,\ boost_headers \ + icui18n \ icuuc \ + icu_headers \ libxml2 \ )) diff --git a/sw/Library_sw.mk b/sw/Library_sw.mk index 2de7d6b0e4dc..65f4154bf12e 100644 --- a/sw/Library_sw.mk +++ b/sw/Library_sw.mk @@ -87,6 +87,7 @@ $(eval $(call gb_Library_use_libraries,sw,\ $(eval $(call gb_Library_use_externals,sw,\ boost_headers \ + icui18n \ icuuc \ icu_headers \ libxml2 \ diff --git a/sw/Module_sw.mk b/sw/Module_sw.mk index ec6e73fd2779..2f8a9a35ed01 100644 --- a/sw/Module_sw.mk +++ b/sw/Module_sw.mk @@ -97,6 +97,7 @@ $(eval $(call gb_Module_add_slowcheck_targets,sw,\ CppunitTest_sw_odfexport \ CppunitTest_sw_odfimport \ CppunitTest_sw_txtexport \ + CppunitTest_sw_txtimport \ $(if $(filter-out MACOSX,$(OS)), \ CppunitTest_sw_uiwriter \ ) \ diff --git a/sw/qa/extras/txtexport/txtexport.cxx b/sw/qa/extras/txtexport/txtexport.cxx index d7246d05aced..0151ee289cc3 100644 --- a/sw/qa/extras/txtexport/txtexport.cxx +++ b/sw/qa/extras/txtexport/txtexport.cxx @@ -15,42 +15,13 @@ #include <unotxdoc.hxx> #include <docsh.hxx> -class TxtImportTest : public SwModelTestBase +class TxtExportTest : public SwModelTestBase { public: - TxtImportTest() : - SwModelTestBase("/sw/qa/extras/txtexport/data/", "Text") - {} - - // Export & assert part of the document (defined by SwPaM). - void assertExportedRange(const OString& aExpected, SwPaM& rPaM) + TxtExportTest() + : SwModelTestBase("/sw/qa/extras/txtexport/data/", "Text") { - WriterRef rAsciiWriter; - SwReaderWriter::GetWriter(FILTER_TEXT, OUString(), rAsciiWriter); - CPPUNIT_ASSERT(rAsciiWriter.is()); - - // no start char - rAsciiWriter->m_bUCS2_WithStartChar = false; - - SvMemoryStream aMemoryStream; - - SwWriter aWriter(aMemoryStream, rPaM); - ErrCode nError = aWriter.Write(rAsciiWriter); - CPPUNIT_ASSERT_EQUAL(ERRCODE_NONE, nError); - - const char* pData = static_cast<const char*>(aMemoryStream.GetData()); - OString aResult(pData, aMemoryStream.GetSize()); - - CPPUNIT_ASSERT_EQUAL(aExpected, aResult); } -}; - -class TxtExportTest : public SwModelTestBase -{ -public: - TxtExportTest() : - SwModelTestBase("/sw/qa/extras/txtexport/data/", "Text") - {} protected: OString readExportedFile() @@ -61,42 +32,35 @@ protected: const char* pData = static_cast<const char*>(aMemoryStream.GetData()); int offset = 0; - if (aMemoryStream.GetSize() > 2 && pData[0] == '\xEF' && pData[1] == '\xBB' && pData[2] == '\xBF') + if (aMemoryStream.GetSize() > 2 && pData[0] == '\xEF' && pData[1] == '\xBB' + && pData[2] == '\xBF') offset = 3; return OString(pData + offset, aMemoryStream.GetSize() - offset); } }; -#define DECLARE_TXTIMPORT_TEST(TestName, filename) DECLARE_SW_EXPORT_TEST(TestName, filename, nullptr, TxtImportTest) -#define DECLARE_TXTEXPORT_TEST(TestName, filename) DECLARE_SW_EXPORT_TEST(TestName, filename, nullptr, TxtExportTest) +#define DECLARE_TXTEXPORT_TEST(TestName, filename) \ + DECLARE_SW_EXPORT_TEST(TestName, filename, nullptr, TxtExportTest) DECLARE_TXTEXPORT_TEST(testBullets, "bullets.odt") { OString aData = readExportedFile(); OUString aString = OStringToOUString( - "1 Heading 1" SAL_NEWLINE_STRING - "1.A Heading 2" SAL_NEWLINE_STRING - "Paragraph" SAL_NEWLINE_STRING - "" SAL_NEWLINE_STRING + "1 Heading 1" SAL_NEWLINE_STRING "1.A Heading 2" SAL_NEWLINE_STRING + "Paragraph" SAL_NEWLINE_STRING "" SAL_NEWLINE_STRING " \xe2\x80\xa2 First bullet" SAL_NEWLINE_STRING " \xe2\x80\xa2 Second bullet" SAL_NEWLINE_STRING " \xe2\x97\xa6 Sub-second bullet" SAL_NEWLINE_STRING " Third bullet, but deleted" SAL_NEWLINE_STRING - " \xe2\x80\xa2 Fourth bullet" SAL_NEWLINE_STRING - "" SAL_NEWLINE_STRING - "Numbering" SAL_NEWLINE_STRING - "" SAL_NEWLINE_STRING - " 1. First" SAL_NEWLINE_STRING - " 2. Second" SAL_NEWLINE_STRING - " 1. Second-first" SAL_NEWLINE_STRING - " Third, but deleted" SAL_NEWLINE_STRING - " 3. Actual third" SAL_NEWLINE_STRING - "" SAL_NEWLINE_STRING - "Paragraph after numbering" SAL_NEWLINE_STRING - "Next paragraph" SAL_NEWLINE_STRING - "Final paragraph" SAL_NEWLINE_STRING, RTL_TEXTENCODING_UTF8); + " \xe2\x80\xa2 Fourth bullet" SAL_NEWLINE_STRING "" SAL_NEWLINE_STRING + "Numbering" SAL_NEWLINE_STRING "" SAL_NEWLINE_STRING " 1. First" SAL_NEWLINE_STRING + " 2. Second" SAL_NEWLINE_STRING " 1. Second-first" SAL_NEWLINE_STRING + " Third, but deleted" SAL_NEWLINE_STRING " 3. Actual third" SAL_NEWLINE_STRING + "" SAL_NEWLINE_STRING "Paragraph after numbering" SAL_NEWLINE_STRING + "Next paragraph" SAL_NEWLINE_STRING "Final paragraph" SAL_NEWLINE_STRING, + RTL_TEXTENCODING_UTF8); // To get the stuff back in the system's encoding OString aExpected(OUStringToOString(aString, osl_getThreadTextEncoding())); @@ -104,35 +68,6 @@ DECLARE_TXTEXPORT_TEST(testBullets, "bullets.odt") CPPUNIT_ASSERT_EQUAL(aExpected, aData); } -DECLARE_TXTIMPORT_TEST(testTdf112191, "bullets.odt") -{ - SwXTextDocument* pTextDoc = dynamic_cast<SwXTextDocument *>(mxComponent.get()); - CPPUNIT_ASSERT(pTextDoc); - SwDoc* pDoc = pTextDoc->GetDocShell()->GetDoc(); - CPPUNIT_ASSERT(pDoc); - - // just the 5th paragraph - no bullet - uno::Reference<text::XTextRange> xPara(getParagraph(5)); - SwUnoInternalPaM aPaM(*pDoc); - bool bSuccess = sw::XTextRangeToSwPaM(aPaM, xPara); - CPPUNIT_ASSERT(bSuccess); - - assertExportedRange("First bullet", aPaM); - - // but when we extend to the next paragraph - now there are bullets - xPara = getParagraph(6); - SwUnoInternalPaM aPaM2(*pDoc); - bSuccess = sw::XTextRangeToSwPaM(aPaM2, xPara); - CPPUNIT_ASSERT(bSuccess); - - OUString aString = OStringToOUString( - " \xe2\x80\xa2 First bullet" SAL_NEWLINE_STRING - " \xe2\x80\xa2 Second bullet", RTL_TEXTENCODING_UTF8); - - SwPaM aPaM3(*aPaM2.GetMark(), *aPaM.GetPoint()); - assertExportedRange(OUStringToOString(aString, osl_getThreadTextEncoding()), aPaM3); -} - CPPUNIT_PLUGIN_IMPLEMENT(); /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sw/qa/extras/txtimport/data/UTF16BEWITHBOM.txt b/sw/qa/extras/txtimport/data/UTF16BEWITHBOM.txt new file mode 100755 index 000000000000..2e0455704058 Binary files /dev/null and b/sw/qa/extras/txtimport/data/UTF16BEWITHBOM.txt differ diff --git a/sw/qa/extras/txtimport/data/UTF16BEWITHOUTBOM.txt b/sw/qa/extras/txtimport/data/UTF16BEWITHOUTBOM.txt new file mode 100755 index 000000000000..0551ae547521 Binary files /dev/null and b/sw/qa/extras/txtimport/data/UTF16BEWITHOUTBOM.txt differ diff --git a/sw/qa/extras/txtimport/data/UTF16LEWITHBOM.txt b/sw/qa/extras/txtimport/data/UTF16LEWITHBOM.txt new file mode 100755 index 000000000000..43ed8143ef9b Binary files /dev/null and b/sw/qa/extras/txtimport/data/UTF16LEWITHBOM.txt differ diff --git a/sw/qa/extras/txtimport/data/UTF16LEWITHOUTBOM.txt b/sw/qa/extras/txtimport/data/UTF16LEWITHOUTBOM.txt new file mode 100755 index 000000000000..d2cc98e785ce Binary files /dev/null and b/sw/qa/extras/txtimport/data/UTF16LEWITHOUTBOM.txt differ diff --git a/sw/qa/extras/txtimport/data/UTF8WITHBOM.txt b/sw/qa/extras/txtimport/data/UTF8WITHBOM.txt new file mode 100755 index 000000000000..afd812fe6ff3 --- /dev/null +++ b/sw/qa/extras/txtimport/data/UTF8WITHBOM.txt @@ -0,0 +1,2 @@ +漢a' +彼は背後にひそかな足音を聞いた。それはあまり良い意味を示すものではない。誰がこんな夜更けに、しかもこんな街灯のお粗末な港街の狭い小道で彼をつけて来るというのだ。人生の航路を捻じ曲げ、その獲物と共に立ち去ろうとしている、その丁度今。 彼のこの仕事への恐れを和らげるために、数多い仲間の中に同じ考えを抱き、彼を見守り、待っている者がいるというのか。それとも背後の足音の主は、この街に無数にいる法監視役で、強靭な罰をすぐにも彼の手首にガシャンと下すというのか。彼は足音が止まったことに気が着いた。あわてて辺りを見回す。ふと狭い抜け道に目が止まる。 彼は素早く右に身を翻し、建物の間に消え去った。その時彼は、もう少しで道の真中に転がっていたごみバケツに躓き転ぶところだった。 � ��は暗闇の中で道を確かめようとじっと見つめた。どうやら自分の通ってきた道以外にこの中庭からの出道はないようだ。 足音はだんだん近づき、彼には角を曲がる黒い人影が見えた。彼の目は夜の闇の中を必死にさまよい、逃げ道を探す。もうすべては終わりなのか。すべての苦労と準備は水の泡だというのか。 突然、彼の横で扉が風に揺らぎ、ほんのわずかにきしむのを聞いた時、彼は背中を壁に押し付け、追跡者に見付けられないことを願った。この扉は望みの綱として投げかけられた、彼のジレンマからの出口なのだろうか。背中を壁にぴったり押し付けたまま、ゆっくりと彼は開いている扉の方へと身を動かして行った。この扉は彼の救いとなるのだろうか。 diff --git a/sw/qa/extras/txtimport/data/UTF8WITHOUTBOM.txt b/sw/qa/extras/txtimport/data/UTF8WITHOUTBOM.txt new file mode 100755 index 000000000000..b8362e9e789f --- /dev/null +++ b/sw/qa/extras/txtimport/data/UTF8WITHOUTBOM.txt @@ -0,0 +1,2 @@ +漢a' +彼は背後にひそかな足音を聞いた。それはあまり良い意味を示すものではない。誰がこんな夜更けに、しかもこんな街灯のお粗末な港街の狭い小道で彼をつけて来るというのだ。人生の航路を捻じ曲げ、その獲物と共に立ち去ろうとしている、その丁度今。 彼のこの仕事への恐れを和らげるために、数多い仲間の中に同じ考えを抱き、彼を見守り、待っている者がいるというのか。それとも背後の足音の主は、この街に無数にいる法監視役で、強靭な罰をすぐにも彼の手首にガシャンと下すというのか。彼は足音が止まったことに気が着いた。あわてて辺りを見回す。ふと狭い抜け道に目が止まる。 彼は素早く右に身を翻し、建物の間に消え去った。その時彼は、もう少しで道の真中に転がっていたごみバケツに躓き転ぶところだった。 � ��は暗闇の中で道を確かめようとじっと見つめた。どうやら自分の通ってきた道以外にこの中庭からの出道はないようだ。 足音はだんだん近づき、彼には角を曲がる黒い人影が見えた。彼の目は夜の闇の中を必死にさまよい、逃げ道を探す。もうすべては終わりなのか。すべての苦労と準備は水の泡だというのか。 突然、彼の横で扉が風に揺らぎ、ほんのわずかにきしむのを聞いた時、彼は背中を壁に押し付け、追跡者に見付けられないことを願った。この扉は望みの綱として投げかけられた、彼のジレンマからの出口なのだろうか。背中を壁にぴったり押し付けたまま、ゆっくりと彼は開いている扉の方へと身を動かして行った。この扉は彼の救いとなるのだろうか。 diff --git a/sw/qa/extras/txtimport/data/bullets.odt b/sw/qa/extras/txtimport/data/bullets.odt new file mode 100644 index 000000000000..43e0c2123ffc Binary files /dev/null and b/sw/qa/extras/txtimport/data/bullets.odt differ diff --git a/sw/qa/extras/txtimport/txtimport.cxx b/sw/qa/extras/txtimport/txtimport.cxx new file mode 100644 index 000000000000..a97242641df8 --- /dev/null +++ b/sw/qa/extras/txtimport/txtimport.cxx @@ -0,0 +1,155 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <swmodeltestbase.hxx> + +#include <shellio.hxx> +#include <unotextrange.hxx> +#include <iodetect.hxx> +#include <unotxdoc.hxx> +#include <docsh.hxx> + +class TxtImportTest : public SwModelTestBase +{ +public: + TxtImportTest() + : SwModelTestBase("/sw/qa/extras/txtimport/data/", "Text") + { + } + + // Export & assert part of the document (defined by SwPaM). + void assertExportedRange(const OString& aExpected, SwPaM& rPaM) + { + WriterRef rAsciiWriter; + SwReaderWriter::GetWriter(FILTER_TEXT, OUString(), rAsciiWriter); + CPPUNIT_ASSERT(rAsciiWriter.is()); + + // no start char + rAsciiWriter->m_bUCS2_WithStartChar = false; + + SvMemoryStream aMemoryStream; + + SwWriter aWriter(aMemoryStream, rPaM); + ErrCode nError = aWriter.Write(rAsciiWriter); + CPPUNIT_ASSERT_EQUAL(ERRCODE_NONE, nError); + + const char* pData = static_cast<const char*>(aMemoryStream.GetData()); + OString aResult(pData, aMemoryStream.GetSize()); + + CPPUNIT_ASSERT_EQUAL(aExpected, aResult); + } +}; + +#define DECLARE_TXTIMPORT_TEST(TestName, filename) \ + DECLARE_SW_IMPORT_TEST(TestName, filename, nullptr, TxtImportTest) + +DECLARE_TXTIMPORT_TEST(testTdf112191, "bullets.odt") +{ + SwXTextDocument* pTextDoc = dynamic_cast<SwXTextDocument*>(mxComponent.get()); + CPPUNIT_ASSERT(pTextDoc); + SwDoc* pDoc = pTextDoc->GetDocShell()->GetDoc(); + CPPUNIT_ASSERT(pDoc); + + // just the 5th paragraph - no bullet + uno::Reference<text::XTextRange> xPara(getParagraph(5)); + SwUnoInternalPaM aPaM(*pDoc); + bool bSuccess = sw::XTextRangeToSwPaM(aPaM, xPara); + CPPUNIT_ASSERT(bSuccess); + + assertExportedRange("First bullet", aPaM); + + // but when we extend to the next paragraph - now there are bullets + xPara = getParagraph(6); + SwUnoInternalPaM aPaM2(*pDoc); + bSuccess = sw::XTextRangeToSwPaM(aPaM2, xPara); + CPPUNIT_ASSERT(bSuccess); + + OUString aString = OStringToOUString(" \xe2\x80\xa2 First bullet" SAL_NEWLINE_STRING + " \xe2\x80\xa2 Second bullet", + RTL_TEXTENCODING_UTF8); + + SwPaM aPaM3(*aPaM2.GetMark(), *aPaM.GetPoint()); + assertExportedRange(OUStringToOString(aString, osl_getThreadTextEncoding()), aPaM3); +} + +DECLARE_TXTIMPORT_TEST(testTdf60145_utf8withoutbom, "UTF8WITHOUTBOM.txt") +{ + SwXTextDocument* pTextDoc = dynamic_cast<SwXTextDocument*>(mxComponent.get()); + CPPUNIT_ASSERT(pTextDoc); + SwDoc* pDoc = pTextDoc->GetDocShell()->GetDoc(); + CPPUNIT_ASSERT(pDoc); + + uno::Reference<text::XTextRange> xPara(getParagraph(1)); + + CPPUNIT_ASSERT_EQUAL(OUString(u"漢a'"), xPara->getString()); +} + +DECLARE_TXTIMPORT_TEST(testTdf60145_utf8withbom, "UTF8WITHBOM.txt") +{ + SwXTextDocument* pTextDoc = dynamic_cast<SwXTextDocument*>(mxComponent.get()); + CPPUNIT_ASSERT(pTextDoc); + SwDoc* pDoc = pTextDoc->GetDocShell()->GetDoc(); + CPPUNIT_ASSERT(pDoc); + + uno::Reference<text::XTextRange> xPara(getParagraph(1)); + + CPPUNIT_ASSERT_EQUAL(OUString(u"漢a'"), xPara->getString()); +} + +DECLARE_TXTIMPORT_TEST(testTdf60145_utf16lewithoutbom, "UTF16LEWITHOUTBOM.txt") +{ + SwXTextDocument* pTextDoc = dynamic_cast<SwXTextDocument*>(mxComponent.get()); + CPPUNIT_ASSERT(pTextDoc); + SwDoc* pDoc = pTextDoc->GetDocShell()->GetDoc(); + CPPUNIT_ASSERT(pDoc); + + uno::Reference<text::XTextRange> xPara(getParagraph(1)); + + CPPUNIT_ASSERT_EQUAL(OUString(u"漢a'"), xPara->getString()); +} + +DECLARE_TXTIMPORT_TEST(testTdf60145_utf16lewithbom, "UTF16LEWITHBOM.txt") +{ + SwXTextDocument* pTextDoc = dynamic_cast<SwXTextDocument*>(mxComponent.get()); + CPPUNIT_ASSERT(pTextDoc); + SwDoc* pDoc = pTextDoc->GetDocShell()->GetDoc(); + CPPUNIT_ASSERT(pDoc); + + uno::Reference<text::XTextRange> xPara(getParagraph(1)); + + CPPUNIT_ASSERT_EQUAL(OUString(u"漢a'"), xPara->getString()); +} + +DECLARE_TXTIMPORT_TEST(testTdf60145_utf16bewithoutbom, "UTF16BEWITHOUTBOM.txt") +{ + SwXTextDocument* pTextDoc = dynamic_cast<SwXTextDocument*>(mxComponent.get()); + CPPUNIT_ASSERT(pTextDoc); + SwDoc* pDoc = pTextDoc->GetDocShell()->GetDoc(); + CPPUNIT_ASSERT(pDoc); + + uno::Reference<text::XTextRange> xPara(getParagraph(1)); + + CPPUNIT_ASSERT_EQUAL(OUString(u"漢a'"), xPara->getString()); +} + +DECLARE_TXTIMPORT_TEST(testTdf60145_utf16bewithbom, "UTF16BEWITHBOM.txt") +{ + SwXTextDocument* pTextDoc = dynamic_cast<SwXTextDocument*>(mxComponent.get()); + CPPUNIT_ASSERT(pTextDoc); + SwDoc* pDoc = pTextDoc->GetDocShell()->GetDoc(); + CPPUNIT_ASSERT(pDoc); + + uno::Reference<text::XTextRange> xPara(getParagraph(1)); + + CPPUNIT_ASSERT_EQUAL(OUString(u"漢a'"), xPara->getString()); +} + +CPPUNIT_PLUGIN_IMPLEMENT(); + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sw/source/filter/basflt/iodetect.cxx b/sw/source/filter/basflt/iodetect.cxx index 5f9929e8f2b7..a47bb9e82b8d 100644 --- a/sw/source/filter/basflt/iodetect.cxx +++ b/sw/source/filter/basflt/iodetect.cxx @@ -28,6 +28,8 @@ #include <sfx2/docfile.hxx> #include <com/sun/star/ucb/ContentCreationException.hpp> #include <com/sun/star/embed/XStorage.hpp> +#include <unicode/ucsdet.h> +#include <unicode/uclean.h> using namespace ::com::sun::star; @@ -267,6 +269,31 @@ bool SwIoSystem::IsDetectableText(const char* pBuf, sal_uLong &rLen, pBuf+=nHead; rLen-=nHead; } + /*See unicode type again without BOM*/ + if (rLen >= 1 && eCharSet == RTL_TEXTENCODING_DONTKNOW) + { + UErrorCode uerr = U_ZERO_ERROR; + UCharsetDetector* ucd = ucsdet_open(&uerr); + ucsdet_setText(ucd, pBuf, rLen, &uerr); + const UCharsetMatch* match = ucsdet_detect(ucd, &uerr); + const char* pEncodingName = ucsdet_getName(match, &uerr); + + if (U_SUCCESS(uerr) && !strcmp("UTF-8", pEncodingName)) + { + eCharSet = RTL_TEXTENCODING_UTF8; // UTF-8 + } + else if (U_SUCCESS(uerr) && !strcmp("UTF-16BE", pEncodingName)) + { + eCharSet = RTL_TEXTENCODING_UCS2; // UTF-16BE + bLE = false; + } + else if (U_SUCCESS(uerr) && !strcmp("UTF-16LE", pEncodingName)) + { + eCharSet = RTL_TEXTENCODING_UCS2; // UTF-16LE + } + + ucsdet_close(ucd); + } bool bCR = false, bLF = false, bIsBareUnicode = false;
_______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/libreoffice-commits