oox/source/helper/attributelist.cxx | 3 sc/qa/unit/data/csv/tdf152980.csv | 9 ++ sc/qa/unit/subsequent_export_test2.cxx | 29 ++++++++ sc/source/filter/oox/richstring.cxx | 112 --------------------------------- 4 files changed, 42 insertions(+), 111 deletions(-)
New commits: commit 47b30728db3ad47f1b4d0d8b027ba0a55607ac1e Author: Czeber László Ádám <czeber.laszloa...@nisz.hu> AuthorDate: Mon May 8 09:33:07 2023 +0200 Commit: Xisco Fauli <xiscofa...@libreoffice.org> CommitDate: Tue May 9 12:28:34 2023 +0200 tdf#152980 CSV import: Fix control character length in XLSX save Converting from CSV to XLSX corrupts text that looks like a control character. Only 4 numeric length escape character allowed, in _x000D_ format, not _x0D_ for exampled. Change lcl_unEscapeUnicodeChars function to decodeXString. Delete not used functions and add multiple occurence for unit test. Change-Id: Id1d4bfcf7d27cf5005e7bea8e289303c5d9aca73 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/151494 Reviewed-by: Eike Rathke <er...@redhat.com> Tested-by: Eike Rathke <er...@redhat.com> Signed-off-by: Xisco Fauli <xiscofa...@libreoffice.org> Reviewed-on: https://gerrit.libreoffice.org/c/core/+/151562 Reviewed-by: Michael Stahl <michael.st...@allotropia.de> Tested-by: Jenkins diff --git a/sc/qa/unit/data/csv/tdf152980.csv b/sc/qa/unit/data/csv/tdf152980.csv new file mode 100644 index 000000000000..c5050b86d968 --- /dev/null +++ b/sc/qa/unit/data/csv/tdf152980.csv @@ -0,0 +1,9 @@ +"a_x1_b" +"a_x01_b" +"a_x001_b" +"a_x0001_b" +"a_xfoo b" +"a b" +"a +b" +"a b" diff --git a/sc/qa/unit/subsequent_export_test2.cxx b/sc/qa/unit/subsequent_export_test2.cxx index 56d7ac158151..d1920de3c3cb 100644 --- a/sc/qa/unit/subsequent_export_test2.cxx +++ b/sc/qa/unit/subsequent_export_test2.cxx @@ -193,6 +193,7 @@ public: void testTotalsRowFunction(); void testAutofilterHiddenButton(); void testTdf119565(); + void testTdf152980(); CPPUNIT_TEST_SUITE(ScExportTest2); @@ -325,6 +326,7 @@ public: CPPUNIT_TEST(testTotalsRowFunction); CPPUNIT_TEST(testAutofilterHiddenButton); CPPUNIT_TEST(testTdf119565); + CPPUNIT_TEST(testTdf152980); CPPUNIT_TEST_SUITE_END(); }; @@ -2969,6 +2971,33 @@ void ScExportTest2::testTdf119565() xShapeProps->getPropertyValue("LineJoint").get<drawing::LineJoint>()); } +void ScExportTest2::testTdf152980() +{ + createScDoc("csv/tdf152980.csv"); + ScDocShell* pDocSh = getScDocShell(); + pDocSh->DoHardRecalc(); + saveAndReload("Calc Office Open XML"); + pDocSh = getScDocShell(); + pDocSh->DoHardRecalc(); + + ScDocument* pDoc = getScDoc(); + + // - Expected: The part between a and b does not change + // - Actual : Only the characters a and b remain + CPPUNIT_ASSERT_EQUAL(OUString("a_x1_b"), pDoc->GetString(0, 0, 0)); + CPPUNIT_ASSERT_EQUAL(OUString("a_x01_b"), pDoc->GetString(0, 1, 0)); + CPPUNIT_ASSERT_EQUAL(OUString("a_x001_b"), pDoc->GetString(0, 2, 0)); + + // The character code does not change in both cases + CPPUNIT_ASSERT_EQUAL(OUString("a_x0001_b"), pDoc->GetString(0, 3, 0)); + + // The escape characters are handled correctly in both cases + CPPUNIT_ASSERT_EQUAL(OUString("a_xfoo\nb"), pDoc->GetString(0, 4, 0)); + CPPUNIT_ASSERT_EQUAL(OUString("a\tb"), pDoc->GetString(0, 5, 0)); + CPPUNIT_ASSERT_EQUAL(OUString("a\nb"), pDoc->GetString(0, 6, 0)); + CPPUNIT_ASSERT_EQUAL(OUString("a\n\nb"), pDoc->GetString(0, 7, 0)); +} + CPPUNIT_TEST_SUITE_REGISTRATION(ScExportTest2); CPPUNIT_PLUGIN_IMPLEMENT(); diff --git a/sc/source/filter/oox/richstring.cxx b/sc/source/filter/oox/richstring.cxx index a9b272d62a9a..8d2f964362d0 100644 --- a/sc/source/filter/oox/richstring.cxx +++ b/sc/source/filter/oox/richstring.cxx @@ -48,116 +48,6 @@ bool lclNeedsRichTextFormat( const oox::xls::Font* pFont ) return pFont && pFont->needsRichTextFormat(); } -sal_Int32 lcl_getHexLetterValue(sal_Unicode nCode) -{ - if (nCode >= '0' && nCode <= '9') - return nCode - '0'; - - if (nCode >= 'A' && nCode <= 'F') - return nCode - 'A' + 10; - - if (nCode >= 'a' && nCode <= 'f') - return nCode - 'a' + 10; - - return -1; -} - -bool lcl_validEscape(sal_Unicode nCode) -{ - // Valid XML chars that can be escaped (ignoring the restrictions) as in the OOX open spec - // 2.1.1742 Part 1 Section 22.9.2.19, ST_Xstring (Escaped String) - if (nCode == 0x000D || nCode == 0x000A || nCode == 0x0009 || nCode == 0x005F) - return true; - - // Other valid XML chars in basic multilingual plane that cannot be escaped. - if ((nCode >= 0x0020 && nCode <= 0xD7FF) || (nCode >= 0xE000 && nCode <= 0xFFFD)) - return false; - - return true; -} - -OUString lcl_unEscapeUnicodeChars(const OUString& rSrc) -{ - // Example: Escaped representation of unicode char 0x000D is _x000D_ - - sal_Int32 nLen = rSrc.getLength(); - if (!nLen) - return rSrc; - - sal_Int32 nStart = 0; - bool bFound = false; - const OUString aPrefix = "_x"; - sal_Int32 nPrefixStart = rSrc.indexOf(aPrefix, nStart); - - if (nPrefixStart == -1) - return rSrc; - - OUStringBuffer aBuf(rSrc); - sal_Int32 nOffset = 0; // index offset in aBuf w.r.t rSrc. - - do - { - sal_Int32 nEnd = -1; - sal_Unicode nCode = 0; - bool bFoundThis = false; - for (sal_Int32 nIdx = 0; nIdx < 5; ++nIdx) - { - sal_Int32 nThisIdx = nPrefixStart + nIdx + 2; - if (nThisIdx >= nLen) - break; - - sal_Unicode nThisCode = rSrc[nThisIdx]; - sal_Int32 nLetter = lcl_getHexLetterValue(nThisCode); - - if (!nIdx && nLetter < 0) - break; - - if (nLetter >= 0) - { - nCode = (nCode << 4) + static_cast<sal_Unicode>(nLetter); - } - else if (nThisCode == '_') - { - nEnd = nThisIdx + 1; - bFoundThis = true; - break; - } - else - { - break; - } - } - - if (bFoundThis) - { - // nEnd is already set inside the inner loop in this case. - if (lcl_validEscape(nCode)) - { - bFound = true; - sal_Int32 nEscStrLen = nEnd - nPrefixStart; - aBuf.remove(nPrefixStart - nOffset, nEscStrLen); - aBuf.insert(nPrefixStart - nOffset, nCode); - - nOffset += nEscStrLen - 1; - } - } - else - { - // Start the next search just after last "_x" - nEnd = nPrefixStart + 2; - } - - nStart = nEnd; - nPrefixStart = rSrc.indexOf(aPrefix, nStart); - } - while (nPrefixStart != -1); - - if (bFound) - return aBuf.makeStringAndClear(); - - return rSrc; -} - } // namespace RichStringPortion::RichStringPortion() : @@ -168,7 +58,7 @@ RichStringPortion::RichStringPortion() : void RichStringPortion::setText( const OUString& rText ) { - maText = lcl_unEscapeUnicodeChars(rText); + maText = AttributeConversion::decodeXString(rText); } FontRef const & RichStringPortion::createFont(const WorkbookHelper& rHelper) commit 9f23104fe61306c72b7be70422a1a274a5d61d94 Author: Eike Rathke <er...@redhat.com> AuthorDate: Mon May 8 14:25:28 2023 +0200 Commit: Xisco Fauli <xiscofa...@libreoffice.org> CommitDate: Tue May 9 12:28:29 2023 +0200 Do not copy decodeXString() string and analyse if there is nothing to decode ... which usually isn't. Change-Id: I1cadc5a4c0072d5152173ad41e54e25c224e96db Reviewed-on: https://gerrit.libreoffice.org/c/core/+/151509 Reviewed-by: Eike Rathke <er...@redhat.com> Tested-by: Jenkins Signed-off-by: Xisco Fauli <xiscofa...@libreoffice.org> Reviewed-on: https://gerrit.libreoffice.org/c/core/+/151561 Reviewed-by: Michael Stahl <michael.st...@allotropia.de> diff --git a/oox/source/helper/attributelist.cxx b/oox/source/helper/attributelist.cxx index 037483cefb5a..7a973975f3d2 100644 --- a/oox/source/helper/attributelist.cxx +++ b/oox/source/helper/attributelist.cxx @@ -98,6 +98,9 @@ OUString AttributeConversion::decodeXString( const OUString& rValue ) // string shorter than one encoded character - no need to decode if( rValue.getLength() < XSTRING_ENCCHAR_LEN ) return rValue; + if (rValue.indexOf(u"_x") == -1) + return rValue; + OUStringBuffer aBuffer; const sal_Unicode* pcStr = rValue.getStr(); const sal_Unicode* pcEnd = pcStr + rValue.getLength();