source

Czeber László Ádám (via logerrit) Tue, 09 May 2023 03:28:55 -0700

 oox/source/helper/attributelist.cxx    |    3 
 sc/qa/unit/data/csv/tdf152980.csv      |    9 ++
 sc/qa/unit/subsequent_export_test2.cxx |   29 ++++++++
 sc/source/filter/oox/richstring.cxx    |  112 ---------------------------------
 4 files changed, 42 insertions(+), 111 deletions(-)


New commits:
commit 47b30728db3ad47f1b4d0d8b027ba0a55607ac1e
Author:     Czeber László Ádám <czeber.laszloa...@nisz.hu>
AuthorDate: Mon May 8 09:33:07 2023 +0200
Commit:     Xisco Fauli <xiscofa...@libreoffice.org>
CommitDate: Tue May 9 12:28:34 2023 +0200

    tdf#152980 CSV import: Fix control character length in XLSX save
    
    Converting from CSV to XLSX corrupts text that looks like a control
    character. Only 4 numeric length escape character allowed, in _x000D_
    format, not _x0D_ for exampled.
    
    Change lcl_unEscapeUnicodeChars function to decodeXString. Delete not used 
functions and add multiple occurence for unit test.
    
    Change-Id: Id1d4bfcf7d27cf5005e7bea8e289303c5d9aca73
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/151494
    Reviewed-by: Eike Rathke <er...@redhat.com>
    Tested-by: Eike Rathke <er...@redhat.com>
    Signed-off-by: Xisco Fauli <xiscofa...@libreoffice.org>
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/151562
    Reviewed-by: Michael Stahl <michael.st...@allotropia.de>
    Tested-by: Jenkins

diff --git a/sc/qa/unit/data/csv/tdf152980.csv 
b/sc/qa/unit/data/csv/tdf152980.csv
new file mode 100644
index 000000000000..c5050b86d968
--- /dev/null
+++ b/sc/qa/unit/data/csv/tdf152980.csv
@@ -0,0 +1,9 @@
+"a_x1_b"
+"a_x01_b"
+"a_x001_b"
+"a_x0001_b"
+"a_xfoo
b"
+"a     b"
+"a
+b"
+"a

b"
diff --git a/sc/qa/unit/subsequent_export_test2.cxx 
b/sc/qa/unit/subsequent_export_test2.cxx
index 56d7ac158151..d1920de3c3cb 100644
--- a/sc/qa/unit/subsequent_export_test2.cxx
+++ b/sc/qa/unit/subsequent_export_test2.cxx
@@ -193,6 +193,7 @@ public:
     void testTotalsRowFunction();
     void testAutofilterHiddenButton();
     void testTdf119565();
+    void testTdf152980();
 
     CPPUNIT_TEST_SUITE(ScExportTest2);
 
@@ -325,6 +326,7 @@ public:
     CPPUNIT_TEST(testTotalsRowFunction);
     CPPUNIT_TEST(testAutofilterHiddenButton);
     CPPUNIT_TEST(testTdf119565);
+    CPPUNIT_TEST(testTdf152980);
 
     CPPUNIT_TEST_SUITE_END();
 };
@@ -2969,6 +2971,33 @@ void ScExportTest2::testTdf119565()
                          
xShapeProps->getPropertyValue("LineJoint").get<drawing::LineJoint>());
 }
 
+void ScExportTest2::testTdf152980()
+{
+    createScDoc("csv/tdf152980.csv");
+    ScDocShell* pDocSh = getScDocShell();
+    pDocSh->DoHardRecalc();
+    saveAndReload("Calc Office Open XML");
+    pDocSh = getScDocShell();
+    pDocSh->DoHardRecalc();
+
+    ScDocument* pDoc = getScDoc();
+
+    // - Expected: The part between a and b does not change
+    // - Actual  : Only the characters a and b remain
+    CPPUNIT_ASSERT_EQUAL(OUString("a_x1_b"), pDoc->GetString(0, 0, 0));
+    CPPUNIT_ASSERT_EQUAL(OUString("a_x01_b"), pDoc->GetString(0, 1, 0));
+    CPPUNIT_ASSERT_EQUAL(OUString("a_x001_b"), pDoc->GetString(0, 2, 0));
+
+    // The character code does not change in both cases
+    CPPUNIT_ASSERT_EQUAL(OUString("a_x0001_b"), pDoc->GetString(0, 3, 0));
+
+    // The escape characters are handled correctly in both cases
+    CPPUNIT_ASSERT_EQUAL(OUString("a_xfoo\nb"), pDoc->GetString(0, 4, 0));
+    CPPUNIT_ASSERT_EQUAL(OUString("a\tb"), pDoc->GetString(0, 5, 0));
+    CPPUNIT_ASSERT_EQUAL(OUString("a\nb"), pDoc->GetString(0, 6, 0));
+    CPPUNIT_ASSERT_EQUAL(OUString("a\n\nb"), pDoc->GetString(0, 7, 0));
+}
+
 CPPUNIT_TEST_SUITE_REGISTRATION(ScExportTest2);
 
 CPPUNIT_PLUGIN_IMPLEMENT();
diff --git a/sc/source/filter/oox/richstring.cxx 
b/sc/source/filter/oox/richstring.cxx
index a9b272d62a9a..8d2f964362d0 100644
--- a/sc/source/filter/oox/richstring.cxx
+++ b/sc/source/filter/oox/richstring.cxx
@@ -48,116 +48,6 @@ bool lclNeedsRichTextFormat( const oox::xls::Font* pFont )
     return pFont && pFont->needsRichTextFormat();
 }
 
-sal_Int32 lcl_getHexLetterValue(sal_Unicode nCode)
-{
-    if (nCode >= '0' && nCode <= '9')
-        return nCode - '0';
-
-    if (nCode >= 'A' && nCode <= 'F')
-        return nCode - 'A' + 10;
-
-    if (nCode >= 'a' && nCode <= 'f')
-        return nCode - 'a' + 10;
-
-    return -1;
-}
-
-bool lcl_validEscape(sal_Unicode nCode)
-{
-    // Valid XML chars that can be escaped (ignoring the restrictions) as in 
the OOX open spec
-    // 2.1.1742 Part 1 Section 22.9.2.19, ST_Xstring (Escaped String)
-    if (nCode == 0x000D || nCode == 0x000A || nCode == 0x0009 || nCode == 
0x005F)
-        return true;
-
-    // Other valid XML chars in basic multilingual plane that cannot be 
escaped.
-    if ((nCode >= 0x0020 && nCode <= 0xD7FF) || (nCode >= 0xE000 && nCode <= 
0xFFFD))
-        return false;
-
-    return true;
-}
-
-OUString lcl_unEscapeUnicodeChars(const OUString& rSrc)
-{
-    // Example: Escaped representation of unicode char 0x000D is _x000D_
-
-    sal_Int32 nLen = rSrc.getLength();
-    if (!nLen)
-        return rSrc;
-
-    sal_Int32 nStart = 0;
-    bool bFound = false;
-    const OUString aPrefix = "_x";
-    sal_Int32 nPrefixStart = rSrc.indexOf(aPrefix, nStart);
-
-    if (nPrefixStart == -1)
-        return rSrc;
-
-    OUStringBuffer aBuf(rSrc);
-    sal_Int32 nOffset = 0; // index offset in aBuf w.r.t rSrc.
-
-    do
-    {
-        sal_Int32 nEnd = -1;
-        sal_Unicode nCode = 0;
-        bool bFoundThis = false;
-        for (sal_Int32 nIdx = 0; nIdx < 5; ++nIdx)
-        {
-            sal_Int32 nThisIdx = nPrefixStart + nIdx + 2;
-            if (nThisIdx >= nLen)
-                break;
-
-            sal_Unicode nThisCode = rSrc[nThisIdx];
-            sal_Int32 nLetter = lcl_getHexLetterValue(nThisCode);
-
-            if (!nIdx && nLetter < 0)
-                break;
-
-            if (nLetter >= 0)
-            {
-                nCode = (nCode << 4) + static_cast<sal_Unicode>(nLetter);
-            }
-            else if (nThisCode == '_')
-            {
-                nEnd = nThisIdx + 1;
-                bFoundThis = true;
-                break;
-            }
-            else
-            {
-                break;
-            }
-        }
-
-        if (bFoundThis)
-        {
-            // nEnd is already set inside the inner loop in this case.
-            if (lcl_validEscape(nCode))
-            {
-                bFound = true;
-                sal_Int32 nEscStrLen = nEnd - nPrefixStart;
-                aBuf.remove(nPrefixStart - nOffset, nEscStrLen);
-                aBuf.insert(nPrefixStart - nOffset, nCode);
-
-                nOffset += nEscStrLen - 1;
-            }
-        }
-        else
-        {
-            // Start the next search just after last "_x"
-            nEnd = nPrefixStart + 2;
-        }
-
-        nStart = nEnd;
-        nPrefixStart = rSrc.indexOf(aPrefix, nStart);
-    }
-    while (nPrefixStart != -1);
-
-    if (bFound)
-        return aBuf.makeStringAndClear();
-
-    return rSrc;
-}
-
 } // namespace
 
 RichStringPortion::RichStringPortion() :
@@ -168,7 +58,7 @@ RichStringPortion::RichStringPortion() :
 
 void RichStringPortion::setText( const OUString& rText )
 {
-    maText = lcl_unEscapeUnicodeChars(rText);
+    maText = AttributeConversion::decodeXString(rText);
 }
 
 FontRef const & RichStringPortion::createFont(const WorkbookHelper& rHelper)
commit 9f23104fe61306c72b7be70422a1a274a5d61d94
Author:     Eike Rathke <er...@redhat.com>
AuthorDate: Mon May 8 14:25:28 2023 +0200
Commit:     Xisco Fauli <xiscofa...@libreoffice.org>
CommitDate: Tue May 9 12:28:29 2023 +0200

    Do not copy decodeXString() string and analyse if there is nothing to decode
    
    ... which usually isn't.
    
    Change-Id: I1cadc5a4c0072d5152173ad41e54e25c224e96db
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/151509
    Reviewed-by: Eike Rathke <er...@redhat.com>
    Tested-by: Jenkins
    Signed-off-by: Xisco Fauli <xiscofa...@libreoffice.org>
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/151561
    Reviewed-by: Michael Stahl <michael.st...@allotropia.de>

diff --git a/oox/source/helper/attributelist.cxx 
b/oox/source/helper/attributelist.cxx
index 037483cefb5a..7a973975f3d2 100644
--- a/oox/source/helper/attributelist.cxx
+++ b/oox/source/helper/attributelist.cxx
@@ -98,6 +98,9 @@ OUString AttributeConversion::decodeXString( const OUString& 
rValue )
     // string shorter than one encoded character - no need to decode
     if( rValue.getLength() < XSTRING_ENCCHAR_LEN )
         return rValue;
+    if (rValue.indexOf(u"_x") == -1)
+        return rValue;
+
     OUStringBuffer aBuffer;
     const sal_Unicode* pcStr = rValue.getStr();
     const sal_Unicode* pcEnd = pcStr + rValue.getLength();

[Libreoffice-commits] core.git: Branch 'libreoffice-7-5' - 2 commits - oox/source sc/qa sc/source

Reply via email to