include/oox/core/filterdetect.hxx                    |    1 
 include/test/bootstrapfixture.hxx                    |    2 
 oox/source/core/filterdetect.cxx                     |   32 ++++++++++++
 oox/source/core/xmlfilterbase.cxx                    |    7 ++
 sfx2/source/doc/sfxbasemodel.cxx                     |   13 -----
 sw/qa/extras/ooxmlexport/ooxmlexport.cxx             |    2 
 sw/qa/extras/ooxmlexport/ooxmlexport2.cxx            |   10 +--
 sw/source/writerfilter/dmapper/DomainMapper_Impl.cxx |   16 ------
 test/source/bootstrapfixture.cxx                     |   48 +++++++++++++++++--
 test/source/unoapi_test.cxx                          |   31 ++----------
 10 files changed, 97 insertions(+), 65 deletions(-)

New commits:
commit e4b629c1eecf8cd46007fb064179d765d55fd26b
Author:     Justin Luth <[email protected]>
AuthorDate: Tue Feb 11 22:14:13 2025 -0500
Commit:     Justin Luth <[email protected]>
CommitDate: Sat Feb 22 18:42:40 2025 +0100

    tdf#165207 tdf#164201 docx: always use errata uri in docProps/core.xml
    
    This patch effectively replaces (and thus contains a revert of)
    25.8 commit d97085cc6cd2bdc3b6723d1960d0ec5fa0a48165.
    
    This fixes a longstanding bug that became more prominent in 7.6
    when mstahl renamed docx filters to promote Word 2010,
    and then again in 25.2 when my bug 164201 patches
    told command-line convert-to to export using Word 2010 filter,
    and finally in 25.8 for interactive round-tripping of existing files.
    
    So what does this patch fix?
    Basically, all of the meta-data was not being read by MS Word,
    which primarily means no create/modify dates, subject, title etc.
    Every time we saved to DOCX format (except DOCX 2007), we caused Word
    to be incapable of understanding anything in docProps/core.xml.
    Telltale sign: when MS Word round-trips with added docProps/core0.xml.
    
    The spec says that DOCX 2010 should be using
        officedocuments/2006/relationships/metadata/core-properties
    and thus our untouched 2010-era coding
    specified all of that for import and export.
    
    But reality seems to be much different than the spec,
    (and some errata-sounding documentation agrees),
    so throw the spec out the window and always write
        package/2006/relationships/metadata/core-properties
    (but only for DOCX - Excel and Powerpoint follow the spec...)
    
    That meant that I also had to rewrite our import logic,
    but that was already suggested by mstahl in
    https://gerrit.libreoffice.org/c/core/+/178048
    
    Bug 165207 doesn't affect LO in any way.
    If there was already a unit test writing out to _rels/.rels
    I would have added a test for this, but since there isn't
    I won't bother since I don't see any value in doing so.
    Plus, we already have validity checks to cover this kind of stuff.
    
    The retained the unit test ensures import logic is still correct:
    make CppunitTest_sw_ooxmlexport20 CPPUNIT_TEST_NAME=testTdf158855
    
    A uiwriter4 test depends on the previous patch in this bug report.
    (See earlier versions of my patchset for details)
    
    officeotron sounds like they WANT to report invalidation errors
    based only on spec, so no point in trying to "submit a patch".
    
    Change-Id: Ib76803bf0c9f3791f9078846f00c118099ef67cd
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/181463
    Tested-by: Jenkins
    Reviewed-by: Justin Luth <[email protected]>

diff --git a/include/oox/core/filterdetect.hxx 
b/include/oox/core/filterdetect.hxx
index 47be88c19019..9ff170e84ef2 100644
--- a/include/oox/core/filterdetect.hxx
+++ b/include/oox/core/filterdetect.hxx
@@ -78,6 +78,7 @@ public:
     virtual void SAL_CALL characters( const OUString& aChars ) override;
 
 private:
+    void parseSettings(const AttributeList& rAttribs);
     void                parseRelationship( const AttributeList& rAttribs );
 
     OUString            getFilterNameFromContentType( std::u16string_view 
rContentType, std::u16string_view rFileName );
diff --git a/include/test/bootstrapfixture.hxx 
b/include/test/bootstrapfixture.hxx
index f512cabc7784..449534035eb4 100644
--- a/include/test/bootstrapfixture.hxx
+++ b/include/test/bootstrapfixture.hxx
@@ -61,7 +61,7 @@ public:
 
     virtual void setUp() override;
 
-    void validate(const OUString& rURL, ValidationFormat) const;
+    void validate(const OUString& rURL, std::u16string_view rFilter) const;
 
     // Allows to exclude tests dependent on color depth of the default virtual 
device
     static sal_uInt16 getDefaultDeviceBitCount();
diff --git a/oox/source/core/filterdetect.cxx b/oox/source/core/filterdetect.cxx
index 33c95c7e2ca3..ccd879f4494b 100644
--- a/oox/source/core/filterdetect.cxx
+++ b/oox/source/core/filterdetect.cxx
@@ -88,6 +88,15 @@ void SAL_CALL FilterDetectDocHandler::startFastElement(
     AttributeList aAttribs( rAttribs );
     switch ( nElement )
     {
+        // cases for word/settings.xml
+        case W_TOKEN(settings):
+        case W_TOKEN(compat):
+            break;
+        case W_TOKEN(compatSetting):
+            if (!maContextStack.empty() && (maContextStack.back() == 
W_TOKEN(compat)))
+                parseSettings(aAttribs);
+            break;
+
         // cases for _rels/.rels
         case PR_TOKEN( Relationships ):
         break;
@@ -142,6 +151,18 @@ void SAL_CALL FilterDetectDocHandler::characters( const 
OUString& /*aChars*/ )
 {
 }
 
+void FilterDetectDocHandler::parseSettings(const AttributeList& rAttribs)
+{
+    // tdf#131936 Remember filter when opening file as 'Office Open XML Text'
+    if 
(rAttribs.getStringDefaulted(W_TOKEN(name)).equalsIgnoreAsciiCase("compatibilityMode"))
+    {
+        const sal_Int32 nVal = rAttribs.getInteger(W_TOKEN(val), 12); // 
default to Word 2007
+        // if specified multiple times, highest value wins
+        if (nVal > 12 && maOOXMLVariant == OOXMLVariant::ECMA_Transitional)
+            maOOXMLVariant = OOXMLVariant::ISO_Transitional; // Word 2010+
+    }
+}
+
 void FilterDetectDocHandler::parseRelationship( const AttributeList& rAttribs )
 {
     OUString aType = rAttribs.getStringDefaulted( XML_Type);
@@ -438,6 +459,7 @@ OUString SAL_CALL FilterDetect::detect( Sequence< 
PropertyValue >& rMediaDescSeq
             aParser.registerNamespace( NMSP_packageRel );
             aParser.registerNamespace( NMSP_officeRel );
             aParser.registerNamespace( NMSP_packageContentTypes );
+            aParser.registerNamespace(NMSP_doc); // for W_TOKEN
 
             OUString aFileName;
             aMediaDescriptor[utl::MediaDescriptor::PROP_URL] >>= aFileName;
@@ -447,6 +469,16 @@ OUString SAL_CALL FilterDetect::detect( Sequence< 
PropertyValue >& rMediaDescSeq
             /*  Parse '_rels/.rels' to get the target path and 
'[Content_Types].xml'
                 to determine the content type of the part at the target path. 
*/
             aParser.parseStream( aZipStorage, u"_rels/.rels"_ustr );
+            try
+            {
+                // Text documents can't use .rels to determine maOOXMLVariant. 
Use compatibilityMode
+                 aParser.parseStream(aZipStorage, u"word/settings.xml"_ustr);
+            }
+            catch(const Exception&)
+            {
+                // not a MS Word text document, or file might not exist
+            }
+            // Order is critical: .rels and then settings.xml must be parsed 
before [Content_Types]
             aParser.parseStream( aZipStorage, u"[Content_Types].xml"_ustr );
         }
     }
diff --git a/oox/source/core/xmlfilterbase.cxx 
b/oox/source/core/xmlfilterbase.cxx
index 38a51cc4b2d4..8cb7c5213514 100644
--- a/oox/source/core/xmlfilterbase.cxx
+++ b/oox/source/core/xmlfilterbase.cxx
@@ -27,6 +27,7 @@
 #include <com/sun/star/beans/Pair.hpp>
 #include <com/sun/star/embed/XRelationshipAccess.hpp>
 #include <com/sun/star/frame/XModel.hpp>
+#include <com/sun/star/text/XTextDocument.hpp>
 #include <com/sun/star/xml/sax/XFastSAXSerializable.hpp>
 #include <com/sun/star/xml/sax/XSAXSerializable.hpp>
 #include <com/sun/star/xml/sax/Writer.hpp>
@@ -632,8 +633,12 @@ writeCoreProperties( XmlFilterBase& rSelf, const 
Reference< XDocumentProperties
         = bRemovePersonalInfo
           && 
!SvtSecurityOptions::IsOptionSet(SvtSecurityOptions::EOption::DocWarnKeepDocUserInfo);
 
+    // Although the spec says that ISOIEC_29500_2008 must use the 
"officedocument" URI,
+    // in MS errata they admit that DOCX just uses the old "package" URI from 
ECMA_376_1ST_EDITION.
+
     OUString sValue;
-    if( rSelf.getVersion() == oox::core::ISOIEC_29500_2008  )
+    const bool bDocx = 
dynamic_cast<text::XTextDocument*>(rSelf.getModel().get());
+    if (!bDocx && rSelf.getVersion() == oox::core::ISOIEC_29500_2008)
     {
         // The lowercase "officedocument" is intentional and according to the 
spec
         // (although most other places are written "officeDocument")
diff --git a/sfx2/source/doc/sfxbasemodel.cxx b/sfx2/source/doc/sfxbasemodel.cxx
index 17dc962a4370..2d9518530416 100644
--- a/sfx2/source/doc/sfxbasemodel.cxx
+++ b/sfx2/source/doc/sfxbasemodel.cxx
@@ -1188,19 +1188,6 @@ void SAL_CALL SfxBaseModel::setArgs(const 
Sequence<beans::PropertyValue>& aArgs)
             pMedium->GetItemSet().Put(SfxUnoAnyItem(SID_ENCRYPTIONDATA, 
rArg.Value));
             ok = true;
         }
-        else if (rArg.Name == "FilterName")
-        {
-            // HACK: Needed a way to tweak the filter after loading has 
started,
-            // but changing this must be avoided unless clearly intentional.
-            if (aArgs.size() == 1)
-            {
-                if (rArg.Value >>= sValue)
-                {
-                    pMedium->GetItemSet().Put(SfxStringItem(SID_FILTER_NAME, 
sValue));
-                    ok = true;
-                }
-            }
-        }
         if (!ok)
         {
             throw lang::IllegalArgumentException("Setting property not 
supported: " + rArg.Name,
diff --git a/sw/qa/extras/ooxmlexport/ooxmlexport.cxx 
b/sw/qa/extras/ooxmlexport/ooxmlexport.cxx
index 64901fcf7359..6aee5bd0babd 100644
--- a/sw/qa/extras/ooxmlexport/ooxmlexport.cxx
+++ b/sw/qa/extras/ooxmlexport/ooxmlexport.cxx
@@ -568,7 +568,7 @@ DECLARE_OOXMLEXPORT_TEST(testParagraphMark2, 
"paragraph-mark2.docx")
 CPPUNIT_TEST_FIXTURE(Test, testParagraphMarkNonempty)
 {
     loadAndSave("paragraph-mark-nonempty.odt");
-    validate(maTempFile.GetFileName(), test::OOXML);
+    validate(maTempFile.GetFileName(), mpFilter);
     CPPUNIT_ASSERT_EQUAL(1, getPages());
     xmlDocUniquePtr pXmlDoc = parseExport(u"word/document.xml"_ustr);
     // There were two <w:sz> elements, make sure the 40 one is dropped and the 
20 one is kept.
diff --git a/sw/qa/extras/ooxmlexport/ooxmlexport2.cxx 
b/sw/qa/extras/ooxmlexport/ooxmlexport2.cxx
index 04356b53fa66..22a4e2169254 100644
--- a/sw/qa/extras/ooxmlexport/ooxmlexport2.cxx
+++ b/sw/qa/extras/ooxmlexport/ooxmlexport2.cxx
@@ -44,7 +44,7 @@ public:
 CPPUNIT_TEST_FIXTURE(Test, testPageGraphicBackground)
 {
     loadAndReload("page-graphic-background.odt");
-    validate(maTempFile.GetFileName(), test::OOXML);
+    validate(maTempFile.GetFileName(), mpFilter);
     CPPUNIT_ASSERT_EQUAL(1, getPages());
     // No idea how the graphic background should be exported (seems there is no
     // way to do a non-tiling export to OOXML), but at least the background
@@ -165,7 +165,7 @@ CPPUNIT_TEST_FIXTURE(Test, testZoom)
     verify();
 
     // Validation test: order of elements were wrong.
-    validate(maTempFile.GetFileName(), test::OOXML);
+    validate(maTempFile.GetFileName(), mpFilter);
     xmlDocUniquePtr pXmlDoc = parseExport(u"word/styles.xml"_ustr);
     // Order was: rsid, next.
     int nNext = getXPathPosition(pXmlDoc, "/w:styles/w:style[3]", "next");
@@ -181,7 +181,7 @@ CPPUNIT_TEST_FIXTURE(Test, testZoom)
 CPPUNIT_TEST_FIXTURE(Test, defaultTabStopNotInStyles)
 {
     loadAndReload("empty.odt");
-    validate(maTempFile.GetFileName(), test::OOXML);
+    validate(maTempFile.GetFileName(), mpFilter);
     CPPUNIT_ASSERT_EQUAL(1, getPages());
 // The default tab stop was mistakenly exported to a style.
 // xray ThisComponent.StyleFamilies(1)(0).ParaTabStop
@@ -253,13 +253,13 @@ CPPUNIT_TEST_FIXTURE(Test, testFdo38244)
     verify();
     saveAndReload(mpFilter);
     verify();
-    validate(maTempFile.GetFileName(), test::OOXML);
+    validate(maTempFile.GetFileName(), mpFilter);
 }
 
 CPPUNIT_TEST_FIXTURE(Test, testCommentsNested)
 {
     loadAndReload("comments-nested.odt");
-    validate(maTempFile.GetFileName(), test::OOXML);
+    validate(maTempFile.GetFileName(), mpFilter);
     CPPUNIT_ASSERT_EQUAL(1, getPages());
     uno::Reference<beans::XPropertySet> xOuter = getProperty< 
uno::Reference<beans::XPropertySet> >(getRun(getParagraph(1), 2), 
u"TextField"_ustr);
     CPPUNIT_ASSERT_EQUAL(u"Outer"_ustr, getProperty<OUString>(xOuter, 
u"Content"_ustr));
diff --git a/sw/source/writerfilter/dmapper/DomainMapper_Impl.cxx 
b/sw/source/writerfilter/dmapper/DomainMapper_Impl.cxx
index 393c451c5f9a..eb0761c75deb 100644
--- a/sw/source/writerfilter/dmapper/DomainMapper_Impl.cxx
+++ b/sw/source/writerfilter/dmapper/DomainMapper_Impl.cxx
@@ -9878,22 +9878,6 @@ void DomainMapper_Impl::ApplySettingsTable()
         // i.e. it typesets the same text with less lines and pages.
         if (m_pSettingsTable->GetWordCompatibilityMode() >= 15)
             xSettings->setPropertyValue(u"JustifyLinesWithShrinking"_ustr, 
uno::Any( true ));
-
-        if (m_pSettingsTable->GetWordCompatibilityMode() > 12)
-        {
-            // impossible to be importing as a 2007-only format when compat 
mode is not equal to 12,
-            // so change from ECMA_376_1ST_EDITION to ISOIEC_29500_2008
-            // so that at export time it will not be forced into compat12 mode.
-            OUString sFilterName;
-            comphelper::SequenceAsHashMap aMap(m_xTextDocument->getArgs());
-            aMap["FilterName"] >>=  sFilterName;
-            if (sFilterName == "MS Word 2007 XML")
-            {
-                m_xTextDocument->setArgs(
-                    { comphelper::makePropertyValue("FilterName", 
OUString("Office Open XML Text")) });
-            }
-        }
-
         if (m_pSettingsTable->GetUsePrinterMetrics())
             xSettings->setPropertyValue(u"PrinterIndependentLayout"_ustr, 
uno::Any(document::PrinterIndependentLayout::DISABLED));
         if( m_pSettingsTable->GetEmbedTrueTypeFonts())
diff --git a/test/source/bootstrapfixture.cxx b/test/source/bootstrapfixture.cxx
index 32ebbd01a9e0..a34cc7e4473b 100644
--- a/test/source/bootstrapfixture.cxx
+++ b/test/source/bootstrapfixture.cxx
@@ -166,7 +166,7 @@ OUString filterOut(const OUString& s, std::u16string_view 
excludedSubstr)
     return result;
 }
 
-OUString filterValidationResults(const OUString& s)
+OUString filterValidationResults(const OUString& s, std::u16string_view 
rFilter)
 {
     OUString result = s;
     // In ECMA-376-1 Second Edition, 2008, there is the following restriction 
for oleObj:
@@ -189,13 +189,55 @@ OUString filterValidationResults(const OUString& s)
     //
     // But officeotron only knows the old version...
     result = filterOut(result, u"Invalid content was found starting with 
element 'p:pic'. No child element is expected at this point.");
+
+    if (rFilter == u"Office Open XML Text")
+    {
+        /* While the spec says the core-properties relationship must be
+         *     officedocument/2006/relationships/metadata/core-properties
+         * MS Word actually just writes the ECMA-376-1ST EDITION version
+         * for both ECMA_Transitional and ISO_Transitional formats.
+         *
+         * officeotron doesn't care that clause 15.2.12.1 fails on all 
MSWord-produced output.
+         */
+        result = filterOut(result, u"Entry with MIME type 
\"application/vnd.openxmlformats-package.core-properties+xml\" has unrecognized 
relationship type 
\"http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties\";
 (see ISO/IEC 29500-1:2008, Clause 15.2.12.1)");
+    }
+
     return result;
 }
 }
 #endif
 
-void test::BootstrapFixture::validate(const OUString& rPath, 
test::ValidationFormat eFormat) const
+void test::BootstrapFixture::validate(const OUString& rPath, 
std::u16string_view rFilter) const
 {
+    test::ValidationFormat eFormat = test::ODF;
+    if (rFilter == u"Calc Office Open XML")
+        eFormat = test::OOXML;
+    else if (rFilter == u"Office Open XML Text")
+        eFormat = test::OOXML;
+    else if (rFilter == u"Impress Office Open XML")
+        eFormat = test::OOXML;
+    else if (rFilter == u"writer8")
+        eFormat = test::ODF;
+    else if (rFilter == u"calc8")
+        eFormat = test::ODF;
+    else if (rFilter == u"impress8")
+        eFormat = test::ODF;
+    else if (rFilter == u"draw8")
+        eFormat = test::ODF;
+    else if (rFilter == u"OpenDocument Text Flat XML")
+        eFormat = test::ODF;
+    else if (rFilter == u"MS Word 97")
+        eFormat = test::MSBINARY;
+    else if (rFilter == u"MS Excel 97")
+        eFormat = test::MSBINARY;
+    else if (rFilter == u"MS PowerPoint 97")
+        eFormat = test::MSBINARY;
+    else
+    {
+        SAL_INFO("test", "BootstrapFixture::validate: unknown filter");
+        return;
+    }
+
 #if HAVE_EXPORT_VALIDATION
     OUString var;
     if( eFormat == test::OOXML )
@@ -266,7 +308,7 @@ void test::BootstrapFixture::validate(const OUString& 
rPath, test::ValidationFor
 
     if( eFormat == test::OOXML && !aContentOUString.isEmpty() )
     {
-        aContentOUString = filterValidationResults(aContentOUString);
+        aContentOUString = filterValidationResults(aContentOUString, rFilter);
         // check for validation errors here
         sal_Int32 nIndex = aContentOUString.lastIndexOf(grand_total);
         if(nIndex == -1)
diff --git a/test/source/unoapi_test.cxx b/test/source/unoapi_test.cxx
index a849ea6d2af1..035bf8c1f5fb 100644
--- a/test/source/unoapi_test.cxx
+++ b/test/source/unoapi_test.cxx
@@ -180,31 +180,12 @@ void UnoApiTest::save(const OUString& rFilter, const 
char* pPassword)
 
     if (!mbSkipValidation)
     {
-        if (rFilter == "Calc Office Open XML")
-            validate(maTempFile.GetFileName(), test::OOXML);
-        /*
-        // too many validation errors right now
-        else if (rFilter == "Office Open XML Text")
-            validate(maTempFile.GetFileName(), test::OOXML);
-        */
-        else if (rFilter == "Impress Office Open XML")
-            validate(maTempFile.GetFileName(), test::OOXML);
-        else if (rFilter == "writer8")
-            validate(maTempFile.GetFileName(), test::ODF);
-        else if (rFilter == "calc8")
-            validate(maTempFile.GetFileName(), test::ODF);
-        else if (rFilter == "impress8")
-            validate(maTempFile.GetFileName(), test::ODF);
-        else if (rFilter == "draw8")
-            validate(maTempFile.GetFileName(), test::ODF);
-        else if (rFilter == "OpenDocument Text Flat XML")
-            validate(maTempFile.GetFileName(), test::ODF);
-        else if (rFilter == "MS Word 97")
-            validate(maTempFile.GetFileName(), test::MSBINARY);
-        else if (rFilter == "MS Excel 97")
-            validate(maTempFile.GetFileName(), test::MSBINARY);
-        else if (rFilter == "MS PowerPoint 97")
-            validate(maTempFile.GetFileName(), test::MSBINARY);
+        if (rFilter == "Office Open XML Text")
+        {
+            // do nothing: too many validation errors right now
+        }
+        else
+            validate(maTempFile.GetFileName(), rFilter);
     }
 }
 

Reply via email to