vcl/Library_vcl.mk                     |    1 
 vcl/inc/pdf/ExternalPDFStreams.hxx     |   65 +++++++++++++++++++++++++++++++++
 vcl/inc/pdf/objectcopier.hxx           |    3 +
 vcl/qa/cppunit/pdfexport/pdfexport.cxx |    2 -
 vcl/source/gdi/pdfobjectcopier.cxx     |    7 +++
 vcl/source/gdi/pdfwriter_impl.cxx      |   33 ++++++++--------
 vcl/source/gdi/pdfwriter_impl.hxx      |   21 ++++++++--
 vcl/source/pdf/ExternalPDFStreams.cxx  |   43 +++++++++++++++++++++
 8 files changed, 151 insertions(+), 24 deletions(-)

New commits:
commit e528293bc17ecce92124e8dd8841bcea2bda562e
Author:     Tomaž Vajngerl <tomaz.vajng...@collabora.co.uk>
AuthorDate: Wed Oct 28 13:55:23 2020 +0100
Commit:     Miklos Vajna <vmik...@collabora.com>
CommitDate: Wed Oct 28 18:24:24 2020 +0100

    pdf: deduplicate resources when copying from external PDF stream
    
    When using external PDF stream/data (from PDF graphic objects),
    make sure to copy the content of external PDF resources (fonts,
    bitmaps, forms) only one time (by sharing the map between calls)
    and every other use, just use the reference to the objects.
    
    Change-Id: Ibaa632c8f74806eb195e69404551db6fd077a986
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/104935
    Tested-by: Jenkins CollaboraOffice <jenkinscollaboraoff...@gmail.com>
    Reviewed-by: Miklos Vajna <vmik...@collabora.com>

diff --git a/vcl/Library_vcl.mk b/vcl/Library_vcl.mk
index e9d820806870..9afdac80ef86 100644
--- a/vcl/Library_vcl.mk
+++ b/vcl/Library_vcl.mk
@@ -319,6 +319,7 @@ $(eval $(call gb_Library_add_exception_objects,vcl,\
     vcl/source/gdi/CommonSalLayout \
     vcl/source/gdi/TypeSerializer \
     vcl/source/pdf/PDFiumLibrary \
+    vcl/source/pdf/ExternalPDFStreams \
     vcl/source/graphic/GraphicID \
     vcl/source/graphic/GraphicLoader \
     vcl/source/graphic/GraphicObject \
diff --git a/vcl/inc/pdf/ExternalPDFStreams.hxx 
b/vcl/inc/pdf/ExternalPDFStreams.hxx
new file mode 100644
index 000000000000..3bd59478c212
--- /dev/null
+++ b/vcl/inc/pdf/ExternalPDFStreams.hxx
@@ -0,0 +1,65 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#pragma once
+
+#include <sal/types.h>
+#include <sal/log.hxx>
+#include <vcl/dllapi.h>
+
+#include <map>
+#include <vector>
+#include <memory>
+
+#include <vcl/filter/pdfdocument.hxx>
+
+namespace vcl
+{
+struct VCL_DLLPUBLIC ExternalPDFStream
+{
+    std::vector<sal_uInt8> maData;
+    std::shared_ptr<filter::PDFDocument> mpPDFDocument;
+    std::map<sal_Int32, sal_Int32> maCopiedResources;
+
+    std::map<sal_Int32, sal_Int32>& getCopiedResources() { return 
maCopiedResources; }
+
+    filter::PDFDocument& getPDFDocument()
+    {
+        if (!mpPDFDocument)
+        {
+            SvMemoryStream aPDFStream;
+            aPDFStream.WriteBytes(maData.data(), maData.size());
+            aPDFStream.Seek(0);
+            mpPDFDocument = std::make_unique<filter::PDFDocument>();
+            if (!mpPDFDocument->Read(aPDFStream))
+            {
+                SAL_WARN("vcl.pdfwriter",
+                         "PDFWriterImpl::writeReferenceXObject: reading the 
PDF document failed");
+            }
+        }
+        return *mpPDFDocument;
+    }
+};
+
+class VCL_DLLPUBLIC ExternalPDFStreams
+{
+private:
+    std::map<std::vector<sal_uInt8>, sal_Int32> maStreamIndexMap;
+    std::vector<ExternalPDFStream> maStreamList;
+
+public:
+    ExternalPDFStreams() {}
+
+    sal_Int32 store(const sal_uInt8* pData, sal_uInt32 nLength);
+
+    ExternalPDFStream& get(sal_uInt32 nIndex);
+};
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/vcl/inc/pdf/objectcopier.hxx b/vcl/inc/pdf/objectcopier.hxx
index a6ff12d116af..487d03186682 100644
--- a/vcl/inc/pdf/objectcopier.hxx
+++ b/vcl/inc/pdf/objectcopier.hxx
@@ -48,6 +48,9 @@ public:
     /// Copies resources of pPage into rLine.
     void copyPageResources(filter::PDFObjectElement* pPage, OStringBuffer& 
rLine);
 
+    void copyPageResources(filter::PDFObjectElement* pPage, OStringBuffer& 
rLine,
+                           std::map<sal_Int32, sal_Int32>& rCopiedResources);
+
     /// Copies page one or more page streams from rContentStreams into rStream.
     static sal_Int32 copyPageStreams(std::vector<filter::PDFObjectElement*>& 
rContentStreams,
                                      SvMemoryStream& rStream, bool& 
rCompressed);
diff --git a/vcl/qa/cppunit/pdfexport/pdfexport.cxx 
b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
index 190e26cce19b..5f50661b013e 100644
--- a/vcl/qa/cppunit/pdfexport/pdfexport.cxx
+++ b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
@@ -2131,7 +2131,7 @@ void PdfExportTest::testMultiPagePDF()
     {   // embedded PDF page 2
         vcl::filter::PDFObjectElement* pXObject2 = 
pXObjects->LookupObject(rIDs[1]);
         CPPUNIT_ASSERT(pXObject2);
-        CPPUNIT_ASSERT_EQUAL(OString("Im34"), rIDs[1]);
+        CPPUNIT_ASSERT_EQUAL(OString("Im24"), rIDs[1]);
 
         auto pSubtype2 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pXObject2->Lookup("Subtype"));
         CPPUNIT_ASSERT(pSubtype2);
diff --git a/vcl/source/gdi/pdfobjectcopier.cxx 
b/vcl/source/gdi/pdfobjectcopier.cxx
index 5e54ee68c289..a953c864c122 100644
--- a/vcl/source/gdi/pdfobjectcopier.cxx
+++ b/vcl/source/gdi/pdfobjectcopier.cxx
@@ -275,13 +275,18 @@ void 
PDFObjectCopier::copyPageResources(filter::PDFObjectElement* pPage, OString
 {
     // Maps from source object id (PDF image) to target object id (export 
result).
     std::map<sal_Int32, sal_Int32> aCopiedResources;
+    copyPageResources(pPage, rLine, aCopiedResources);
+}
 
+void PDFObjectCopier::copyPageResources(filter::PDFObjectElement* pPage, 
OStringBuffer& rLine,
+                                        std::map<sal_Int32, sal_Int32>& 
rCopiedResources)
+{
     rLine.append(" /Resources <<");
     static const std::initializer_list<OString> aKeys
         = { "ColorSpace", "ExtGState", "Font", "XObject", "Shading" };
     for (const auto& rKey : aKeys)
     {
-        rLine.append(copyExternalResources(*pPage, rKey, aCopiedResources));
+        rLine.append(copyExternalResources(*pPage, rKey, rCopiedResources));
     }
     rLine.append(">>");
 }
diff --git a/vcl/source/gdi/pdfwriter_impl.cxx 
b/vcl/source/gdi/pdfwriter_impl.cxx
index 6f18a2882645..2aa973e567c5 100644
--- a/vcl/source/gdi/pdfwriter_impl.cxx
+++ b/vcl/source/gdi/pdfwriter_impl.cxx
@@ -8585,7 +8585,7 @@ bool PDFWriterImpl::writeGradientFunction( GradientEmit 
const & rObject )
 
 void PDFWriterImpl::writeJPG( JPGEmit& rObject )
 {
-    if (!rObject.m_aReferenceXObject.m_aPDFData.empty() && 
!m_aContext.UseReferenceXObject)
+    if (rObject.m_aReferenceXObject.hasExternalPDFData() && 
!m_aContext.UseReferenceXObject)
     {
         writeReferenceXObject(rObject.m_aReferenceXObject);
         return;
@@ -8684,23 +8684,19 @@ void 
PDFWriterImpl::writeReferenceXObject(ReferenceXObjectEmit& rEmit)
     {
         // Parse the PDF data, we need that to write the PDF dictionary of our
         // object.
-        SvMemoryStream aPDFStream;
-        aPDFStream.WriteBytes(rEmit.m_aPDFData.data(), 
rEmit.m_aPDFData.size());
-        aPDFStream.Seek(0);
-        filter::PDFDocument aPDFDocument;
-        if (!aPDFDocument.Read(aPDFStream))
-        {
-            SAL_WARN("vcl.pdfwriter", "PDFWriterImpl::writeReferenceXObject: 
reading the PDF document failed");
+        if (rEmit.m_nExternalPDFDataIndex < 0)
             return;
-        }
-        std::vector<filter::PDFObjectElement*> aPages = 
aPDFDocument.GetPages();
+        auto & rExternalPDFStream = 
m_aExternalPDFStreams.get(rEmit.m_nExternalPDFDataIndex);
+        auto & rPDFDocument = rExternalPDFStream.getPDFDocument();
+
+        std::vector<filter::PDFObjectElement*> aPages = 
rPDFDocument.GetPages();
         if (aPages.empty())
         {
             SAL_WARN("vcl.pdfwriter", "PDFWriterImpl::writeReferenceXObject: 
no pages");
             return;
         }
 
-        size_t nPageIndex = rEmit.m_nPDFPageIndex >= 0 ? rEmit.m_nPDFPageIndex 
: 0;
+        size_t nPageIndex = rEmit.m_nExternalPDFPageIndex >= 0 ? 
rEmit.m_nExternalPDFPageIndex : 0;
 
         filter::PDFObjectElement* pPage = aPages[nPageIndex];
         if (!pPage)
@@ -8772,7 +8768,9 @@ void 
PDFWriterImpl::writeReferenceXObject(ReferenceXObjectEmit& rEmit)
         }
 
         PDFObjectCopier aCopier(*this);
-        aCopier.copyPageResources(pPage, aLine);
+        auto & rResources = rExternalPDFStream.getCopiedResources();
+        aCopier.copyPageResources(pPage, aLine, rResources);
+
         aLine.append(" /BBox [ 0 0 ");
         aLine.append(nWidth);
         aLine.append(" ");
@@ -8914,7 +8912,7 @@ namespace
 
 bool PDFWriterImpl::writeBitmapObject( BitmapEmit& rObject, bool bMask )
 {
-    if (!rObject.m_aReferenceXObject.m_aPDFData.empty() && 
!m_aContext.UseReferenceXObject)
+    if (rObject.m_aReferenceXObject.hasExternalPDFData() && 
!m_aContext.UseReferenceXObject)
     {
         writeReferenceXObject(rObject.m_aReferenceXObject);
         return true;
@@ -9236,10 +9234,10 @@ void PDFWriterImpl::createEmbeddedFile(const Graphic& 
rGraphic, ReferenceXObject
     sal_uInt32 nLength = 
rGraphic.getVectorGraphicData()->getVectorGraphicDataArrayLength();
     auto const & rArray = 
rGraphic.getVectorGraphicData()->getVectorGraphicDataArray();
 
-    auto pPDFData = 
std::make_shared<std::vector<sal_Int8>>(rArray.getConstArray(), 
rArray.getConstArray() + nLength);
-
     if (m_aContext.UseReferenceXObject)
     {
+        auto pPDFData = 
std::make_shared<std::vector<sal_Int8>>(rArray.getConstArray(), 
rArray.getConstArray() + nLength);
+
         // Store the original PDF data as an embedded file.
         m_aEmbeddedFiles.emplace_back();
         m_aEmbeddedFiles.back().m_nObject = createObject();
@@ -9248,8 +9246,9 @@ void PDFWriterImpl::createEmbeddedFile(const Graphic& 
rGraphic, ReferenceXObject
     }
     else
     {
-        rEmit.m_nPDFPageIndex = 
rGraphic.getVectorGraphicData()->getPageIndex();
-        rEmit.m_aPDFData = *pPDFData;
+        sal_Int32 aIndex = m_aExternalPDFStreams.store(reinterpret_cast<const 
sal_uInt8*>(rArray.getConstArray()), nLength);
+        rEmit.m_nExternalPDFPageIndex = 
rGraphic.getVectorGraphicData()->getPageIndex();
+        rEmit.m_nExternalPDFDataIndex = aIndex;
     }
 
     rEmit.m_nFormObject = createObject();
diff --git a/vcl/source/gdi/pdfwriter_impl.hxx 
b/vcl/source/gdi/pdfwriter_impl.hxx
index 79df86f9b679..17e5f6d8c3ea 100644
--- a/vcl/source/gdi/pdfwriter_impl.hxx
+++ b/vcl/source/gdi/pdfwriter_impl.hxx
@@ -46,6 +46,7 @@
 
 #include <outdata.hxx>
 #include <vcl/filter/pdfobjectcontainer.hxx>
+#include <pdf/ExternalPDFStreams.hxx>
 #include "pdffontcache.hxx"
 #include "pdfbuildin_fonts.hxx"
 
@@ -208,19 +209,27 @@ public:
         sal_Int32 m_nBitmapObject;
         /// Size of the bitmap replacement, in pixels.
         Size m_aPixelSize;
+
         /// PDF data from the graphic object, if not writing a reference 
XObject.
-        std::vector<sal_Int8> m_aPDFData;
-        sal_Int32 m_nPDFPageIndex;
+        sal_Int32 m_nExternalPDFDataIndex;
+        sal_Int32 m_nExternalPDFPageIndex;
 
         ReferenceXObjectEmit()
-            : m_nFormObject(0),
-              m_nEmbeddedObject(0),
-              m_nBitmapObject(0)
+            : m_nFormObject(0)
+            , m_nEmbeddedObject(0)
+            , m_nBitmapObject(0)
+            , m_nExternalPDFDataIndex(-1)
+            , m_nExternalPDFPageIndex(-1)
         {
         }
 
         /// Returns the ID one should use when referring to this bitmap.
         sal_Int32 getObject() const;
+
+        bool hasExternalPDFData() const
+        {
+            return m_nExternalPDFDataIndex >= 0;
+        }
     };
 
     struct BitmapEmit
@@ -709,6 +718,8 @@ private:
     osl::File                           m_aFile;
     bool                                m_bOpen;
 
+    ExternalPDFStreams m_aExternalPDFStreams;
+
     /* output redirection; e.g. to accumulate content streams for
        XObjects
      */
diff --git a/vcl/source/pdf/ExternalPDFStreams.cxx 
b/vcl/source/pdf/ExternalPDFStreams.cxx
new file mode 100644
index 000000000000..08f31ed22829
--- /dev/null
+++ b/vcl/source/pdf/ExternalPDFStreams.cxx
@@ -0,0 +1,43 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ */
+
+#include <pdf/ExternalPDFStreams.hxx>
+#include <comphelper/hash.hxx>
+
+namespace vcl
+{
+sal_Int32 ExternalPDFStreams::store(const sal_uInt8* pData, sal_uInt32 nLength)
+{
+    sal_Int32 nIndex = -1;
+
+    std::vector<sal_uInt8> aHash
+        = comphelper::Hash::calculateHash(pData, nLength, 
comphelper::HashType::SHA1);
+
+    auto it = maStreamIndexMap.find(aHash);
+    if (it == maStreamIndexMap.end())
+    {
+        auto& rExternalStream = maStreamList.emplace_back();
+        rExternalStream.maData.resize(nLength);
+        std::copy(pData, pData + nLength, rExternalStream.maData.begin());
+        nIndex = maStreamList.size() - 1;
+        maStreamIndexMap.emplace(aHash, nIndex);
+    }
+    else
+    {
+        nIndex = it->second;
+    }
+
+    return nIndex;
+}
+
+ExternalPDFStream& ExternalPDFStreams::get(sal_uInt32 nIndex) { return 
maStreamList.at(nIndex); }
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
_______________________________________________
Libreoffice-commits mailing list
libreoffice-comm...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/libreoffice-commits

Reply via email to