vcl/inc/pdf/ExternalPDFStreams.hxx     |   12 ++++++++----
 vcl/source/filter/ipdf/pdfdocument.cxx |   12 +++++++++---
 vcl/source/gdi/pdfwriter_impl.cxx      |   12 +++++++++---
 3 files changed, 26 insertions(+), 10 deletions(-)

New commits:
commit ed7a3dabc8c35b8b33bad33fc26ebd8fb80b0cbd
Author:     Tomaž Vajngerl <tomaz.vajng...@collabora.co.uk>
AuthorDate: Tue Mar 2 18:57:46 2021 +0900
Commit:     Adolfo Jayme Barrientos <fit...@ubuntu.com>
CommitDate: Wed Mar 3 09:14:22 2021 +0100

    tdf#140606 make PDF parsing more lenient and prevent a crash
    
    If the external document can't be opened, it tried to continue
    with the export anyway, which eventually lead to a crash. This
    is fixed by handling this situation and prevent a crash, however
    the part of the document in this case isn't exported.
    
    The document couldn't be opened because of a parsing error - there
    was a unexpected null character instead of a whitespace, which
    made the parser panic. Fix this by making the parser more lenient
    in such a situation when there is an unexpected null and try to
    continue parsing.
    
    Bug document seems to be created with a buggy PDF writer, but other
    PDF readers don't complain when parsing the document so it looks to
    be a valid. qpdf --check doesn't complain either.
    
    Change-Id: I61eb281e821ccd195ef006d778556e25d1c7f5e3
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/111820
    Tested-by: Jenkins
    Reviewed-by: Tomaž Vajngerl <qui...@gmail.com>
    (cherry picked from commit 2c1ed5a5dad827cde032f27a4348e81be15889bc)
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/111857
    Reviewed-by: Adolfo Jayme Barrientos <fit...@ubuntu.com>

diff --git a/vcl/inc/pdf/ExternalPDFStreams.hxx 
b/vcl/inc/pdf/ExternalPDFStreams.hxx
index ab3d057bd83c..0a1997fe7dc7 100644
--- a/vcl/inc/pdf/ExternalPDFStreams.hxx
+++ b/vcl/inc/pdf/ExternalPDFStreams.hxx
@@ -33,21 +33,25 @@ struct VCL_DLLPUBLIC ExternalPDFStream
 
     std::map<sal_Int32, sal_Int32>& getCopiedResources() { return 
maCopiedResources; }
 
-    filter::PDFDocument& getPDFDocument()
+    std::shared_ptr<filter::PDFDocument>& getPDFDocument()
     {
         if (!mpPDFDocument)
         {
             SvMemoryStream aPDFStream;
             aPDFStream.WriteBytes(maData.data(), maData.size());
             aPDFStream.Seek(0);
-            mpPDFDocument = std::make_shared<filter::PDFDocument>();
-            if (!mpPDFDocument->Read(aPDFStream))
+            auto pPDFDocument = std::make_shared<filter::PDFDocument>();
+            if (!pPDFDocument->Read(aPDFStream))
             {
                 SAL_WARN("vcl.pdfwriter",
                          "PDFWriterImpl::writeReferenceXObject: reading the 
PDF document failed");
             }
+            else
+            {
+                mpPDFDocument = pPDFDocument;
+            }
         }
-        return *mpPDFDocument;
+        return mpPDFDocument;
     }
 };
 
diff --git a/vcl/source/filter/ipdf/pdfdocument.cxx 
b/vcl/source/filter/ipdf/pdfdocument.cxx
index 41c44bd01b57..804713abaf10 100644
--- a/vcl/source/filter/ipdf/pdfdocument.cxx
+++ b/vcl/source/filter/ipdf/pdfdocument.cxx
@@ -1441,12 +1441,18 @@ bool PDFDocument::Tokenize(SvStream& rStream, 
TokenizeMode eMode,
                 }
                 else
                 {
-                    if (!rtl::isAsciiWhiteSpace(static_cast<unsigned 
char>(ch)))
+                    auto uChar = static_cast<unsigned char>(ch);
+                    // Be more lenient and allow unexpected null char
+                    if (!rtl::isAsciiWhiteSpace(uChar) && uChar != 0)
                     {
-                        SAL_WARN("vcl.filter", "PDFDocument::Tokenize: 
unexpected character: "
-                                                   << ch << " at byte position 
" << rStream.Tell());
+                        SAL_WARN("vcl.filter",
+                                 "PDFDocument::Tokenize: unexpected character 
with code "
+                                     << sal_Int32(ch) << " at byte position " 
<< rStream.Tell());
                         return false;
                     }
+                    SAL_WARN_IF(uChar == 0, "vcl.filter",
+                                "PDFDocument::Tokenize: unexpected null 
character at "
+                                    << rStream.Tell() << " - ignoring");
                 }
                 break;
             }
diff --git a/vcl/source/gdi/pdfwriter_impl.cxx 
b/vcl/source/gdi/pdfwriter_impl.cxx
index 951cda0f29da..eeeb4305a181 100644
--- a/vcl/source/gdi/pdfwriter_impl.cxx
+++ b/vcl/source/gdi/pdfwriter_impl.cxx
@@ -8459,10 +8459,16 @@ void 
PDFWriterImpl::writeReferenceXObject(ReferenceXObjectEmit& rEmit)
         // object.
         if (rEmit.m_nExternalPDFDataIndex < 0)
             return;
-        auto & rExternalPDFStream = 
m_aExternalPDFStreams.get(rEmit.m_nExternalPDFDataIndex);
-        auto & rPDFDocument = rExternalPDFStream.getPDFDocument();
+        auto& rExternalPDFStream = 
m_aExternalPDFStreams.get(rEmit.m_nExternalPDFDataIndex);
+        auto& pPDFDocument = rExternalPDFStream.getPDFDocument();
+        if (!pPDFDocument)
+        {
+            // Couldn't parse the document and can't continue
+            SAL_WARN("vcl.pdfwriter", "PDFWriterImpl::writeReferenceXObject: 
failed to parse the document");
+            return;
+        }
 
-        std::vector<filter::PDFObjectElement*> aPages = 
rPDFDocument.GetPages();
+        std::vector<filter::PDFObjectElement*> aPages = 
pPDFDocument->GetPages();
         if (aPages.empty())
         {
             SAL_WARN("vcl.pdfwriter", "PDFWriterImpl::writeReferenceXObject: 
no pages");
_______________________________________________
Libreoffice-commits mailing list
libreoffice-comm...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/libreoffice-commits

Reply via email to