commit a2d61c0eb5e37e4021c32022aaa4cc53483e5057
Author: Francesco Pretto <ceztko@gmail.com>
Date:   Sun Dec 30 12:41:15 2018 +0100

    PdfTokenizer: Fix deferred decode of "Contents" string in signature dictionaries
    
    The check for bIsSigContents was done on data in the
    dictionary that may not be available at the moment
    of the check because /Contents may be written before
    /Type in the pdf. The solution is to defer the check
    later when all keys has been added to the dictionary.
    To do that we just store the char buffer for
    "Contents" when we find it and decode the string with
    or without the PdfEncrypt instance after decision. At
    that point we finally add it to the dictionary. Also
    check for signature dictionaries of type
    /Type/DocTimeStamp, as done in PDFBox[1]
    
    [1] https://issues.apache.org/jira/browse/PDFBOX-3173

diff --git a/src/base/PdfTokenizer.cpp b/src/base/PdfTokenizer.cpp
index d9c6f62..fa5de47 100644
--- a/src/base/PdfTokenizer.cpp
+++ b/src/base/PdfTokenizer.cpp
@@ -597,6 +597,7 @@ void PdfTokenizer::ReadDictionary( PdfVariant& rVariant, PdfEncrypt* pEncrypt )
     PdfDictionary dict;
     EPdfTokenType eType;
     const char *  pszToken;
+    std::auto_ptr<std::vector<char>> contentsHexBuffer;
 
     for( ;; )
     {
@@ -612,18 +613,73 @@ void PdfTokenizer::ReadDictionary( PdfVariant& rVariant, PdfEncrypt* pEncrypt )
         // Convert the read variant to a name; throws InvalidDataType if not a name.
         key = val.GetName();
 
-        // 'Contents' key of a /Type/Sig dictionary is an unencrypted Hex string
-        bool bIsSigContents = key == PdfName( "Contents" ) &&
-            dict.HasKey( "Type" ) &&
-            dict.GetKey( "Type" )->GetDataType() == ePdfDataType_Name &&
-            dict.GetKey( "Type" )->GetName() == PdfName( "Sig" );
+        // Try to get the next variant
+        gotToken = this->GetNextToken( pszToken, &eType );
+        if ( !gotToken )
+        {
+            PODOFO_RAISE_ERROR_INFO( ePdfError_UnexpectedEOF, "Expected variant." );
+        }
 
-        // Get the next variant. If there isn't one, it'll throw UnexpectedEOF.
-        this->GetNextVariant( val, bIsSigContents ? NULL : pEncrypt );
+        EPdfDataType eDataType = this->DetermineDataType( pszToken, eType, val );
+        if ( key == "Contents" && eDataType == ePdfDataType_HexString )
+        {
+            // 'Contents' key in signature dictionaries is an unencrypted Hex string:
+            // save the string buffer for later check if it needed decryption
+            contentsHexBuffer = std::auto_ptr<std::vector<char>>( new std::vector<char>() );
+            ReadHexString( *contentsHexBuffer );
+            continue;
+        }
+
+        switch ( eDataType )
+        {
+            case ePdfDataType_Null:
+            case ePdfDataType_Bool:
+            case ePdfDataType_Number:
+            case ePdfDataType_Real:
+            case ePdfDataType_Reference:
+            {
+                // the data was already read into rVariant by the DetermineDataType function
+                break;
+            }
+            case ePdfDataType_Name:
+            case ePdfDataType_String:
+            case ePdfDataType_HexString:
+            case ePdfDataType_Array:
+            case ePdfDataType_Dictionary:
+            {
+                this->ReadDataType( eDataType, val, pEncrypt );
+                break;
+            }
+            case ePdfDataType_RawData:
+            case ePdfDataType_Unknown:
+            default:
+            {
+                PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidDataType, "Unexpected data type" );
+            }
+        }
 
         dict.AddKey( key, val );
     }
 
+    if ( contentsHexBuffer.get() != NULL )
+    {
+        PdfObject *type = dict.GetKey( "Type" );
+        // "Contents" is unencrypted in /Type/Sig and /Type/DocTimeStamp dictionaries 
+        // https://issues.apache.org/jira/browse/PDFBOX-3173
+        bool contentsUnencrypted = type != NULL && type->GetDataType() == ePdfDataType_Name &&
+            (type->GetName() == PdfName( "Sig" ) || type->GetName() == PdfName( "DocTimeStamp" ));
+
+        PdfEncrypt *encrypt = NULL;
+        if ( !contentsUnencrypted )
+            encrypt = pEncrypt;
+
+        PdfString string;
+        string.SetHexData( contentsHexBuffer->size() ? &(*contentsHexBuffer)[0] : "", contentsHexBuffer->size(), encrypt );
+
+        val = string;
+        dict.AddKey( "Contents", val );
+    }
+
     rVariant = dict;
 }
 
@@ -764,9 +820,18 @@ void PdfTokenizer::ReadString( PdfVariant& rVariant, PdfEncrypt* pEncrypt )
 
 void PdfTokenizer::ReadHexString( PdfVariant& rVariant, PdfEncrypt* pEncrypt )
 {
-    int        c;
+    ReadHexString( m_vecBuffer );
 
-    m_vecBuffer.clear();
+    PdfString string;
+    string.SetHexData( m_vecBuffer.size() ? &(m_vecBuffer[0]) : "", m_vecBuffer.size(), pEncrypt );
+
+    rVariant = string;
+}
+
+void PdfTokenizer::ReadHexString( std::vector<char>& rVecBuffer)
+{
+    rVecBuffer.clear();
+    int        c;
 
     while( (c = m_device.Device()->GetChar()) != EOF )
     {
@@ -778,17 +843,12 @@ void PdfTokenizer::ReadHexString( PdfVariant& rVariant, PdfEncrypt* pEncrypt )
         if( isdigit( c ) ||
             ( c >= 'A' && c <= 'F') ||
             ( c >= 'a' && c <= 'f'))
-            m_vecBuffer.push_back( c );
+            rVecBuffer.push_back( c );
     }
 
     // pad to an even length if necessary
-    if( m_vecBuffer.size() % 2 )
-        m_vecBuffer.push_back( '0' );
-
-    PdfString string;
-    string.SetHexData( m_vecBuffer.size() ? &(m_vecBuffer[0]) : "", m_vecBuffer.size(), pEncrypt );
-
-    rVariant = string;
+    if(rVecBuffer.size() % 2 )
+        rVecBuffer.push_back( '0' );
 }
 
 void PdfTokenizer::ReadName( PdfVariant& rVariant )
diff --git a/src/base/PdfTokenizer.h b/src/base/PdfTokenizer.h
index 14894fa..b7032b6 100644
--- a/src/base/PdfTokenizer.h
+++ b/src/base/PdfTokenizer.h
@@ -225,6 +225,13 @@ class PODOFO_API PdfTokenizer {
      */
     void ReadHexString( PdfVariant& rVariant, PdfEncrypt* pEncrypt );
 
+    /** Read a hex string from the input device
+     *  and store it into a vector.
+     *
+     *  \param rVecBuffer store the hex string into this variable
+     */
+    void ReadHexString( std::vector<char> &rVecBuffer );
+
     /** Read a name from the input device
      *  and store it into a variant.
      * 
