Hello Podofo Users,
I'm using Podofo for removing Embedded Files and File Annotation. My code
works fine so far, no files can be seen using a PDF Viewer, and I even
managed to remove the dependent filestreams (otherwise files are still
"hidden" inside the pdf and take space).
What I'm not sure about is, whether my removal code is complete (removing
all data) and safe (e.g. no double frees). So I'd like to commit my code
here and ask whether you think the code is correct.
On removing Embedded Files:
There's code somewhere on how to extract embedded files. I refactored this
code and added Clear instructions whenever I thought them necessary for the
task.
// Check if given ptr is a reference, if so, resolve ptr reference and
store previous ptr to ref_ptr
template <typename T>
void CheckReference( const PdfMemDocument* doc, T* &ptr, T* &ref_ptr )
{
if ( IS_NULL( ptr ) || !ptr->IsReference() ) { return; }
ref_ptr = ptr;
ptr = doc->GetObjects().GetObject( ref_ptr->GetReference() );
}
template <typename T>
void Clear( T* ptr )
{
if ( ptr ) { ptr->Clear(); }
}
void RemoveEmbeddedFileData( PdfObject *pObj ) const
{
PdfObject *pRefObj = NULL;
CheckReference( m_pDocument, pObj, pRefObj );
if ( !IS_NULL( pObj ) && pObj->GetDataType() ==
ePdfDataType_Dictionary )
{
PdfDictionary &outerDict = pObj->GetDictionary();
if ( outerDict.HasKey("EF") )
{
PdfDictionary &innerDict =
outerDict.GetKey("EF")->GetDictionary();
if ( innerDict.HasKey("F") )
{
PdfObject *pStreamObj = innerDict.GetKey("F"),
*pStreamRefObj = NULL;
CheckReference( m_pDocument, pStreamObj, pStreamRefObj
);
Clear( pStreamObj );
Clear( pStreamRefObj );
}
innerDict.Clear();
}
outerDict.Clear();
}
Clear( pObj );
Clear( pRefObj );
}
void RemoveEmbeddedFiles()
{
PdfObject *pEmbFilesObj = NULL;
{
const PdfNamesTree *pNamesTree = m_pDocument->GetNamesTree(
true );
RETURN_ON_NULL( pNamesTree );
const PdfObject *pNamesTreeObj = pNamesTree->GetObject();
RETURN_ON_NULL( pNamesTreeObj );
pEmbFilesObj = pNamesTreeObj->GetIndirectKey("EmbeddedFiles");
RETURN_ON_NULL( pEmbFilesObj );
}
PdfObject *pEmbFilesRefObj = NULL;
CheckReference( m_pDocument, pEmbFilesObj, pEmbFilesRefObj );
RETURN_ON_NULL( pEmbFilesObj );
PdfObject *pEmbFilesNamesObj =
pEmbFilesObj->GetIndirectKey("Names");
RETURN_ON_NULL( pEmbFilesNamesObj );
PdfArray* pEmbFilesNamesArray = &pEmbFilesNamesObj->GetArray();
PdfArray::iterator it = pEmbFilesNamesArray->begin(), it_end =
pEmbFilesNamesArray->end();
for (; it != it_end; ++it ) { RemoveEmbeddedFileData( &(*it) ); }
Clear( pEmbFilesNamesArray );
Clear( pEmbFilesNamesObj );
// Removing 'EmbeddedFiles' object seems not to be required!
//Clear( pEmbFilesObj );
//Clear( pEmbFilesRefObj );
}
On removing File Annotation Data:
bool HasFileStream( const PdfDictionary &dict )
{
return dict.HasKey("EF") &&
dict.GetKey("EF")->GetDictionary().HasKey("F");
}
PdfObject* GetFileStream( PdfDictionary &dict )
{
return ( HasFileStream( dict ) ?
dict.GetKey("EF")->GetDictionary().GetKey("F") : NULL );
}
void RemoveFileAttachment
(
const PdfMemDocument *pDoc,
const PdfAnnotation *pAnnot
)
{
if ( IS_NULL( pAnnot ) || !pAnnot->HasFileAttachement() ) { return;
}
PdfFileSpec *pFileSpec = pAnnot->GetFileAttachement();
RETURN_ON_NULL( pFileSpec );
PdfObject *pFileSpecObj = pFileSpec->GetObject();
if ( IS_NULL( pFileSpecObj ) || pFileSpecObj->GetDataType() !=
ePdfDataType_Dictionary ) { return; }
PdfDictionary &fileDict = pFileSpecObj->GetDictionary();
PdfObject *pFileStreamObj = GetFileStream( fileDict ),
*pFileStreamRefObj = NULL;
CheckReference( pDoc, pFileStreamObj, pFileStreamRefObj );
RETURN_ON_NULL( pFileStreamObj );
Clear( pFileStreamObj );
Clear( pFileStreamRefObj );
fileDict.Clear();
Clear( pFileSpecObj );
}
void RemoveAnnotationsImpl
(
const PdfMemDocument *pDoc
)
{
int pageCount = pDoc->GetPageCount();
for ( int p = 0; p < pageCount; ++p )
{
PdfPage* pPage = pDoc->GetPage( p );
CONTINUE_ON_NULL( pPage );
int annotCount = pPage->GetNumAnnots();
for ( int a = annotCount - 1; a >= 0; --a )
{
const PdfAnnotation* pAnnot = pPage->GetAnnotation( a );
if ( IS_NULL( pAnnot ) || pAnnot->GetType() !=
ePdfAnnotation_FileAttachement ) { continue; }
RemoveFileAttachment( pDoc, pAnnot );
pPage->DeleteAnnotation( a );
}
}
}
------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Podofo-users mailing list
Podofo-users@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/podofo-users