On Feb 13, 2011, at 5:02 PM, Graham Cox wrote: > Hi all, > > I'm about to embark on a fairly serious bit of coding to extract all the > graphic objects from a PDF file to turn them into editable entities in a > drawing app. > > First, the documentation references some sample code called 'Voyeur' > (/Developer/Examples/Quartz/PDF/Voyeur) but this no longer seems to exist. > Does anyone have an up to date link? > > Second, has anyone already done this and have code they are prepared to > share? It must have been reimplemented many times over the years. I'm reading > the documentation for what support there is in Cocoa for PDF parsing, and > also the PDF spec itself, and it's a pretty big undertaking to get right. I > have a distinct feeling I'm staring at a well-solved problem.
Below is skeleton code that will get you started using the CGPDFxxx APIs. -- Scott Ribe scott_r...@elevated-dev.com http://www.elevated-dev.com/ (303) 722-0567 voice #include <Carbon/Carbon.h> #include <iostream> #include <map> using namespace std; static const char * sPdfTypeNames[] = { "", "null", "boolean", "integer", "real", "name", "string", "array", "dictionary", "stream" }; static int level = 1; void DumpObjectProperties( CGPDFObjectRef obj ) { int cnt; CGPDFObjectType type = CGPDFObjectGetType( obj ); switch( type ) { case kCGPDFObjectTypeBoolean: { CGPDFBoolean pdfbool; if( CGPDFObjectGetValue( obj, kCGPDFObjectTypeBoolean, &pdfbool ) ) { if( pdfbool ) cout << " - " << true; else cout << " - " << false; } } break; case kCGPDFObjectTypeInteger: { CGPDFInteger pdfint; if( CGPDFObjectGetValue( obj, kCGPDFObjectTypeInteger, &pdfint ) ) cout << " - " << pdfint; } break; case kCGPDFObjectTypeReal: { CGPDFReal pdfreal; if( CGPDFObjectGetValue( obj, kCGPDFObjectTypeReal, &pdfreal ) ) cout << " - " << pdfreal; } break; case kCGPDFObjectTypeName: { const char * name; if( CGPDFObjectGetValue( obj, kCGPDFObjectTypeName, &name ) ) cout << " - " << name; } break; case kCGPDFObjectTypeString: { CGPDFStringRef pdfstr; if( CGPDFObjectGetValue( obj, kCGPDFObjectTypeString, &pdfstr ) ) cout << " - " << string( (char *) CGPDFStringGetBytePtr( pdfstr ), CGPDFStringGetLength( pdfstr ) ); } break; case kCGPDFObjectTypeArray: { CGPDFArrayRef array; if( CGPDFObjectGetValue( obj, kCGPDFObjectTypeArray, &array ) ) { cnt = CGPDFArrayGetCount( array ); cout << " - " << "entries: " << cnt; } } break; case kCGPDFObjectTypeDictionary: { CGPDFDictionaryRef dict; if( CGPDFObjectGetValue( obj, kCGPDFObjectTypeDictionary, &dict ) ) { cnt = CGPDFDictionaryGetCount( dict ); cout << " - " << "entries: " << cnt; } } break; } cout << endl << flush; } void DumpObject( const char * key, CGPDFObjectRef obj, void * info ) { for( int i = 0; i < level; ++i ) cout << "| "; CGPDFObjectType type = CGPDFObjectGetType( obj ); if( type >= 1 && type < sizeof( sPdfTypeNames ) / sizeof( char *) ) { cout << key << ": " << sPdfTypeNames[type]; DumpObjectProperties( obj ); } else cout << key << ": " << "unrecognized object type " << type << endl << flush; switch( type ) { case kCGPDFObjectTypeDictionary: { if( strcmp( "Parent", key ) ) { ++level; CGPDFDictionaryRef dict; if( CGPDFObjectGetValue( obj, kCGPDFObjectTypeDictionary, &dict ) ) CGPDFDictionaryApplyFunction( dict, DumpObject, NULL ); --level; } } break; case kCGPDFObjectTypeArray: { ++level; CGPDFArrayRef array; if( CGPDFObjectGetValue( obj, kCGPDFObjectTypeArray, &array ) ) { int arraycnt = CGPDFArrayGetCount( array ); for( int i = 0; i < arraycnt; ++i ) { CGPDFObjectRef aryobj; if( CGPDFArrayGetObject( array, i, &aryobj ) ) { char tmp[16]; sprintf( tmp, "%d", i ); DumpObject( tmp, aryobj, NULL ); } } } --level; } break; case kCGPDFObjectTypeStream: { ++level; CGPDFStreamRef strm; if( CGPDFObjectGetValue( obj, kCGPDFObjectTypeStream, &strm ) ) { CGPDFDictionaryRef dict = CGPDFStreamGetDictionary( strm ); if( dict ) CGPDFDictionaryApplyFunction( dict, DumpObject, NULL ); } --level; } break; } } int main (int argc, char * const argv[]) { if( argc != 2 ) { cerr << "usage: pdfdir source.pdf" << endl << flush; return 1; } CFStringRef path = CFStringCreateWithCString( NULL, argv[1], kCFStringEncodingUTF8 ); CFURLRef url = CFURLCreateWithFileSystemPath( NULL, path, kCFURLPOSIXPathStyle, 0 ); CGPDFDocumentRef doc = CGPDFDocumentCreateWithURL( url ); if( !doc ) { cerr << "could not open source pdf file" << endl << flush; return 1; } int pgcnt = CGPDFDocumentGetNumberOfPages( doc ); if( pgcnt <= 0 ) { cerr << "source pdf file has no pages" << endl << flush; return 1; } cout << "page count: " << pgcnt << endl << flush; for( int i1 = 0; i1 < pgcnt; ++i1 ) { CGPDFPageRef pg = CGPDFDocumentGetPage( doc, i1 + 1 ); if( !pg ) { cerr << "failed to read page " << i1 + 1 << " of source pdf file" << endl << flush; return 1; } CGPDFDictionaryRef dict = CGPDFPageGetDictionary( pg ); if( !dict ) { cerr << "failed to read dictionary for page " << i1 + 1 << " of source pdf file" << endl << flush; return 1; } cout << "page: " << i1 + 1 << endl << flush; CGPDFDictionaryApplyFunction( dict, DumpObject, NULL ); } return 0; } _______________________________________________ Cocoa-dev mailing list (Cocoa-dev@lists.apple.com) Please do not post admin requests or moderator comments to the list. Contact the moderators at cocoa-dev-admins(at)lists.apple.com Help/Unsubscribe/Update your Subscription: http://lists.apple.com/mailman/options/cocoa-dev/archive%40mail-archive.com This email sent to arch...@mail-archive.com