Extract Images from PDF file using PDFBox in C#
-----------------------------------------------
Key: PDFBOX-1240
URL: https://issues.apache.org/jira/browse/PDFBOX-1240
Project: PDFBox
Issue Type: New Feature
Components: Utilities
Affects Versions: 0.7.3
Environment: public class ExtractImage
{
private int imageCounter = 1;
private static string PASSWORD = "-password";
private static string PREFIX = "-prefix";
public ExtractImage()
{
}
public void extractImage(string []args)
{
if(args.Length<1||args.Length>4)
{
usage();
}
else
{
string pdfFile = "";
string password = "";
string prefix = null;
for (int i = 0; i < args.Length; i++)
{
if(args[i]==PASSWORD)
{
i++;
if (i >= args.Length)
{
usage();
}
password = args[i];
}
else if (args[i]==PREFIX)
{
i++;
if(i>=args.Length)
{
usage();
}
prefix = args[i];
}
else
{
if(pdfFile==null)
{
pdfFile = args[i];
}
}
}
if(pdfFile==null)
{
usage();
}
else
{
if(prefix==null && pdfFile.Length>4)
{
prefix = pdfFile.Substring(0, pdfFile.Length - 4);
}
PDDocument document=null;
try
{
document = PDDocument.load(pdfFile);
if( document.isEncrypted() )
{
StandardDecryptionMaterial spm = new
StandardDecryptionMaterial(password);
document.openProtection(spm);
AccessPermission ap =
document.getCurrentAccessPermission();
if( ! ap.canExtractContent() )
{
Console.WriteLine("Error: You do not have
permission to extract images." );
}
}
List pages =
document.getDocumentCatalog().getAllPages();
java.util.Iterator iter = pages.iterator();
while( iter.hasNext() )
{
PDPage page = (PDPage)iter.next();
PDResources resources = page.getResources();
java.util.Map images = resources.getImages();
if( images != null )
{
Iterator imageIter =
images.keySet().iterator();
while( imageIter.hasNext() )
{
string key = (String )imageIter.next();
PDXObjectImage image =
(PDXObjectImage)images.get( key );
string name = getUniqueFileName( key,
image.getSuffix() );
Console.WriteLine( "Writing image:" + name
);
image.write2file( name );
}
}
}
}
catch (Exception)
{
throw;
}
}
}
}
private string getUniqueFileName(string prefix, string suffix)
{
string uniqueName = null;
java.io.File f = null;
f = null;
while (f == null || f.exists())
{
uniqueName = prefix + "-" + imageCounter;
f =new File(uniqueName + "." + suffix);
imageCounter++;
}
return uniqueName;
}
private void usage()
{
Console.Error.WriteLine("Usage: java org.pdfbox.ExtractImages
[OPTIONS] <PDF file>\n" +
" -password <password> Password to decrypt
document\n" +
" -prefix <image-prefix> Image
prefix(default to pdf name)\n" +
" <PDF file> The PDF document to use\n");
Environment.Exit(1);
}
}
Reference:
http://svn.apache.org/repos/asf/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/ExtractImages.java
Reporter: Pham Minh Cuong
Priority: Blocker
Fix For: 0.7.3
I tested but i have a problem.It ran if(args.Length<1||args.Length>4) and
stopped in usage().It loop.Please help me.Many thanks.
{
usage();
}
--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators:
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira