The function
works on most of PDF, but not in this case.

Hope someone
can help me to make it works, thank you. 

 

Below are
the codes and the error messages I got.

The
code from example:

 

import
java.awt.Rectangle;

import
java.io.FileOutputStream;

import
java.io.IOException;

import
java.io.PrintWriter;

import
com.itextpdf.text.DocumentException;

import
com.itextpdf.text.pdf.PdfReader;

import
com.itextpdf.text.pdf.parser.FilteredTextRenderListener;

import
com.itextpdf.text.pdf.parser.LocationTextExtractionStrategy;

import
com.itextpdf.text.pdf.parser.PdfTextExtractor;

import
com.itextpdf.text.pdf.parser.RegionTextRenderFilter;

import
com.itextpdf.text.pdf.parser.RenderFilter;

import
com.itextpdf.text.pdf.parser.TextExtractionStrategy;

 



    /** The original PDF that will be parsed.
*/

    String pdf = "d:/pdftest/21.pdf";

    /** The resulting text file. */

    String txt = "d:/pdftest/1.txt";

    PdfReader reader = new PdfReader(pdf);

        PrintWriter out = new PrintWriter(new
FileOutputStream(txt));

        Rectangle rect = new Rectangle(250,
635, 20, 20);

        //Rectangle rect = new Rectangle(70,
80, 420, 500);

        //Rectangle rect = new Rectangle(0, 0,
20, 20);

        RenderFilter filter = new
RegionTextRenderFilter(rect);

        
TextExtractionStrategy strategy;

        //for (int i = 1; i <=
reader.getNumberOfPages(); i++) {

        for (int i = 1; i <= 1; i++) {

            //out.println("good ok");

            println "ok"

            strategy = new
FilteredTextRenderListener(new LocationTextExtractionStrategy(), filter);

#32       println(PdfTextExtractor.getTextFromPage(reader,
i, strategy));

        }

        out.flush();

        out.close();

 

 

 

Error
message:

 

Apr 18,
2011 11:17:46 AM org.codehaus.groovy.runtime.StackTraceUtils sanitize

 

WARNING:
Sanitizing stacktrace:

 

com.itextpdf.text.pdf.parser.InlineImageUtils$InlineImageParseException:
Could not find image data or EI

 

            at
com.itextpdf.text.pdf.parser.InlineImageUtils.parseInlineImageSamples(InlineImageUtils.java:345)

 

            at
com.itextpdf.text.pdf.parser.InlineImageUtils.parseInlineImage(InlineImageUtils.java:150)

 

            at
com.itextpdf.text.pdf.parser.PdfContentStreamProcessor.processContent(PdfContentStreamProcessor.java:342)

 

            at
com.itextpdf.text.pdf.parser.PdfReaderContentParser.processContent(PdfReaderContentParser.java:41)

 

            at
com.itextpdf.text.pdf.parser.PdfTextExtractor.getTextFromPage(PdfTextExtractor.java:73)

 

            at
com.itextpdf.text.pdf.parser.PdfTextExtractor$getTextFromPage.call(Unknown
Source)

 

            at 
org.codehaus.groovy.runtime.callsite.CallSiteArray.defaultCall(CallSiteArray.java:40)

 

            at
org.codehaus.groovy.runtime.callsite.AbstractCallSite.call(AbstractCallSite.java:116)

 

            at
org.codehaus.groovy.runtime.callsite.AbstractCallSite.call(AbstractCallSite.java:132)

 

            at pdftest.run(pdftest.groovy:32)

 

            at
groovy.lang.GroovyShell.runScriptOrMainOrTestOrRunnable(GroovyShell.java:266)

 

            at
groovy.lang.GroovyShell.run(GroovyShell.java:517)

 

            at
groovy.lang.GroovyShell.run(GroovyShell.java:172)

 

            at groovy.lang.GroovyShell$run.call(Unknown
Source)

 

            at
groovy.ui.Console$_runScriptImpl_closure16.doCall(Console.groovy:904)

 

            at
sun.reflect.GeneratedMethodAccessor221.invoke(Unknown Source)

 

            at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)

 

            at
java.lang.reflect.Method.invoke(Method.java:597)

 

            at
org.codehaus.groovy.reflection.CachedMethod.invoke(CachedMethod.java:90)

 

            at
groovy.lang.MetaMethod.doMethodInvoke(MetaMethod.java:233)

 

            at
org.codehaus.groovy.runtime.metaclass.ClosureMetaClass.invokeMethod(ClosureMetaClass.java:273)

 

            at
groovy.lang.MetaClassImpl.invokeMethod(MetaClassImpl.java:886)

 

            at
org.codehaus.groovy.runtime.callsite.PogoMetaClassSite.callCurrent(PogoMetaClassSite.java:66)

 

            at
org.codehaus.groovy.runtime.callsite.AbstractCallSite.callCurrent(AbstractCallSite.java:149)

 

            at
groovy.ui.Console$_runScriptImpl_closure16.doCall(Console.groovy)

 

            at
sun.reflect.GeneratedMethodAccessor220.invoke(Unknown Source)

 

            at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)

 

            at
java.lang.reflect.Method.invoke(Method.java:597)

 

            at
org.codehaus.groovy.reflection.CachedMethod.invoke(CachedMethod.java:90)

 

            at
groovy.lang.MetaMethod.doMethodInvoke(MetaMethod.java:233)

 

            at
org.codehaus.groovy.runtime.metaclass.ClosureMetaClass.invokeMethod(ClosureMetaClass.java:273)

 

            at
groovy.lang.MetaClassImpl.invokeMethod(MetaClassImpl.java:886)

 

            at
groovy.lang.Closure.call(Closure.java:282)

 

            at
groovy.lang.Closure.call(Closure.java:277)

 

            at
groovy.lang.Closure.run(Closure.java:360)

 

            at
java.lang.Thread.run(Thread.java:662)

Apr 18,
2011 11:17:46 AM org.codehaus.groovy.runtime.StackTraceUtils sanitize

 

WARNING:
Sanitizing stacktrace:

 

ExceptionConverter:
com.itextpdf.text.pdf.parser.InlineImageUtils$InlineImageParseException: Could
not find image data or EI

 

            at
com.itextpdf.text.pdf.parser.InlineImageUtils.parseInlineImageSamples(InlineImageUtils.java:345)

 

            at
com.itextpdf.text.pdf.parser.InlineImageUtils.parseInlineImage(InlineImageUtils.java:150)

 

            at
com.itextpdf.text.pdf.parser.PdfContentStreamProcessor.processContent(PdfContentStreamProcessor.java:342)

 

            at
com.itextpdf.text.pdf.parser.PdfReaderContentParser.processContent(PdfReaderContentParser.java:41)

 

            at 
com.itextpdf.text.pdf.parser.PdfTextExtractor.getTextFromPage(PdfTextExtractor.java:73)

 

            at
com.itextpdf.text.pdf.parser.PdfTextExtractor$getTextFromPage.call(Unknown
Source)

 

            at pdftest.run(pdftest.groovy:32)

 

ExceptionConverter:
com.itextpdf.text.pdf.parser.InlineImageUtils$InlineImageParseException: Could
not find image data or EI

 

            at
com.itextpdf.text.pdf.parser.InlineImageUtils.parseInlineImageSamples(InlineImageUtils.java:345)

 

            at 
com.itextpdf.text.pdf.parser.InlineImageUtils.parseInlineImage(InlineImageUtils.java:150)

 

            at
com.itextpdf.text.pdf.parser.PdfContentStreamProcessor.processContent(PdfContentStreamProcessor.java:342)

 

            at
com.itextpdf.text.pdf.parser.PdfReaderContentParser.processContent(PdfReaderContentParser.java:41)

 

            at
com.itextpdf.text.pdf.parser.PdfTextExtractor.getTextFromPage(PdfTextExtractor.java:73)

 

            at
com.itextpdf.text.pdf.parser.PdfTextExtractor$getTextFromPage.call(Unknown
Source)

 

            at pdftest.run(pdftest.groovy:32)

                                          
------------------------------------------------------------------------------
Benefiting from Server Virtualization: Beyond Initial Workload 
Consolidation -- Increasing the use of server virtualization is a top
priority.Virtualization can reduce costs, simplify management, and improve 
application availability and disaster protection. Learn more about boosting 
the value of server virtualization. http://p.sf.net/sfu/vmware-sfdev2dev
_______________________________________________
iText-questions mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/itext-questions

iText(R) is a registered trademark of 1T3XT BVBA.
Many questions posted to this list can (and will) be answered with a reference 
to the iText book: http://www.itextpdf.com/book/
Please check the keywords list before you ask for examples: 
http://itextpdf.com/themes/keywords.php

Reply via email to