[
https://issues.apache.org/jira/browse/PDFBOX-992?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13067564#comment-13067564
]
Lars Torunski commented on PDFBOX-992:
--------------------------------------
I'm getting IllegalArgumentException und IndexOutOfBoundsException during text
extraction
java.lang.IllegalArgumentException: fromIndex(0) > toIndex(-2)
at java.util.SubList.<init>(AbstractList.java:604)
at java.util.RandomAccessSubList.<init>(AbstractList.java:758)
at java.util.RandomAccessSubList.subList(AbstractList.java:762)
at
org.apache.fontbox.cff.CharStringConverter.handleType2Command(CharStringConverter.java:259)
at
org.apache.fontbox.cff.CharStringConverter.handleCommand(CharStringConverter.java:81)
at
org.apache.fontbox.cff.CharStringHandler.handleSequence(CharStringHandler.java:53)
at
org.apache.fontbox.cff.CharStringConverter.convert(CharStringConverter.java:64)
at org.apache.fontbox.cff.CFFFont$Mapping.toType1Sequence(CFFFont.java:374)
at org.apache.fontbox.cff.AFMFormatter.renderFont(AFMFormatter.java:126)
at org.apache.fontbox.cff.AFMFormatter.printFontMetrics(AFMFormatter.java:64)
at org.apache.fontbox.cff.AFMFormatter.printFont(AFMFormatter.java:57)
at org.apache.fontbox.cff.AFMFormatter.format(AFMFormatter.java:50)
at
org.apache.pdfbox.pdmodel.font.PDType1CFont.prepareFontMetric(PDType1CFont.java:502)
at org.apache.pdfbox.pdmodel.font.PDType1CFont.load(PDType1CFont.java:381)
at org.apache.pdfbox.pdmodel.font.PDType1CFont.<init>(PDType1CFont.java:104)
at org.apache.pdfbox.pdmodel.font.PDType1Font.<init>(PDType1Font.java:162)
at
org.apache.pdfbox.pdmodel.font.PDFontFactory.createFont(PDFontFactory.java:108)
at
org.apache.pdfbox.pdmodel.font.PDFontFactory.createFont(PDFontFactory.java:75)
at org.apache.pdfbox.pdmodel.PDResources.getFonts(PDResources.java:115)
at
org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:243)
at
org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:225)
at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:442)
at
org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:366)
at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:322)
java.lang.IndexOutOfBoundsException: Index: 2,Size: 2
at java.util.SubList.rangeCheck(AbstractList.java:746)
at java.util.SubList.get(AbstractList.java:619)
at
org.apache.fontbox.cff.CharStringConverter.drawAlternatingCurve(CharStringConverter.java:397)
at
org.apache.fontbox.cff.CharStringConverter.handleType1Command(CharStringConverter.java:142)
at
org.apache.fontbox.cff.CharStringConverter.handleCommand(CharStringConverter.java:77)
at
org.apache.fontbox.cff.CharStringHandler.handleSequence(CharStringHandler.java:53)
at
org.apache.fontbox.cff.CharStringConverter.handleType2Command(CharStringConverter.java:307)
at
org.apache.fontbox.cff.CharStringConverter.handleCommand(CharStringConverter.java:81)
at
org.apache.fontbox.cff.CharStringHandler.handleSequence(CharStringHandler.java:53)
at
org.apache.fontbox.cff.CharStringConverter.convert(CharStringConverter.java:64)
at org.apache.fontbox.cff.CFFFont$Mapping.toType1Sequence(CFFFont.java:374)
at org.apache.fontbox.cff.AFMFormatter.renderFont(AFMFormatter.java:126)
at org.apache.fontbox.cff.AFMFormatter.printFontMetrics(AFMFormatter.java:64)
at org.apache.fontbox.cff.AFMFormatter.printFont(AFMFormatter.java:57)
at org.apache.fontbox.cff.AFMFormatter.format(AFMFormatter.java:50)
at
org.apache.pdfbox.pdmodel.font.PDType1CFont.prepareFontMetric(PDType1CFont.java:502)
at org.apache.pdfbox.pdmodel.font.PDType1CFont.load(PDType1CFont.java:381)
at org.apache.pdfbox.pdmodel.font.PDType1CFont.<init>(PDType1CFont.java:104)
at org.apache.pdfbox.pdmodel.font.PDType1Font.<init>(PDType1Font.java:162)
at
org.apache.pdfbox.pdmodel.font.PDFontFactory.createFont(PDFontFactory.java:108)
at
org.apache.pdfbox.pdmodel.font.PDFontFactory.createFont(PDFontFactory.java:75)
at org.apache.pdfbox.pdmodel.PDResources.getFonts(PDResources.java:115)
at
org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:243)
at
org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:225)
at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:442)
at
org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:366)
at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:322)
> IndexOutOfBoundsException: while parsing few pdf's
> --------------------------------------------------
>
> Key: PDFBOX-992
> URL: https://issues.apache.org/jira/browse/PDFBOX-992
> Project: PDFBox
> Issue Type: Bug
> Components: Text extraction
> Affects Versions: 1.5.0
> Environment: Windows XP, RAD 7.5, Websphere 6.1,
> pdfbox-app-1.5.0.jar, fontbox-1.5.0.jar
> Reporter: Nilesh Naik
> Labels: pdfbox, textExtraction
>
> Hi Team, The text extraction works fine with most pdf's but it failed for
> couple of them with the below error: The pdf can be found here
> http://cid-a3aa7f7d9888874d.office.live.com/self.aspx/Public/getting%5E_started%5E_with%5E_Flex3.pdf
> . Let me know if this is a bug or an issue with the pdf.
> java.lang.IndexOutOfBoundsException: Index: 2,Size: 2
> at java.util.SubList.rangeCheck(AbstractList.java:864)
> at java.util.SubList.get(AbstractList.java:737)
> at
> org.apache.fontbox.cff.CharStringConverter.drawCurve(CharStringConverter.java:415)
> at
> org.apache.fontbox.cff.CharStringConverter.handleType2Command(CharStringConverter.java:277)
> at
> org.apache.fontbox.cff.CharStringConverter.handleCommand(CharStringConverter.java:81)
> at
> org.apache.fontbox.cff.CharStringHandler.handleSequence(CharStringHandler.java:53)
> at
> org.apache.fontbox.cff.CharStringConverter.handleType2Command(CharStringConverter.java:307)
> at
> org.apache.fontbox.cff.CharStringConverter.handleCommand(CharStringConverter.java:81)
> at
> org.apache.fontbox.cff.CharStringHandler.handleSequence(CharStringHandler.java:53)
> at
> org.apache.fontbox.cff.CharStringConverter.convert(CharStringConverter.java:64)
> at org.apache.fontbox.cff.CFFFont$Mapping.toType1Sequence(CFFFont.java:374)
> at org.apache.fontbox.cff.AFMFormatter.renderFont(AFMFormatter.java:126)
> at org.apache.fontbox.cff.AFMFormatter.printFontMetrics(AFMFormatter.java:64)
> at org.apache.fontbox.cff.AFMFormatter.printFont(AFMFormatter.java:57)
> at org.apache.fontbox.cff.AFMFormatter.format(AFMFormatter.java:50)
--
This message is automatically generated by JIRA.
For more information on JIRA, see: http://www.atlassian.com/software/jira