Parser is not sufficiently robust; + sign in front of integer causes exception
------------------------------------------------------------------------------
Key: PDFBOX-1149
URL: https://issues.apache.org/jira/browse/PDFBOX-1149
Project: PDFBox
Issue Type: Bug
Components: Parsing
Affects Versions: 1.6.0
Environment: Linux
Reporter: Jeffrey Aguilera
Priority: Trivial
WARN 2011-1027 14:53:15.902 [redcondor.spam.PdfTextExtractor] unexpected
exception while extracting text from PDF
java.lang.RuntimeException: java.io.IOException: Value is not an integer: +256
at
org.apache.pdfbox.pdfparser.PDFStreamParser$1.tryNext(PDFStreamParser.java:178)
at
org.apache.pdfbox.pdfparser.PDFStreamParser$1.hasNext(PDFStreamParser.java:187)
at
org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:266)
at
org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:251)
at
org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:225)
at
org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:442)
at
org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:366)
at
org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:322)
at
org.apache.pdfbox.util.PDFTextStripper.getText(PDFTextStripper.java:242)
at redcondor.spam.PdfTextExtractor.extractText(PdfTextExtractor.java:63)
at
redcondor.spam.ParsedMailMessage$Part.parse(ParsedMailMessage.java:1000)
at
redcondor.spam.ParsedMailMessage$Part.<init>(ParsedMailMessage.java:782)
at redcondor.spam.ParsedMailMessage.<init>(ParsedMailMessage.java:168)
at
model.rules.filtering.filterable.FilterableFile.newRuleContext(FilterableFile.java:301)
at
model.rules.filtering.filterable.FilterableFile.getRuleContext(FilterableFile.java:295)
at
model.rules.filtering.filterable.FilterableFile.getSize(FilterableFile.java:341)
at
model.rules.filtering.filters.AbstractFilter.getSize(AbstractFilter.java:260)
at redcondor.conditions.SizeCondition.evaluate(SizeCondition.java:118)
at redcondor.conditions.SizeCondition.evaluate(SizeCondition.java:21)
at core.conditions.AllCondition.evaluate(AllCondition.java:102)
at model.rules.engine.CompiledRule.evaluate(CompiledRule.java:200)
at
model.rules.engine.providers.RuleEngineProvider.match(RuleEngineProvider.java:353)
at
run.antispam.services.AntiSpamServiceProvider.match(AntiSpamServiceProvider.java:532)
at
model.rules.filtering.filters.rule.RuleFilter.getRule(RuleFilter.java:81)
at
model.rules.filtering.filters.rule.RuleFilter.getCachedRule(RuleFilter.java:88)
at
model.rules.filtering.filters.rule.RuleFilter.getResponse(RuleFilter.java:122)
at
model.rules.filtering.filters.AbstractFilteringService$InstrumentedFilter.getResponse(AbstractFilteringService.java:220)
at
model.rules.filtering.services.stacks.DefaultFilteringTask.execute(DefaultFilteringTask.java:157)
at
model.rules.filtering.services.stacks.AbstractFilterStackService.execute(AbstractFilterStackService.java:123)
at
model.rules.filtering.services.stacks.AbstractFilterStackService.filter(AbstractFilterStackService.java:227)
at
model.rules.filtering.services.stacks.inbound.InboundFilterStackService.filter(InboundFilterStackService.java:254)
at
run.smtp.services.spoolers.inbox.InboxService.notifyMessageReady(InboxService.java:425)
at
redcondor.net.smtp.spooler.AbstractSpooler.notifyObjectReceived(AbstractSpooler.java:546)
at
redcondor.net.smtp.spooler.AbstractSpooler.notifyObjectReceived(AbstractSpooler.java:62)
at
redcondor.services.threaded.WorkerThread.pumpObject(WorkerThread.java:291)
at
redcondor.services.threaded.WorkerThread.pumpObject(WorkerThread.java:244)
at
redcondor.services.threaded.WorkerPool.notifyWorking(WorkerPool.java:209)
at
redcondor.services.threaded.WorkerPool$Worker.run(WorkerPool.java:451)
Caused by: java.io.IOException: Value is not an integer: +256
at org.apache.pdfbox.cos.COSNumber.get(COSNumber.java:96)
at
org.apache.pdfbox.pdfparser.PDFStreamParser.parseNextToken(PDFStreamParser.java:347)
at
org.apache.pdfbox.pdfparser.PDFStreamParser.access$000(PDFStreamParser.java:46)
at
org.apache.pdfbox.pdfparser.PDFStreamParser$1.tryNext(PDFStreamParser.java:175)
... 37 more
--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators:
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira