Zack Honig created PDFBOX-2125:
----------------------------------

             Summary: Solr throws exception error
                 Key: PDFBOX-2125
                 URL: https://issues.apache.org/jira/browse/PDFBOX-2125
             Project: PDFBox
          Issue Type: Bug
          Components: PDModel, Text extraction
    Affects Versions: 1.8.4
         Environment: Java; Windows 7; Tika; Solr;
            Reporter: Zack Honig
             Fix For: 1.8.4


When uploading a PDF document and it renders through SOLR, this error is thrown:

54440 [qtp1311760211-12] ERROR org.apache.solr.core.SolrCore  û org.apache.solr.
common.SolrException: org.apache.tika.exception.TikaException: Unexpected Runtim
eException from org.apache.tika.parser.pdf.PDFParser@4d711a77
        at org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(Extr
actingDocumentLoader.java:225)
        at org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(Co
ntentStreamHandlerBase.java:74)
        at org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandl
erBase.java:135)
        at org.apache.solr.core.RequestHandlers$LazyRequestHandlerWrapper.handle
Request(RequestHandlers.java:241)
        at org.apache.solr.core.SolrCore.execute(SolrCore.java:1952)
        at org.apache.solr.servlet.SolrDispatchFilter.execute(SolrDispatchFilter
.java:774)
        at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilte
r.java:418)
        at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilte
r.java:207)
        at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(Servlet
Handler.java:1419)
        at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java
:455)
        at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.j
ava:137)
        at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.jav
a:522)
        at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandl
er.java:231)
        at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandl
er.java:1075)
        at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:
384)
        at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandle
r.java:193)
        at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandle
r.java:1009)
        at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.j
ava:135)
        at org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(Cont
extHandlerCollection.java:255)
        at org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerColl
ection.java:154)
        at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper
.java:116)
        at org.eclipse.jetty.server.Server.handle(Server.java:368)
        at org.eclipse.jetty.server.AbstractHttpConnection.handleRequest(Abstrac
tHttpConnection.java:489)
        at org.eclipse.jetty.server.BlockingHttpConnection.handleRequest(Blockin
gHttpConnection.java:53)
        at org.eclipse.jetty.server.AbstractHttpConnection.headerComplete(Abstra
ctHttpConnection.java:942)
        at org.eclipse.jetty.server.AbstractHttpConnection$RequestHandler.header
Complete(AbstractHttpConnection.java:1004)
        at org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:647)
        at org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:235)

        at org.eclipse.jetty.server.BlockingHttpConnection.handle(BlockingHttpCo
nnection.java:72)
        at org.eclipse.jetty.server.bio.SocketConnector$ConnectorEndPoint.run(So
cketConnector.java:264)
        at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPoo
l.java:608)
        at org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool
.java:543)
        at java.lang.Thread.run(Unknown Source)
Caused by: org.apache.tika.exception.TikaException: Unexpected RuntimeException
from org.apache.tika.parser.pdf.PDFParser@4d711a77
        at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:244
)
        at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:242
)
        at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:1
24)
        at org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(Extr
actingDocumentLoader.java:219)
        ... 32 more
Caused by: java.lang.ArrayIndexOutOfBoundsException
        at java.lang.System.arraycopy(Native Method)
        at org.apache.pdfbox.pdmodel.encryption.StandardSecurityHandler.computeE
ncryptedKey(StandardSecurityHandler.java:573)
        at org.apache.pdfbox.pdmodel.encryption.StandardSecurityHandler.computeU
serPassword(StandardSecurityHandler.java:611)
        at org.apache.pdfbox.pdmodel.encryption.StandardSecurityHandler.isUserPa
ssword(StandardSecurityHandler.java:796)
        at org.apache.pdfbox.pdmodel.encryption.StandardSecurityHandler.prepareF
orDecryption(StandardSecurityHandler.java:214)
        at org.apache.pdfbox.pdmodel.encryption.StandardSecurityHandler.decryptD
ocument(StandardSecurityHandler.java:156)
        at org.apache.pdfbox.pdmodel.PDDocument.openProtection(PDDocument.java:1
600)
        at org.apache.pdfbox.pdmodel.PDDocument.decrypt(PDDocument.java:946)
        at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java
:329)
        at org.apache.tika.parser.pdf.PDF2XHTML.process(PDF2XHTML.java:106)
        at org.apache.tika.parser.pdf.PDFParser.parse(PDFParser.java:143)
        at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:242
)
        ... 35 more

54456 [qtp1311760211-12] ERROR org.apache.solr.servlet.SolrDispatchFilter  û nul
l:org.apache.solr.common.SolrException: org.apache.tika.exception.TikaException:
 Unexpected RuntimeException from org.apache.tika.parser.pdf.PDFParser@4d711a77
        at org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(Extr
actingDocumentLoader.java:225)
        at org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(Co
ntentStreamHandlerBase.java:74)
        at org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandl
erBase.java:135)
        at org.apache.solr.core.RequestHandlers$LazyRequestHandlerWrapper.handle
Request(RequestHandlers.java:241)
        at org.apache.solr.core.SolrCore.execute(SolrCore.java:1952)
        at org.apache.solr.servlet.SolrDispatchFilter.execute(SolrDispatchFilter
.java:774)
        at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilte
r.java:418)
        at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilte
r.java:207)
        at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(Servlet
Handler.java:1419)
        at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java
:455)
        at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.j
ava:137)
        at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.jav
a:522)
        at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandl
er.java:231)
        at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandl
er.java:1075)
        at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:
384)
        at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandle
r.java:193)
        at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandle
r.java:1009)
        at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.j
ava:135)
        at org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(Cont
extHandlerCollection.java:255)
        at org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerColl
ection.java:154)
        at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper
.java:116)
        at org.eclipse.jetty.server.Server.handle(Server.java:368)
        at org.eclipse.jetty.server.AbstractHttpConnection.handleRequest(Abstrac
tHttpConnection.java:489)
        at org.eclipse.jetty.server.BlockingHttpConnection.handleRequest(Blockin
gHttpConnection.java:53)
        at org.eclipse.jetty.server.AbstractHttpConnection.headerComplete(Abstra
ctHttpConnection.java:942)
        at org.eclipse.jetty.server.AbstractHttpConnection$RequestHandler.header
Complete(AbstractHttpConnection.java:1004)
        at org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:647)
        at org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:235)

        at org.eclipse.jetty.server.BlockingHttpConnection.handle(BlockingHttpCo
nnection.java:72)
        at org.eclipse.jetty.server.bio.SocketConnector$ConnectorEndPoint.run(So
cketConnector.java:264)
        at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPoo
l.java:608)
        at org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool
.java:543)
        at java.lang.Thread.run(Unknown Source)
Caused by: org.apache.tika.exception.TikaException: Unexpected RuntimeException
from org.apache.tika.parser.pdf.PDFParser@4d711a77
        at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:244
)
        at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:242
)
        at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:1
24)
        at org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(Extr
actingDocumentLoader.java:219)
        ... 32 more
Caused by: java.lang.ArrayIndexOutOfBoundsException
        at java.lang.System.arraycopy(Native Method)
        at org.apache.pdfbox.pdmodel.encryption.StandardSecurityHandler.computeE
ncryptedKey(StandardSecurityHandler.java:573)
        at org.apache.pdfbox.pdmodel.encryption.StandardSecurityHandler.computeU
serPassword(StandardSecurityHandler.java:611)
        at org.apache.pdfbox.pdmodel.encryption.StandardSecurityHandler.isUserPa
ssword(StandardSecurityHandler.java:796)
        at org.apache.pdfbox.pdmodel.encryption.StandardSecurityHandler.prepareF
orDecryption(StandardSecurityHandler.java:214)
        at org.apache.pdfbox.pdmodel.encryption.StandardSecurityHandler.decryptD
ocument(StandardSecurityHandler.java:156)
        at org.apache.pdfbox.pdmodel.PDDocument.openProtection(PDDocument.java:1
600)
        at org.apache.pdfbox.pdmodel.PDDocument.decrypt(PDDocument.java:946)
        at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java
:329)
        at org.apache.tika.parser.pdf.PDF2XHTML.process(PDF2XHTML.java:106)
        at org.apache.tika.parser.pdf.PDFParser.parse(PDFParser.java:143)
        at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:242
)
        ... 35 more



--
This message was sent by Atlassian JIRA
(v6.2#6252)

Reply via email to