[ https://issues.apache.org/jira/browse/TIKA-1046?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14602757#comment-14602757 ]
Tim Allison commented on TIKA-1046: ----------------------------------- Many thanks to [~kiwiwings] for fixing this in POI. I'll add a temporary catch block at the Tika level that we can remove with an update 3.13-beta1. > Get "java.util.zip.ZipException: unknown compression method" when indexing > ppf97-file containing wmf-image > ---------------------------------------------------------------------------------------------------------- > > Key: TIKA-1046 > URL: https://issues.apache.org/jira/browse/TIKA-1046 > Project: Tika > Issue Type: Bug > Components: parser > Reporter: Olof Jonasson > Attachments: ppt2000_working.ppt, ppt2010_working.ppt, > ppt97_failing.ppt > > > With solr4.0 and tika1.2 we get an exeption when trying to index a powerpoint > file that contains a specific .wmf-image. > As it seems, the powerpoint file must be created in Office97 (or older?) to > generate the error, since re-saving the file in Office2000 or Office2010 > makes the problem go away. > Full stacktrace from the solr-server below: > 2012-dec-19 14:39:46 org.apache.solr.common.SolrException log > ALLVARLIG: org.apache.solr.common.SolrException: > org.apache.tika.exception.TikaException: Unexpected RuntimeException from > org.apache.tika.parser.microsoft.OfficeParser@12f195 > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:225) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:74) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:129) > at > org.apache.solr.core.RequestHandlers$LazyRequestHandlerWrapper.handleRequest(RequestHandlers.java:240) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:1699) > at > org.apache.solr.servlet.SolrDispatchFilter.execute(SolrDispatchFilter.java:455) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:276) > at > org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:235) > at > org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:206) > at > org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:233) > at > org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:191) > at > org.apache.catalina.authenticator.AuthenticatorBase.invoke(AuthenticatorBase.java:563) > at > org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:127) > at > org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:102) > at > org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:109) > at > org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:293) > at > org.apache.coyote.http11.Http11Processor.process(Http11Processor.java:859) > at > org.apache.coyote.http11.Http11Protocol$Http11ConnectionHandler.process(Http11Protocol.java:602) > at > org.apache.tomcat.util.net.JIoEndpoint$Worker.run(JIoEndpoint.java:489) > at java.lang.Thread.run(Thread.java:662) > Caused by: org.apache.tika.exception.TikaException: Unexpected > RuntimeException from org.apache.tika.parser.microsoft.OfficeParser@12f195 > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:244) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:242) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:242) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:120) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:219) > ... 19 more > Caused by: org.apache.poi.hslf.exceptions.HSLFException: > java.util.zip.ZipException: unknown compression method > at org.apache.poi.hslf.blip.WMF.getData(WMF.java:65) > at > org.apache.tika.parser.microsoft.HSLFExtractor.handleSlideEmbeddedPictures(HSLFExtractor.java:204) > at > org.apache.tika.parser.microsoft.HSLFExtractor.parse(HSLFExtractor.java:162) > at > org.apache.tika.parser.microsoft.OfficeParser.parse(OfficeParser.java:189) > at > org.apache.tika.parser.microsoft.OfficeParser.parse(OfficeParser.java:161) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:242) > ... 23 more > Caused by: java.util.zip.ZipException: unknown compression method > at java.util.zip.InflaterInputStream.read(InflaterInputStream.java:147) > at java.io.FilterInputStream.read(FilterInputStream.java:90) > at org.apache.poi.hslf.blip.WMF.getData(WMF.java:59) > ... 28 more > 2012-dec-19 14:39:46 org.apache.solr.common.SolrException log > ALLVARLIG: null:org.apache.solr.common.SolrException: > org.apache.tika.exception.TikaException: Unexpected RuntimeException from > org.apache.tika.parser.microsoft.OfficeParser@12f195 > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:225) > at > org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:74) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:129) > at > org.apache.solr.core.RequestHandlers$LazyRequestHandlerWrapper.handleRequest(RequestHandlers.java:240) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:1699) > at > org.apache.solr.servlet.SolrDispatchFilter.execute(SolrDispatchFilter.java:455) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:276) > at > org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:235) > at > org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:206) > at > org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:233) > at > org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:191) > at > org.apache.catalina.authenticator.AuthenticatorBase.invoke(AuthenticatorBase.java:563) > at > org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:127) > at > org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:102) > at > org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:109) > at > org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:293) > at > org.apache.coyote.http11.Http11Processor.process(Http11Processor.java:859) > at > org.apache.coyote.http11.Http11Protocol$Http11ConnectionHandler.process(Http11Protocol.java:602) > at > org.apache.tomcat.util.net.JIoEndpoint$Worker.run(JIoEndpoint.java:489) > at java.lang.Thread.run(Thread.java:662) > Caused by: org.apache.tika.exception.TikaException: Unexpected > RuntimeException from org.apache.tika.parser.microsoft.OfficeParser@12f195 > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:244) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:242) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:242) > at > org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:120) > at > org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:219) > ... 19 more > Caused by: org.apache.poi.hslf.exceptions.HSLFException: > java.util.zip.ZipException: unknown compression method > at org.apache.poi.hslf.blip.WMF.getData(WMF.java:65) > at > org.apache.tika.parser.microsoft.HSLFExtractor.handleSlideEmbeddedPictures(HSLFExtractor.java:204) > at > org.apache.tika.parser.microsoft.HSLFExtractor.parse(HSLFExtractor.java:162) > at > org.apache.tika.parser.microsoft.OfficeParser.parse(OfficeParser.java:189) > at > org.apache.tika.parser.microsoft.OfficeParser.parse(OfficeParser.java:161) > at > org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:242) > ... 23 more > Caused by: java.util.zip.ZipException: unknown compression method > at java.util.zip.InflaterInputStream.read(InflaterInputStream.java:147) > at java.io.FilterInputStream.read(FilterInputStream.java:90) > at org.apache.poi.hslf.blip.WMF.getData(WMF.java:59) > ... 28 more -- This message was sent by Atlassian JIRA (v6.3.4#6332)