Hi, We've got some Word DOC files that are causing POI to throw this error. When we open the docs in Word 2010, a bar at the top indicates they are in "Protected View." Any thoughts on fixing this without editing each Word doc with this issue?
curl "http://localhost:8983/solr/update/extract?extractOnly=true&fmap.content=text" -F "[email protected]" <?xml version="1.0" encoding="UTF-8"?> <response> <lst name="responseHeader"><int name="status">500</int><int name="QTime">6</int> </lst><lst name="error"><str name="msg">org.apache.tika.exception.TikaException: TIKA-198: Illegal IOException from org.apache.tika.parser.microsoft.OfficeParse r@228b96e1</str><str name="trace">org.apache.solr.common.SolrException: org.apac he.tika.exception.TikaException: TIKA-198: Illegal IOException from org.apache.t ika.parser.microsoft.OfficeParser@228b96e1 at org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(Extr actingDocumentLoader.java:230) at org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(Co ntentStreamHandlerBase.java:74) at org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandl erBase.java:129) at org.apache.solr.core.RequestHandlers$LazyRequestHandlerWrapper.handle Request(RequestHandlers.java:240) at org.apache.solr.core.SolrCore.execute(SolrCore.java:1656) at org.apache.solr.servlet.SolrDispatchFilter.execute(SolrDispatchFilter .java:454) at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilte r.java:275) at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(Servlet Handler.java:1337) at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java :484) at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.j ava:119) at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.jav a:524) at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandl er.java:233) at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandl er.java:1065) at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java: 413) at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandle r.java:192) at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandle r.java:999) at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.j ava:117) at org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(Cont extHandlerCollection.java:250) at org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerColl ection.java:149) at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper .java:111) at org.eclipse.jetty.server.Server.handle(Server.java:351) at org.eclipse.jetty.server.AbstractHttpConnection.handleRequest(Abstrac tHttpConnection.java:454) at org.eclipse.jetty.server.BlockingHttpConnection.handleRequest(Blockin gHttpConnection.java:47) at org.eclipse.jetty.server.AbstractHttpConnection.headerComplete(Abstra ctHttpConnection.java:890) at org.eclipse.jetty.server.AbstractHttpConnection$RequestHandler.header Complete(AbstractHttpConnection.java:944) at org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:642) at org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:230) at org.eclipse.jetty.server.BlockingHttpConnection.handle(BlockingHttpCo nnection.java:66) at org.eclipse.jetty.server.bio.SocketConnector$ConnectorEndPoint.run(So cketConnector.java:254) at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPoo l.java:599) at org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool .java:534) at java.lang.Thread.run(Unknown Source) Caused by: org.apache.tika.exception.TikaException: TIKA-198: Illegal IOExceptio n from org.apache.tika.parser.microsoft.OfficeParser@228b96e1 at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:248 ) at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:242 ) at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:1 20) at org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(Extr actingDocumentLoader.java:224) ... 31 more Caused by: java.io.IOException: Duplicate name "#1;CompObj" at org.apache.poi.poifs.property.DirectoryProperty.addChild(DirectoryPro perty.java:266) at org.apache.poi.poifs.property.PropertyTableBase.populatePropertyTree( PropertyTableBase.java:115) at org.apache.poi.poifs.property.PropertyTableBase.<init>(Property TableBase.java:63) at org.apache.poi.poifs.property.NPropertyTable.<init>(NPropertyTa ble.java:66) at org.apache.poi.poifs.filesystem.NPOIFSFileSystem.readCoreContents(NPO IFSFileSystem.java:379) at org.apache.poi.poifs.filesystem.NPOIFSFileSystem.<init>(NPOIFSF ileSystem.java:202) at org.apache.poi.poifs.filesystem.NPOIFSFileSystem.<init>(NPOIFSF ileSystem.java:184) at org.apache.tika.parser.microsoft.OfficeParser.parse(OfficeParser.java :155) at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:242 ) ... 34 more </str><int name="code">500</int></lst> </response> Warm regards, Alex
