Author: thorsten
Date: Fri Apr 30 11:28:26 2010
New Revision: 939642
URL: http://svn.apache.org/viewvc?rev=939642&view=rev
Log:
DROIDS-81
Reporter: Richard Frovarp
Patch: Richard Frovarp
review: thorsten
Added:
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java
Modified:
incubator/droids/trunk/droids-tika/pom.xml
Modified: incubator/droids/trunk/droids-tika/pom.xml
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-tika/pom.xml?rev=939642&r1=939641&r2=939642&view=diff
==============================================================================
--- incubator/droids/trunk/droids-tika/pom.xml (original)
+++ incubator/droids/trunk/droids-tika/pom.xml Fri Apr 30 11:28:26 2010
@@ -46,9 +46,14 @@
<version>${pom.version}</version>
</dependency>
<dependency>
- <groupId>org.apache.tika</groupId>
- <artifactId>tika</artifactId>
- <version>0.2</version>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-core</artifactId>
+ <version>0.6</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-parsers</artifactId>
+ <version>0.6</version>
</dependency>
</dependencies>
Added:
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java?rev=939642&view=auto
==============================================================================
---
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java
(added)
+++
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java
Fri Apr 30 11:28:26 2010
@@ -0,0 +1,45 @@
+package org.apache.droids.tika;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.droids.api.ContentEntity;
+import org.apache.droids.api.Link;
+import org.apache.droids.api.Parse;
+import org.apache.droids.api.Parser;
+import org.apache.droids.exception.DroidsException;
+import org.apache.droids.helper.Loggable;
+import org.apache.droids.parse.ParseImpl;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.BodyContentHandler;
+import org.xml.sax.SAXException;
+
+public class TikaDocumentParser extends Loggable implements Parser {
+
+ @Override
+ public Parse parse(ContentEntity entity, Link link) throws DroidsException,
+ IOException {
+ org.apache.tika.parser.Parser parser = new AutoDetectParser();
+ Metadata metadata = new Metadata();
+ BodyContentHandler handler = new BodyContentHandler();
+
+ InputStream instream = entity.obtainContent();
+ try {
+ parser.parse(instream, handler, metadata, new ParseContext());
+ ParseImpl parse = new ParseImpl(handler.toString(),null);
+
+ return parse;
+
+ } catch (SAXException ex) {
+ throw new DroidsException("Failure parsing document " + link.getId(),
ex);
+ } catch (TikaException ex) {
+ throw new DroidsException("Failure parsing document " + link.getId(),
ex);
+ } finally {
+ instream.close();
+ }
+ }
+
+}