Author: btellier
Date: Mon Jun 29 08:46:59 2015
New Revision: 1688147

URL: http://svn.apache.org/r1688147
Log:
MAILBOX-245 Profile to not load tika Jar

Added:
    
james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/extractor/
    
james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/extractor/DefaultTextExtractor.java
    
james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/extractor/ParsedContent.java
    
james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/extractor/TextExtractor.java
    
james/mailbox/trunk/store/src/test/java/org/apache/james/mailbox/store/extractor/
    
james/mailbox/trunk/store/src/test/java/org/apache/james/mailbox/store/extractor/DefaultTextExtractorTest.java
    james/mailbox/trunk/store/src/test/resources/
    james/mailbox/trunk/store/src/test/resources/documents/
    james/mailbox/trunk/store/src/test/resources/documents/Text.txt
    james/mailbox/trunk/store/src/test/resources/documents/writter.docx
    james/mailbox/trunk/tika/
    james/mailbox/trunk/tika/pom.xml
    james/mailbox/trunk/tika/src/
    james/mailbox/trunk/tika/src/main/
    james/mailbox/trunk/tika/src/main/java/
    james/mailbox/trunk/tika/src/main/java/org/
    james/mailbox/trunk/tika/src/main/java/org/apache/
    james/mailbox/trunk/tika/src/main/java/org/apache/james/
    james/mailbox/trunk/tika/src/main/java/org/apache/james/mailbox/
    james/mailbox/trunk/tika/src/main/java/org/apache/james/mailbox/tika/
    
james/mailbox/trunk/tika/src/main/java/org/apache/james/mailbox/tika/extractor/
    
james/mailbox/trunk/tika/src/main/java/org/apache/james/mailbox/tika/extractor/TikaTextExtractor.java
    james/mailbox/trunk/tika/src/test/
    james/mailbox/trunk/tika/src/test/java/
    james/mailbox/trunk/tika/src/test/java/org/
    james/mailbox/trunk/tika/src/test/java/org/apache/
    james/mailbox/trunk/tika/src/test/java/org/apache/james/
    james/mailbox/trunk/tika/src/test/java/org/apache/james/mailbox/
    james/mailbox/trunk/tika/src/test/java/org/apache/james/mailbox/tika/
    
james/mailbox/trunk/tika/src/test/java/org/apache/james/mailbox/tika/extractor/
    
james/mailbox/trunk/tika/src/test/java/org/apache/james/mailbox/tika/extractor/TikaTextExtractorTest.java
    james/mailbox/trunk/tika/src/test/resources/
    james/mailbox/trunk/tika/src/test/resources/documents/
    james/mailbox/trunk/tika/src/test/resources/documents/PDF.pdf
    james/mailbox/trunk/tika/src/test/resources/documents/Text.txt
    james/mailbox/trunk/tika/src/test/resources/documents/calc.ods
    james/mailbox/trunk/tika/src/test/resources/documents/calc.xlsx
    james/mailbox/trunk/tika/src/test/resources/documents/fake.txt
    james/mailbox/trunk/tika/src/test/resources/documents/slides.odp
    james/mailbox/trunk/tika/src/test/resources/documents/slides.pptx
    james/mailbox/trunk/tika/src/test/resources/documents/writter.docx
    james/mailbox/trunk/tika/src/test/resources/documents/writter.odt
Removed:
    
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/extractor/
    
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonUsingTika.java
    
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/extractor/
    james/mailbox/trunk/elasticsearch/src/test/resources/documents/PDF.pdf
    james/mailbox/trunk/elasticsearch/src/test/resources/documents/Text.txt
    james/mailbox/trunk/elasticsearch/src/test/resources/documents/calc.ods
    james/mailbox/trunk/elasticsearch/src/test/resources/documents/calc.xlsx
    james/mailbox/trunk/elasticsearch/src/test/resources/documents/fake.txt
    james/mailbox/trunk/elasticsearch/src/test/resources/documents/slides.odp
    james/mailbox/trunk/elasticsearch/src/test/resources/documents/slides.pptx
    james/mailbox/trunk/elasticsearch/src/test/resources/documents/writter.docx
    james/mailbox/trunk/elasticsearch/src/test/resources/documents/writter.odt
Modified:
    james/mailbox/trunk/elasticsearch/pom.xml
    
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessage.java
    
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJson.java
    
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java
    
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartContainerBuilder.java
    
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartParser.java
    
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/RootMimePartContainerBuilder.java
    
james/mailbox/trunk/elasticsearch/src/main/resources/META-INF/spring/mailbox-elasticsearch.xml
    
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/ElasticSearchIntegrationTest.java
    
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java
    james/mailbox/trunk/pom.xml

Modified: james/mailbox/trunk/elasticsearch/pom.xml
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/pom.xml?rev=1688147&r1=1688146&r2=1688147&view=diff
==============================================================================
--- james/mailbox/trunk/elasticsearch/pom.xml (original)
+++ james/mailbox/trunk/elasticsearch/pom.xml Mon Jun 29 08:46:59 2015
@@ -29,7 +29,7 @@
 
     <artifactId>apache-james-mailbox-elasticsearch</artifactId>
     <description>Apache James Mailbox IMAP search implementation using 
ElasticSearch</description>
-    <name>Apache James :: Mailbox :: ElasticSearch</name>
+    <name>Apache James :: Mailbox :: Tika</name>
 
     <properties>
         <javax.mail.groupId>javax.mail</javax.mail.groupId>
@@ -52,6 +52,11 @@
             <scope>test</scope>
         </dependency>
         <dependency>
+            <groupId>org.apache.james</groupId>
+            <artifactId>apache-james-mailbox-tika</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
             <groupId>com.fasterxml.jackson.core</groupId>
             <artifactId>jackson-databind</artifactId>
         </dependency>
@@ -103,16 +108,6 @@
             <scope>test</scope>
         </dependency>
         <dependency>
-            <groupId>org.apache.tika</groupId>
-            <artifactId>tika-core</artifactId>
-            <version>1.7</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.tika</groupId>
-            <artifactId>tika-parsers</artifactId>
-            <version>1.7</version>
-        </dependency>
-        <dependency>
             <groupId>org.assertj</groupId>
             <artifactId>assertj-core</artifactId>
             <version>3.0.0</version>

Modified: 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessage.java
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessage.java?rev=1688147&r1=1688146&r2=1688147&view=diff
==============================================================================
--- 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessage.java
 (original)
+++ 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessage.java
 Mon Jun 29 08:46:59 2015
@@ -23,8 +23,8 @@ import com.fasterxml.jackson.annotation.
 import com.google.common.base.Preconditions;
 import com.google.common.base.Throwables;
 import com.google.common.collect.Multimap;
-import org.apache.james.mailbox.elasticsearch.json.extractor.TextExtractor;
 import org.apache.james.mailbox.elasticsearch.query.DateResolutionFormater;
+import org.apache.james.mailbox.store.extractor.TextExtractor;
 import org.apache.james.mailbox.store.mail.model.MailboxId;
 import org.apache.james.mailbox.store.mail.model.Message;
 import org.apache.james.mailbox.store.mail.model.Property;

Modified: 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJson.java
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJson.java?rev=1688147&r1=1688146&r2=1688147&view=diff
==============================================================================
--- 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJson.java
 (original)
+++ 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJson.java
 Mon Jun 29 08:46:59 2015
@@ -26,7 +26,7 @@ import com.fasterxml.jackson.databind.Ob
 import com.fasterxml.jackson.datatype.guava.GuavaModule;
 import com.fasterxml.jackson.datatype.jdk8.Jdk8Module;
 import com.google.common.base.Preconditions;
-import org.apache.james.mailbox.elasticsearch.json.extractor.TextExtractor;
+import org.apache.james.mailbox.store.extractor.TextExtractor;
 import org.apache.james.mailbox.store.mail.model.Message;
 
 public class MessageToElasticSearchJson {
@@ -41,6 +41,10 @@ public class MessageToElasticSearchJson
         this.mapper.registerModule(new Jdk8Module());
     }
 
+    public MessageToElasticSearchJson(TextExtractor textExtractor) {
+        this(textExtractor, ZoneId.systemDefault());
+    }
+
     public String convertToJson(Message<?> message) throws 
JsonProcessingException {
         Preconditions.checkNotNull(message);
         return mapper.writeValueAsString(IndexableMessage.from(message, 
textExtractor));

Modified: 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java?rev=1688147&r1=1688146&r2=1688147&view=diff
==============================================================================
--- 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java
 (original)
+++ 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java
 Mon Jun 29 08:46:59 2015
@@ -26,9 +26,9 @@ import com.google.common.collect.Immutab
 import com.google.common.collect.Lists;
 import com.google.common.collect.Multimap;
 import org.apache.commons.io.FilenameUtils;
-import 
org.apache.james.mailbox.elasticsearch.json.extractor.DefaultTextExtractor;
-import org.apache.james.mailbox.elasticsearch.json.extractor.ParsedContent;
-import org.apache.james.mailbox.elasticsearch.json.extractor.TextExtractor;
+import org.apache.james.mailbox.store.extractor.DefaultTextExtractor;
+import org.apache.james.mailbox.store.extractor.ParsedContent;
+import org.apache.james.mailbox.store.extractor.TextExtractor;
 import org.apache.james.mime4j.stream.Field;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -119,7 +119,7 @@ public class MimePart {
             Optional<ParsedContent> parsedContent = 
parseContent(textExtractor);
             return new MimePart(
                 headerCollectionBuilder.build(),
-                parsedContent.map(ParsedContent::getTextualContent)
+                parsedContent.map( x -> 
Optional.ofNullable(x.getTextualContent()))
                     .orElse(Optional.empty())
                 ,
                 mediaType,
@@ -128,15 +128,24 @@ public class MimePart {
                 fileExtension,
                 contentDisposition,
                 children,
-                parsedContent.map(ParsedContent::getMetadata)
-                    .orElse(ImmutableMultimap.<String, 
String>builder().build())
+                parsedContent
+                    .map(x -> x.getMetadata()
+                        .entrySet()
+                        .stream()
+                        .reduce(ImmutableMultimap.<String, String>builder(),
+                            (builder, entry) -> builder.putAll(entry.getKey(), 
entry.getValue()),
+                            (builder1, builder2) -> 
builder1.putAll(builder2.build())).build())
+                    .orElse(ImmutableMultimap.of())
             );
         }
 
         private Optional<ParsedContent> parseContent(TextExtractor 
textExtractor) {
             if (bodyContent.isPresent()) {
                 try {
-                    return 
Optional.of(textExtractor.extractContent(bodyContent.get(), 
computeContentType(), fileName));
+                    return Optional.of(textExtractor.extractContent(
+                        bodyContent.get(),
+                        computeContentType().orElse(null),
+                        fileName.orElse(null)));
                 } catch (Exception e) {
                     LOGGER.warn("Failed parsing attachment", e);
                 }

Modified: 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartContainerBuilder.java
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartContainerBuilder.java?rev=1688147&r1=1688146&r2=1688147&view=diff
==============================================================================
--- 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartContainerBuilder.java
 (original)
+++ 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartContainerBuilder.java
 Mon Jun 29 08:46:59 2015
@@ -19,7 +19,7 @@
 
 package org.apache.james.mailbox.elasticsearch.json;
 
-import org.apache.james.mailbox.elasticsearch.json.extractor.TextExtractor;
+import org.apache.james.mailbox.store.extractor.TextExtractor;
 import org.apache.james.mime4j.stream.Field;
 
 import java.io.InputStream;

Modified: 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartParser.java
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartParser.java?rev=1688147&r1=1688146&r2=1688147&view=diff
==============================================================================
--- 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartParser.java
 (original)
+++ 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartParser.java
 Mon Jun 29 08:46:59 2015
@@ -20,7 +20,7 @@
 package org.apache.james.mailbox.elasticsearch.json;
 
 import com.google.common.base.Preconditions;
-import org.apache.james.mailbox.elasticsearch.json.extractor.TextExtractor;
+import org.apache.james.mailbox.store.extractor.TextExtractor;
 import org.apache.james.mailbox.store.mail.model.MailboxId;
 import org.apache.james.mailbox.store.mail.model.Message;
 import org.apache.james.mime4j.MimeException;

Modified: 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/RootMimePartContainerBuilder.java
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/RootMimePartContainerBuilder.java?rev=1688147&r1=1688146&r2=1688147&view=diff
==============================================================================
--- 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/RootMimePartContainerBuilder.java
 (original)
+++ 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/RootMimePartContainerBuilder.java
 Mon Jun 29 08:46:59 2015
@@ -19,7 +19,7 @@
 
 package org.apache.james.mailbox.elasticsearch.json;
 
-import org.apache.james.mailbox.elasticsearch.json.extractor.TextExtractor;
+import org.apache.james.mailbox.store.extractor.TextExtractor;
 import org.apache.james.mime4j.stream.Field;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

Modified: 
james/mailbox/trunk/elasticsearch/src/main/resources/META-INF/spring/mailbox-elasticsearch.xml
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/main/resources/META-INF/spring/mailbox-elasticsearch.xml?rev=1688147&r1=1688146&r2=1688147&view=diff
==============================================================================
--- 
james/mailbox/trunk/elasticsearch/src/main/resources/META-INF/spring/mailbox-elasticsearch.xml
 (original)
+++ 
james/mailbox/trunk/elasticsearch/src/main/resources/META-INF/spring/mailbox-elasticsearch.xml
 Mon Jun 29 08:46:59 2015
@@ -39,7 +39,9 @@
         <constructor-arg index="0" ref="elasticsearch-clientprovider-2"/>
     </bean>
 
-    <bean id="elasticsearch-json" 
class="org.apache.james.mailbox.elasticsearch.json.MessageToElasticSearchJson"/>
+    <bean id="elasticsearch-json" 
class="org.apache.james.mailbox.elasticsearch.json.MessageToElasticSearchJson">
+        <constructor-arg index="0" ref="text-extractor"/>
+    </bean>
 
     <bean id="elasticsearch-searcher" 
class="org.apache.james.mailbox.elasticsearch.search.ElasticSearchSearcher">
         <constructor-arg index="0" ref="elasticsearch-clientprovider-2"/>
@@ -65,4 +67,6 @@
 
     <bean id="criterion-converter" 
class="org.apache.james.mailbox.elasticsearch.query.CriterionConverter"/>
 
+    <bean id="text-extractor" 
class="org.apache.james.mailbox.tika.extractor.TikaTextExtractor"/>
+
 </beans>

Modified: 
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/ElasticSearchIntegrationTest.java
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/ElasticSearchIntegrationTest.java?rev=1688147&r1=1688146&r2=1688147&view=diff
==============================================================================
--- 
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/ElasticSearchIntegrationTest.java
 (original)
+++ 
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/ElasticSearchIntegrationTest.java
 Mon Jun 29 08:46:59 2015
@@ -30,7 +30,7 @@ import org.apache.james.mailbox.acl.Simp
 import org.apache.james.mailbox.acl.UnionMailboxACLResolver;
 import 
org.apache.james.mailbox.elasticsearch.events.ElasticSearchListeningMessageSearchIndex;
 import org.apache.james.mailbox.elasticsearch.json.MessageToElasticSearchJson;
-import 
org.apache.james.mailbox.elasticsearch.json.extractor.DefaultTextExtractor;
+import org.apache.james.mailbox.store.extractor.DefaultTextExtractor;
 import org.apache.james.mailbox.elasticsearch.query.CriterionConverter;
 import org.apache.james.mailbox.elasticsearch.query.QueryConverter;
 import org.apache.james.mailbox.elasticsearch.search.ElasticSearchSearcher;

Modified: 
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java?rev=1688147&r1=1688146&r2=1688147&view=diff
==============================================================================
--- 
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java
 (original)
+++ 
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java
 Mon Jun 29 08:46:59 2015
@@ -22,8 +22,8 @@ package org.apache.james.mailbox.elastic
 import com.google.common.base.Throwables;
 import org.apache.commons.io.IOUtils;
 import org.apache.james.mailbox.FlagsBuilder;
-import 
org.apache.james.mailbox.elasticsearch.json.extractor.DefaultTextExtractor;
-import org.apache.james.mailbox.elasticsearch.json.extractor.TikaTextExtractor;
+import org.apache.james.mailbox.tika.extractor.TikaTextExtractor;
+import org.apache.james.mailbox.store.extractor.DefaultTextExtractor;
 import org.apache.james.mailbox.store.TestId;
 import org.apache.james.mailbox.store.mail.model.Message;
 import org.apache.james.mailbox.store.mail.model.impl.PropertyBuilder;

Modified: james/mailbox/trunk/pom.xml
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/pom.xml?rev=1688147&r1=1688146&r2=1688147&view=diff
==============================================================================
--- james/mailbox/trunk/pom.xml (original)
+++ james/mailbox/trunk/pom.xml Mon Jun 29 08:46:59 2015
@@ -60,6 +60,7 @@
         <module>memory</module>
         <module>store</module>
         <module>spring</module>
+        <module>tika</module>
         <module>tool</module>
         <module>zoo-seq-provider</module>
     </modules>
@@ -201,11 +202,16 @@
                 <artifactId>apache-james-mailbox-cassandra</artifactId>
                 <version>${project.version}</version>
             </dependency>
-           <dependency>
+            <dependency>
                 <groupId>org.apache.james</groupId>
                 <artifactId>apache-james-mailbox-elasticsearch</artifactId>
                 <version>${project.version}</version>
             </dependency>
+            <dependency>
+                <groupId>org.apache.james</groupId>
+                <artifactId>apache-james-mailbox-tika</artifactId>
+                <version>${project.version}</version>
+            </dependency>
 
            <dependency>
                 <groupId>org.apache.james</groupId>
@@ -601,6 +607,30 @@
 
     <profiles>
         <profile>
+            <id>exclude-tika</id>
+            <activation>
+                <activeByDefault>false</activeByDefault>
+            </activation>
+
+            <dependencies>
+                <dependency>
+                    <groupId>org.apache.james</groupId>
+                    <artifactId>apache-james-mailbox-elasticsearch</artifactId>
+                    <type>pom</type>
+                    <exclusions>
+                        <exclusion>
+                            <groupId>org.apache.tika</groupId>
+                            <artifactId>tika-core</artifactId>
+                        </exclusion>
+                        <exclusion>
+                            <groupId>org.apache.tika</groupId>
+                            <artifactId>tika-parsers</artifactId>
+                        </exclusion>
+                    </exclusions>
+                </dependency>
+            </dependencies>
+        </profile>
+        <profile>
             <id>geronimo</id>
             <properties>
                 
<javax.mail.groupId>org.apache.geronimo.javamail</javax.mail.groupId>

Added: 
james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/extractor/DefaultTextExtractor.java
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/extractor/DefaultTextExtractor.java?rev=1688147&view=auto
==============================================================================
--- 
james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/extractor/DefaultTextExtractor.java
 (added)
+++ 
james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/extractor/DefaultTextExtractor.java
 Mon Jun 29 08:46:59 2015
@@ -0,0 +1,43 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ ****************************************************************/
+
+package org.apache.james.mailbox.store.extractor;
+
+import java.io.InputStream;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.commons.io.IOUtils;
+
+/**
+ * A default text extractor that is directly based on the input file provided.
+ * 
+ * Costs less calculations that TikaTextExtractor, but result is not that good.
+ */
+public class DefaultTextExtractor implements TextExtractor {
+
+    @Override
+    public ParsedContent extractContent(InputStream inputStream, String 
contentType, String fileName) throws Exception {
+        if(contentType != null && contentType.startsWith("text/") ) {
+            return new ParsedContent(IOUtils.toString(inputStream), new 
HashMap<String, List<String>>());
+        } else {
+            return new ParsedContent(null, new HashMap<String, 
List<String>>());
+        }
+    }
+}

Added: 
james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/extractor/ParsedContent.java
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/extractor/ParsedContent.java?rev=1688147&view=auto
==============================================================================
--- 
james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/extractor/ParsedContent.java
 (added)
+++ 
james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/extractor/ParsedContent.java
 Mon Jun 29 08:46:59 2015
@@ -0,0 +1,43 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ ****************************************************************/
+
+package org.apache.james.mailbox.store.extractor;
+
+import java.util.List;
+import java.util.Map;
+
+public class ParsedContent {
+
+    private final String textualContent;
+    private final Map<String, List<String>> metadata;
+
+    public ParsedContent(String textualContent, Map<String, List<String>> 
metadata) {
+        this.textualContent = textualContent;
+        this.metadata = metadata;
+    }
+
+    public String getTextualContent() {
+        return textualContent;
+    }
+
+    public  Map<String, List<String>> getMetadata() {
+        return metadata;
+    }
+    
+}

Added: 
james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/extractor/TextExtractor.java
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/extractor/TextExtractor.java?rev=1688147&view=auto
==============================================================================
--- 
james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/extractor/TextExtractor.java
 (added)
+++ 
james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/extractor/TextExtractor.java
 Mon Jun 29 08:46:59 2015
@@ -0,0 +1,28 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ ****************************************************************/
+
+package org.apache.james.mailbox.store.extractor;
+
+import java.io.InputStream;
+
+public interface TextExtractor {
+
+    ParsedContent extractContent(InputStream inputStream, String contentType, 
String fileName) throws Exception;
+
+}

Added: 
james/mailbox/trunk/store/src/test/java/org/apache/james/mailbox/store/extractor/DefaultTextExtractorTest.java
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/store/src/test/java/org/apache/james/mailbox/store/extractor/DefaultTextExtractorTest.java?rev=1688147&view=auto
==============================================================================
--- 
james/mailbox/trunk/store/src/test/java/org/apache/james/mailbox/store/extractor/DefaultTextExtractorTest.java
 (added)
+++ 
james/mailbox/trunk/store/src/test/java/org/apache/james/mailbox/store/extractor/DefaultTextExtractorTest.java
 Mon Jun 29 08:46:59 2015
@@ -0,0 +1,56 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ ****************************************************************/
+
+package org.apache.james.mailbox.store.extractor;
+
+import java.io.InputStream;
+
+import org.junit.Before;
+import org.junit.Test;
+import static org.assertj.core.api.Assertions.assertThat;
+
+public class DefaultTextExtractorTest {
+    private TextExtractor textExtractor;
+
+    @Before
+    public void setUp() {
+        textExtractor = new DefaultTextExtractor();
+    }
+
+    @Test
+    public void textTest() throws Exception {
+        InputStream inputStream = 
ClassLoader.getSystemResourceAsStream("documents/Text.txt");
+        assertThat(inputStream).isNotNull();
+        assertThat(textExtractor.extractContent(inputStream, "text/plain", 
"Text.txt")
+            .getTextualContent())
+            .isEqualTo("This is some awesome text text.\n\n");
+    }
+
+    @Test
+    public void textMicrosoftWorldTest() throws Exception {
+        InputStream inputStream = 
ClassLoader.getSystemResourceAsStream("documents/writter.docx");
+        assertThat(inputStream).isNotNull();
+        assertThat(textExtractor.extractContent(
+            inputStream,
+            
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+            "writter.docx")
+            .getTextualContent())
+            .isNull();
+    }
+}

Added: james/mailbox/trunk/store/src/test/resources/documents/Text.txt
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/store/src/test/resources/documents/Text.txt?rev=1688147&view=auto
==============================================================================
--- james/mailbox/trunk/store/src/test/resources/documents/Text.txt (added)
+++ james/mailbox/trunk/store/src/test/resources/documents/Text.txt Mon Jun 29 
08:46:59 2015
@@ -0,0 +1,2 @@
+This is some awesome text text.
+

Added: james/mailbox/trunk/store/src/test/resources/documents/writter.docx
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/store/src/test/resources/documents/writter.docx?rev=1688147&view=auto
==============================================================================
    (empty)

Added: james/mailbox/trunk/tika/pom.xml
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/tika/pom.xml?rev=1688147&view=auto
==============================================================================
--- james/mailbox/trunk/tika/pom.xml (added)
+++ james/mailbox/trunk/tika/pom.xml Mon Jun 29 08:46:59 2015
@@ -0,0 +1,230 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements. See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership. The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License. You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied. See the License for the
+    specific language governing permissions and limitations
+    under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"; 
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+    <modelVersion>4.0.0</modelVersion>
+
+    <parent>
+        <artifactId>apache-james-mailbox</artifactId>
+        <groupId>org.apache.james</groupId>
+        <version>0.6-SNAPSHOT</version>
+
+    </parent>
+
+    <artifactId>apache-james-mailbox-tika</artifactId>
+    <description>Apache James Mailbox project for optional Tika dependency, to 
extract attachment textual content before indexation</description>
+    <name>Apache James :: Mailbox :: ElasticSearch</name>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.james</groupId>
+            <artifactId>apache-james-mailbox-store</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.google.guava</groupId>
+            <artifactId>guava</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.tika</groupId>
+            <artifactId>tika-core</artifactId>
+            <version>1.7</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.tika</groupId>
+            <artifactId>tika-parsers</artifactId>
+            <version>1.7</version>
+        </dependency>
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.assertj</groupId>
+            <artifactId>assertj-core</artifactId>
+        </dependency>
+    </dependencies>
+
+    <profiles>
+            <profile>
+                <id>remove-tika</id>
+                <activation>
+                    <activeByDefault>false</activeByDefault>
+                </activation>
+            </profile>
+            <profile>
+                <id>exclude-tika</id>
+                <build>
+                    <plugins>
+                        <plugin>
+                            <groupId>org.apache.maven.plugins</groupId>
+                            <artifactId>maven-jar-plugin</artifactId>
+                            <configuration>
+                                <excludes>
+                                    <exclude>**/**</exclude>
+                                </excludes>
+                            </configuration>
+                        </plugin>
+                    </plugins>
+                </build>
+            </profile>
+        <profile>
+            <id>disable-build-for-older-jdk</id>
+            <activation>
+                <jdk>(,1.8)</jdk>
+            </activation>
+            <build>
+                <plugins>
+                    <plugin>
+                        <artifactId>maven-jar-plugin</artifactId>
+                        <executions>
+                            <execution>
+                                <id>default-jar</id>
+                                <phase>none</phase>
+                            </execution>
+                            <execution>
+                                <id>jar</id>
+                                <phase>none</phase>
+                            </execution>
+                            <execution>
+                                <id>test-jar</id>
+                                <phase>none</phase>
+                            </execution>
+                        </executions>
+                    </plugin>
+                    <plugin>
+                        <artifactId>maven-compiler-plugin</artifactId>
+                        <executions>
+                            <execution>
+                                <id>default-compile</id>
+                                <phase>none</phase>
+                            </execution>
+                            <execution>
+                                <id>default-testCompile</id>
+                                <phase>none</phase>
+                            </execution>
+                        </executions>
+                    </plugin>
+                    <plugin>
+                        <artifactId>maven-surefire-plugin</artifactId>
+                        <executions>
+                            <execution>
+                                <id>default-test</id>
+                                <phase>none</phase>
+                            </execution>
+                        </executions>
+                    </plugin>
+                    <plugin>
+                        <artifactId>maven-source-plugin</artifactId>
+                        <executions>
+                            <execution>
+                                <id>attach-sources</id>
+                                <phase>none</phase>
+                            </execution>
+                        </executions>
+                    </plugin>
+                    <plugin>
+                        <artifactId>maven-install-plugin</artifactId>
+                        <executions>
+                            <execution>
+                                <id>default-install</id>
+                                <phase>none</phase>
+                            </execution>
+                        </executions>
+                    </plugin>
+                    <plugin>
+                        <artifactId>maven-resources-plugin</artifactId>
+                        <executions>
+                            <execution>
+                                <id>default-resources</id>
+                                <phase>none</phase>
+                            </execution>
+                            <execution>
+                                <id>default-testResources</id>
+                                <phase>none</phase>
+                            </execution>
+                        </executions>
+                    </plugin>
+                    <plugin>
+                        <artifactId>maven-site-plugin</artifactId>
+                        <executions>
+                            <execution>
+                                <id>attach-descriptor</id>
+                                <phase>none</phase>
+                            </execution>
+                        </executions>
+                    </plugin>
+                </plugins>
+            </build>
+        </profile>
+        <profile>
+            <id>build-for-jdk-8</id>
+            <activation>
+                <jdk>[1.8,)</jdk>
+            </activation>
+            <build>
+                <plugins>
+                    <plugin>
+                        <artifactId>maven-assembly-plugin</artifactId>
+                        <configuration>
+                            <archive>
+                                <manifest>
+                                    
<mainClass>fully.qualified.MainClass</mainClass>
+                                </manifest>
+                            </archive>
+                            <descriptorRefs>
+                                
<descriptorRef>jar-with-dependencies</descriptorRef>
+                            </descriptorRefs>
+                        </configuration>
+                    </plugin>
+                    <plugin>
+                        <groupId>org.apache.maven.plugins</groupId>
+                        <artifactId>maven-compiler-plugin</artifactId>
+                        <configuration>
+                            <source>1.8</source>
+                            <target>1.8</target>
+                        </configuration>
+                    </plugin>
+                </plugins>
+            </build>
+        </profile>
+        <profile>
+            <id>disable-animal-sniffer</id>
+            <activation>
+                <jdk>[1.6,)</jdk>
+            </activation>
+            <build>
+                <plugins>
+                    <plugin>
+                        <groupId>org.codehaus.mojo</groupId>
+                        <artifactId>animal-sniffer-maven-plugin</artifactId>
+                        <executions>
+                            <execution>
+                                <id>check_java_6</id>
+                                <phase>none</phase>
+                            </execution>
+                        </executions>
+                    </plugin>
+                </plugins>
+            </build>
+        </profile>
+    </profiles>
+
+
+</project>

Added: 
james/mailbox/trunk/tika/src/main/java/org/apache/james/mailbox/tika/extractor/TikaTextExtractor.java
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/tika/src/main/java/org/apache/james/mailbox/tika/extractor/TikaTextExtractor.java?rev=1688147&view=auto
==============================================================================
--- 
james/mailbox/trunk/tika/src/main/java/org/apache/james/mailbox/tika/extractor/TikaTextExtractor.java
 (added)
+++ 
james/mailbox/trunk/tika/src/main/java/org/apache/james/mailbox/tika/extractor/TikaTextExtractor.java
 Mon Jun 29 08:46:59 2015
@@ -0,0 +1,98 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ ****************************************************************/
+
+package org.apache.james.mailbox.tika.extractor;
+
+import java.io.InputStream;
+import java.io.StringWriter;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import com.google.common.collect.ImmutableList;
+import org.apache.james.mailbox.store.extractor.ParsedContent;
+import org.apache.james.mailbox.store.extractor.TextExtractor;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.BodyContentHandler;
+
+public class TikaTextExtractor implements TextExtractor {
+
+    private static class MetadataEntry {
+
+        private final String name;
+        private final ImmutableList<String> entries;
+
+        public MetadataEntry(String name, List<String> entries) {
+            this.name = name;
+            this.entries = ImmutableList.copyOf(entries);
+        }
+
+        public String getName() {
+            return name;
+        }
+
+        public List<String> getEntries() {
+            return entries;
+        }
+    }
+
+    private final Parser parser;
+    
+    public TikaTextExtractor() {
+        parser = new AutoDetectParser();
+    }
+
+    public ParsedContent extractContent(InputStream inputStream, String 
contentType, String fileName) throws Exception {
+        Metadata metadata = createInitializedMetadata(contentType, fileName);
+
+        StringWriter stringWriter = new StringWriter();
+        BodyContentHandler bodyContentHandler = new 
BodyContentHandler(stringWriter);
+        parser.parse(inputStream, bodyContentHandler, metadata, new 
ParseContext());
+
+        return new ParsedContent(stringWriter.toString(), 
convertMetadataToMultimap(metadata));
+    }
+
+    private Metadata createInitializedMetadata(String contentType, String 
fileName) {
+        Metadata metadata = new Metadata();
+        if (contentType != null) {
+            metadata.set(Metadata.CONTENT_TYPE, contentType);
+        }
+        if (fileName != null) {
+            metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
+        }
+        return metadata;
+    }
+
+    private Map<String, List<String>> convertMetadataToMultimap(Metadata 
metadata) {
+        return Arrays.stream(metadata.names())
+            .map(name -> new MetadataEntry(name, 
Arrays.asList(metadata.getValues(name))))
+            .reduce(new HashMap<>(), (metadataMultiMap, metadataEntry) -> {
+                    metadataMultiMap.put(metadataEntry.getName(), 
metadataEntry.getEntries());
+                    return metadataMultiMap;
+                }, (metadataMultimap1, metadataMultimap2) -> {
+                    metadataMultimap1.putAll(metadataMultimap2);
+                    return metadataMultimap1;
+                });
+    }
+
+}

Added: 
james/mailbox/trunk/tika/src/test/java/org/apache/james/mailbox/tika/extractor/TikaTextExtractorTest.java
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/tika/src/test/java/org/apache/james/mailbox/tika/extractor/TikaTextExtractorTest.java?rev=1688147&view=auto
==============================================================================
--- 
james/mailbox/trunk/tika/src/test/java/org/apache/james/mailbox/tika/extractor/TikaTextExtractorTest.java
 (added)
+++ 
james/mailbox/trunk/tika/src/test/java/org/apache/james/mailbox/tika/extractor/TikaTextExtractorTest.java
 Mon Jun 29 08:46:59 2015
@@ -0,0 +1,125 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ ****************************************************************/
+
+package org.apache.james.mailbox.tika.extractor;
+
+import java.io.InputStream;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+import org.apache.james.mailbox.store.extractor.TextExtractor;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TikaTextExtractorTest {
+    
+    private TextExtractor textExtractor;
+    
+    @Before
+    public void setUp() {
+        textExtractor = new TikaTextExtractor();
+    }
+    
+    @Test
+    public void textTest() throws Exception {
+        InputStream inputStream = 
ClassLoader.getSystemResourceAsStream("documents/Text.txt");
+        assertThat(inputStream).isNotNull();
+        assertThat(textExtractor.extractContent(inputStream, "text/plain", 
"Text.txt").getTextualContent())
+            .isEqualTo("This is some awesome text text.\n\n\n");
+    }
+
+    @Test
+    public void textMicrosoftWorldTest() throws Exception {
+        InputStream inputStream = 
ClassLoader.getSystemResourceAsStream("documents/writter.docx");
+        assertThat(inputStream).isNotNull();
+        assertThat(textExtractor.extractContent(inputStream, 
"application/vnd.openxmlformats-officedocument.wordprocessingml.document", 
"writter.docx").getTextualContent())
+            .isEqualTo("This is an awesome document on libroffice writter 
!\n");
+    }
+
+    @Test
+    public void textOdtTest() throws Exception {
+        InputStream inputStream = 
ClassLoader.getSystemResourceAsStream("documents/writter.odt");
+        assertThat(inputStream).isNotNull();
+        assertThat(textExtractor.extractContent(inputStream, 
"application/vnd.oasis.opendocument.text", "writter.odt").getTextualContent())
+            .isEqualTo("This is an awesome document on libroffice writter 
!\n");
+    }
+
+    @Test
+    public void documentWithBadDeclaredMetadataShouldBeWellHandled() throws 
Exception {
+        InputStream inputStream = 
ClassLoader.getSystemResourceAsStream("documents/fake.txt");
+        assertThat(inputStream).isNotNull();
+        assertThat(textExtractor.extractContent(inputStream, 
"application/vnd.oasis.opendocument.text", "writter.odt").getTextualContent())
+            .isEqualTo("This is an awesome document on libroffice writter 
!\n");
+    }
+    
+    @Test
+    public void slidePowerPointTest() throws Exception {
+        InputStream inputStream = 
ClassLoader.getSystemResourceAsStream("documents/slides.pptx");
+        assertThat(inputStream).isNotNull();
+        assertThat(textExtractor.extractContent(inputStream, 
"application/vnd.openxmlformats-officedocument.presentationml.presentation", 
"slides.pptx").getTextualContent())
+            .isEqualTo("James is awesome\nIt manages attachments so well 
!\n");
+    }
+
+    @Test
+    public void slideOdpTest() throws Exception {
+        InputStream inputStream = 
ClassLoader.getSystemResourceAsStream("documents/slides.odp");
+        assertThat(inputStream).isNotNull();
+        assertThat(textExtractor.extractContent(inputStream, 
"application/vnd.oasis.opendocument.presentation", 
"slides.odp").getTextualContent())
+            .isEqualTo("James is awesome\n\nIt manages attachments so well 
!\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n");
+    }
+    
+    @Test
+    public void pdfTest() throws Exception {
+        InputStream inputStream = 
ClassLoader.getSystemResourceAsStream("documents/PDF.pdf");
+        assertThat(inputStream).isNotNull();
+        assertThat(textExtractor.extractContent(inputStream, 
"application/pdf", "PDF.pdf").getTextualContent())
+            .isEqualTo("\nThis is an awesome document on libroffice writter 
!\n\n\n");
+    }
+    
+    @Test
+    public void odsTest() throws Exception {
+        InputStream inputStream = 
ClassLoader.getSystemResourceAsStream("documents/calc.ods");
+        assertThat(inputStream).isNotNull();
+        assertThat(textExtractor.extractContent(inputStream, 
"application/vnd.oasis.opendocument.spreadsheet", 
"calc.ods").getTextualContent())
+            .isEqualTo("\tThis is an aesome LibreOffice document !\n" +
+                "\n" +
+                "\n" +
+                "???\n" +
+                "Page \n" +
+                "??? (???)\n" +
+                "00/00/0000, 00:00:00\n" +
+                "Page  / \n");
+    }
+    
+    @Test
+    public void excelTest() throws Exception {
+        InputStream inputStream = 
ClassLoader.getSystemResourceAsStream("documents/calc.xlsx");
+        assertThat(inputStream).isNotNull();
+        assertThat(textExtractor.extractContent(inputStream, 
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", 
"calc.xlsx").getTextualContent())
+            .isEqualTo("Feuille1\n" +
+                "\tThis is an aesome LibreOffice document !\n" +
+                "\n" +
+                "&A\t\n" +
+                "\n" +
+                "Page &P\t\n" +
+                "\n" +
+                "\n");
+    }
+    
+}

Added: james/mailbox/trunk/tika/src/test/resources/documents/PDF.pdf
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/tika/src/test/resources/documents/PDF.pdf?rev=1688147&view=auto
==============================================================================
    (empty)

Added: james/mailbox/trunk/tika/src/test/resources/documents/Text.txt
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/tika/src/test/resources/documents/Text.txt?rev=1688147&view=auto
==============================================================================
--- james/mailbox/trunk/tika/src/test/resources/documents/Text.txt (added)
+++ james/mailbox/trunk/tika/src/test/resources/documents/Text.txt Mon Jun 29 
08:46:59 2015
@@ -0,0 +1,2 @@
+This is some awesome text text.
+

Added: james/mailbox/trunk/tika/src/test/resources/documents/calc.ods
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/tika/src/test/resources/documents/calc.ods?rev=1688147&view=auto
==============================================================================
    (empty)

Added: james/mailbox/trunk/tika/src/test/resources/documents/calc.xlsx
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/tika/src/test/resources/documents/calc.xlsx?rev=1688147&view=auto
==============================================================================
    (empty)

Added: james/mailbox/trunk/tika/src/test/resources/documents/fake.txt
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/tika/src/test/resources/documents/fake.txt?rev=1688147&view=auto
==============================================================================
    (empty)

Added: james/mailbox/trunk/tika/src/test/resources/documents/slides.odp
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/tika/src/test/resources/documents/slides.odp?rev=1688147&view=auto
==============================================================================
    (empty)

Added: james/mailbox/trunk/tika/src/test/resources/documents/slides.pptx
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/tika/src/test/resources/documents/slides.pptx?rev=1688147&view=auto
==============================================================================
    (empty)

Added: james/mailbox/trunk/tika/src/test/resources/documents/writter.docx
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/tika/src/test/resources/documents/writter.docx?rev=1688147&view=auto
==============================================================================
    (empty)

Added: james/mailbox/trunk/tika/src/test/resources/documents/writter.odt
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/tika/src/test/resources/documents/writter.odt?rev=1688147&view=auto
==============================================================================
    (empty)



---------------------------------------------------------------------
To unsubscribe, e-mail: server-dev-unsubscr...@james.apache.org
For additional commands, e-mail: server-dev-h...@james.apache.org

Reply via email to