Author: btellier
Date: Mon Jun 29 08:45:43 2015
New Revision: 1688146

URL: http://svn.apache.org/r1688146
Log:
 MAILBOX-245 Use text extractor on JSON generation

Added:
    
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonUsingTika.java
    
james/mailbox/trunk/elasticsearch/src/test/resources/documents/nonTextual.eml
    
james/mailbox/trunk/elasticsearch/src/test/resources/documents/nonTextual.json
Modified:
    
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessage.java
    
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/JsonMessageConstants.java
    
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJson.java
    
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java
    
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartContainerBuilder.java
    
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartParser.java
    
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/RootMimePartContainerBuilder.java
    
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/ElasticSearchIntegrationTest.java
    
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java
    james/mailbox/trunk/elasticsearch/src/test/resources/documents/htmlMail.json
    
james/mailbox/trunk/elasticsearch/src/test/resources/documents/recursiveMail.json
    james/mailbox/trunk/elasticsearch/src/test/resources/documents/spamMail.json

Modified: 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessage.java
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessage.java?rev=1688146&r1=1688145&r2=1688146&view=diff
==============================================================================
--- 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessage.java
 (original)
+++ 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessage.java
 Mon Jun 29 08:45:43 2015
@@ -23,6 +23,7 @@ import com.fasterxml.jackson.annotation.
 import com.google.common.base.Preconditions;
 import com.google.common.base.Throwables;
 import com.google.common.collect.Multimap;
+import org.apache.james.mailbox.elasticsearch.json.extractor.TextExtractor;
 import org.apache.james.mailbox.elasticsearch.query.DateResolutionFormater;
 import org.apache.james.mailbox.store.mail.model.MailboxId;
 import org.apache.james.mailbox.store.mail.model.Message;
@@ -41,11 +42,11 @@ import java.util.stream.Collectors;
 
 public class IndexableMessage {
 
-    public static IndexableMessage from(Message<? extends MailboxId> message) {
+    public static IndexableMessage from(Message<? extends MailboxId> message, 
TextExtractor textExtractor) {
         Preconditions.checkNotNull(message.getMailboxId());
         IndexableMessage indexableMessage = new IndexableMessage();
         try {
-            MimePart parsingResult = new MimePartParser(message).parse();
+            MimePart parsingResult = new MimePartParser(message, 
textExtractor).parse();
             indexableMessage.bodyText = parsingResult.locateFirstTextualBody();
             indexableMessage.setFlattenedAttachments(parsingResult);
             
indexableMessage.copyHeaderFields(parsingResult.getHeaderCollection(), 
getSanitizedInternalDate(message));

Modified: 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/JsonMessageConstants.java
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/JsonMessageConstants.java?rev=1688146&r1=1688145&r2=1688146&view=diff
==============================================================================
--- 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/JsonMessageConstants.java
 (original)
+++ 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/JsonMessageConstants.java
 Mon Jun 29 08:45:43 2015
@@ -66,6 +66,7 @@ public interface JsonMessageConstants {
         String CONTENT_DISPOSITION = "contentDisposition";
         String FILENAME = "fileName";
         String FILE_EXTENSION = "fileExtension";
+        String FILE_METADATA = "fileMetadata";
     }
 
     interface Property {

Modified: 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJson.java
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJson.java?rev=1688146&r1=1688145&r2=1688146&view=diff
==============================================================================
--- 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJson.java
 (original)
+++ 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJson.java
 Mon Jun 29 08:45:43 2015
@@ -26,21 +26,24 @@ import com.fasterxml.jackson.databind.Ob
 import com.fasterxml.jackson.datatype.guava.GuavaModule;
 import com.fasterxml.jackson.datatype.jdk8.Jdk8Module;
 import com.google.common.base.Preconditions;
+import org.apache.james.mailbox.elasticsearch.json.extractor.TextExtractor;
 import org.apache.james.mailbox.store.mail.model.Message;
 
 public class MessageToElasticSearchJson {
 
-    private ObjectMapper mapper;
+    private final ObjectMapper mapper;
+    private final TextExtractor textExtractor;
 
-    public MessageToElasticSearchJson() {
-        mapper = new ObjectMapper();
-        mapper.registerModule(new GuavaModule());
-        mapper.registerModule(new Jdk8Module());
+    public MessageToElasticSearchJson(TextExtractor textExtractor) {
+        this.textExtractor = textExtractor;
+        this.mapper = new ObjectMapper();
+        this.mapper.registerModule(new GuavaModule());
+        this.mapper.registerModule(new Jdk8Module());
     }
 
     public String convertToJson(Message<?> message) throws 
JsonProcessingException {
         Preconditions.checkNotNull(message);
-        return mapper.writeValueAsString(IndexableMessage.from(message));
+        return mapper.writeValueAsString(IndexableMessage.from(message, 
textExtractor));
     }
 
     public String getUpdatedJsonMessagePart(Flags flags, long modSeq) throws 
JsonProcessingException {

Modified: 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java?rev=1688146&r1=1688145&r2=1688146&view=diff
==============================================================================
--- 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java
 (original)
+++ 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java
 Mon Jun 29 08:45:43 2015
@@ -21,15 +21,18 @@ package org.apache.james.mailbox.elastic
 
 import com.fasterxml.jackson.annotation.JsonIgnore;
 import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableMultimap;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Multimap;
 import org.apache.commons.io.FilenameUtils;
-import org.apache.commons.io.IOUtils;
+import 
org.apache.james.mailbox.elasticsearch.json.extractor.DefaultTextExtractor;
+import org.apache.james.mailbox.elasticsearch.json.extractor.ParsedContent;
+import org.apache.james.mailbox.elasticsearch.json.extractor.TextExtractor;
 import org.apache.james.mime4j.stream.Field;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.IOException;
 import java.io.InputStream;
 import java.util.List;
 import java.util.Optional;
@@ -47,7 +50,7 @@ public class MimePart {
         private Optional<String> fileName;
         private Optional<String> fileExtension;
         private Optional<String> contentDisposition;
-
+        private TextExtractor textExtractor;
 
         private Builder() {
             children = Lists.newArrayList();
@@ -58,6 +61,7 @@ public class MimePart {
             this.fileName = Optional.empty();
             this.fileExtension = Optional.empty();
             this.contentDisposition = Optional.empty();
+            this.textExtractor = new DefaultTextExtractor();
         }
 
         @Override
@@ -104,35 +108,50 @@ public class MimePart {
         }
 
         @Override
+        public MimePartContainerBuilder using(TextExtractor textExtractor) {
+            Preconditions.checkArgument(textExtractor != null, "Provided text 
extractor should not be null");
+            this.textExtractor = textExtractor;
+            return this;
+        }
+
+        @Override
         public MimePart build() {
+            Optional<ParsedContent> parsedContent = 
parseContent(textExtractor);
             return new MimePart(
-                    headerCollectionBuilder.build(),
-                    decodeContent(),
-                    mediaType,
-                    subType,
-                    fileName,
-                    fileExtension,
-                    contentDisposition,
-                    children
+                headerCollectionBuilder.build(),
+                parsedContent.map(ParsedContent::getTextualContent)
+                    .orElse(Optional.empty())
+                ,
+                mediaType,
+                subType,
+                fileName,
+                fileExtension,
+                contentDisposition,
+                children,
+                parsedContent.map(ParsedContent::getMetadata)
+                    .orElse(ImmutableMultimap.<String, 
String>builder().build())
             );
         }
 
-        private boolean isTextualMimePart() {
-            return mediaType.isPresent()
-                && mediaType.get().equalsIgnoreCase("text");
-        }
-
-        private Optional<String> decodeContent() {
-            if (bodyContent.isPresent() && isTextualMimePart()) {
+        private Optional<ParsedContent> parseContent(TextExtractor 
textExtractor) {
+            if (bodyContent.isPresent()) {
                 try {
-                    return Optional.of(IOUtils.toString(bodyContent.get()));
-                } catch (IOException e) {
-                    LOGGER.warn("Can not decode body content", e);
+                    return 
Optional.of(textExtractor.extractContent(bodyContent.get(), 
computeContentType(), fileName));
+                } catch (Exception e) {
+                    LOGGER.warn("Failed parsing attachment", e);
                 }
             }
             return Optional.empty();
         }
 
+        private Optional<String> computeContentType() {
+            if (mediaType.isPresent() && subType.isPresent()) {
+                return Optional.of(mediaType.get() + "/" + subType.get());
+            } else {
+                return Optional.empty();
+            }
+        }
+
     }
     
     public static Builder builder() {
@@ -149,10 +168,11 @@ public class MimePart {
     private final Optional<String> fileExtension;
     private final Optional<String> contentDisposition;
     private final List<MimePart> attachments;
+    private final ImmutableMultimap<String, String> metadata;
 
     private MimePart(HeaderCollection headerCollection, Optional<String> 
bodyTextContent, Optional<String> mediaType,
                     Optional<String> subType, Optional<String> fileName, 
Optional<String> fileExtension,
-                    Optional<String> contentDisposition, List<MimePart> 
attachments) {
+                    Optional<String> contentDisposition, List<MimePart> 
attachments, Multimap<String, String> metadata) {
         this.headerCollection = headerCollection;
         this.mediaType = mediaType;
         this.subType = subType;
@@ -161,6 +181,7 @@ public class MimePart {
         this.contentDisposition = contentDisposition;
         this.attachments = attachments;
         this.bodyTextContent = bodyTextContent;
+        this.metadata = ImmutableMultimap.copyOf(metadata);
     }
 
     @JsonIgnore
@@ -208,6 +229,11 @@ public class MimePart {
         return bodyTextContent;
     }
 
+    @JsonProperty(JsonMessageConstants.Attachment.FILE_METADATA)
+    public ImmutableMultimap<String, String> getMetadata() {
+        return metadata;
+    }
+
     @JsonIgnore
     public Optional<String> locateFirstTextualBody() {
         return Stream.concat(

Modified: 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartContainerBuilder.java
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartContainerBuilder.java?rev=1688146&r1=1688145&r2=1688146&view=diff
==============================================================================
--- 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartContainerBuilder.java
 (original)
+++ 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartContainerBuilder.java
 Mon Jun 29 08:45:43 2015
@@ -19,6 +19,7 @@
 
 package org.apache.james.mailbox.elasticsearch.json;
 
+import org.apache.james.mailbox.elasticsearch.json.extractor.TextExtractor;
 import org.apache.james.mime4j.stream.Field;
 
 import java.io.InputStream;
@@ -27,6 +28,8 @@ public interface MimePartContainerBuilde
 
     MimePart build();
 
+    MimePartContainerBuilder using(TextExtractor textExtractor);
+
     MimePartContainerBuilder addToHeaders(Field field);
 
     MimePartContainerBuilder addBodyContent(InputStream bodyContent);

Modified: 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartParser.java
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartParser.java?rev=1688146&r1=1688145&r2=1688146&view=diff
==============================================================================
--- 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartParser.java
 (original)
+++ 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartParser.java
 Mon Jun 29 08:45:43 2015
@@ -20,6 +20,7 @@
 package org.apache.james.mailbox.elasticsearch.json;
 
 import com.google.common.base.Preconditions;
+import org.apache.james.mailbox.elasticsearch.json.extractor.TextExtractor;
 import org.apache.james.mailbox.store.mail.model.MailboxId;
 import org.apache.james.mailbox.store.mail.model.Message;
 import org.apache.james.mime4j.MimeException;
@@ -36,13 +37,15 @@ import java.util.LinkedList;
 public class MimePartParser {
 
     private final Message<? extends MailboxId> message;
+    private final TextExtractor textExtractor;
     private final MimeTokenStream stream;
     private final Deque<MimePartContainerBuilder> builderStack;
     private MimePart result;
     private MimePartContainerBuilder currentlyBuildMimePart;
 
-    public MimePartParser(Message<? extends MailboxId> message) {
+    public MimePartParser(Message<? extends MailboxId> message, TextExtractor 
textExtractor) {
         this.message = message;
+        this.textExtractor = textExtractor;
         this.builderStack = new LinkedList<>();
         this.currentlyBuildMimePart = new RootMimePartContainerBuilder();
         this.stream = new MimeTokenStream(
@@ -94,7 +97,7 @@ public class MimePartParser {
     }
     
     private void closeMimePart() {
-        MimePart bodyMimePart = currentlyBuildMimePart.build();
+        MimePart bodyMimePart = 
currentlyBuildMimePart.using(textExtractor).build();
         if (!builderStack.isEmpty()) {
             builderStack.peek().addChild(bodyMimePart);
         } else {

Modified: 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/RootMimePartContainerBuilder.java
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/RootMimePartContainerBuilder.java?rev=1688146&r1=1688145&r2=1688146&view=diff
==============================================================================
--- 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/RootMimePartContainerBuilder.java
 (original)
+++ 
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/RootMimePartContainerBuilder.java
 Mon Jun 29 08:45:43 2015
@@ -19,6 +19,7 @@
 
 package org.apache.james.mailbox.elasticsearch.json;
 
+import org.apache.james.mailbox.elasticsearch.json.extractor.TextExtractor;
 import org.apache.james.mime4j.stream.Field;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -36,6 +37,10 @@ public class RootMimePartContainerBuilde
         return rootMimePart;
     }
 
+    @Override public MimePartContainerBuilder using(TextExtractor 
textExtractor) {
+        return this;
+    }
+
     @Override
     public MimePartContainerBuilder addToHeaders(Field field) {
         LOGGER.warn("Trying to add headers to the Root MimePart container");

Modified: 
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/ElasticSearchIntegrationTest.java
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/ElasticSearchIntegrationTest.java?rev=1688146&r1=1688145&r2=1688146&view=diff
==============================================================================
--- 
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/ElasticSearchIntegrationTest.java
 (original)
+++ 
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/ElasticSearchIntegrationTest.java
 Mon Jun 29 08:45:43 2015
@@ -30,6 +30,7 @@ import org.apache.james.mailbox.acl.Simp
 import org.apache.james.mailbox.acl.UnionMailboxACLResolver;
 import 
org.apache.james.mailbox.elasticsearch.events.ElasticSearchListeningMessageSearchIndex;
 import org.apache.james.mailbox.elasticsearch.json.MessageToElasticSearchJson;
+import 
org.apache.james.mailbox.elasticsearch.json.extractor.DefaultTextExtractor;
 import org.apache.james.mailbox.elasticsearch.query.CriterionConverter;
 import org.apache.james.mailbox.elasticsearch.query.QueryConverter;
 import org.apache.james.mailbox.elasticsearch.search.ElasticSearchSearcher;
@@ -158,10 +159,10 @@ public class ElasticSearchIntegrationTes
             IndexCreationFactory.createIndex(new 
TestingClientProvider(embeddedElasticSearch.getNode()))
         );
         MailboxSessionMapperFactory<InMemoryId> mapperFactory = new 
InMemoryMailboxSessionMapperFactory();
-        elasticSearchListeningMessageSearchIndex = new 
ElasticSearchListeningMessageSearchIndex<InMemoryId>(mapperFactory,
+        elasticSearchListeningMessageSearchIndex = new 
ElasticSearchListeningMessageSearchIndex<>(mapperFactory,
             new ElasticSearchIndexer(clientProvider),
-            new ElasticSearchSearcher<InMemoryId>(clientProvider, new 
QueryConverter(new CriterionConverter())),
-            new MessageToElasticSearchJson());
+            new ElasticSearchSearcher<>(clientProvider, new QueryConverter(new 
CriterionConverter())),
+            new MessageToElasticSearchJson(new DefaultTextExtractor()));
         storeMailboxManager = new StoreMailboxManager<>(
             mapperFactory,
             new MockAuthenticator(),

Modified: 
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java?rev=1688146&r1=1688145&r2=1688146&view=diff
==============================================================================
--- 
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java
 (original)
+++ 
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java
 Mon Jun 29 08:45:43 2015
@@ -22,6 +22,8 @@ package org.apache.james.mailbox.elastic
 import com.google.common.base.Throwables;
 import org.apache.commons.io.IOUtils;
 import org.apache.james.mailbox.FlagsBuilder;
+import 
org.apache.james.mailbox.elasticsearch.json.extractor.DefaultTextExtractor;
+import org.apache.james.mailbox.elasticsearch.json.extractor.TikaTextExtractor;
 import org.apache.james.mailbox.store.TestId;
 import org.apache.james.mailbox.store.mail.model.Message;
 import org.apache.james.mailbox.store.mail.model.impl.PropertyBuilder;
@@ -53,11 +55,9 @@ public class MessageToElasticSearchJsonT
 
     private Date date;
     private PropertyBuilder propertyBuilder;
-    private MessageToElasticSearchJson messageToElasticSearchJson;
 
     @Before
     public void setUp() throws Exception {
-        messageToElasticSearchJson = new MessageToElasticSearchJson();
         date = formatter.parse("07-06-2015");
         propertyBuilder = new PropertyBuilder();
         propertyBuilder.setMediaType("plain");
@@ -68,6 +68,7 @@ public class MessageToElasticSearchJsonT
 
     @Test
     public void spamEmailShouldBeWellConvertedToJson() throws IOException {
+        MessageToElasticSearchJson messageToElasticSearchJson = new 
MessageToElasticSearchJson(new DefaultTextExtractor());
         Message<TestId> spamMail = new SimpleMessage<>(date,
             SIZE,
             BODY_START_OCTET,
@@ -83,6 +84,7 @@ public class MessageToElasticSearchJsonT
 
     @Test
     public void htmlEmailShouldBeWellConvertedToJson() throws IOException {
+        MessageToElasticSearchJson messageToElasticSearchJson = new 
MessageToElasticSearchJson(new DefaultTextExtractor());
         Message<TestId> htmlMail = new SimpleMessage<>(date,
             SIZE,
             BODY_START_OCTET,
@@ -99,6 +101,7 @@ public class MessageToElasticSearchJsonT
 
     @Test
     public void pgpSignedEmailShouldBeWellConvertedToJson() throws IOException 
{
+        MessageToElasticSearchJson messageToElasticSearchJson = new 
MessageToElasticSearchJson(new DefaultTextExtractor());
         Message<TestId> pgpSignedMail = new SimpleMessage<>(date,
             SIZE,
             BODY_START_OCTET,
@@ -115,6 +118,7 @@ public class MessageToElasticSearchJsonT
 
     @Test
     public void simpleEmailShouldBeWellConvertedToJson() throws IOException {
+        MessageToElasticSearchJson messageToElasticSearchJson = new 
MessageToElasticSearchJson(new DefaultTextExtractor());
         Message<TestId> mail = new SimpleMessage<>(date,
             SIZE,
             BODY_START_OCTET,
@@ -131,6 +135,7 @@ public class MessageToElasticSearchJsonT
 
     @Test
     public void recursiveEmailShouldBeWellConvertedToJson() throws IOException 
{
+        MessageToElasticSearchJson messageToElasticSearchJson = new 
MessageToElasticSearchJson(new DefaultTextExtractor());
         Message<TestId> recursiveMail = new SimpleMessage<>(date,
             SIZE,
             BODY_START_OCTET,
@@ -147,6 +152,7 @@ public class MessageToElasticSearchJsonT
 
     @Test
     public void emailWithNoInternalDateShouldUseNowDate() throws IOException {
+        MessageToElasticSearchJson messageToElasticSearchJson = new 
MessageToElasticSearchJson(new DefaultTextExtractor());
         Message<TestId> mailWithNoInternalDate = new SimpleMessage<>(null,
             SIZE,
             BODY_START_OCTET,
@@ -164,6 +170,7 @@ public class MessageToElasticSearchJsonT
 
     @Test(expected = NullPointerException.class)
     public void emailWithNoMailboxIdShouldThrow() throws IOException {
+        MessageToElasticSearchJson messageToElasticSearchJson = new 
MessageToElasticSearchJson(new DefaultTextExtractor());
         Message<TestId> mailWithNoMailboxId;
         try {
             mailWithNoMailboxId = new SimpleMessage<>(date,
@@ -183,19 +190,38 @@ public class MessageToElasticSearchJsonT
 
     @Test
     public void getUpdatedJsonMessagePartShouldBehaveWellOnEmptyFlags() throws 
Exception {
+        MessageToElasticSearchJson messageToElasticSearchJson = new 
MessageToElasticSearchJson(new DefaultTextExtractor());
         
assertThatJson(messageToElasticSearchJson.getUpdatedJsonMessagePart(new 
Flags(), MOD_SEQ))
             
.isEqualTo("{\"modSeq\":42,\"isAnswered\":false,\"isDeleted\":false,\"isDraft\":false,\"isFlagged\":false,\"isRecent\":false,\"userFlags\":[],\"isUnread\":true}");
     }
 
     @Test
     public void getUpdatedJsonMessagePartShouldBehaveWellOnNonEmptyFlags() 
throws Exception {
+        MessageToElasticSearchJson messageToElasticSearchJson = new 
MessageToElasticSearchJson(new DefaultTextExtractor());
         
assertThatJson(messageToElasticSearchJson.getUpdatedJsonMessagePart(new 
FlagsBuilder().add(Flags.Flag.DELETED, Flags.Flag.FLAGGED).add("user").build(), 
MOD_SEQ))
             
.isEqualTo("{\"modSeq\":42,\"isAnswered\":false,\"isDeleted\":true,\"isDraft\":false,\"isFlagged\":true,\"isRecent\":false,\"userFlags\":[\"user\"],\"isUnread\":true}");
     }
 
     @Test(expected = NullPointerException.class)
     public void getUpdatedJsonMessagePartShouldThrowIfFlagsIsNull() throws 
Exception {
+        MessageToElasticSearchJson messageToElasticSearchJson = new 
MessageToElasticSearchJson(new DefaultTextExtractor());
         messageToElasticSearchJson.getUpdatedJsonMessagePart(null, MOD_SEQ);
     }
 
+    @Test
+    public void spamEmailShouldBeWellConvertedToJsonWithApacheTika() throws 
IOException {
+        MessageToElasticSearchJson messageToElasticSearchJson = new 
MessageToElasticSearchJson(new TikaTextExtractor());
+        Message<TestId> spamMail = new SimpleMessage<>(date,
+            SIZE,
+            BODY_START_OCTET,
+            new 
SharedByteArrayInputStream(IOUtils.toByteArray(ClassLoader.getSystemResourceAsStream("documents/nonTextual.eml"))),
+            new Flags(),
+            propertyBuilder,
+            MAILBOX_ID);
+        spamMail.setModSeq(MOD_SEQ);
+        assertThatJson(messageToElasticSearchJson.convertToJson(spamMail))
+            .when(IGNORING_ARRAY_ORDER)
+            
.isEqualTo(IOUtils.toString(ClassLoader.getSystemResource("documents/nonTextual.json")));
+    }
+
 }

Added: 
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonUsingTika.java
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonUsingTika.java?rev=1688146&view=auto
==============================================================================
--- 
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonUsingTika.java
 (added)
+++ 
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonUsingTika.java
 Mon Jun 29 08:45:43 2015
@@ -0,0 +1,79 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ ****************************************************************/
+
+package org.apache.james.mailbox.elasticsearch.json;
+
+import javax.mail.Flags;
+import javax.mail.util.SharedByteArrayInputStream;
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.james.mailbox.elasticsearch.json.extractor.TikaTextExtractor;
+import org.apache.james.mailbox.store.TestId;
+import org.apache.james.mailbox.store.mail.model.Message;
+import org.apache.james.mailbox.store.mail.model.impl.PropertyBuilder;
+import org.apache.james.mailbox.store.mail.model.impl.SimpleMessage;
+import org.junit.Before;
+import org.junit.Test;
+import static net.javacrumbs.jsonunit.core.Option.IGNORING_ARRAY_ORDER;
+import static net.javacrumbs.jsonunit.fluent.JsonFluentAssert.assertThatJson;
+
+public class MessageToElasticSearchJsonUsingTika {
+
+    public static final int SIZE = 25;
+    public static final int BODY_START_OCTET = 100;
+    public static final TestId MAILBOX_ID = TestId.of(18L);
+    public static final long MOD_SEQ = 42L;
+    public static final long UID = 25L;
+
+    private SimpleDateFormat formatter = new SimpleDateFormat("dd-MM-yyyy");
+
+    private Date date;
+    private PropertyBuilder propertyBuilder;
+    private MessageToElasticSearchJson messageToElasticSearchJson;
+
+    @Before
+    public void setUp() throws Exception {
+        messageToElasticSearchJson = new MessageToElasticSearchJson(new 
TikaTextExtractor());
+        date = formatter.parse("07-06-2015");
+        propertyBuilder = new PropertyBuilder();
+        propertyBuilder.setMediaType("plain");
+        propertyBuilder.setSubType("text");
+        propertyBuilder.setTextualLineCount(18L);
+        propertyBuilder.setContentDescription("An e-mail");
+    }
+
+    @Test
+    public void spamEmailShouldBeWellConvertedToJson() throws IOException {
+        Message<TestId> spamMail = new SimpleMessage<>(date,
+            SIZE,
+            BODY_START_OCTET,
+            new 
SharedByteArrayInputStream(IOUtils.toByteArray(ClassLoader.getSystemResourceAsStream("documents/nonTextual.eml"))),
+            new Flags(),
+            propertyBuilder,
+            MAILBOX_ID);
+        spamMail.setModSeq(MOD_SEQ);
+        assertThatJson(messageToElasticSearchJson.convertToJson(spamMail))
+            .when(IGNORING_ARRAY_ORDER)
+            
.isEqualTo(IOUtils.toString(ClassLoader.getSystemResource("documents/nonTextual.json")));
+    }
+
+}

Modified: 
james/mailbox/trunk/elasticsearch/src/test/resources/documents/htmlMail.json
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/resources/documents/htmlMail.json?rev=1688146&r1=1688145&r2=1688146&view=diff
==============================================================================
--- 
james/mailbox/trunk/elasticsearch/src/test/resources/documents/htmlMail.json 
(original)
+++ 
james/mailbox/trunk/elasticsearch/src/test/resources/documents/htmlMail.json 
Mon Jun 29 08:45:43 2015
@@ -121,6 +121,7 @@
       "subtype":"plain",
       "fileName":null,
       "fileExtension":null,
+      "fileMetadata":{},
       "contentDisposition":null,
       "headers":{
         "content-transfer-encoding":[
@@ -136,6 +137,7 @@
       "mediaType":"text",
       "subtype":"html",
       "fileName":null,
+      "fileMetadata":{},
       "fileExtension":null,
       "contentDisposition":null,
       "headers":{

Added: 
james/mailbox/trunk/elasticsearch/src/test/resources/documents/nonTextual.eml
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/resources/documents/nonTextual.eml?rev=1688146&view=auto
==============================================================================
--- 
james/mailbox/trunk/elasticsearch/src/test/resources/documents/nonTextual.eml 
(added)
+++ 
james/mailbox/trunk/elasticsearch/src/test/resources/documents/nonTextual.eml 
Mon Jun 29 08:45:43 2015
@@ -0,0 +1,196 @@
+Return-Path: <btell...@linagora.com>
+Received: from alderaan.linagora.com (smtp.linagora.dc1 [172.16.18.53])
+        by imap (Cyrus v2.2.13-Debian-2.2.13-19+squeeze3) with LMTPA;
+        Thu, 18 Jun 2015 12:43:28 +0200
+X-Sieve: CMU Sieve 2.2
+Received: from [10.75.9.154] (unknown [92.103.166.6])
+       (using TLSv1 with cipher DHE-RSA-AES128-SHA (128/128 bits))
+       (No client certificate requested)
+       by alderaan.linagora.com (Postfix) with ESMTPSA id 0EB1078A
+       for <btell...@linagora.com>; Thu, 18 Jun 2015 12:43:28 +0200 (CEST)
+To: btell...@linagora.com
+From: Benoit Tellier <btell...@linagora.com>
+Subject: Test message
+Message-ID: <5582a0ce.4020...@linagora.com>
+Date: Thu, 18 Jun 2015 12:43:26 +0200
+User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101
+ Thunderbird/38.0.1
+MIME-Version: 1.0
+Content-Type: multipart/mixed;
+ boundary="------------030000010109090603040500"
+
+This is a multi-part message in MIME format.
+--------------030000010109090603040500
+Content-Type: text/plain; charset=utf-8
+Content-Transfer-Encoding: 7bit
+
+This mail have a non textual attachment !
+
+--------------030000010109090603040500
+Content-Type: application/vnd.oasis.opendocument.text;
+ name="toto.odt"
+Content-Transfer-Encoding: base64
+Content-Disposition: attachment;
+ filename="toto.odt"
+
+UEsDBBQAAAgAAGNV0kZexjIMJwAAACcAAAAIAAAAbWltZXR5cGVhcHBsaWNhdGlvbi92bmQu
+b2FzaXMub3BlbmRvY3VtZW50LnRleHRQSwMEFAAACAAAY1XSRnPWCvM4AwAAOAMAABgAAABU
+aHVtYm5haWxzL3RodW1ibmFpbC5wbmeJUE5HDQoaCgAAAA1JSERSAAAAxgAAAQAIAwAAAN+D
++XIAAAEyUExURXJoXXN0dXZ4e3h3eHl7fnx8gXqCh32IgXuJnoB7fI+Fa4+IbIODe4iEe5+Q
+eISDhICFiISJhIaMiY2Gg4uJgomMjIGJkIuOkY6Uj4SRm4+WkomZnJGKgJGUjJuTh5uWj5GV
+kZGVnJaYkpWbmpmVlZmblJ2en4OYqZSYoJSfqpyeoI6gs5SipJygopKhsZapvKeajKGemKqd
+k6KjnaqilKmrn7SllL2tnaampaarraquo6msrqmusKq2urSwqLu5tLy5uaO2yKu5w77EtrXI
+1sC5p8+8rcHGt83BtdfGttnKus3NysLO3svP0tbRzdrQy9rYztrb2tvg5Nru9+bf3Ozg0PLj
+yfLl0PXo0/vt0uTn7ujn5+jp7Ovw6OL2/en4/fLr5/336vP7/v7+/gAAAP///4uCFGkAAAHB
+SURBVHja7dPbThMBEAbgYgsaQmkEVhfbxR5YDKUIslBUWuTogZI0HGxq0gBt3/8dIE00ek+C
+gW8uZv6Zu+9iUsNHUSkMDAwMDAwMDAwMDAyMJ8o4Of137+z/vfV+h7Nh+a6X+8t/rqN2MxjF
+y9H4fv1wjN1c5eOXdOpdshTNFQuvTrbGk2pzIq6/DJbGnk+udOY/ZYNqsBcnM4WpSqVbquxk
+e7nCRPZ4pZ8Oxhe+vZh+3RzbqC1uzlxvXz0Y4+fpbC7qlVqZ8moUhfH5j8/PkupBMahHYb4V
+Ti7/mm+EH1a/HkXFIG7n33RrpUY4DOOzjeR9v9SefXtYy2YOovX84mZ00R78t7/RWfPiGBgY
+GBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgY
+GBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgY
+GBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgY
+GBgYGBgYGBgYGBgYGBj3VrfW1XHYvlYYzQAAAABJRU5ErkJgglBLAwQUAAgICABjVdJGAAAA
+AAAAAAAAAAAACwAAAGNvbnRlbnQueG1spVfNjts2EL73KVQFyE1La50AWWXtIEAQoMAuUHS3
+Ra5cirLZSqRKUpb9Nn2WPlmGpEVTtuVl4oNliPPNzMfh/FD3n7ZNnWyoVEzwRZrfzNKEciJK
+xleL9M/nr9mH9NPyl3tRVYzQohSkayjXGRFcw38C2lwVTrpIO8kLgRVTBccNVYUmhWgpH7SK
+EF1YX25F6V0drW7BobamWx2rbLAjXfwS79mCQ+1S4j5W2WAhqKF6JWKVt6rOKgFRb1qs2RGL
+bc34P4t0rXVbINT3/U0/vxFyhfK7uztkpZ4w8bi2k7VFlQTRmhpnCuU3ORqwDdU4lp/BhpR4
+17xQGR0arPHJqarNKjojNquJ0JA1ltG5YcHj452X8cc7L0PdBuv1xJl8QI8gtI/Hh0MuyCbW
+l8GOQkUka6O36dChvhDCUzUKrkAt3dvZ7B1y7wG6vwjvJdNUBnByEU5wTXzERXMuaIDLESAy
+ujFp6hPfBEJNKNwiJ/ZgVU6a/vb48ETWtMEHMHsdnDGuNOaHyEhzCJM7fY8kbYXUPjBVfMOE
+07r13Na6qafL3UgH6EqW5Vko0JkjKH0ovGzDaP8mHXXyywlxd5QQti2+pmJBYd+8qJDPkMH4
+MoYUOTR5ufJzqBIdh03A7NoHkG5bKpkR4dqqFSMLYdbX4idM7mdXYGHU0Bmth47ht3TWjBBZ
+oyCBoFBEWwTa4/kgm22cOVMcoqyOLR41CqLUXJ/Lh+c/kJFlZjzCANh7Cq4Ft+lyuAO4/qGQ
+X6jgLpBVmNCspKRWy3vXy/1y4t4N70X6VVL6hLnK0wTa9gBqWL07yNJkZMEIsxXlsF/oJKpn
+SqXospcHBuPHHmLyBGrVGWdvcSvUx2OgW71IQIoG8xGiZZpAs99gyWyW/wA5t9vXuQEugpqL
+zRXUvtC/8V/dZVoBJobSTmnaXMMpyIqfS5goBmgqmffruNNw7pqRzNrxWW6fI76/597XnmaL
+JV5J3K4HASyYS7R9yZzWE0ySEssyHQybUsxaqGUqNaMq8Z25gKKEJgOjqJqRlzwNJN5RdoQx
+Gwz4TpN/PiFvePwQqfc5JacO0WQc94IXUe78i/GzvLeXdUX/7eCLxJ/H6WJil0qm2hrvMtFp
+uPHSrIa7Asw+6H1W7Lb3W113SruiMhyvMvY85M51Vkx8rzXyxX1c2LBPR611KmHaQbIuP+99
+t5ifAiAhlj1VoqHJMCETSIANK2mZwHyxGgn8JCYmqv//9+tAAewNdNpDArijRaNTRxOfl8vv
+UEsHCLswXn60AwAAnw4AAFBLAwQUAAgICABjVdJGAAAAAAAAAAAAAAAADAAAAHNldHRpbmdz
+LnhtbL1aW3PaOBR+31+R4T0lIZcmTJKOIaWlpYEB0sz2TdgH0CLreCQ5wL/fIxloFnBLjbVP
+DL6cTzqX71zkuw+LWJy8gtIc5X3l/N1Z5QRkiBGXk/vK87B1elP58PDXHY7HPIR6hGEagzSn
+GoyhR/QJvS51Pbt9X0mVrCPTXNcli0HXTVjHBOT6tfrbp+sOLLuyEFzO7itTY5J6tTqfz9/N
+L96hmlTPb29vq+7u+tEQ5ZhPDoXKnn4LhYgbIPtCthgHVjs7u6xm/ysnq0W+UU2t8rDWw3r7
+D3crgOznlBuIrW5OVpft0u4rBFl/5TDfaK2y773/vvOdng8UsCEmlfUds0zojkA5qTyc3VV3
+RRwutgNj40PuC4/MdK/gy9rFxdVxwj8Dn0z3L/v8/e3tRTHpgynO+xCRj0FzyuQE9BbCCFEA
+k5UHo1IohtGWDYVzDd8wgjzpYyb0weJPY5acchnBAqJdZe13MPcOhYZaHqbydrS1VG0Ut8q2
+vlwrbspc36udvb+9Li43L1RqV2fnRaVqPhJQfrA4saWHtpPaz40RG4EFY2Qlu4HGYJwbgAV9
+4gdiPCRR2942RWWOIqQOW2JqmijSWG4HdVnSG4iz0qJ6Vy8tFhpU+9d+U3DxbT0AAaGBqKXo
+QoGV77n4llzybq/4av8DlBsPz6bZhVQxQ7n5T9JqjxjMtNjiaXfjK3arHigkkBKNw99j/uO8
+y8lvIikThYdk5MT32AQaLJxNFKZym+jLAnFcZJG87UKhTsiVPUSfkz8gjxDwBUe5GzgW4WOc
+mOUvVXQEwiAdGUUM0hJLHciQnBKiwP3zABYkiaCdKDZRLJl+Y2rWQhUzM8SnNB6BC67yUYds
+1KUaneAmXHqQ/5GWHg2Wmm60KCJ9KM5B+BL+M8ataayqHhWbO7YsH+xZjqhMnzEqFTY294Gz
+MvpY4NwHsWDSoWZgkLCQ1j+Y0i5musWVNvZy+YCDmAnRZInugQophRIZXF/74pvfWb/4NqjO
+ErQJcIzWBCGsw5UP09c86iNuUz5tzTYQF5c31wW7zGe5IMIK4Qco/LgwHWCRH8pqCp4EmnpN
+S82g1sT8ouhRReTsqjIfYWOBE4h6PDSp8oLwiE9oyJWtfOtp3dE/uittevMA1kEW9clKKMXS
+g/j2RJJZiGoodUYNwSwJoLIU0GQiTIWr/XzpsA9U1NoACoxRFpf41KYIX3hfUm34eGl3p1+4
+ofQtUyYals19MRGpNJz5SnvPGmz1AWoIC/NCBUnirfzQfbC+8Er9fJsanHwbFee8IHLZe5WR
+hji0GdZSrAcad7L7OP8KsD2eKMkwXRE5B6CqDajnCr3a/00m92P+wNjxUGZ5gtwUwLotO1z7
+CNhAUN3jNEiNSZPJEET5bpDZyWflTk5NaRaUZOI3ubb4LloCmTGuHsXYtQh+2izlKuw117iC
+zkdG0h02AvG4OuLwRcx9sIcekDsUODqnbmrpLGra0qebbZiAahEITQ81t4nbD5jTn2NQXxMP
+UFZp9qCLNJeNIHOm9FM+mZ5SmYcitRsu6NrsFb5nR2Bd2RSofRRyTYof2s0jo0hlGpoYx2xn
+JnX4eG5LnP0dYKrC4gO/bmrs8VCHAkP8zUFE2qfHNlFSj/OzdOlKcl3y2/9N83uG8a69KjjO
+XB2uUV9tKADJk77CTr1O4NeXDS6ZWh5iENsE7m8Ar2pXNzclTBT7w44vfgA1AJPuFFd/qIJA
+8IkkqhkYTNak5mUYYo1mGdRHOpCUaUwDFXm7vyJtVT07Wl6hBGZgmPJQqW+6egqlhMjXEmfp
+Z1tu0kp5Tdm49dZtvNWXt0OIBFRLYbw3IMrgNzKHCVKDz0nEjIfZXctmA58ATk+feUTp3uYD
+L1XlVyrCA82Z7KUyNKmvwUZWHNEmetT5wxQFRb0vmE+2BftFa3nEtNYsqbp7wkcYs1R46e+i
+7GC2Ox5r8DMdcc5qA28IcSK8OO43ZqYNymk29btc9auW5eii+KjzXGpFZplK8o9yC35B4pZn
+BwSeTkFtff5J4IhtukK7mSI+k3smX9355q2a9zXgw79QSwcIGlhvBQQGAABPKAAAUEsDBBQA
+CAgIAGNV0kYAAAAAAAAAAAAAAAAIAAAAbWV0YS54bWyNk82OmzAUhfd9CsTMFoyBScACRuqi
+q6laqanUXeTYd4inYCPbDOnb10BImSSLLjn3Oz7HPxTPp7bx3kEboWTp4zDyPZBMcSHr0v+5
++xJk/nP1qVCvr4IB4Yr1LUgbtGCp56zSkHlU+r2WRFEjDJG0BUMsI6oDuVjImiZT0KycGiF/
+l/7R2o4gNAxDOCSh0jXCeZ6jabqgnF24rtfNRHGGoIExwSAcYrSwY8P/LTWy60pKqUvQiM+l
+p7g4ilI0fy90rTlv7m3AsQlyDamlwbuA4cH3zttfHXjsV8vpjjWqYirDNFDriMCZoYoj/BRE
+mwBnOxyTFJP4KcT5NsnzbLMp0B1HwRm5Z01ItA3TLM/yJMdZgRZsTgUurLv4gPd6Wqv6vsNf
+0/jHOeJm/NHF/rAGTBVf0Wd5ZmuQ4MxKVy/ioOHbtHGUhmmYhPHji5D9af8r2+w3qbcC9p1W
+b8AsSqM2evzci4YHS8y/FeeEyxM11pU0VjBv0i09NBAw1Utb+u6mJ1G0tL4R1WHMula7FYkv
+mqa1pt3xejAozRdte9bY0dHMgl4G6TKR7taGo7BgOspcyA2IfVQV6MMrQff+yOovUEsHCM3d
+MirAAQAAzwMAAFBLAwQUAAgICABjVdJGAAAAAAAAAAAAAAAACgAAAHN0eWxlcy54bWztWluv
+2zYSft9fYajovsmSfDm+NCcFtrvFFki6wCbb14KWKIsNJQok5Ut+/Q5JUaJlyUfJyRqFsXk4
+gTkfh8OPM8MhqTc/nnI6OWAuCCuevWgaehNcxCwhxf7Z+8/Hn/219+Pbv7xhaUpivE1YXOW4
+kL6QZ4rFBDoXYmuEz17Fiy1DgohtgXIstjLeshIXttPWRW/1UKZFKxvbXYPd3hKf5NjOCnvR
+F+3Gj6zBbu+Eo+PYzgoLnLrdUza280lQP2V+zPISSdKx4kRJ8enZy6Qst0FwPB6nx/mU8X0Q
+bTabQEsbg+MGV1acalQSB5hiNZgIomkUWGyOJRprn8K6JhVVvsN8NDVIoqtVFYf9aI847Aeo
+iTPER/uGBl8u7zwZv7zzxO2bI5kNrMk6eA9C/ef9u9YXeD52LIW9oCrmpBw9TYN2+zPGGlNV
+BxOg2txZGC4C89tBH2/Cj5xIzB14fBMeIxo3jLO8jzTARQEgfHxQbmrRXE16UPMy4LhkXDaG
+pOMTFLAza8IrkzkdDi8ltdA9T5JeKJgzDyDUwNH9A8HH77yLzHl7ATadBdBp6KUuGuTmqZsd
+ojBQmCZsYEnapMr3TdpPWVXAJGCrqAnEpxJzokSI6m7bCw2ul1H2FSrrvcLRcBHeQsxlH98f
+/x0oma/SPSS0Wouzy828t3ZLSxlsZymKsZ/gmIq3b0wqapon5rcy7tn7mWP8ARUi8iaQdSwo
+J/TcyrzJhQYl9Pe4gElBIIgjEcILbo/yjkD21JxMPkC3tGewv6KSiR+6QNN60wDOclRcIEoi
+Y8hVB8SJdpovMM7M9mXbADfCNMPNK0z7O/4D/VbdNsvBjDHpLCTOX2OT4xVf5zCjLAiGnLlu
+N+WatTTBKapoXcRZzbVRe47KjMSexda//ZJD1uCSQNGnJiIkZ58wbLuUwT7/3XzxtEQLb6Ky
+yDYllDaS1WyTxuDBKdseQZXPSqljsGC++l13ERlK2NEHawWW/unZC6dRtI5I0Ss/X8sl7N0+
+lDrYFyWKodDyM8bJZ6ayiEFHi1vog5pb3IOF3WG03itsn9aabgrTORKZ+aZ8TREVji+ViCPN
+/AXvWqTwPqokU4OAg5EEMwNFtMyQHUDbseMYQWkIi0ViaSVqf1bG5SyB7pT7cnfhXqRIsNoU
+VZnvzsYaaW2ELQY8iJVC+d+w2Q1c2X01m0pgoKFQi6sHr91G8gpfGDWUGMGttFyQzyCPZqXU
+bRQV+wrtoSnluiGGbUZycJy//aNhCEuoU/xPmBd6dr1j+lAtoKKbWlqMGtdionC6LBuarQlW
++jmzktoWK/jp1+tRVTlL8WkgR+hBG4iZdGfQRpqR7rCN6JdfvXbpLnLCmETRrLV302mB++xc
+ZrjQy+ZTlCRAurZFJwFKctKYP9K3y6qIZWUUqiQCs4R5wyK+7PzWaf2EQBYo1CDhdLGZLdvQ
+vIyPEths4/L/TvwndGLXx7DNUl3H4zhHpPDVIc963+wKVFYi60BeESGmBnfSJcWu75jrhB3j
+KiCUs8E2AZ5DUSmUK792YJ+zY2dwaOmE5ieMS1+yPZaZOq+r0HtpYHdA49AfIJASxBNvMEPY
+xaNICDAPgqgNqWt9/8QocYJ5UB00NFdRfr8phYpXF/ARGn6fhb/vWHLuM+ulXJYjDokGKCv1
+nv70pBNHK9gxKdUhNpyG67mWmVNLDC4P/1eIdrfTZhl0NVDoagDRIzqLl9LOQE7RgWZTyv++
+Ju9mr0UbyKMTUD1qjfryQt32HCqWrbw1eij79Vp/O5HVpowCWXsb8KDFDWLY5jZ5KqvbsH0h
+XPtCAHbDkqKzEyQTV/yaEPzq6BoMrM1qNjawdCGQYbLPpNpjwu/H0/QOCoSvmf+NDEOVyhEx
+feGRs1EeGX1Dl7RXFON4+gnpE903dBUghqMvzMQ2274qE+vXAXN3rU8+4lKinclebYfD2Xmg
+nDNtqgcUtnB8hEP26EU3rNXSns53copx9ZtjcSOvbR7tVb/A+fP0DX2KaH03feobrf6fPY71
+ZFglzdH+ivh/GYnXAVJ8wLSGGzpUA0yrqbSq3FcPEwgSbcOzSnh11y7RrogJostvWDkdk9je
+TaAdiMFx9oW6D+5T24HUunVjCrU0O+LE351N6oUi2HMGb46CdnyVK+Z1EtGrSvQlyLPnt+11
+cqE4lRbeMtw7XZAOMTmS49mDcbwY4HjRz/HiHhzPH4zj5QDHy36Ol/fgePFgHD8NcPzUz/HT
+PThePhjHqwGOV/0cr+7B8dODcbwe4Hjdz/H6HhyvHozjzQDHm36ON/fgeP1QHEe9DEd9/Eb3
+YHfzWOxOB/id9jM8vQvHUfhgJM8GSJ71kzx7JcmXIpf5gkks4CxZpGRf1VfGjcCvD9UpY1L9
+7luEqJ6reWs/IFqpp7a60XYUzuT1Q5vbx5y11Uuc0me/F1LzHW8hLpIhA0m/gVa9YqS1oG+Y
+wZsC85WCfh/YPDkPi3301FpaGtTS1jJSxFx/mKnKOucjD62t/bZDPdqATjj6W4G95NjDcqMz
+LO/FCf99mUdeD6hzhaUlR5Ko7xjXzblBtzYXqtHg5GrtQJ70GSfqa656mRmXHBHpXd/hrdar
+zhGwvcO7ljUl35WEG+takTHm5ncPxh/9HJ2a2akL6PbbnhogcGnVGWrCaRiuHB7su7K/w0CH
+7qBB8826B4RS9Xrbi2lD8tkTjJLmRgslf1RCGr8w3mLaOcR1bdRs+X37YGq+wwn1P8/9VqNv
+5e2EM4zUs6T+EbgsOI3XilofvXbKWpAj0ehoRqsblaabL4uuzY4zO7HRUR/0f7f+9r9QSwcI
+XUUkGToIAAD3LgAAUEsDBBQACAgIAGNV0kYAAAAAAAAAAAAAAAAMAAAAbWFuaWZlc3QucmRm
+zZPNboMwEITvPIVlzthALwUFcijKuWqfwDWGWAUv8poS3r6Ok1ZRpKrqn9TjrkYz3460m+1h
+HMiLsqjBVDRjKSXKSGi16Ss6uy65pds62ti2Kx+aHfFqg6WfKrp3bio5X5aFLTcMbM+zoih4
+mvM8T7wiwdU4cUgMxrSOCAkejUJp9eR8GjnO4glmV1F066CQefcgPYvdOqmgsgphtlK9h7Yg
+kYFAjQlMyoR0gxy6TkvFM5bzUTnBoe3ix2C904OiPGDwK47P2N6IDKblXuC9sO5cg998lWh6
+7mN6ddPF8d8jlGCcMu5P6rs7ef/n/i7P/xnir7R2RGxAzqNn+pDntPIfVUevUEsHCLT3aNIF
+AQAAgwMAAFBLAwQUAAAIAABjVdJGAAAAAAAAAAAAAAAAGgAAAENvbmZpZ3VyYXRpb25zMi9w
+b3B1cG1lbnUvUEsDBBQAAAgAAGNV0kYAAAAAAAAAAAAAAAAaAAAAQ29uZmlndXJhdGlvbnMy
+L3N0YXR1c2Jhci9QSwMEFAAACAAAY1XSRgAAAAAAAAAAAAAAABgAAABDb25maWd1cmF0aW9u
+czIvdG9vbGJhci9QSwMEFAAACAAAY1XSRgAAAAAAAAAAAAAAABgAAABDb25maWd1cmF0aW9u
+czIvbWVudWJhci9QSwMEFAAACAAAY1XSRgAAAAAAAAAAAAAAABgAAABDb25maWd1cmF0aW9u
+czIvZmxvYXRlci9QSwMEFAAICAgAY1XSRgAAAAAAAAAAAAAAACcAAABDb25maWd1cmF0aW9u
+czIvYWNjZWxlcmF0b3IvY3VycmVudC54bWwDAFBLBwgAAAAAAgAAAAAAAABQSwMEFAAACAAA
+Y1XSRgAAAAAAAAAAAAAAABoAAABDb25maWd1cmF0aW9uczIvdG9vbHBhbmVsL1BLAwQUAAAI
+AABjVdJGAAAAAAAAAAAAAAAAHAAAAENvbmZpZ3VyYXRpb25zMi9wcm9ncmVzc2Jhci9QSwME
+FAAACAAAY1XSRgAAAAAAAAAAAAAAAB8AAABDb25maWd1cmF0aW9uczIvaW1hZ2VzL0JpdG1h
+cHMvUEsDBBQACAgIAGNV0kYAAAAAAAAAAAAAAAAVAAAATUVUQS1JTkYvbWFuaWZlc3QueG1s
+tZTBbsMgDIbvfYqI6xTYeppQ0h4q7Qm6B2DESZHARGCq9u1HqrXJNGVqtO5mY/P/nzBQbU/O
+FkcI0Xis2Qt/ZgWg9o3Brmbv+7fylW03q8opNC1EktegyPsw3tKapYDSq2iiROUgStLS94CN
+18kBkvzeLy9Ot2wCsGabVTH6tcZCmfeH89jdJmvLXtGhZmJOZFx20BhV0rmHmqm+t0Yrym3i
+iA2/APMpJyc4ERNLGPaH5D5QGRsFXUPeYzfDYJzqQAz1RS7aIw18+RxnhAdyMZQX6UYgysOO
+Dxd2QOrxtHS28A+sX2s8NO0dVyd3PS322HlsTZfCRSKuhdIaLOTUB6FTCL8P929edz6HmHBA
+4MlwPVUYzCvx4w/YfAJQSwcIHYDzWRwBAAA+BAAAUEsBAhQAFAAACAAAY1XSRl7GMgwnAAAA
+JwAAAAgAAAAAAAAAAAAAAAAAAAAAAG1pbWV0eXBlUEsBAhQAFAAACAAAY1XSRnPWCvM4AwAA
+OAMAABgAAAAAAAAAAAAAAAAATQAAAFRodW1ibmFpbHMvdGh1bWJuYWlsLnBuZ1BLAQIUABQA
+CAgIAGNV0ka7MF5+tAMAAJ8OAAALAAAAAAAAAAAAAAAAALsDAABjb250ZW50LnhtbFBLAQIU
+ABQACAgIAGNV0kYaWG8FBAYAAE8oAAAMAAAAAAAAAAAAAAAAAKgHAABzZXR0aW5ncy54bWxQ
+SwECFAAUAAgICABjVdJGzd0yKsABAADPAwAACAAAAAAAAAAAAAAAAADmDQAAbWV0YS54bWxQ
+SwECFAAUAAgICABjVdJGXUUkGToIAAD3LgAACgAAAAAAAAAAAAAAAADcDwAAc3R5bGVzLnht
+bFBLAQIUABQACAgIAGNV0ka092jSBQEAAIMDAAAMAAAAAAAAAAAAAAAAAE4YAABtYW5pZmVz
+dC5yZGZQSwECFAAUAAAIAABjVdJGAAAAAAAAAAAAAAAAGgAAAAAAAAAAAAAAAACNGQAAQ29u
+ZmlndXJhdGlvbnMyL3BvcHVwbWVudS9QSwECFAAUAAAIAABjVdJGAAAAAAAAAAAAAAAAGgAA
+AAAAAAAAAAAAAADFGQAAQ29uZmlndXJhdGlvbnMyL3N0YXR1c2Jhci9QSwECFAAUAAAIAABj
+VdJGAAAAAAAAAAAAAAAAGAAAAAAAAAAAAAAAAAD9GQAAQ29uZmlndXJhdGlvbnMyL3Rvb2xi
+YXIvUEsBAhQAFAAACAAAY1XSRgAAAAAAAAAAAAAAABgAAAAAAAAAAAAAAAAAMxoAAENvbmZp
+Z3VyYXRpb25zMi9tZW51YmFyL1BLAQIUABQAAAgAAGNV0kYAAAAAAAAAAAAAAAAYAAAAAAAA
+AAAAAAAAAGkaAABDb25maWd1cmF0aW9uczIvZmxvYXRlci9QSwECFAAUAAgICABjVdJGAAAA
+AAIAAAAAAAAAJwAAAAAAAAAAAAAAAACfGgAAQ29uZmlndXJhdGlvbnMyL2FjY2VsZXJhdG9y
+L2N1cnJlbnQueG1sUEsBAhQAFAAACAAAY1XSRgAAAAAAAAAAAAAAABoAAAAAAAAAAAAAAAAA
+9hoAAENvbmZpZ3VyYXRpb25zMi90b29scGFuZWwvUEsBAhQAFAAACAAAY1XSRgAAAAAAAAAA
+AAAAABwAAAAAAAAAAAAAAAAALhsAAENvbmZpZ3VyYXRpb25zMi9wcm9ncmVzc2Jhci9QSwEC
+FAAUAAAIAABjVdJGAAAAAAAAAAAAAAAAHwAAAAAAAAAAAAAAAABoGwAAQ29uZmlndXJhdGlv
+bnMyL2ltYWdlcy9CaXRtYXBzL1BLAQIUABQACAgIAGNV0kYdgPNZHAEAAD4EAAAVAAAAAAAA
+AAAAAAAAAKUbAABNRVRBLUlORi9tYW5pZmVzdC54bWxQSwUGAAAAABEAEQBwBAAABB0AAAAA
+
+--------------030000010109090603040500--

Added: 
james/mailbox/trunk/elasticsearch/src/test/resources/documents/nonTextual.json
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/resources/documents/nonTextual.json?rev=1688146&view=auto
==============================================================================
--- 
james/mailbox/trunk/elasticsearch/src/test/resources/documents/nonTextual.json 
(added)
+++ 
james/mailbox/trunk/elasticsearch/src/test/resources/documents/nonTextual.json 
Mon Jun 29 08:45:43 2015
@@ -0,0 +1,156 @@
+{
+  "id":0,
+  "mailboxId":"18",
+  "modSeq":42,
+  "size":25,
+  "date":"2015-06-07T00:00:00+0200",
+  "mediaType":"plain",
+  "subtype":"text",
+  "userFlags":[],
+  "headers":{
+    "date":[
+      "Thu, 18 Jun 2015 12:43:26 +0200"
+    ],
+    "mime-version":[
+      "1.0"
+    ],
+    "x-sieve":[
+      "CMU Sieve 2.2"
+    ],
+    "return-path":[
+      "<btell...@linagora.com>"
+    ],
+    "subject":[
+      "Test message"
+    ],
+    "message-id":[
+      "<5582a0ce.4020...@linagora.com>"
+    ],
+    "received":[
+      "from alderaan.linagora.com (smtp.linagora.dc1 [172.16.18.53])\t by imap 
(Cyrus v2.2.13-Debian-2.2.13-19+squeeze3) with LMTPA;\t Thu, 18 Jun 2015 
12:43:28 +0200","from [10.75.9.154] (unknown [92.103.166.6])\t(using TLSv1 with 
cipher DHE-RSA-AES128-SHA (128/128 bits))\t(No client certificate 
requested)\tby alderaan.linagora.com (Postfix) with ESMTPSA id 0EB1078A\tfor 
<btell...@linagora.com>; Thu, 18 Jun 2015 12:43:28 +0200 (CEST)"
+    ],
+    "from":[
+      "Benoit Tellier <btell...@linagora.com>"
+    ],
+    "content-type":[
+      "multipart/mixed; boundary=\"------------030000010109090603040500\""
+    ],
+    "to":[
+      "btell...@linagora.com"
+    ],
+    "user-agent":[
+      "Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 
Thunderbird/38.0.1"
+    ]
+  },
+  "from":[
+    {
+      "name":"Benoit Tellier",
+      "address":"btell...@linagora.com"
+    }
+  ],
+  "to":[
+    {
+      "name":"btell...@linagora.com",
+      "address":"btell...@linagora.com"
+    }
+  ],
+  "cc":[],
+  "bcc":[],
+  "subject":["Test message"],
+  "sentDate":"2015-06-18T12:43:26+0200",
+  "properties":[
+    {
+      "namespace":"http://james.apache.org/rfc2045/Content-Type";,
+      "localName":"type",
+      "value":"plain"
+    },
+    {
+      "namespace":"http://james.apache.org/rfc2045/Content-Type";,
+      "localName":"subtype",
+      "value":"text"
+    },
+    {
+      "namespace":"http://james.apache.org/rfc2045";,
+      "localName":"Content-Description",
+      "value":"An e-mail"
+    }
+  ],
+  "attachments":[
+    {
+      "mediaType":"text",
+      "subtype":"plain",
+      "fileName":null,
+      "fileExtension":null,
+      "contentDisposition":null,
+      "fileMetadata":{
+        
"X-Parsed-By":["org.apache.tika.parser.DefaultParser","org.apache.tika.parser.txt.TXTParser"],
+        "Content-Encoding":["ISO-8859-1"],
+        "Content-Type":["text/plain; charset=ISO-8859-1"]
+      },
+      "headers":{
+        "content-transfer-encoding":["7bit"],
+        "content-type":["text/plain; charset=utf-8"]
+      },
+      "textContent":"This mail have a non textual attachment !\n\n"
+    },
+    {
+      "mediaType":"application",
+      "subtype":"vnd.oasis.opendocument.text",
+      "fileName":"toto.odt",
+      "fileExtension":"odt",
+      "contentDisposition":"attachment",
+      "fileMetadata":{
+        "date":["2015-06-18T12:43:07.489893918"],
+        "meta:paragraph-count":["1"],
+        "meta:word-count":["7"],
+        "Table-Count":["0"],
+        "generator":["LibreOffice/4.4.3.2$Linux_X86_64 
LibreOffice_project/40m0$Build-2"],
+        "Word-Count":["7"],
+        "dcterms:created":["2015-06-18T12:41:25.197399866"],
+        "dcterms:modified":["2015-06-18T12:43:07.489893918"],
+        "Last-Modified":["2015-06-18T12:43:07.489893918"],
+        "nbPara":["1"],
+        "Last-Save-Date":["2015-06-18T12:43:07.489893918"],
+        "meta:object-count":["0"],
+        "meta:character-count":["47"],
+        "Paragraph-Count":["1"],
+        "nbImg":["0"],
+        "meta:save-date":["2015-06-18T12:43:07.489893918"],
+        "modified":["2015-06-18T12:43:07.489893918"],
+        "Edit-Time":["PT1M42S"],
+        "meta:image-count":["0"],
+        "Image-Count":["0"],
+        "nbCharacter":["47"],
+        "nbObject":["0"],
+        "nbPage":["1"],
+        "Object-Count":["0"],
+        "nbWord":["7"],
+        "Content-Type":["application/vnd.oasis.opendocument.text"],
+        
"X-Parsed-By":["org.apache.tika.parser.DefaultParser","org.apache.tika.parser.odf.OpenDocumentParser"],
+        "meta:creation-date":["2015-06-18T12:41:25.197399866"],
+        "meta:table-count":["0"],
+        "Creation-Date":["2015-06-18T12:41:25.197399866"],
+        "xmpTPg:NPages":["1"],
+        "resourceName":["toto.odt"],
+        "Character Count":["47"],
+        "editing-cycles":["2"],
+        "Page-Count":["1"],
+        "nbTab":["0"],
+        "meta:page-count":["1"]
+      },
+      "headers":{
+        "content-transfer-encoding":["base64"],
+        "content-disposition":["attachment; filename=\"toto.odt\""],
+        "content-type":["application/vnd.oasis.opendocument.text; 
name=\"toto.odt\""]
+      },
+      "textContent":"Awesome document provided for text extraction !\n"}
+  ],
+  "textBody":"This mail have a non textual attachment !\n\n",
+  "isAnswered":false,
+  "isDeleted":false,
+  "isDraft":false,
+  "isFlagged":false,
+  "isRecent":false,
+  "hasAttachment":true,
+  "isUnread":true
+}

Modified: 
james/mailbox/trunk/elasticsearch/src/test/resources/documents/recursiveMail.json
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/resources/documents/recursiveMail.json?rev=1688146&r1=1688145&r2=1688146&view=diff
==============================================================================
--- 
james/mailbox/trunk/elasticsearch/src/test/resources/documents/recursiveMail.json
 (original)
+++ 
james/mailbox/trunk/elasticsearch/src/test/resources/documents/recursiveMail.json
 Mon Jun 29 08:45:43 2015
@@ -95,6 +95,7 @@
       "textContent": "Forward as attachment !\n\n\n",
       "mediaType": "text",
       "subtype": "plain",
+      "fileMetadata":{},
       "fileName": null,
       "fileExtension": null,
       "contentDisposition": null,
@@ -111,6 +112,7 @@
       "textContent": null,
       "mediaType": null,
       "subtype": null,
+      "fileMetadata":{},
       "fileName": null,
       "fileExtension": null,
       "contentDisposition": null,
@@ -131,6 +133,7 @@
       "mediaType": null,
       "subtype": null,
       "fileName": null,
+      "fileMetadata":{},
       "fileExtension": null,
       "contentDisposition": null,
       "headers": {
@@ -176,6 +179,7 @@
       "subtype": "plain",
       "fileName": null,
       "fileExtension": null,
+      "fileMetadata":{},
       "contentDisposition": null,
       "headers": {
         "content-transfer-encoding": [
@@ -188,6 +192,7 @@
     },
     {
       "textContent": null,
+      "fileMetadata":{},
       "mediaType": "application",
       "subtype": "vnd.oasis.opendocument.spreadsheet",
       "fileName": "Sprint-2015-05-18.ods",

Modified: 
james/mailbox/trunk/elasticsearch/src/test/resources/documents/spamMail.json
URL: 
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/resources/documents/spamMail.json?rev=1688146&r1=1688145&r2=1688146&view=diff
==============================================================================
--- 
james/mailbox/trunk/elasticsearch/src/test/resources/documents/spamMail.json 
(original)
+++ 
james/mailbox/trunk/elasticsearch/src/test/resources/documents/spamMail.json 
Mon Jun 29 08:45:43 2015
@@ -127,6 +127,7 @@
          "mediaType": "text",
          "subtype": "plain",
          "fileName": null,
+         "fileMetadata":{},
          "fileExtension": null,
          "contentDisposition": "inline",
          "headers": {
@@ -147,6 +148,7 @@
          "subtype": "rfc822-headers",
          "fileName": "header",
          "fileExtension": "",
+         "fileMetadata":{},
          "contentDisposition": "inline",
          "headers": {
                "content-disposition": [
@@ -168,6 +170,7 @@
          "mediaType": "text",
          "subtype": "plain",
          "fileName": null,
+         "fileMetadata":{},
          "fileExtension": null,
          "contentDisposition": "inline",
          "headers": {



---------------------------------------------------------------------
To unsubscribe, e-mail: server-dev-unsubscr...@james.apache.org
For additional commands, e-mail: server-dev-h...@james.apache.org

Reply via email to