Author: btellier
Date: Mon Jun 29 08:45:43 2015
New Revision: 1688146
URL: http://svn.apache.org/r1688146
Log:
MAILBOX-245 Use text extractor on JSON generation
Added:
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonUsingTika.java
james/mailbox/trunk/elasticsearch/src/test/resources/documents/nonTextual.eml
james/mailbox/trunk/elasticsearch/src/test/resources/documents/nonTextual.json
Modified:
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessage.java
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/JsonMessageConstants.java
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJson.java
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartContainerBuilder.java
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartParser.java
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/RootMimePartContainerBuilder.java
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/ElasticSearchIntegrationTest.java
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java
james/mailbox/trunk/elasticsearch/src/test/resources/documents/htmlMail.json
james/mailbox/trunk/elasticsearch/src/test/resources/documents/recursiveMail.json
james/mailbox/trunk/elasticsearch/src/test/resources/documents/spamMail.json
Modified:
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessage.java
URL:
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessage.java?rev=1688146&r1=1688145&r2=1688146&view=diff
==============================================================================
---
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessage.java
(original)
+++
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessage.java
Mon Jun 29 08:45:43 2015
@@ -23,6 +23,7 @@ import com.fasterxml.jackson.annotation.
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.google.common.collect.Multimap;
+import org.apache.james.mailbox.elasticsearch.json.extractor.TextExtractor;
import org.apache.james.mailbox.elasticsearch.query.DateResolutionFormater;
import org.apache.james.mailbox.store.mail.model.MailboxId;
import org.apache.james.mailbox.store.mail.model.Message;
@@ -41,11 +42,11 @@ import java.util.stream.Collectors;
public class IndexableMessage {
- public static IndexableMessage from(Message<? extends MailboxId> message) {
+ public static IndexableMessage from(Message<? extends MailboxId> message,
TextExtractor textExtractor) {
Preconditions.checkNotNull(message.getMailboxId());
IndexableMessage indexableMessage = new IndexableMessage();
try {
- MimePart parsingResult = new MimePartParser(message).parse();
+ MimePart parsingResult = new MimePartParser(message,
textExtractor).parse();
indexableMessage.bodyText = parsingResult.locateFirstTextualBody();
indexableMessage.setFlattenedAttachments(parsingResult);
indexableMessage.copyHeaderFields(parsingResult.getHeaderCollection(),
getSanitizedInternalDate(message));
Modified:
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/JsonMessageConstants.java
URL:
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/JsonMessageConstants.java?rev=1688146&r1=1688145&r2=1688146&view=diff
==============================================================================
---
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/JsonMessageConstants.java
(original)
+++
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/JsonMessageConstants.java
Mon Jun 29 08:45:43 2015
@@ -66,6 +66,7 @@ public interface JsonMessageConstants {
String CONTENT_DISPOSITION = "contentDisposition";
String FILENAME = "fileName";
String FILE_EXTENSION = "fileExtension";
+ String FILE_METADATA = "fileMetadata";
}
interface Property {
Modified:
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJson.java
URL:
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJson.java?rev=1688146&r1=1688145&r2=1688146&view=diff
==============================================================================
---
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJson.java
(original)
+++
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJson.java
Mon Jun 29 08:45:43 2015
@@ -26,21 +26,24 @@ import com.fasterxml.jackson.databind.Ob
import com.fasterxml.jackson.datatype.guava.GuavaModule;
import com.fasterxml.jackson.datatype.jdk8.Jdk8Module;
import com.google.common.base.Preconditions;
+import org.apache.james.mailbox.elasticsearch.json.extractor.TextExtractor;
import org.apache.james.mailbox.store.mail.model.Message;
public class MessageToElasticSearchJson {
- private ObjectMapper mapper;
+ private final ObjectMapper mapper;
+ private final TextExtractor textExtractor;
- public MessageToElasticSearchJson() {
- mapper = new ObjectMapper();
- mapper.registerModule(new GuavaModule());
- mapper.registerModule(new Jdk8Module());
+ public MessageToElasticSearchJson(TextExtractor textExtractor) {
+ this.textExtractor = textExtractor;
+ this.mapper = new ObjectMapper();
+ this.mapper.registerModule(new GuavaModule());
+ this.mapper.registerModule(new Jdk8Module());
}
public String convertToJson(Message<?> message) throws
JsonProcessingException {
Preconditions.checkNotNull(message);
- return mapper.writeValueAsString(IndexableMessage.from(message));
+ return mapper.writeValueAsString(IndexableMessage.from(message,
textExtractor));
}
public String getUpdatedJsonMessagePart(Flags flags, long modSeq) throws
JsonProcessingException {
Modified:
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java
URL:
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java?rev=1688146&r1=1688145&r2=1688146&view=diff
==============================================================================
---
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java
(original)
+++
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java
Mon Jun 29 08:45:43 2015
@@ -21,15 +21,18 @@ package org.apache.james.mailbox.elastic
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableMultimap;
import com.google.common.collect.Lists;
import com.google.common.collect.Multimap;
import org.apache.commons.io.FilenameUtils;
-import org.apache.commons.io.IOUtils;
+import
org.apache.james.mailbox.elasticsearch.json.extractor.DefaultTextExtractor;
+import org.apache.james.mailbox.elasticsearch.json.extractor.ParsedContent;
+import org.apache.james.mailbox.elasticsearch.json.extractor.TextExtractor;
import org.apache.james.mime4j.stream.Field;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import java.util.Optional;
@@ -47,7 +50,7 @@ public class MimePart {
private Optional<String> fileName;
private Optional<String> fileExtension;
private Optional<String> contentDisposition;
-
+ private TextExtractor textExtractor;
private Builder() {
children = Lists.newArrayList();
@@ -58,6 +61,7 @@ public class MimePart {
this.fileName = Optional.empty();
this.fileExtension = Optional.empty();
this.contentDisposition = Optional.empty();
+ this.textExtractor = new DefaultTextExtractor();
}
@Override
@@ -104,35 +108,50 @@ public class MimePart {
}
@Override
+ public MimePartContainerBuilder using(TextExtractor textExtractor) {
+ Preconditions.checkArgument(textExtractor != null, "Provided text
extractor should not be null");
+ this.textExtractor = textExtractor;
+ return this;
+ }
+
+ @Override
public MimePart build() {
+ Optional<ParsedContent> parsedContent =
parseContent(textExtractor);
return new MimePart(
- headerCollectionBuilder.build(),
- decodeContent(),
- mediaType,
- subType,
- fileName,
- fileExtension,
- contentDisposition,
- children
+ headerCollectionBuilder.build(),
+ parsedContent.map(ParsedContent::getTextualContent)
+ .orElse(Optional.empty())
+ ,
+ mediaType,
+ subType,
+ fileName,
+ fileExtension,
+ contentDisposition,
+ children,
+ parsedContent.map(ParsedContent::getMetadata)
+ .orElse(ImmutableMultimap.<String,
String>builder().build())
);
}
- private boolean isTextualMimePart() {
- return mediaType.isPresent()
- && mediaType.get().equalsIgnoreCase("text");
- }
-
- private Optional<String> decodeContent() {
- if (bodyContent.isPresent() && isTextualMimePart()) {
+ private Optional<ParsedContent> parseContent(TextExtractor
textExtractor) {
+ if (bodyContent.isPresent()) {
try {
- return Optional.of(IOUtils.toString(bodyContent.get()));
- } catch (IOException e) {
- LOGGER.warn("Can not decode body content", e);
+ return
Optional.of(textExtractor.extractContent(bodyContent.get(),
computeContentType(), fileName));
+ } catch (Exception e) {
+ LOGGER.warn("Failed parsing attachment", e);
}
}
return Optional.empty();
}
+ private Optional<String> computeContentType() {
+ if (mediaType.isPresent() && subType.isPresent()) {
+ return Optional.of(mediaType.get() + "/" + subType.get());
+ } else {
+ return Optional.empty();
+ }
+ }
+
}
public static Builder builder() {
@@ -149,10 +168,11 @@ public class MimePart {
private final Optional<String> fileExtension;
private final Optional<String> contentDisposition;
private final List<MimePart> attachments;
+ private final ImmutableMultimap<String, String> metadata;
private MimePart(HeaderCollection headerCollection, Optional<String>
bodyTextContent, Optional<String> mediaType,
Optional<String> subType, Optional<String> fileName,
Optional<String> fileExtension,
- Optional<String> contentDisposition, List<MimePart>
attachments) {
+ Optional<String> contentDisposition, List<MimePart>
attachments, Multimap<String, String> metadata) {
this.headerCollection = headerCollection;
this.mediaType = mediaType;
this.subType = subType;
@@ -161,6 +181,7 @@ public class MimePart {
this.contentDisposition = contentDisposition;
this.attachments = attachments;
this.bodyTextContent = bodyTextContent;
+ this.metadata = ImmutableMultimap.copyOf(metadata);
}
@JsonIgnore
@@ -208,6 +229,11 @@ public class MimePart {
return bodyTextContent;
}
+ @JsonProperty(JsonMessageConstants.Attachment.FILE_METADATA)
+ public ImmutableMultimap<String, String> getMetadata() {
+ return metadata;
+ }
+
@JsonIgnore
public Optional<String> locateFirstTextualBody() {
return Stream.concat(
Modified:
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartContainerBuilder.java
URL:
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartContainerBuilder.java?rev=1688146&r1=1688145&r2=1688146&view=diff
==============================================================================
---
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartContainerBuilder.java
(original)
+++
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartContainerBuilder.java
Mon Jun 29 08:45:43 2015
@@ -19,6 +19,7 @@
package org.apache.james.mailbox.elasticsearch.json;
+import org.apache.james.mailbox.elasticsearch.json.extractor.TextExtractor;
import org.apache.james.mime4j.stream.Field;
import java.io.InputStream;
@@ -27,6 +28,8 @@ public interface MimePartContainerBuilde
MimePart build();
+ MimePartContainerBuilder using(TextExtractor textExtractor);
+
MimePartContainerBuilder addToHeaders(Field field);
MimePartContainerBuilder addBodyContent(InputStream bodyContent);
Modified:
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartParser.java
URL:
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartParser.java?rev=1688146&r1=1688145&r2=1688146&view=diff
==============================================================================
---
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartParser.java
(original)
+++
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartParser.java
Mon Jun 29 08:45:43 2015
@@ -20,6 +20,7 @@
package org.apache.james.mailbox.elasticsearch.json;
import com.google.common.base.Preconditions;
+import org.apache.james.mailbox.elasticsearch.json.extractor.TextExtractor;
import org.apache.james.mailbox.store.mail.model.MailboxId;
import org.apache.james.mailbox.store.mail.model.Message;
import org.apache.james.mime4j.MimeException;
@@ -36,13 +37,15 @@ import java.util.LinkedList;
public class MimePartParser {
private final Message<? extends MailboxId> message;
+ private final TextExtractor textExtractor;
private final MimeTokenStream stream;
private final Deque<MimePartContainerBuilder> builderStack;
private MimePart result;
private MimePartContainerBuilder currentlyBuildMimePart;
- public MimePartParser(Message<? extends MailboxId> message) {
+ public MimePartParser(Message<? extends MailboxId> message, TextExtractor
textExtractor) {
this.message = message;
+ this.textExtractor = textExtractor;
this.builderStack = new LinkedList<>();
this.currentlyBuildMimePart = new RootMimePartContainerBuilder();
this.stream = new MimeTokenStream(
@@ -94,7 +97,7 @@ public class MimePartParser {
}
private void closeMimePart() {
- MimePart bodyMimePart = currentlyBuildMimePart.build();
+ MimePart bodyMimePart =
currentlyBuildMimePart.using(textExtractor).build();
if (!builderStack.isEmpty()) {
builderStack.peek().addChild(bodyMimePart);
} else {
Modified:
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/RootMimePartContainerBuilder.java
URL:
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/RootMimePartContainerBuilder.java?rev=1688146&r1=1688145&r2=1688146&view=diff
==============================================================================
---
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/RootMimePartContainerBuilder.java
(original)
+++
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/RootMimePartContainerBuilder.java
Mon Jun 29 08:45:43 2015
@@ -19,6 +19,7 @@
package org.apache.james.mailbox.elasticsearch.json;
+import org.apache.james.mailbox.elasticsearch.json.extractor.TextExtractor;
import org.apache.james.mime4j.stream.Field;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -36,6 +37,10 @@ public class RootMimePartContainerBuilde
return rootMimePart;
}
+ @Override public MimePartContainerBuilder using(TextExtractor
textExtractor) {
+ return this;
+ }
+
@Override
public MimePartContainerBuilder addToHeaders(Field field) {
LOGGER.warn("Trying to add headers to the Root MimePart container");
Modified:
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/ElasticSearchIntegrationTest.java
URL:
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/ElasticSearchIntegrationTest.java?rev=1688146&r1=1688145&r2=1688146&view=diff
==============================================================================
---
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/ElasticSearchIntegrationTest.java
(original)
+++
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/ElasticSearchIntegrationTest.java
Mon Jun 29 08:45:43 2015
@@ -30,6 +30,7 @@ import org.apache.james.mailbox.acl.Simp
import org.apache.james.mailbox.acl.UnionMailboxACLResolver;
import
org.apache.james.mailbox.elasticsearch.events.ElasticSearchListeningMessageSearchIndex;
import org.apache.james.mailbox.elasticsearch.json.MessageToElasticSearchJson;
+import
org.apache.james.mailbox.elasticsearch.json.extractor.DefaultTextExtractor;
import org.apache.james.mailbox.elasticsearch.query.CriterionConverter;
import org.apache.james.mailbox.elasticsearch.query.QueryConverter;
import org.apache.james.mailbox.elasticsearch.search.ElasticSearchSearcher;
@@ -158,10 +159,10 @@ public class ElasticSearchIntegrationTes
IndexCreationFactory.createIndex(new
TestingClientProvider(embeddedElasticSearch.getNode()))
);
MailboxSessionMapperFactory<InMemoryId> mapperFactory = new
InMemoryMailboxSessionMapperFactory();
- elasticSearchListeningMessageSearchIndex = new
ElasticSearchListeningMessageSearchIndex<InMemoryId>(mapperFactory,
+ elasticSearchListeningMessageSearchIndex = new
ElasticSearchListeningMessageSearchIndex<>(mapperFactory,
new ElasticSearchIndexer(clientProvider),
- new ElasticSearchSearcher<InMemoryId>(clientProvider, new
QueryConverter(new CriterionConverter())),
- new MessageToElasticSearchJson());
+ new ElasticSearchSearcher<>(clientProvider, new QueryConverter(new
CriterionConverter())),
+ new MessageToElasticSearchJson(new DefaultTextExtractor()));
storeMailboxManager = new StoreMailboxManager<>(
mapperFactory,
new MockAuthenticator(),
Modified:
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java
URL:
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java?rev=1688146&r1=1688145&r2=1688146&view=diff
==============================================================================
---
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java
(original)
+++
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java
Mon Jun 29 08:45:43 2015
@@ -22,6 +22,8 @@ package org.apache.james.mailbox.elastic
import com.google.common.base.Throwables;
import org.apache.commons.io.IOUtils;
import org.apache.james.mailbox.FlagsBuilder;
+import
org.apache.james.mailbox.elasticsearch.json.extractor.DefaultTextExtractor;
+import org.apache.james.mailbox.elasticsearch.json.extractor.TikaTextExtractor;
import org.apache.james.mailbox.store.TestId;
import org.apache.james.mailbox.store.mail.model.Message;
import org.apache.james.mailbox.store.mail.model.impl.PropertyBuilder;
@@ -53,11 +55,9 @@ public class MessageToElasticSearchJsonT
private Date date;
private PropertyBuilder propertyBuilder;
- private MessageToElasticSearchJson messageToElasticSearchJson;
@Before
public void setUp() throws Exception {
- messageToElasticSearchJson = new MessageToElasticSearchJson();
date = formatter.parse("07-06-2015");
propertyBuilder = new PropertyBuilder();
propertyBuilder.setMediaType("plain");
@@ -68,6 +68,7 @@ public class MessageToElasticSearchJsonT
@Test
public void spamEmailShouldBeWellConvertedToJson() throws IOException {
+ MessageToElasticSearchJson messageToElasticSearchJson = new
MessageToElasticSearchJson(new DefaultTextExtractor());
Message<TestId> spamMail = new SimpleMessage<>(date,
SIZE,
BODY_START_OCTET,
@@ -83,6 +84,7 @@ public class MessageToElasticSearchJsonT
@Test
public void htmlEmailShouldBeWellConvertedToJson() throws IOException {
+ MessageToElasticSearchJson messageToElasticSearchJson = new
MessageToElasticSearchJson(new DefaultTextExtractor());
Message<TestId> htmlMail = new SimpleMessage<>(date,
SIZE,
BODY_START_OCTET,
@@ -99,6 +101,7 @@ public class MessageToElasticSearchJsonT
@Test
public void pgpSignedEmailShouldBeWellConvertedToJson() throws IOException
{
+ MessageToElasticSearchJson messageToElasticSearchJson = new
MessageToElasticSearchJson(new DefaultTextExtractor());
Message<TestId> pgpSignedMail = new SimpleMessage<>(date,
SIZE,
BODY_START_OCTET,
@@ -115,6 +118,7 @@ public class MessageToElasticSearchJsonT
@Test
public void simpleEmailShouldBeWellConvertedToJson() throws IOException {
+ MessageToElasticSearchJson messageToElasticSearchJson = new
MessageToElasticSearchJson(new DefaultTextExtractor());
Message<TestId> mail = new SimpleMessage<>(date,
SIZE,
BODY_START_OCTET,
@@ -131,6 +135,7 @@ public class MessageToElasticSearchJsonT
@Test
public void recursiveEmailShouldBeWellConvertedToJson() throws IOException
{
+ MessageToElasticSearchJson messageToElasticSearchJson = new
MessageToElasticSearchJson(new DefaultTextExtractor());
Message<TestId> recursiveMail = new SimpleMessage<>(date,
SIZE,
BODY_START_OCTET,
@@ -147,6 +152,7 @@ public class MessageToElasticSearchJsonT
@Test
public void emailWithNoInternalDateShouldUseNowDate() throws IOException {
+ MessageToElasticSearchJson messageToElasticSearchJson = new
MessageToElasticSearchJson(new DefaultTextExtractor());
Message<TestId> mailWithNoInternalDate = new SimpleMessage<>(null,
SIZE,
BODY_START_OCTET,
@@ -164,6 +170,7 @@ public class MessageToElasticSearchJsonT
@Test(expected = NullPointerException.class)
public void emailWithNoMailboxIdShouldThrow() throws IOException {
+ MessageToElasticSearchJson messageToElasticSearchJson = new
MessageToElasticSearchJson(new DefaultTextExtractor());
Message<TestId> mailWithNoMailboxId;
try {
mailWithNoMailboxId = new SimpleMessage<>(date,
@@ -183,19 +190,38 @@ public class MessageToElasticSearchJsonT
@Test
public void getUpdatedJsonMessagePartShouldBehaveWellOnEmptyFlags() throws
Exception {
+ MessageToElasticSearchJson messageToElasticSearchJson = new
MessageToElasticSearchJson(new DefaultTextExtractor());
assertThatJson(messageToElasticSearchJson.getUpdatedJsonMessagePart(new
Flags(), MOD_SEQ))
.isEqualTo("{\"modSeq\":42,\"isAnswered\":false,\"isDeleted\":false,\"isDraft\":false,\"isFlagged\":false,\"isRecent\":false,\"userFlags\":[],\"isUnread\":true}");
}
@Test
public void getUpdatedJsonMessagePartShouldBehaveWellOnNonEmptyFlags()
throws Exception {
+ MessageToElasticSearchJson messageToElasticSearchJson = new
MessageToElasticSearchJson(new DefaultTextExtractor());
assertThatJson(messageToElasticSearchJson.getUpdatedJsonMessagePart(new
FlagsBuilder().add(Flags.Flag.DELETED, Flags.Flag.FLAGGED).add("user").build(),
MOD_SEQ))
.isEqualTo("{\"modSeq\":42,\"isAnswered\":false,\"isDeleted\":true,\"isDraft\":false,\"isFlagged\":true,\"isRecent\":false,\"userFlags\":[\"user\"],\"isUnread\":true}");
}
@Test(expected = NullPointerException.class)
public void getUpdatedJsonMessagePartShouldThrowIfFlagsIsNull() throws
Exception {
+ MessageToElasticSearchJson messageToElasticSearchJson = new
MessageToElasticSearchJson(new DefaultTextExtractor());
messageToElasticSearchJson.getUpdatedJsonMessagePart(null, MOD_SEQ);
}
+ @Test
+ public void spamEmailShouldBeWellConvertedToJsonWithApacheTika() throws
IOException {
+ MessageToElasticSearchJson messageToElasticSearchJson = new
MessageToElasticSearchJson(new TikaTextExtractor());
+ Message<TestId> spamMail = new SimpleMessage<>(date,
+ SIZE,
+ BODY_START_OCTET,
+ new
SharedByteArrayInputStream(IOUtils.toByteArray(ClassLoader.getSystemResourceAsStream("documents/nonTextual.eml"))),
+ new Flags(),
+ propertyBuilder,
+ MAILBOX_ID);
+ spamMail.setModSeq(MOD_SEQ);
+ assertThatJson(messageToElasticSearchJson.convertToJson(spamMail))
+ .when(IGNORING_ARRAY_ORDER)
+
.isEqualTo(IOUtils.toString(ClassLoader.getSystemResource("documents/nonTextual.json")));
+ }
+
}
Added:
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonUsingTika.java
URL:
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonUsingTika.java?rev=1688146&view=auto
==============================================================================
---
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonUsingTika.java
(added)
+++
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonUsingTika.java
Mon Jun 29 08:45:43 2015
@@ -0,0 +1,79 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ ****************************************************************/
+
+package org.apache.james.mailbox.elasticsearch.json;
+
+import javax.mail.Flags;
+import javax.mail.util.SharedByteArrayInputStream;
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.james.mailbox.elasticsearch.json.extractor.TikaTextExtractor;
+import org.apache.james.mailbox.store.TestId;
+import org.apache.james.mailbox.store.mail.model.Message;
+import org.apache.james.mailbox.store.mail.model.impl.PropertyBuilder;
+import org.apache.james.mailbox.store.mail.model.impl.SimpleMessage;
+import org.junit.Before;
+import org.junit.Test;
+import static net.javacrumbs.jsonunit.core.Option.IGNORING_ARRAY_ORDER;
+import static net.javacrumbs.jsonunit.fluent.JsonFluentAssert.assertThatJson;
+
+public class MessageToElasticSearchJsonUsingTika {
+
+ public static final int SIZE = 25;
+ public static final int BODY_START_OCTET = 100;
+ public static final TestId MAILBOX_ID = TestId.of(18L);
+ public static final long MOD_SEQ = 42L;
+ public static final long UID = 25L;
+
+ private SimpleDateFormat formatter = new SimpleDateFormat("dd-MM-yyyy");
+
+ private Date date;
+ private PropertyBuilder propertyBuilder;
+ private MessageToElasticSearchJson messageToElasticSearchJson;
+
+ @Before
+ public void setUp() throws Exception {
+ messageToElasticSearchJson = new MessageToElasticSearchJson(new
TikaTextExtractor());
+ date = formatter.parse("07-06-2015");
+ propertyBuilder = new PropertyBuilder();
+ propertyBuilder.setMediaType("plain");
+ propertyBuilder.setSubType("text");
+ propertyBuilder.setTextualLineCount(18L);
+ propertyBuilder.setContentDescription("An e-mail");
+ }
+
+ @Test
+ public void spamEmailShouldBeWellConvertedToJson() throws IOException {
+ Message<TestId> spamMail = new SimpleMessage<>(date,
+ SIZE,
+ BODY_START_OCTET,
+ new
SharedByteArrayInputStream(IOUtils.toByteArray(ClassLoader.getSystemResourceAsStream("documents/nonTextual.eml"))),
+ new Flags(),
+ propertyBuilder,
+ MAILBOX_ID);
+ spamMail.setModSeq(MOD_SEQ);
+ assertThatJson(messageToElasticSearchJson.convertToJson(spamMail))
+ .when(IGNORING_ARRAY_ORDER)
+
.isEqualTo(IOUtils.toString(ClassLoader.getSystemResource("documents/nonTextual.json")));
+ }
+
+}
Modified:
james/mailbox/trunk/elasticsearch/src/test/resources/documents/htmlMail.json
URL:
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/resources/documents/htmlMail.json?rev=1688146&r1=1688145&r2=1688146&view=diff
==============================================================================
---
james/mailbox/trunk/elasticsearch/src/test/resources/documents/htmlMail.json
(original)
+++
james/mailbox/trunk/elasticsearch/src/test/resources/documents/htmlMail.json
Mon Jun 29 08:45:43 2015
@@ -121,6 +121,7 @@
"subtype":"plain",
"fileName":null,
"fileExtension":null,
+ "fileMetadata":{},
"contentDisposition":null,
"headers":{
"content-transfer-encoding":[
@@ -136,6 +137,7 @@
"mediaType":"text",
"subtype":"html",
"fileName":null,
+ "fileMetadata":{},
"fileExtension":null,
"contentDisposition":null,
"headers":{
Added:
james/mailbox/trunk/elasticsearch/src/test/resources/documents/nonTextual.eml
URL:
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/resources/documents/nonTextual.eml?rev=1688146&view=auto
==============================================================================
---
james/mailbox/trunk/elasticsearch/src/test/resources/documents/nonTextual.eml
(added)
+++
james/mailbox/trunk/elasticsearch/src/test/resources/documents/nonTextual.eml
Mon Jun 29 08:45:43 2015
@@ -0,0 +1,196 @@
+Return-Path: <[email protected]>
+Received: from alderaan.linagora.com (smtp.linagora.dc1 [172.16.18.53])
+ by imap (Cyrus v2.2.13-Debian-2.2.13-19+squeeze3) with LMTPA;
+ Thu, 18 Jun 2015 12:43:28 +0200
+X-Sieve: CMU Sieve 2.2
+Received: from [10.75.9.154] (unknown [92.103.166.6])
+ (using TLSv1 with cipher DHE-RSA-AES128-SHA (128/128 bits))
+ (No client certificate requested)
+ by alderaan.linagora.com (Postfix) with ESMTPSA id 0EB1078A
+ for <[email protected]>; Thu, 18 Jun 2015 12:43:28 +0200 (CEST)
+To: [email protected]
+From: Benoit Tellier <[email protected]>
+Subject: Test message
+Message-ID: <[email protected]>
+Date: Thu, 18 Jun 2015 12:43:26 +0200
+User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101
+ Thunderbird/38.0.1
+MIME-Version: 1.0
+Content-Type: multipart/mixed;
+ boundary="------------030000010109090603040500"
+
+This is a multi-part message in MIME format.
+--------------030000010109090603040500
+Content-Type: text/plain; charset=utf-8
+Content-Transfer-Encoding: 7bit
+
+This mail have a non textual attachment !
+
+--------------030000010109090603040500
+Content-Type: application/vnd.oasis.opendocument.text;
+ name="toto.odt"
+Content-Transfer-Encoding: base64
+Content-Disposition: attachment;
+ filename="toto.odt"
+
+UEsDBBQAAAgAAGNV0kZexjIMJwAAACcAAAAIAAAAbWltZXR5cGVhcHBsaWNhdGlvbi92bmQu
+b2FzaXMub3BlbmRvY3VtZW50LnRleHRQSwMEFAAACAAAY1XSRnPWCvM4AwAAOAMAABgAAABU
+aHVtYm5haWxzL3RodW1ibmFpbC5wbmeJUE5HDQoaCgAAAA1JSERSAAAAxgAAAQAIAwAAAN+D
++XIAAAEyUExURXJoXXN0dXZ4e3h3eHl7fnx8gXqCh32IgXuJnoB7fI+Fa4+IbIODe4iEe5+Q
+eISDhICFiISJhIaMiY2Gg4uJgomMjIGJkIuOkY6Uj4SRm4+WkomZnJGKgJGUjJuTh5uWj5GV
+kZGVnJaYkpWbmpmVlZmblJ2en4OYqZSYoJSfqpyeoI6gs5SipJygopKhsZapvKeajKGemKqd
+k6KjnaqilKmrn7SllL2tnaampaarraquo6msrqmusKq2urSwqLu5tLy5uaO2yKu5w77EtrXI
+1sC5p8+8rcHGt83BtdfGttnKus3NysLO3svP0tbRzdrQy9rYztrb2tvg5Nru9+bf3Ozg0PLj
+yfLl0PXo0/vt0uTn7ujn5+jp7Ovw6OL2/en4/fLr5/336vP7/v7+/gAAAP///4uCFGkAAAHB
+SURBVHja7dPbThMBEAbgYgsaQmkEVhfbxR5YDKUIslBUWuTogZI0HGxq0gBt3/8dIE00ek+C
+gW8uZv6Zu+9iUsNHUSkMDAwMDAwMDAwMDAyMJ8o4Of137+z/vfV+h7Nh+a6X+8t/rqN2MxjF
+y9H4fv1wjN1c5eOXdOpdshTNFQuvTrbGk2pzIq6/DJbGnk+udOY/ZYNqsBcnM4WpSqVbquxk
+e7nCRPZ4pZ8Oxhe+vZh+3RzbqC1uzlxvXz0Y4+fpbC7qlVqZ8moUhfH5j8/PkupBMahHYb4V
+Ti7/mm+EH1a/HkXFIG7n33RrpUY4DOOzjeR9v9SefXtYy2YOovX84mZ00R78t7/RWfPiGBgY
+GBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgY
+GBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgY
+GBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgY
+GBgYGBgYGBgYGBgYGBj3VrfW1XHYvlYYzQAAAABJRU5ErkJgglBLAwQUAAgICABjVdJGAAAA
+AAAAAAAAAAAACwAAAGNvbnRlbnQueG1spVfNjts2EL73KVQFyE1La50AWWXtIEAQoMAuUHS3
+Ra5cirLZSqRKUpb9Nn2WPlmGpEVTtuVl4oNliPPNzMfh/FD3n7ZNnWyoVEzwRZrfzNKEciJK
+xleL9M/nr9mH9NPyl3tRVYzQohSkayjXGRFcw38C2lwVTrpIO8kLgRVTBccNVYUmhWgpH7SK
+EF1YX25F6V0drW7BobamWx2rbLAjXfwS79mCQ+1S4j5W2WAhqKF6JWKVt6rOKgFRb1qs2RGL
+bc34P4t0rXVbINT3/U0/vxFyhfK7uztkpZ4w8bi2k7VFlQTRmhpnCuU3ORqwDdU4lp/BhpR4
+17xQGR0arPHJqarNKjojNquJ0JA1ltG5YcHj452X8cc7L0PdBuv1xJl8QI8gtI/Hh0MuyCbW
+l8GOQkUka6O36dChvhDCUzUKrkAt3dvZ7B1y7wG6vwjvJdNUBnByEU5wTXzERXMuaIDLESAy
+ujFp6hPfBEJNKNwiJ/ZgVU6a/vb48ETWtMEHMHsdnDGuNOaHyEhzCJM7fY8kbYXUPjBVfMOE
+07r13Na6qafL3UgH6EqW5Vko0JkjKH0ovGzDaP8mHXXyywlxd5QQti2+pmJBYd+8qJDPkMH4
+MoYUOTR5ufJzqBIdh03A7NoHkG5bKpkR4dqqFSMLYdbX4idM7mdXYGHU0Bmth47ht3TWjBBZ
+oyCBoFBEWwTa4/kgm22cOVMcoqyOLR41CqLUXJ/Lh+c/kJFlZjzCANh7Cq4Ft+lyuAO4/qGQ
+X6jgLpBVmNCspKRWy3vXy/1y4t4N70X6VVL6hLnK0wTa9gBqWL07yNJkZMEIsxXlsF/oJKpn
+SqXospcHBuPHHmLyBGrVGWdvcSvUx2OgW71IQIoG8xGiZZpAs99gyWyW/wA5t9vXuQEugpqL
+zRXUvtC/8V/dZVoBJobSTmnaXMMpyIqfS5goBmgqmffruNNw7pqRzNrxWW6fI76/597XnmaL
+JV5J3K4HASyYS7R9yZzWE0ySEssyHQybUsxaqGUqNaMq8Z25gKKEJgOjqJqRlzwNJN5RdoQx
+Gwz4TpN/PiFvePwQqfc5JacO0WQc94IXUe78i/GzvLeXdUX/7eCLxJ/H6WJil0qm2hrvMtFp
+uPHSrIa7Asw+6H1W7Lb3W113SruiMhyvMvY85M51Vkx8rzXyxX1c2LBPR611KmHaQbIuP+99
+t5ifAiAhlj1VoqHJMCETSIANK2mZwHyxGgn8JCYmqv//9+tAAewNdNpDArijRaNTRxOfl8vv
+UEsHCLswXn60AwAAnw4AAFBLAwQUAAgICABjVdJGAAAAAAAAAAAAAAAADAAAAHNldHRpbmdz
+LnhtbL1aW3PaOBR+31+R4T0lIZcmTJKOIaWlpYEB0sz2TdgH0CLreCQ5wL/fIxloFnBLjbVP
+DL6cTzqX71zkuw+LWJy8gtIc5X3l/N1Z5QRkiBGXk/vK87B1elP58PDXHY7HPIR6hGEagzSn
+GoyhR/QJvS51Pbt9X0mVrCPTXNcli0HXTVjHBOT6tfrbp+sOLLuyEFzO7itTY5J6tTqfz9/N
+L96hmlTPb29vq+7u+tEQ5ZhPDoXKnn4LhYgbIPtCthgHVjs7u6xm/ysnq0W+UU2t8rDWw3r7
+D3crgOznlBuIrW5OVpft0u4rBFl/5TDfaK2y773/vvOdng8UsCEmlfUds0zojkA5qTyc3VV3
+RRwutgNj40PuC4/MdK/gy9rFxdVxwj8Dn0z3L/v8/e3tRTHpgynO+xCRj0FzyuQE9BbCCFEA
+k5UHo1IohtGWDYVzDd8wgjzpYyb0weJPY5acchnBAqJdZe13MPcOhYZaHqbydrS1VG0Ut8q2
+vlwrbspc36udvb+9Li43L1RqV2fnRaVqPhJQfrA4saWHtpPaz40RG4EFY2Qlu4HGYJwbgAV9
+4gdiPCRR2942RWWOIqQOW2JqmijSWG4HdVnSG4iz0qJ6Vy8tFhpU+9d+U3DxbT0AAaGBqKXo
+QoGV77n4llzybq/4av8DlBsPz6bZhVQxQ7n5T9JqjxjMtNjiaXfjK3arHigkkBKNw99j/uO8
+y8lvIikThYdk5MT32AQaLJxNFKZym+jLAnFcZJG87UKhTsiVPUSfkz8gjxDwBUe5GzgW4WOc
+mOUvVXQEwiAdGUUM0hJLHciQnBKiwP3zABYkiaCdKDZRLJl+Y2rWQhUzM8SnNB6BC67yUYds
+1KUaneAmXHqQ/5GWHg2Wmm60KCJ9KM5B+BL+M8ataayqHhWbO7YsH+xZjqhMnzEqFTY294Gz
+MvpY4NwHsWDSoWZgkLCQ1j+Y0i5musWVNvZy+YCDmAnRZInugQophRIZXF/74pvfWb/4NqjO
+ErQJcIzWBCGsw5UP09c86iNuUz5tzTYQF5c31wW7zGe5IMIK4Qco/LgwHWCRH8pqCp4EmnpN
+S82g1sT8ouhRReTsqjIfYWOBE4h6PDSp8oLwiE9oyJWtfOtp3dE/uittevMA1kEW9clKKMXS
+g/j2RJJZiGoodUYNwSwJoLIU0GQiTIWr/XzpsA9U1NoACoxRFpf41KYIX3hfUm34eGl3p1+4
+ofQtUyYals19MRGpNJz5SnvPGmz1AWoIC/NCBUnirfzQfbC+8Er9fJsanHwbFee8IHLZe5WR
+hji0GdZSrAcad7L7OP8KsD2eKMkwXRE5B6CqDajnCr3a/00m92P+wNjxUGZ5gtwUwLotO1z7
+CNhAUN3jNEiNSZPJEET5bpDZyWflTk5NaRaUZOI3ubb4LloCmTGuHsXYtQh+2izlKuw117iC
+zkdG0h02AvG4OuLwRcx9sIcekDsUODqnbmrpLGra0qebbZiAahEITQ81t4nbD5jTn2NQXxMP
+UFZp9qCLNJeNIHOm9FM+mZ5SmYcitRsu6NrsFb5nR2Bd2RSofRRyTYof2s0jo0hlGpoYx2xn
+JnX4eG5LnP0dYKrC4gO/bmrs8VCHAkP8zUFE2qfHNlFSj/OzdOlKcl3y2/9N83uG8a69KjjO
+XB2uUV9tKADJk77CTr1O4NeXDS6ZWh5iENsE7m8Ar2pXNzclTBT7w44vfgA1AJPuFFd/qIJA
+8IkkqhkYTNak5mUYYo1mGdRHOpCUaUwDFXm7vyJtVT07Wl6hBGZgmPJQqW+6egqlhMjXEmfp
+Z1tu0kp5Tdm49dZtvNWXt0OIBFRLYbw3IMrgNzKHCVKDz0nEjIfZXctmA58ATk+feUTp3uYD
+L1XlVyrCA82Z7KUyNKmvwUZWHNEmetT5wxQFRb0vmE+2BftFa3nEtNYsqbp7wkcYs1R46e+i
+7GC2Ox5r8DMdcc5qA28IcSK8OO43ZqYNymk29btc9auW5eii+KjzXGpFZplK8o9yC35B4pZn
+BwSeTkFtff5J4IhtukK7mSI+k3smX9355q2a9zXgw79QSwcIGlhvBQQGAABPKAAAUEsDBBQA
+CAgIAGNV0kYAAAAAAAAAAAAAAAAIAAAAbWV0YS54bWyNk82OmzAUhfd9CsTMFoyBScACRuqi
+q6laqanUXeTYd4inYCPbDOnb10BImSSLLjn3Oz7HPxTPp7bx3kEboWTp4zDyPZBMcSHr0v+5
++xJk/nP1qVCvr4IB4Yr1LUgbtGCp56zSkHlU+r2WRFEjDJG0BUMsI6oDuVjImiZT0KycGiF/
+l/7R2o4gNAxDOCSh0jXCeZ6jabqgnF24rtfNRHGGoIExwSAcYrSwY8P/LTWy60pKqUvQiM+l
+p7g4ilI0fy90rTlv7m3AsQlyDamlwbuA4cH3zttfHXjsV8vpjjWqYirDNFDriMCZoYoj/BRE
+mwBnOxyTFJP4KcT5NsnzbLMp0B1HwRm5Z01ItA3TLM/yJMdZgRZsTgUurLv4gPd6Wqv6vsNf
+0/jHOeJm/NHF/rAGTBVf0Wd5ZmuQ4MxKVy/ioOHbtHGUhmmYhPHji5D9af8r2+w3qbcC9p1W
+b8AsSqM2evzci4YHS8y/FeeEyxM11pU0VjBv0i09NBAw1Utb+u6mJ1G0tL4R1WHMula7FYkv
+mqa1pt3xejAozRdte9bY0dHMgl4G6TKR7taGo7BgOspcyA2IfVQV6MMrQff+yOovUEsHCM3d
+MirAAQAAzwMAAFBLAwQUAAgICABjVdJGAAAAAAAAAAAAAAAACgAAAHN0eWxlcy54bWztWluv
+2zYSft9fYajovsmSfDm+NCcFtrvFFki6wCbb14KWKIsNJQok5Ut+/Q5JUaJlyUfJyRqFsXk4
+gTkfh8OPM8MhqTc/nnI6OWAuCCuevWgaehNcxCwhxf7Z+8/Hn/219+Pbv7xhaUpivE1YXOW4
+kL6QZ4rFBDoXYmuEz17Fiy1DgohtgXIstjLeshIXttPWRW/1UKZFKxvbXYPd3hKf5NjOCnvR
+F+3Gj6zBbu+Eo+PYzgoLnLrdUza280lQP2V+zPISSdKx4kRJ8enZy6Qst0FwPB6nx/mU8X0Q
+bTabQEsbg+MGV1acalQSB5hiNZgIomkUWGyOJRprn8K6JhVVvsN8NDVIoqtVFYf9aI847Aeo
+iTPER/uGBl8u7zwZv7zzxO2bI5kNrMk6eA9C/ef9u9YXeD52LIW9oCrmpBw9TYN2+zPGGlNV
+BxOg2txZGC4C89tBH2/Cj5xIzB14fBMeIxo3jLO8jzTARQEgfHxQbmrRXE16UPMy4LhkXDaG
+pOMTFLAza8IrkzkdDi8ltdA9T5JeKJgzDyDUwNH9A8HH77yLzHl7ATadBdBp6KUuGuTmqZsd
+ojBQmCZsYEnapMr3TdpPWVXAJGCrqAnEpxJzokSI6m7bCw2ul1H2FSrrvcLRcBHeQsxlH98f
+/x0oma/SPSS0Wouzy828t3ZLSxlsZymKsZ/gmIq3b0wqapon5rcy7tn7mWP8ARUi8iaQdSwo
+J/TcyrzJhQYl9Pe4gElBIIgjEcILbo/yjkD21JxMPkC3tGewv6KSiR+6QNN60wDOclRcIEoi
+Y8hVB8SJdpovMM7M9mXbADfCNMPNK0z7O/4D/VbdNsvBjDHpLCTOX2OT4xVf5zCjLAiGnLlu
+N+WatTTBKapoXcRZzbVRe47KjMSexda//ZJD1uCSQNGnJiIkZ58wbLuUwT7/3XzxtEQLb6Ky
+yDYllDaS1WyTxuDBKdseQZXPSqljsGC++l13ERlK2NEHawWW/unZC6dRtI5I0Ss/X8sl7N0+
+lDrYFyWKodDyM8bJZ6ayiEFHi1vog5pb3IOF3WG03itsn9aabgrTORKZ+aZ8TREVji+ViCPN
+/AXvWqTwPqokU4OAg5EEMwNFtMyQHUDbseMYQWkIi0ViaSVqf1bG5SyB7pT7cnfhXqRIsNoU
+VZnvzsYaaW2ELQY8iJVC+d+w2Q1c2X01m0pgoKFQi6sHr91G8gpfGDWUGMGttFyQzyCPZqXU
+bRQV+wrtoSnluiGGbUZycJy//aNhCEuoU/xPmBd6dr1j+lAtoKKbWlqMGtdionC6LBuarQlW
++jmzktoWK/jp1+tRVTlL8WkgR+hBG4iZdGfQRpqR7rCN6JdfvXbpLnLCmETRrLV302mB++xc
+ZrjQy+ZTlCRAurZFJwFKctKYP9K3y6qIZWUUqiQCs4R5wyK+7PzWaf2EQBYo1CDhdLGZLdvQ
+vIyPEths4/L/TvwndGLXx7DNUl3H4zhHpPDVIc963+wKVFYi60BeESGmBnfSJcWu75jrhB3j
+KiCUs8E2AZ5DUSmUK792YJ+zY2dwaOmE5ieMS1+yPZaZOq+r0HtpYHdA49AfIJASxBNvMEPY
+xaNICDAPgqgNqWt9/8QocYJ5UB00NFdRfr8phYpXF/ARGn6fhb/vWHLuM+ulXJYjDokGKCv1
+nv70pBNHK9gxKdUhNpyG67mWmVNLDC4P/1eIdrfTZhl0NVDoagDRIzqLl9LOQE7RgWZTyv++
+Ju9mr0UbyKMTUD1qjfryQt32HCqWrbw1eij79Vp/O5HVpowCWXsb8KDFDWLY5jZ5KqvbsH0h
+XPtCAHbDkqKzEyQTV/yaEPzq6BoMrM1qNjawdCGQYbLPpNpjwu/H0/QOCoSvmf+NDEOVyhEx
+feGRs1EeGX1Dl7RXFON4+gnpE903dBUghqMvzMQ2274qE+vXAXN3rU8+4lKinclebYfD2Xmg
+nDNtqgcUtnB8hEP26EU3rNXSns53copx9ZtjcSOvbR7tVb/A+fP0DX2KaH03feobrf6fPY71
+ZFglzdH+ivh/GYnXAVJ8wLSGGzpUA0yrqbSq3FcPEwgSbcOzSnh11y7RrogJostvWDkdk9je
+TaAdiMFx9oW6D+5T24HUunVjCrU0O+LE351N6oUi2HMGb46CdnyVK+Z1EtGrSvQlyLPnt+11
+cqE4lRbeMtw7XZAOMTmS49mDcbwY4HjRz/HiHhzPH4zj5QDHy36Ol/fgePFgHD8NcPzUz/HT
+PThePhjHqwGOV/0cr+7B8dODcbwe4Hjdz/H6HhyvHozjzQDHm36ON/fgeP1QHEe9DEd9/Eb3
+YHfzWOxOB/id9jM8vQvHUfhgJM8GSJ71kzx7JcmXIpf5gkks4CxZpGRf1VfGjcCvD9UpY1L9
+7luEqJ6reWs/IFqpp7a60XYUzuT1Q5vbx5y11Uuc0me/F1LzHW8hLpIhA0m/gVa9YqS1oG+Y
+wZsC85WCfh/YPDkPi3301FpaGtTS1jJSxFx/mKnKOucjD62t/bZDPdqATjj6W4G95NjDcqMz
+LO/FCf99mUdeD6hzhaUlR5Ko7xjXzblBtzYXqtHg5GrtQJ70GSfqa656mRmXHBHpXd/hrdar
+zhGwvcO7ljUl35WEG+takTHm5ncPxh/9HJ2a2akL6PbbnhogcGnVGWrCaRiuHB7su7K/w0CH
+7qBB8826B4RS9Xrbi2lD8tkTjJLmRgslf1RCGr8w3mLaOcR1bdRs+X37YGq+wwn1P8/9VqNv
+5e2EM4zUs6T+EbgsOI3XilofvXbKWpAj0ehoRqsblaabL4uuzY4zO7HRUR/0f7f+9r9QSwcI
+XUUkGToIAAD3LgAAUEsDBBQACAgIAGNV0kYAAAAAAAAAAAAAAAAMAAAAbWFuaWZlc3QucmRm
+zZPNboMwEITvPIVlzthALwUFcijKuWqfwDWGWAUv8poS3r6Ok1ZRpKrqn9TjrkYz3460m+1h
+HMiLsqjBVDRjKSXKSGi16Ss6uy65pds62ti2Kx+aHfFqg6WfKrp3bio5X5aFLTcMbM+zoih4
+mvM8T7wiwdU4cUgMxrSOCAkejUJp9eR8GjnO4glmV1F066CQefcgPYvdOqmgsgphtlK9h7Yg
+kYFAjQlMyoR0gxy6TkvFM5bzUTnBoe3ix2C904OiPGDwK47P2N6IDKblXuC9sO5cg998lWh6
+7mN6ddPF8d8jlGCcMu5P6rs7ef/n/i7P/xnir7R2RGxAzqNn+pDntPIfVUevUEsHCLT3aNIF
+AQAAgwMAAFBLAwQUAAAIAABjVdJGAAAAAAAAAAAAAAAAGgAAAENvbmZpZ3VyYXRpb25zMi9w
+b3B1cG1lbnUvUEsDBBQAAAgAAGNV0kYAAAAAAAAAAAAAAAAaAAAAQ29uZmlndXJhdGlvbnMy
+L3N0YXR1c2Jhci9QSwMEFAAACAAAY1XSRgAAAAAAAAAAAAAAABgAAABDb25maWd1cmF0aW9u
+czIvdG9vbGJhci9QSwMEFAAACAAAY1XSRgAAAAAAAAAAAAAAABgAAABDb25maWd1cmF0aW9u
+czIvbWVudWJhci9QSwMEFAAACAAAY1XSRgAAAAAAAAAAAAAAABgAAABDb25maWd1cmF0aW9u
+czIvZmxvYXRlci9QSwMEFAAICAgAY1XSRgAAAAAAAAAAAAAAACcAAABDb25maWd1cmF0aW9u
+czIvYWNjZWxlcmF0b3IvY3VycmVudC54bWwDAFBLBwgAAAAAAgAAAAAAAABQSwMEFAAACAAA
+Y1XSRgAAAAAAAAAAAAAAABoAAABDb25maWd1cmF0aW9uczIvdG9vbHBhbmVsL1BLAwQUAAAI
+AABjVdJGAAAAAAAAAAAAAAAAHAAAAENvbmZpZ3VyYXRpb25zMi9wcm9ncmVzc2Jhci9QSwME
+FAAACAAAY1XSRgAAAAAAAAAAAAAAAB8AAABDb25maWd1cmF0aW9uczIvaW1hZ2VzL0JpdG1h
+cHMvUEsDBBQACAgIAGNV0kYAAAAAAAAAAAAAAAAVAAAATUVUQS1JTkYvbWFuaWZlc3QueG1s
+tZTBbsMgDIbvfYqI6xTYeppQ0h4q7Qm6B2DESZHARGCq9u1HqrXJNGVqtO5mY/P/nzBQbU/O
+FkcI0Xis2Qt/ZgWg9o3Brmbv+7fylW03q8opNC1EktegyPsw3tKapYDSq2iiROUgStLS94CN
+18kBkvzeLy9Ot2wCsGabVTH6tcZCmfeH89jdJmvLXtGhZmJOZFx20BhV0rmHmqm+t0Yrym3i
+iA2/APMpJyc4ERNLGPaH5D5QGRsFXUPeYzfDYJzqQAz1RS7aIw18+RxnhAdyMZQX6UYgysOO
+Dxd2QOrxtHS28A+sX2s8NO0dVyd3PS322HlsTZfCRSKuhdIaLOTUB6FTCL8P929edz6HmHBA
+4MlwPVUYzCvx4w/YfAJQSwcIHYDzWRwBAAA+BAAAUEsBAhQAFAAACAAAY1XSRl7GMgwnAAAA
+JwAAAAgAAAAAAAAAAAAAAAAAAAAAAG1pbWV0eXBlUEsBAhQAFAAACAAAY1XSRnPWCvM4AwAA
+OAMAABgAAAAAAAAAAAAAAAAATQAAAFRodW1ibmFpbHMvdGh1bWJuYWlsLnBuZ1BLAQIUABQA
+CAgIAGNV0ka7MF5+tAMAAJ8OAAALAAAAAAAAAAAAAAAAALsDAABjb250ZW50LnhtbFBLAQIU
+ABQACAgIAGNV0kYaWG8FBAYAAE8oAAAMAAAAAAAAAAAAAAAAAKgHAABzZXR0aW5ncy54bWxQ
+SwECFAAUAAgICABjVdJGzd0yKsABAADPAwAACAAAAAAAAAAAAAAAAADmDQAAbWV0YS54bWxQ
+SwECFAAUAAgICABjVdJGXUUkGToIAAD3LgAACgAAAAAAAAAAAAAAAADcDwAAc3R5bGVzLnht
+bFBLAQIUABQACAgIAGNV0ka092jSBQEAAIMDAAAMAAAAAAAAAAAAAAAAAE4YAABtYW5pZmVz
+dC5yZGZQSwECFAAUAAAIAABjVdJGAAAAAAAAAAAAAAAAGgAAAAAAAAAAAAAAAACNGQAAQ29u
+ZmlndXJhdGlvbnMyL3BvcHVwbWVudS9QSwECFAAUAAAIAABjVdJGAAAAAAAAAAAAAAAAGgAA
+AAAAAAAAAAAAAADFGQAAQ29uZmlndXJhdGlvbnMyL3N0YXR1c2Jhci9QSwECFAAUAAAIAABj
+VdJGAAAAAAAAAAAAAAAAGAAAAAAAAAAAAAAAAAD9GQAAQ29uZmlndXJhdGlvbnMyL3Rvb2xi
+YXIvUEsBAhQAFAAACAAAY1XSRgAAAAAAAAAAAAAAABgAAAAAAAAAAAAAAAAAMxoAAENvbmZp
+Z3VyYXRpb25zMi9tZW51YmFyL1BLAQIUABQAAAgAAGNV0kYAAAAAAAAAAAAAAAAYAAAAAAAA
+AAAAAAAAAGkaAABDb25maWd1cmF0aW9uczIvZmxvYXRlci9QSwECFAAUAAgICABjVdJGAAAA
+AAIAAAAAAAAAJwAAAAAAAAAAAAAAAACfGgAAQ29uZmlndXJhdGlvbnMyL2FjY2VsZXJhdG9y
+L2N1cnJlbnQueG1sUEsBAhQAFAAACAAAY1XSRgAAAAAAAAAAAAAAABoAAAAAAAAAAAAAAAAA
+9hoAAENvbmZpZ3VyYXRpb25zMi90b29scGFuZWwvUEsBAhQAFAAACAAAY1XSRgAAAAAAAAAA
+AAAAABwAAAAAAAAAAAAAAAAALhsAAENvbmZpZ3VyYXRpb25zMi9wcm9ncmVzc2Jhci9QSwEC
+FAAUAAAIAABjVdJGAAAAAAAAAAAAAAAAHwAAAAAAAAAAAAAAAABoGwAAQ29uZmlndXJhdGlv
+bnMyL2ltYWdlcy9CaXRtYXBzL1BLAQIUABQACAgIAGNV0kYdgPNZHAEAAD4EAAAVAAAAAAAA
+AAAAAAAAAKUbAABNRVRBLUlORi9tYW5pZmVzdC54bWxQSwUGAAAAABEAEQBwBAAABB0AAAAA
+
+--------------030000010109090603040500--
Added:
james/mailbox/trunk/elasticsearch/src/test/resources/documents/nonTextual.json
URL:
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/resources/documents/nonTextual.json?rev=1688146&view=auto
==============================================================================
---
james/mailbox/trunk/elasticsearch/src/test/resources/documents/nonTextual.json
(added)
+++
james/mailbox/trunk/elasticsearch/src/test/resources/documents/nonTextual.json
Mon Jun 29 08:45:43 2015
@@ -0,0 +1,156 @@
+{
+ "id":0,
+ "mailboxId":"18",
+ "modSeq":42,
+ "size":25,
+ "date":"2015-06-07T00:00:00+0200",
+ "mediaType":"plain",
+ "subtype":"text",
+ "userFlags":[],
+ "headers":{
+ "date":[
+ "Thu, 18 Jun 2015 12:43:26 +0200"
+ ],
+ "mime-version":[
+ "1.0"
+ ],
+ "x-sieve":[
+ "CMU Sieve 2.2"
+ ],
+ "return-path":[
+ "<[email protected]>"
+ ],
+ "subject":[
+ "Test message"
+ ],
+ "message-id":[
+ "<[email protected]>"
+ ],
+ "received":[
+ "from alderaan.linagora.com (smtp.linagora.dc1 [172.16.18.53])\t by imap
(Cyrus v2.2.13-Debian-2.2.13-19+squeeze3) with LMTPA;\t Thu, 18 Jun 2015
12:43:28 +0200","from [10.75.9.154] (unknown [92.103.166.6])\t(using TLSv1 with
cipher DHE-RSA-AES128-SHA (128/128 bits))\t(No client certificate
requested)\tby alderaan.linagora.com (Postfix) with ESMTPSA id 0EB1078A\tfor
<[email protected]>; Thu, 18 Jun 2015 12:43:28 +0200 (CEST)"
+ ],
+ "from":[
+ "Benoit Tellier <[email protected]>"
+ ],
+ "content-type":[
+ "multipart/mixed; boundary=\"------------030000010109090603040500\""
+ ],
+ "to":[
+ "[email protected]"
+ ],
+ "user-agent":[
+ "Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101
Thunderbird/38.0.1"
+ ]
+ },
+ "from":[
+ {
+ "name":"Benoit Tellier",
+ "address":"[email protected]"
+ }
+ ],
+ "to":[
+ {
+ "name":"[email protected]",
+ "address":"[email protected]"
+ }
+ ],
+ "cc":[],
+ "bcc":[],
+ "subject":["Test message"],
+ "sentDate":"2015-06-18T12:43:26+0200",
+ "properties":[
+ {
+ "namespace":"http://james.apache.org/rfc2045/Content-Type",
+ "localName":"type",
+ "value":"plain"
+ },
+ {
+ "namespace":"http://james.apache.org/rfc2045/Content-Type",
+ "localName":"subtype",
+ "value":"text"
+ },
+ {
+ "namespace":"http://james.apache.org/rfc2045",
+ "localName":"Content-Description",
+ "value":"An e-mail"
+ }
+ ],
+ "attachments":[
+ {
+ "mediaType":"text",
+ "subtype":"plain",
+ "fileName":null,
+ "fileExtension":null,
+ "contentDisposition":null,
+ "fileMetadata":{
+
"X-Parsed-By":["org.apache.tika.parser.DefaultParser","org.apache.tika.parser.txt.TXTParser"],
+ "Content-Encoding":["ISO-8859-1"],
+ "Content-Type":["text/plain; charset=ISO-8859-1"]
+ },
+ "headers":{
+ "content-transfer-encoding":["7bit"],
+ "content-type":["text/plain; charset=utf-8"]
+ },
+ "textContent":"This mail have a non textual attachment !\n\n"
+ },
+ {
+ "mediaType":"application",
+ "subtype":"vnd.oasis.opendocument.text",
+ "fileName":"toto.odt",
+ "fileExtension":"odt",
+ "contentDisposition":"attachment",
+ "fileMetadata":{
+ "date":["2015-06-18T12:43:07.489893918"],
+ "meta:paragraph-count":["1"],
+ "meta:word-count":["7"],
+ "Table-Count":["0"],
+ "generator":["LibreOffice/4.4.3.2$Linux_X86_64
LibreOffice_project/40m0$Build-2"],
+ "Word-Count":["7"],
+ "dcterms:created":["2015-06-18T12:41:25.197399866"],
+ "dcterms:modified":["2015-06-18T12:43:07.489893918"],
+ "Last-Modified":["2015-06-18T12:43:07.489893918"],
+ "nbPara":["1"],
+ "Last-Save-Date":["2015-06-18T12:43:07.489893918"],
+ "meta:object-count":["0"],
+ "meta:character-count":["47"],
+ "Paragraph-Count":["1"],
+ "nbImg":["0"],
+ "meta:save-date":["2015-06-18T12:43:07.489893918"],
+ "modified":["2015-06-18T12:43:07.489893918"],
+ "Edit-Time":["PT1M42S"],
+ "meta:image-count":["0"],
+ "Image-Count":["0"],
+ "nbCharacter":["47"],
+ "nbObject":["0"],
+ "nbPage":["1"],
+ "Object-Count":["0"],
+ "nbWord":["7"],
+ "Content-Type":["application/vnd.oasis.opendocument.text"],
+
"X-Parsed-By":["org.apache.tika.parser.DefaultParser","org.apache.tika.parser.odf.OpenDocumentParser"],
+ "meta:creation-date":["2015-06-18T12:41:25.197399866"],
+ "meta:table-count":["0"],
+ "Creation-Date":["2015-06-18T12:41:25.197399866"],
+ "xmpTPg:NPages":["1"],
+ "resourceName":["toto.odt"],
+ "Character Count":["47"],
+ "editing-cycles":["2"],
+ "Page-Count":["1"],
+ "nbTab":["0"],
+ "meta:page-count":["1"]
+ },
+ "headers":{
+ "content-transfer-encoding":["base64"],
+ "content-disposition":["attachment; filename=\"toto.odt\""],
+ "content-type":["application/vnd.oasis.opendocument.text;
name=\"toto.odt\""]
+ },
+ "textContent":"Awesome document provided for text extraction !\n"}
+ ],
+ "textBody":"This mail have a non textual attachment !\n\n",
+ "isAnswered":false,
+ "isDeleted":false,
+ "isDraft":false,
+ "isFlagged":false,
+ "isRecent":false,
+ "hasAttachment":true,
+ "isUnread":true
+}
Modified:
james/mailbox/trunk/elasticsearch/src/test/resources/documents/recursiveMail.json
URL:
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/resources/documents/recursiveMail.json?rev=1688146&r1=1688145&r2=1688146&view=diff
==============================================================================
---
james/mailbox/trunk/elasticsearch/src/test/resources/documents/recursiveMail.json
(original)
+++
james/mailbox/trunk/elasticsearch/src/test/resources/documents/recursiveMail.json
Mon Jun 29 08:45:43 2015
@@ -95,6 +95,7 @@
"textContent": "Forward as attachment !\n\n\n",
"mediaType": "text",
"subtype": "plain",
+ "fileMetadata":{},
"fileName": null,
"fileExtension": null,
"contentDisposition": null,
@@ -111,6 +112,7 @@
"textContent": null,
"mediaType": null,
"subtype": null,
+ "fileMetadata":{},
"fileName": null,
"fileExtension": null,
"contentDisposition": null,
@@ -131,6 +133,7 @@
"mediaType": null,
"subtype": null,
"fileName": null,
+ "fileMetadata":{},
"fileExtension": null,
"contentDisposition": null,
"headers": {
@@ -176,6 +179,7 @@
"subtype": "plain",
"fileName": null,
"fileExtension": null,
+ "fileMetadata":{},
"contentDisposition": null,
"headers": {
"content-transfer-encoding": [
@@ -188,6 +192,7 @@
},
{
"textContent": null,
+ "fileMetadata":{},
"mediaType": "application",
"subtype": "vnd.oasis.opendocument.spreadsheet",
"fileName": "Sprint-2015-05-18.ods",
Modified:
james/mailbox/trunk/elasticsearch/src/test/resources/documents/spamMail.json
URL:
http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/resources/documents/spamMail.json?rev=1688146&r1=1688145&r2=1688146&view=diff
==============================================================================
---
james/mailbox/trunk/elasticsearch/src/test/resources/documents/spamMail.json
(original)
+++
james/mailbox/trunk/elasticsearch/src/test/resources/documents/spamMail.json
Mon Jun 29 08:45:43 2015
@@ -127,6 +127,7 @@
"mediaType": "text",
"subtype": "plain",
"fileName": null,
+ "fileMetadata":{},
"fileExtension": null,
"contentDisposition": "inline",
"headers": {
@@ -147,6 +148,7 @@
"subtype": "rfc822-headers",
"fileName": "header",
"fileExtension": "",
+ "fileMetadata":{},
"contentDisposition": "inline",
"headers": {
"content-disposition": [
@@ -168,6 +170,7 @@
"mediaType": "text",
"subtype": "plain",
"fileName": null,
+ "fileMetadata":{},
"fileExtension": null,
"contentDisposition": "inline",
"headers": {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]