Repository: james-project Updated Branches: refs/heads/master 7bcc6201b -> 868765716
JAMES-1867 Add parameter for indexing attachments or not Project: http://git-wip-us.apache.org/repos/asf/james-project/repo Commit: http://git-wip-us.apache.org/repos/asf/james-project/commit/fe5a6a49 Tree: http://git-wip-us.apache.org/repos/asf/james-project/tree/fe5a6a49 Diff: http://git-wip-us.apache.org/repos/asf/james-project/diff/fe5a6a49 Branch: refs/heads/master Commit: fe5a6a498642cfc1d5b476e2b1d954b3139148ab Parents: 7bcc620 Author: Laura Royet <lro...@linagora.com> Authored: Mon Nov 28 14:01:58 2016 +0100 Committer: Laura Royet <lro...@linagora.com> Committed: Wed Nov 30 15:32:35 2016 +0100 ---------------------------------------------------------------------- .../mailbox/elasticsearch/IndexAttachments.java | 25 +++++++++ .../elasticsearch/json/IndexableMessage.java | 18 +++++-- .../json/MessageToElasticSearchJson.java | 3 +- .../json/IndexableMessageTest.java | 57 +++++++++++++++++--- .../src/test/resources/eml/Toto.eml | 41 ++++++++++++++ 5 files changed, 130 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/james-project/blob/fe5a6a49/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/IndexAttachments.java ---------------------------------------------------------------------- diff --git a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/IndexAttachments.java b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/IndexAttachments.java new file mode 100644 index 0000000..6d45eed --- /dev/null +++ b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/IndexAttachments.java @@ -0,0 +1,25 @@ +/**************************************************************** + * Licensed to the Apache Software Foundation (ASF) under one * + * or more contributor license agreements. See the NOTICE file * + * distributed with this work for additional information * + * regarding copyright ownership. The ASF licenses this file * + * to you under the Apache License, Version 2.0 (the * + * "License"); you may not use this file except in compliance * + * with the License. You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, * + * software distributed under the License is distributed on an * + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * + * KIND, either express or implied. See the License for the * + * specific language governing permissions and limitations * + * under the License. * + ****************************************************************/ + +package org.apache.james.mailbox.elasticsearch; + +public enum IndexAttachments { + + NO, YES; +} http://git-wip-us.apache.org/repos/asf/james-project/blob/fe5a6a49/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessage.java ---------------------------------------------------------------------- diff --git a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessage.java b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessage.java index b474c1b..0896140 100644 --- a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessage.java +++ b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessage.java @@ -30,6 +30,7 @@ import java.util.stream.Stream; import org.apache.james.mailbox.MailboxSession.User; import org.apache.james.mailbox.MessageUid; +import org.apache.james.mailbox.elasticsearch.IndexAttachments; import org.apache.james.mailbox.elasticsearch.query.DateResolutionFormater; import org.apache.james.mailbox.extractor.TextExtractor; import org.apache.james.mailbox.store.mail.model.MailboxMessage; @@ -41,11 +42,14 @@ import com.github.steveash.guavate.Guavate; import com.google.common.base.Preconditions; import com.google.common.base.Strings; import com.google.common.base.Throwables; +import com.google.common.collect.ImmutableList; import com.google.common.collect.Multimap; public class IndexableMessage { - public static IndexableMessage from(MailboxMessage message, List<User> users, TextExtractor textExtractor, ZoneId zoneId) { + public static IndexableMessage from(MailboxMessage message, List<User> users, TextExtractor textExtractor, + ZoneId zoneId, IndexAttachments indexAttachments) { + Preconditions.checkNotNull(message.getMailboxId()); Preconditions.checkArgument(!users.isEmpty()); IndexableMessage indexableMessage = new IndexableMessage(); @@ -54,7 +58,7 @@ public class IndexableMessage { indexableMessage.users = users.stream().map(User::getUserName).collect(Guavate.toImmutableList()); indexableMessage.bodyText = parsingResult.locateFirstTextBody(); indexableMessage.bodyHtml = parsingResult.locateFirstHtmlBody(); - indexableMessage.setFlattenedAttachments(parsingResult); + indexableMessage.setFlattenedAttachments(parsingResult, indexAttachments); indexableMessage.copyHeaderFields(parsingResult.getHeaderCollection(), getSanitizedInternalDate(message, zoneId)); indexableMessage.generateText(); } catch (IOException | MimeException e) { @@ -64,9 +68,13 @@ public class IndexableMessage { return indexableMessage; } - private void setFlattenedAttachments(MimePart parsingResult) { - attachments = parsingResult.getAttachmentsStream() - .collect(Collectors.toList()); + private void setFlattenedAttachments(MimePart parsingResult, IndexAttachments indexAttachments) { + if (indexAttachments.equals(IndexAttachments.YES)) { + attachments = parsingResult.getAttachmentsStream() + .collect(Collectors.toList()); + } else { + attachments = ImmutableList.of(); + } } private void copyHeaderFields(HeaderCollection headerCollection, ZonedDateTime internalDate) { http://git-wip-us.apache.org/repos/asf/james-project/blob/fe5a6a49/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJson.java ---------------------------------------------------------------------- diff --git a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJson.java b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJson.java index dd27a6e..8e9ca86 100644 --- a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJson.java +++ b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJson.java @@ -26,6 +26,7 @@ import javax.inject.Inject; import javax.mail.Flags; import org.apache.james.mailbox.MailboxSession.User; +import org.apache.james.mailbox.elasticsearch.IndexAttachments; import org.apache.james.mailbox.extractor.TextExtractor; import org.apache.james.mailbox.store.mail.model.MailboxMessage; @@ -56,7 +57,7 @@ public class MessageToElasticSearchJson { public String convertToJson(MailboxMessage message, List<User> users) throws JsonProcessingException { Preconditions.checkNotNull(message); - return mapper.writeValueAsString(IndexableMessage.from(message, users, textExtractor, zoneId)); + return mapper.writeValueAsString(IndexableMessage.from(message, users, textExtractor, zoneId, IndexAttachments.NO)); } public String getUpdatedJsonMessagePart(Flags flags, long modSeq) throws JsonProcessingException { http://git-wip-us.apache.org/repos/asf/james-project/blob/fe5a6a49/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageTest.java ---------------------------------------------------------------------- diff --git a/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageTest.java b/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageTest.java index 2a62f2c..930c21c 100644 --- a/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageTest.java +++ b/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageTest.java @@ -29,6 +29,7 @@ import java.time.ZoneId; import javax.mail.Flags; import org.apache.commons.io.IOUtils; +import org.apache.james.mailbox.elasticsearch.IndexAttachments; import org.apache.james.mailbox.mock.MockMailboxSession; import org.apache.james.mailbox.store.TestId; import org.apache.james.mailbox.store.extractor.DefaultTextExtractor; @@ -51,7 +52,7 @@ public class IndexableMessageTest { .thenReturn(new Flags()); IndexableMessage indexableMessage = IndexableMessage.from(mailboxMessage, ImmutableList.of(new MockMailboxSession("username").getUser()), - new DefaultTextExtractor(), ZoneId.of("Europe/Paris")); + new DefaultTextExtractor(), ZoneId.of("Europe/Paris"), IndexAttachments.NO); assertThat(indexableMessage.getText()).isEmpty(); } @@ -68,7 +69,7 @@ public class IndexableMessageTest { .thenReturn(new Flags()); IndexableMessage indexableMessage = IndexableMessage.from(mailboxMessage, ImmutableList.of(new MockMailboxSession("username").getUser()), - new DefaultTextExtractor(), ZoneId.of("Europe/Paris")); + new DefaultTextExtractor(), ZoneId.of("Europe/Paris"), IndexAttachments.NO); assertThat(indexableMessage.getText()).isEqualTo("Second user us...@james.org First user u...@james.org"); } @@ -85,7 +86,7 @@ public class IndexableMessageTest { .thenReturn(new Flags()); IndexableMessage indexableMessage = IndexableMessage.from(mailboxMessage, ImmutableList.of(new MockMailboxSession("username").getUser()), - new DefaultTextExtractor(), ZoneId.of("Europe/Paris")); + new DefaultTextExtractor(), ZoneId.of("Europe/Paris"), IndexAttachments.NO); assertThat(indexableMessage.getText()).isEqualTo("First to u...@james.org Second to us...@james.org"); } @@ -102,7 +103,7 @@ public class IndexableMessageTest { .thenReturn(new Flags()); IndexableMessage indexableMessage = IndexableMessage.from(mailboxMessage, ImmutableList.of(new MockMailboxSession("username").getUser()), - new DefaultTextExtractor(), ZoneId.of("Europe/Paris")); + new DefaultTextExtractor(), ZoneId.of("Europe/Paris"), IndexAttachments.NO); assertThat(indexableMessage.getText()).isEqualTo("First cc u...@james.org Second cc us...@james.org"); } @@ -119,7 +120,7 @@ public class IndexableMessageTest { .thenReturn(new Flags()); IndexableMessage indexableMessage = IndexableMessage.from(mailboxMessage, ImmutableList.of(new MockMailboxSession("username").getUser()), - new DefaultTextExtractor(), ZoneId.of("Europe/Paris")); + new DefaultTextExtractor(), ZoneId.of("Europe/Paris"), IndexAttachments.NO); assertThat(indexableMessage.getText()).isEqualTo("Second bcc us...@james.org First bcc u...@james.org"); } @@ -136,7 +137,7 @@ public class IndexableMessageTest { .thenReturn(new Flags()); IndexableMessage indexableMessage = IndexableMessage.from(mailboxMessage, ImmutableList.of(new MockMailboxSession("username").getUser()), - new DefaultTextExtractor(), ZoneId.of("Europe/Paris")); + new DefaultTextExtractor(), ZoneId.of("Europe/Paris"), IndexAttachments.NO); assertThat(indexableMessage.getText()).isEqualTo("subject1 subject2"); } @@ -153,7 +154,7 @@ public class IndexableMessageTest { .thenReturn(new Flags()); IndexableMessage indexableMessage = IndexableMessage.from(mailboxMessage, ImmutableList.of(new MockMailboxSession("username").getUser()), - new DefaultTextExtractor(), ZoneId.of("Europe/Paris")); + new DefaultTextExtractor(), ZoneId.of("Europe/Paris"), IndexAttachments.NO); assertThat(indexableMessage.getText()).isEqualTo("My body"); } @@ -170,7 +171,7 @@ public class IndexableMessageTest { .thenReturn(new Flags()); IndexableMessage indexableMessage = IndexableMessage.from(mailboxMessage, ImmutableList.of(new MockMailboxSession("username").getUser()), - new DefaultTextExtractor(), ZoneId.of("Europe/Paris")); + new DefaultTextExtractor(), ZoneId.of("Europe/Paris"), IndexAttachments.NO); assertThat(indexableMessage.getText()).isEqualTo("Ad Min ad...@opush.test " + "a@test a@test B b@test " + @@ -182,4 +183,44 @@ public class IndexableMessageTest { "-- \n" + "Ad Min\n"); } + + @Test + public void attachmentsShouldNotBeenIndexedWhenAsked() throws Exception { + //Given + MailboxMessage mailboxMessage = mock(MailboxMessage.class); + TestId mailboxId = TestId.of(1); + when(mailboxMessage.getMailboxId()) + .thenReturn(mailboxId); + when(mailboxMessage.getFullContent()) + .thenReturn(new ByteArrayInputStream(IOUtils.toByteArray(ClassLoader.getSystemResourceAsStream("eml/Toto.eml")))); + when(mailboxMessage.createFlags()) + .thenReturn(new Flags()); + + // When + IndexableMessage indexableMessage = IndexableMessage.from(mailboxMessage, ImmutableList.of(new MockMailboxSession("username").getUser()), + new DefaultTextExtractor(), ZoneId.of("Europe/Paris"), IndexAttachments.NO); + + // Then + assertThat(indexableMessage.getAttachments()).isEmpty(); + } + + @Test + public void attachmentsShouldBeenIndexedWhenAsked() throws Exception { + //Given + MailboxMessage mailboxMessage = mock(MailboxMessage.class); + TestId mailboxId = TestId.of(1); + when(mailboxMessage.getMailboxId()) + .thenReturn(mailboxId); + when(mailboxMessage.getFullContent()) + .thenReturn(new ByteArrayInputStream(IOUtils.toByteArray(ClassLoader.getSystemResourceAsStream("eml/Toto.eml")))); + when(mailboxMessage.createFlags()) + .thenReturn(new Flags()); + + // When + IndexableMessage indexableMessage = IndexableMessage.from(mailboxMessage, ImmutableList.of(new MockMailboxSession("username").getUser()), + new DefaultTextExtractor(), ZoneId.of("Europe/Paris"), IndexAttachments.YES); + + // Then + assertThat(indexableMessage.getAttachments()).isNotEmpty(); + } } http://git-wip-us.apache.org/repos/asf/james-project/blob/fe5a6a49/mailbox/elasticsearch/src/test/resources/eml/Toto.eml ---------------------------------------------------------------------- diff --git a/mailbox/elasticsearch/src/test/resources/eml/Toto.eml b/mailbox/elasticsearch/src/test/resources/eml/Toto.eml new file mode 100644 index 0000000..ab2de03 --- /dev/null +++ b/mailbox/elasticsearch/src/test/resources/eml/Toto.eml @@ -0,0 +1,41 @@ +Return-Path: <lro...@linagora.com> +Received: from alderaan.linagora.com (smtp.linagora.dc1 [172.16.18.53]) + by imap (Cyrus v2.2.13-Debian-2.2.13-19+squeeze3) with LMTPA; + Tue, 29 Nov 2016 13:57:56 +0100 +X-Sieve: CMU Sieve 2.2 +Received: from [10.69.0.146] (mne69-10-88-173-78-196.fbx.proxad.net [88.173.78.196]) + (using TLSv1 with cipher DHE-RSA-AES128-SHA (128/128 bits)) + (No client certificate requested) + by alderaan.linagora.com (Postfix) with ESMTPSA id CB0233783 + for <lro...@linagora.com>; Tue, 29 Nov 2016 13:57:56 +0100 (CET) +To: Laura ROYET <lro...@linagora.com> +From: Laura Royet <lro...@linagora.com> +Subject: Toto +Message-ID: <25871149-ccf9-9c7e-1e16-334beeb27...@linagora.com> +Date: Tue, 29 Nov 2016 13:57:56 +0100 +User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 + Thunderbird/45.5.0 +MIME-Version: 1.0 +Content-Type: multipart/mixed; + boundary="------------3F646081DC313215FD6847F4" + +This is a multi-part message in MIME format. +--------------3F646081DC313215FD6847F4 +Content-Type: text/plain; charset=utf-8; format=flowed +Content-Transfer-Encoding: 7bit + + + +-- +Laura Royet + + +--------------3F646081DC313215FD6847F4 +Content-Type: text/plain; charset=UTF-8; + name="Toto.txt" +Content-Transfer-Encoding: base64 +Content-Disposition: attachment; + filename="Toto.txt" + +VG90bwpDb3B5cmlnaHQgwqkgMjAxNiBMSU5BR09SQSAKQ0MgQlktU0EsIEdOVSBGREwK +--------------3F646081DC313215FD6847F4-- --------------------------------------------------------------------- To unsubscribe, e-mail: server-dev-unsubscr...@james.apache.org For additional commands, e-mail: server-dev-h...@james.apache.org