MAILBOX-278 Ignore Error thrown by Tika to be able to parse the rest of the message
Project: http://git-wip-us.apache.org/repos/asf/james-project/repo Commit: http://git-wip-us.apache.org/repos/asf/james-project/commit/6c0947f6 Tree: http://git-wip-us.apache.org/repos/asf/james-project/tree/6c0947f6 Diff: http://git-wip-us.apache.org/repos/asf/james-project/diff/6c0947f6 Branch: refs/heads/master Commit: 6c0947f6da8f6b8c188578acbe6edac7d588a1fa Parents: 05df700 Author: Raphael Ouazana <raphael.ouaz...@linagora.com> Authored: Tue Jan 31 15:08:09 2017 +0100 Committer: Quynh Nguyen <qngu...@linagora.com> Committed: Tue Feb 7 16:15:23 2017 +0700 ---------------------------------------------------------------------- .../mailbox/elasticsearch/json/MimePart.java | 2 +- .../json/IndexableMessageTest.java | 24 + .../test/resources/eml/bodyMakeTikaToFail.eml | 1272 ++++++++++++++++++ 3 files changed, 1297 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/james-project/blob/6c0947f6/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java ---------------------------------------------------------------------- diff --git a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java index d2416a9..ee623cf 100644 --- a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java +++ b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java @@ -147,7 +147,7 @@ public class MimePart { bodyContent.get(), computeContentType().orElse(null), fileName.orElse(null))); - } catch (Exception e) { + } catch (Throwable e) { LOGGER.warn("Failed parsing attachment", e); } } http://git-wip-us.apache.org/repos/asf/james-project/blob/6c0947f6/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageTest.java ---------------------------------------------------------------------- diff --git a/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageTest.java b/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageTest.java index f5ff7fc..cf98e6e 100644 --- a/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageTest.java +++ b/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageTest.java @@ -38,6 +38,7 @@ import org.apache.james.mailbox.mock.MockMailboxSession; import org.apache.james.mailbox.model.TestId; import org.apache.james.mailbox.store.extractor.DefaultTextExtractor; import org.apache.james.mailbox.store.mail.model.MailboxMessage; +import org.apache.james.mailbox.tika.extractor.TikaTextExtractor; import org.junit.Test; import com.google.common.collect.ImmutableList; @@ -279,4 +280,27 @@ public class IndexableMessageTest { assertThat(indexableMessage.getText()).contains("first attachment content"); assertThat(indexableMessage.getText()).contains("third attachment content"); } + + @Test + public void messageShouldBeIndexedEvenIfTikaParserThrowsAnError() throws Exception { + //Given + MailboxMessage mailboxMessage = mock(MailboxMessage.class); + TestId mailboxId = TestId.of(1); + when(mailboxMessage.getMailboxId()) + .thenReturn(mailboxId); + when(mailboxMessage.getFullContent()) + .thenReturn(new ByteArrayInputStream(IOUtils.toByteArray(ClassLoader.getSystemResourceAsStream("eml/bodyMakeTikaToFail.eml")))); + when(mailboxMessage.createFlags()) + .thenReturn(new Flags()); + when(mailboxMessage.getUid()) + .thenReturn(MESSAGE_UID); + + // When + IndexableMessage indexableMessage = IndexableMessage.from(mailboxMessage, ImmutableList.of(new MockMailboxSession("username").getUser()), + new TikaTextExtractor(), ZoneId.of("Europe/Paris"), IndexAttachments.YES); + + // Then + assertThat(indexableMessage.getText()).contains("subject should be parsed"); + } + } --------------------------------------------------------------------- To unsubscribe, e-mail: server-dev-unsubscr...@james.apache.org For additional commands, e-mail: server-dev-h...@james.apache.org