This is an automated email from the ASF dual-hosted git repository.
btellier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-project.git
The following commit(s) were added to refs/heads/master by this push:
new e857226d8a [ENHANCEMENT] Indexing: infer hasAttachment without calling
MailboxMe… (#2777)
e857226d8a is described below
commit e857226d8a72da6d1e2090eecc6190b671e32f09
Author: Benoit TELLIER <[email protected]>
AuthorDate: Fri Jul 25 21:53:35 2025 +0200
[ENHANCEMENT] Indexing: infer hasAttachment without calling MailboxMe…
(#2777)
---
.../mailbox/opensearch/json/IndexableMessage.java | 4 ++--
.../opensearch/json/IndexableMessageTest.java | 15 +-----------
.../src/test/resources/eml/alternative.json | 2 +-
.../src/test/resources/eml/inlined-mixed.json | 2 +-
.../james/mailbox/store/search/mime/MimePart.java | 27 ++++++++++++++++++----
.../search/mime/MimePartContainerBuilder.java | 3 +++
.../mailbox/store/search/mime/MimePartParser.java | 2 ++
.../search/mime/RootMimePartContainerBuilder.java | 7 ++++++
.../store/src/test/resources/eml/james-3901.json | 2 +-
.../store/src/test/resources/eml/nonTextual.json | 2 +-
.../nonTextualWithoutAttachmentTextContent.json | 2 +-
mailbox/store/src/test/resources/eml/spamMail.json | 2 +-
12 files changed, 43 insertions(+), 27 deletions(-)
diff --git
a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/IndexableMessage.java
b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/IndexableMessage.java
index 8bfaf646f8..cb2bd7cae7 100644
---
a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/IndexableMessage.java
+++
b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/IndexableMessage.java
@@ -31,7 +31,6 @@ import java.util.stream.Collectors;
import org.apache.james.mailbox.ModSeq;
import org.apache.james.mailbox.extractor.TextExtractor;
-import org.apache.james.mailbox.model.MessageAttachmentMetadata;
import org.apache.james.mailbox.opensearch.IndexAttachments;
import org.apache.james.mailbox.opensearch.IndexBody;
import org.apache.james.mailbox.opensearch.IndexHeaders;
@@ -138,8 +137,9 @@ public class IndexableMessage {
Optional<String> bodyText =
parsingResult.locateFirstTextBody().map(SearchUtil::removeGreaterThanCharactersAtBeginningOfLine);
Optional<String> bodyHtml =
parsingResult.locateFirstHtmlBody();
- boolean hasAttachment =
MessageAttachmentMetadata.hasNonInlinedAttachment(message.getAttachments());
List<MimePart> attachments =
setFlattenedAttachments(parsingResult, indexAttachments);
+ boolean hasAttachment = attachments.stream()
+ .anyMatch(mimePart -> !mimePart.isInlinedWithCid() &&
mimePart.getContentDisposition().isPresent());
HeaderCollection headerCollection =
parsingResult.getHeaderCollection();
ZonedDateTime internalDate =
getSanitizedInternalDate(message, zoneId);
diff --git
a/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/json/IndexableMessageTest.java
b/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/json/IndexableMessageTest.java
index 3836ae127f..726f9585b4 100644
---
a/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/json/IndexableMessageTest.java
+++
b/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/json/IndexableMessageTest.java
@@ -37,10 +37,7 @@ import org.apache.james.mailbox.ModSeq;
import org.apache.james.mailbox.extractor.ParsedContent;
import org.apache.james.mailbox.extractor.TextExtractor;
import org.apache.james.mailbox.inmemory.InMemoryMessageId;
-import org.apache.james.mailbox.model.AttachmentMetadata;
-import org.apache.james.mailbox.model.MessageAttachmentMetadata;
import org.apache.james.mailbox.model.MessageId;
-import org.apache.james.mailbox.model.StringBackedAttachmentId;
import org.apache.james.mailbox.model.TestId;
import org.apache.james.mailbox.model.ThreadId;
import org.apache.james.mailbox.opensearch.IndexAttachments;
@@ -90,21 +87,11 @@ class IndexableMessageTest {
when(mailboxMessage.getMessageId())
.thenReturn(messageId);
when(mailboxMessage.getFullContent())
-
.thenReturn(ClassLoader.getSystemResourceAsStream("eml/mailWithHeaders.eml"));
+
.thenReturn(ClassLoader.getSystemResourceAsStream("eml/emailWithTextAttachment.eml"));
when(mailboxMessage.createFlags())
.thenReturn(new Flags());
when(mailboxMessage.getUid())
.thenReturn(MESSAGE_UID);
- when(mailboxMessage.getAttachments())
- .thenReturn(ImmutableList.of(MessageAttachmentMetadata.builder()
- .attachment(AttachmentMetadata.builder()
- .messageId(messageId)
- .attachmentId(StringBackedAttachmentId.from("1"))
- .type("text/plain")
- .size(36)
- .build())
- .isInline(false)
- .build()));
// When
IndexableMessage indexableMessage = IndexableMessage.builder()
diff --git a/mailbox/opensearch/src/test/resources/eml/alternative.json
b/mailbox/opensearch/src/test/resources/eml/alternative.json
index 86a63fc739..5cc8f5c0c3 100644
--- a/mailbox/opensearch/src/test/resources/eml/alternative.json
+++ b/mailbox/opensearch/src/test/resources/eml/alternative.json
@@ -15,7 +15,7 @@
"cc":[],
"date":"2015-06-07T00:00:00+0200",
"from":[{"name":"Benoit
TELLIER","address":"[email protected]","domain":"linagora"}],
- "hasAttachment":false,
+ "hasAttachment":true,
"headers":[
{"name":"mime-version","value":"1.0"},
{"name":"subject","value":"Test"},
diff --git a/mailbox/opensearch/src/test/resources/eml/inlined-mixed.json
b/mailbox/opensearch/src/test/resources/eml/inlined-mixed.json
index b7408ee3db..04055bf8e4 100644
--- a/mailbox/opensearch/src/test/resources/eml/inlined-mixed.json
+++ b/mailbox/opensearch/src/test/resources/eml/inlined-mixed.json
@@ -25,7 +25,7 @@
"address": "[email protected]",
"domain":"domain"
}],
- "hasAttachment": false,
+ "hasAttachment": true,
"headers": [{
"name": "date",
"value": "Wed, 26 Jan 2022 12:21:37 +0100"
diff --git
a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/MimePart.java
b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/MimePart.java
index 6e4732cc6e..3f1bb33c15 100644
---
a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/MimePart.java
+++
b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/MimePart.java
@@ -32,6 +32,7 @@ import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOUtils;
import org.apache.james.mailbox.extractor.ParsedContent;
import org.apache.james.mailbox.extractor.TextExtractor;
+import org.apache.james.mailbox.model.Cid;
import org.apache.james.mailbox.model.ContentType;
import org.apache.james.mailbox.model.ContentType.MediaType;
import org.apache.james.mailbox.model.ContentType.SubType;
@@ -58,6 +59,7 @@ public class MimePart {
private Optional<String> fileExtension;
private Optional<String> contentDisposition;
private Optional<Charset> charset;
+ private Optional<Cid> cid;
private Predicate<ContentType> shouldCaryOverContent;
private Builder(Predicate<ContentType> shouldCaryOverContent) {
@@ -71,6 +73,7 @@ public class MimePart {
this.fileExtension = Optional.empty();
this.contentDisposition = Optional.empty();
this.charset = Optional.empty();
+ this.cid = Optional.empty();
}
@Override
@@ -132,6 +135,12 @@ public class MimePart {
}
}
+ @Override
+ public MimePartContainerBuilder addCid(Cid cid) {
+ this.cid = Optional.ofNullable(cid);
+ return this;
+ }
+
@Override
public ParsedMimePart build() {
final Optional<ContentType> contentType = computeContentType();
@@ -145,7 +154,7 @@ public class MimePart {
fileName,
fileExtension,
contentDisposition,
- children);
+ cid, children);
}
}
@@ -159,12 +168,13 @@ public class MimePart {
private final Optional<String> fileName;
private final Optional<String> fileExtension;
private final Optional<String> contentDisposition;
+ private final Optional<Cid> cid;
private final List<ParsedMimePart> attachments;
public ParsedMimePart(HeaderCollection headerCollection,
Optional<InputStream> bodyContent, Optional<Charset> charset,
Optional<MediaType> mediaType,
Optional<SubType> subType, Optional<ContentType>
contentType, Optional<String> fileName, Optional<String> fileExtension,
- Optional<String> contentDisposition,
List<ParsedMimePart> attachments) {
+ Optional<String> contentDisposition,
Optional<Cid> cid, List<ParsedMimePart> attachments) {
this.headerCollection = headerCollection;
this.mediaType = mediaType;
this.subType = subType;
@@ -172,6 +182,7 @@ public class MimePart {
this.fileName = fileName;
this.fileExtension = fileExtension;
this.contentDisposition = contentDisposition;
+ this.cid = cid;
this.attachments = attachments;
this.charset = charset;
@@ -190,7 +201,7 @@ public class MimePart {
return Mono.just(Optional.empty());
})
.map(text -> new MimePart(headerCollection,
text.flatMap(ParsedContent::getTextualContent),
- mediaType, subType, fileName, fileExtension,
contentDisposition, attachments)));
+ mediaType, subType, fileName, fileExtension,
contentDisposition, cid, attachments)));
}
private Mono<ParsedContent> extractText(TextExtractor textExtractor) {
@@ -232,21 +243,27 @@ public class MimePart {
private final Optional<String> fileName;
private final Optional<String> fileExtension;
private final Optional<String> contentDisposition;
+ private final Optional<Cid> cid;
private final List<MimePart> attachments;
private MimePart(HeaderCollection headerCollection, Optional<String>
bodyTextContent, Optional<MediaType> mediaType,
- Optional<SubType> subType, Optional<String> fileName,
Optional<String> fileExtension,
- Optional<String> contentDisposition, List<MimePart>
attachments) {
+ Optional<SubType> subType, Optional<String> fileName,
Optional<String> fileExtension,
+ Optional<String> contentDisposition, Optional<Cid> cid,
List<MimePart> attachments) {
this.headerCollection = headerCollection;
this.mediaType = mediaType;
this.subType = subType;
this.fileName = fileName;
this.fileExtension = fileExtension;
this.contentDisposition = contentDisposition;
+ this.cid = cid;
this.attachments = attachments;
this.bodyTextContent = bodyTextContent;
}
+ public boolean isInlinedWithCid() {
+ return
contentDisposition.map("inline"::equalsIgnoreCase).orElse(false) &&
cid.isPresent();
+ }
+
public List<MimePart> getAttachments() {
return attachments;
}
diff --git
a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/MimePartContainerBuilder.java
b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/MimePartContainerBuilder.java
index f3dc788f24..415a058910 100644
---
a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/MimePartContainerBuilder.java
+++
b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/MimePartContainerBuilder.java
@@ -22,6 +22,7 @@ package org.apache.james.mailbox.store.search.mime;
import java.io.InputStream;
import java.nio.charset.Charset;
+import org.apache.james.mailbox.model.Cid;
import org.apache.james.mailbox.model.ContentType.MediaType;
import org.apache.james.mailbox.model.ContentType.SubType;
import org.apache.james.mime4j.stream.Field;
@@ -46,4 +47,6 @@ public interface MimePartContainerBuilder {
MimePartContainerBuilder addContentDisposition(String contentDisposition);
+ MimePartContainerBuilder addCid(Cid cid);
+
}
diff --git
a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/MimePartParser.java
b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/MimePartParser.java
index 3dbd62c917..c8f8148032 100644
---
a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/MimePartParser.java
+++
b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/MimePartParser.java
@@ -27,6 +27,7 @@ import java.util.LinkedList;
import java.util.Optional;
import org.apache.james.mailbox.extractor.TextExtractor;
+import org.apache.james.mailbox.model.Cid;
import org.apache.james.mailbox.model.ContentType.MediaType;
import org.apache.james.mailbox.model.ContentType.SubType;
import org.apache.james.mime4j.MimeException;
@@ -130,6 +131,7 @@ public class MimePartParser {
.map(SubType::of)
.ifPresent(currentlyBuildMimePart::addSubType);
currentlyBuildMimePart.addContentDisposition(descriptor.getContentDispositionType());
+ Optional.ofNullable(descriptor.getContentId()).flatMap(v ->
Cid.parser().relaxed().unwrap().parse(v)).ifPresent(currentlyBuildMimePart::addCid);
Optional.ofNullable(descriptor.getContentDispositionFilename())
.or(() ->
Optional.ofNullable(descriptor.getContentTypeParameters().get("name")))
diff --git
a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/RootMimePartContainerBuilder.java
b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/RootMimePartContainerBuilder.java
index e087c59fbb..54c5203ac2 100644
---
a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/RootMimePartContainerBuilder.java
+++
b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/RootMimePartContainerBuilder.java
@@ -22,6 +22,7 @@ package org.apache.james.mailbox.store.search.mime;
import java.io.InputStream;
import java.nio.charset.Charset;
+import org.apache.james.mailbox.model.Cid;
import org.apache.james.mailbox.model.ContentType.MediaType;
import org.apache.james.mailbox.model.ContentType.SubType;
import org.apache.james.mime4j.stream.Field;
@@ -85,6 +86,12 @@ public class RootMimePartContainerBuilder implements
MimePartContainerBuilder {
return this;
}
+ @Override
+ public MimePartContainerBuilder addCid(Cid cid) {
+ LOGGER.warn("Trying to add content disposition to the Root MimePart
container");
+ return this;
+ }
+
@Override
public MimePartContainerBuilder charset(Charset charset) {
LOGGER.warn("Trying to add content charset to the Root MimePart
container");
diff --git a/mailbox/store/src/test/resources/eml/james-3901.json
b/mailbox/store/src/test/resources/eml/james-3901.json
index 421b11279a..c54055fa81 100644
--- a/mailbox/store/src/test/resources/eml/james-3901.json
+++ b/mailbox/store/src/test/resources/eml/james-3901.json
@@ -12,7 +12,7 @@
"cc":[],
"date":"2015-06-07T00:00:00+0200",
"from":[{"name":null,"address":"Drew Fossum","domain":null}],
- "hasAttachment":false,
+ "hasAttachment":true,
"headers":[
{"name":"return-path","value":"<[email protected]>"},
{"name":"received","value":"from 10.2.0.0 (EHLO 617) ([10.2.0.0])
by smtp.upn.integration-open-paas.org (JAMES SMTP Server ) with ESMTP ID
-489272706 for <[email protected]>;
Sat, 28 Nov 2020 10:49:24 +0000 (GMT)"},
diff --git a/mailbox/store/src/test/resources/eml/nonTextual.json
b/mailbox/store/src/test/resources/eml/nonTextual.json
index b57359eff6..85ef80c4e6 100644
--- a/mailbox/store/src/test/resources/eml/nonTextual.json
+++ b/mailbox/store/src/test/resources/eml/nonTextual.json
@@ -19,7 +19,7 @@
"address":"[email protected]",
"domain":"linagora"
}],
- "hasAttachment":false,
+ "hasAttachment":true,
"headers": [{
"name": "return-path",
"value": "<[email protected]>"
diff --git
a/mailbox/store/src/test/resources/eml/nonTextualWithoutAttachmentTextContent.json
b/mailbox/store/src/test/resources/eml/nonTextualWithoutAttachmentTextContent.json
index cf5be90ccb..255bdb2063 100644
---
a/mailbox/store/src/test/resources/eml/nonTextualWithoutAttachmentTextContent.json
+++
b/mailbox/store/src/test/resources/eml/nonTextualWithoutAttachmentTextContent.json
@@ -19,7 +19,7 @@
"address":"[email protected]",
"domain":"linagora"
}],
- "hasAttachment":false,
+ "hasAttachment":true,
"headers": [{
"name": "return-path",
"value": "<[email protected]>"
diff --git a/mailbox/store/src/test/resources/eml/spamMail.json
b/mailbox/store/src/test/resources/eml/spamMail.json
index 468665807e..d1e212770d 100644
--- a/mailbox/store/src/test/resources/eml/spamMail.json
+++ b/mailbox/store/src/test/resources/eml/spamMail.json
@@ -152,6 +152,6 @@
"isDraft": false,
"isFlagged": false,
"isRecent": false,
- "hasAttachment": false,
+ "hasAttachment": true,
"isUnread": true
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]