This is an automated email from the ASF dual-hosted git repository. btellier pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/james-project.git
commit 27246a391888758ec39b6955f968beb3a2692357 Author: Benoit TELLIER <[email protected]> AuthorDate: Sun Apr 5 19:17:49 2026 +0200 JAMES-4198 Heuristic to skip attachment parsing based on message headers --- .../src/test/resources/eml/alternative.json | 4 +-- .../src/test/resources/eml/alternative_simple.json | 4 +-- .../src/test/resources/eml/inlined-mixed.json | 4 +-- .../apache/james/mailbox/store/MessageStorer.java | 30 ++++++++++++++++++++-- .../apache/james/mailbox/store/ResultUtils.java | 26 ++++++++++++++++++- .../mailbox/store/search/mime/MimePartParser.java | 3 +++ mailbox/store/src/test/resources/eml/htmlMail.json | 4 +-- .../src/test/resources/eml/invalidCharset.json | 4 +-- .../store/src/test/resources/eml/james-3901.json | 4 +-- .../store/src/test/resources/eml/nonTextual.json | 4 +-- .../nonTextualWithoutAttachmentTextContent.json | 4 +-- .../src/test/resources/eml/pgpSignedMail.json | 4 +-- mailbox/store/src/test/resources/eml/spamMail.json | 4 +-- .../src/test/resources/eml/spamMailNoHeaders.json | 4 +-- 14 files changed, 78 insertions(+), 25 deletions(-) diff --git a/mailbox/opensearch/src/test/resources/eml/alternative.json b/mailbox/opensearch/src/test/resources/eml/alternative.json index 5cc8f5c0c3..a71413429a 100644 --- a/mailbox/opensearch/src/test/resources/eml/alternative.json +++ b/mailbox/opensearch/src/test/resources/eml/alternative.json @@ -26,7 +26,7 @@ {"name":"content-type","value":"multipart/mixed; boundary=\"-=Part.17f.732e3d28e1c76db4.18da4b40791.62ef5e3fa995057d=-\""} ], "mailboxId":"18", - "mediaType":"plain", + "mediaType":"multipart", "messageId":"184", "threadId":"184", "modSeq":42, @@ -34,7 +34,7 @@ "saveDate":null, "size":25, "subject":["Test"], - "subtype":"text", + "subtype":"mixed", "to":[{"name":"Benoit TELLIER","address":"[email protected]","domain":"linagora"}], "uid":25, "userFlags":[], diff --git a/mailbox/opensearch/src/test/resources/eml/alternative_simple.json b/mailbox/opensearch/src/test/resources/eml/alternative_simple.json index 50ce79eeec..e4a5e827c4 100644 --- a/mailbox/opensearch/src/test/resources/eml/alternative_simple.json +++ b/mailbox/opensearch/src/test/resources/eml/alternative_simple.json @@ -16,7 +16,7 @@ {"name":"content-type","value":"multipart/alternative; boundary=\"-=Part.17e.48ac92d73c356567.18da4b40791.360a293e2f389efe=-\""} ], "mailboxId":"18", - "mediaType":"plain", + "mediaType":"multipart", "messageId":"184", "threadId":"184", "modSeq":42, @@ -24,7 +24,7 @@ "saveDate":null, "size":25, "subject":["Test"], - "subtype":"text", + "subtype":"alternative", "to":[{"name":"Benoit TELLIER","address":"[email protected]","domain":"linagora"}], "uid":25, "userFlags":[], diff --git a/mailbox/opensearch/src/test/resources/eml/inlined-mixed.json b/mailbox/opensearch/src/test/resources/eml/inlined-mixed.json index 04055bf8e4..6a7f797e1b 100644 --- a/mailbox/opensearch/src/test/resources/eml/inlined-mixed.json +++ b/mailbox/opensearch/src/test/resources/eml/inlined-mixed.json @@ -55,14 +55,14 @@ "value": "8bit" }], "mailboxId": "18", - "mediaType": "plain", + "mediaType": "multipart", "messageId": "184", "threadId": "184", "modSeq": 42, "sentDate": "2022-01-26T12:21:37+0100", "size": 25, "subject": ["My subject"], - "subtype": "text", + "subtype": "mixed", "to": [{ "name": "Alice", "address": "[email protected]", diff --git a/mailbox/store/src/main/java/org/apache/james/mailbox/store/MessageStorer.java b/mailbox/store/src/main/java/org/apache/james/mailbox/store/MessageStorer.java index 8b0c973561..aa2693dde4 100644 --- a/mailbox/store/src/main/java/org/apache/james/mailbox/store/MessageStorer.java +++ b/mailbox/store/src/main/java/org/apache/james/mailbox/store/MessageStorer.java @@ -44,8 +44,14 @@ import org.apache.james.mailbox.store.mail.model.MimeMessageId; import org.apache.james.mailbox.store.mail.model.Subject; import org.apache.james.mailbox.store.mail.model.impl.MessageParser; import org.apache.james.mailbox.store.mail.utils.MimeMessageHeadersUtil; +import org.apache.james.mime4j.codec.DecodeMonitor; import org.apache.james.mime4j.dom.Message; +import org.apache.james.mime4j.dom.field.ContentDispositionField; +import org.apache.james.mime4j.dom.field.ContentTypeField; +import org.apache.james.mime4j.field.ContentDispositionFieldLenientImpl; +import org.apache.james.mime4j.field.ContentTypeFieldLenientImpl; import org.apache.james.mime4j.message.HeaderImpl; +import org.apache.james.mime4j.stream.Field; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -104,7 +110,7 @@ public interface MessageStorer { return mapperFactory.getMessageMapper(session) .executeReactive( - storeAttachments(messageId, content, maybeMessage, session) + storeAttachments(messageId, content, maybeMessage, session, headers) .subscribeOn(Schedulers.boundedElastic()) .zipWith(threadIdGuessingAlgorithm.guessThreadIdReactive(messageId, mimeMessageId, inReplyTo, references, subject, session)) .flatMap(Throwing.function((Tuple2<List<MessageAttachmentMetadata>, ThreadId> pair) -> { @@ -118,13 +124,33 @@ public interface MessageStorer { }).sneakyThrow())); } - private Mono<List<MessageAttachmentMetadata>> storeAttachments(MessageId messageId, Content messageContent, Optional<Message> maybeMessage, MailboxSession session) { + private Mono<List<MessageAttachmentMetadata>> storeAttachments(MessageId messageId, Content messageContent, Optional<Message> maybeMessage, MailboxSession session, HeaderImpl headers) { + if (!mayNeedAttachmentParsing(headers)) { + return Mono.just(ImmutableList.of()); + } return Mono.usingWhen(Mono.fromCallable(() -> extractAttachments(messageContent, maybeMessage)), attachments -> attachmentMapperFactory.getAttachmentMapper(session) .storeAttachmentsReactive(attachments.getAttachments(), messageId), parsingResults -> Mono.fromRunnable(parsingResults::dispose).subscribeOn(Schedulers.boundedElastic())); } + private boolean mayNeedAttachmentParsing(HeaderImpl headers) { + Field rawContentType = headers.getField("Content-Type"); + if (rawContentType != null) { + ContentTypeField contentTypeField = ContentTypeFieldLenientImpl.PARSER.parse(rawContentType, DecodeMonitor.SILENT); + if (contentTypeField.getMediaType().equalsIgnoreCase("multipart")) { + return true; + } + } + Field rawDisposition = headers.getField("Content-Disposition"); + if (rawDisposition != null) { + ContentDispositionField dispositionField = ContentDispositionFieldLenientImpl.PARSER.parse(rawDisposition, DecodeMonitor.SILENT); + return ContentDispositionField.DISPOSITION_TYPE_ATTACHMENT + .equalsIgnoreCase(dispositionField.getDispositionType()); + } + return false; + } + private MessageParser.ParsingResult extractAttachments(Content contentIn, Optional<Message> maybeMessage) { return maybeMessage.map(message -> { try { diff --git a/mailbox/store/src/main/java/org/apache/james/mailbox/store/ResultUtils.java b/mailbox/store/src/main/java/org/apache/james/mailbox/store/ResultUtils.java index 3c9600c89b..1680f9ac51 100644 --- a/mailbox/store/src/main/java/org/apache/james/mailbox/store/ResultUtils.java +++ b/mailbox/store/src/main/java/org/apache/james/mailbox/store/ResultUtils.java @@ -231,6 +231,30 @@ public class ResultUtils { return result; } + private static boolean isNonMultipart(MailboxMessage message) throws IOException { + return createHeaders(message).stream() + .filter(h -> h.getName().equalsIgnoreCase("Content-Type")) + .map(Header::getValue) + .findFirst() + .map(ct -> !ct.toLowerCase().trim().startsWith("multipart")) + .orElse(true); + } + + private static PartContentBuilder buildHandleSinglePart(int[] path, MailboxMessage message) throws IOException, MimeException { + // CF RFC-3501 section 6.4.5 + // + // Every message has at least one part number. Non-[MIME-IMB] + // messages, and non-multipart [MIME-IMB] messages with no + // encapsulated message, only have a part 1. + if (path.length == 1 && path[0] == 1 && isNonMultipart(message)) { + InputStream stream = message.getFullContent(); + PartContentBuilder result = new PartContentBuilder(); + result.parse(stream); + return result; + } + return build(path, message); + } + private static int[] path(MimePath mimePath) { if (mimePath == null) { return null; @@ -253,7 +277,7 @@ public class ResultUtils { throws IOException, MimeException { int[] path = path(mimePath); if (path != null) { - PartContentBuilder builder = build(path, message); + PartContentBuilder builder = buildHandleSinglePart(path, message); List<Header> headers = builder.getMimeHeaders(); messageResult.setMimeHeaders(mimePath, headers.iterator()); } diff --git a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/MimePartParser.java b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/MimePartParser.java index c8f8148032..770a6de203 100644 --- a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/MimePartParser.java +++ b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/MimePartParser.java @@ -73,6 +73,9 @@ public class MimePartParser { private void processMimePart(MimeTokenStream stream, EntityState state) { switch (state) { case T_START_MULTIPART: + extractMimePartBodyDescription(stream); + stackCurrent(); + break; case T_START_MESSAGE: stackCurrent(); break; diff --git a/mailbox/store/src/test/resources/eml/htmlMail.json b/mailbox/store/src/test/resources/eml/htmlMail.json index e9803bc164..295233d4ea 100644 --- a/mailbox/store/src/test/resources/eml/htmlMail.json +++ b/mailbox/store/src/test/resources/eml/htmlMail.json @@ -7,8 +7,8 @@ "size":25, "date":"2015-06-07T00:00:00+0200", "saveDate": null, - "mediaType":"plain", - "subtype":"text", + "mediaType":"multipart", + "subtype":"alternative", "userFlags":["social","pocket-money"], "mimeMessageID": "<[email protected]>", "headers": [{ diff --git a/mailbox/store/src/test/resources/eml/invalidCharset.json b/mailbox/store/src/test/resources/eml/invalidCharset.json index d65196e233..c0b55b0207 100644 --- a/mailbox/store/src/test/resources/eml/invalidCharset.json +++ b/mailbox/store/src/test/resources/eml/invalidCharset.json @@ -22,7 +22,7 @@ {"name":"content-transfer-encoding","value":"7bit"} ], "mailboxId":"18", - "mediaType":"plain", + "mediaType":"text", "messageId":"184", "threadId": "184", "modSeq":42, @@ -30,7 +30,7 @@ "saveDate": null, "size":25, "subject":["Inline attachment"], - "subtype":"text", + "subtype":"plain", "to":[{ "name":"Antoine DUPRAT", "address":"[email protected]", diff --git a/mailbox/store/src/test/resources/eml/james-3901.json b/mailbox/store/src/test/resources/eml/james-3901.json index 791c790d26..7685a62304 100644 --- a/mailbox/store/src/test/resources/eml/james-3901.json +++ b/mailbox/store/src/test/resources/eml/james-3901.json @@ -32,7 +32,7 @@ {"name":"content-type","value":"multipart/mixed;\tboundary=\"--boundary-LibPST-iamunique-1722682679_-_-\""} ], "mailboxId":"18", - "mediaType":"plain", + "mediaType":"multipart", "messageId":"184", "threadId":"184", "modSeq":42, @@ -40,7 +40,7 @@ "saveDate":null, "size":25, "subject":["Revised Draft"], - "subtype":"text", + "subtype":"mixed", "to":[{"name":null,"address":"Mary Kay Miller","domain":null}], "uid":25, "userFlags":[], diff --git a/mailbox/store/src/test/resources/eml/nonTextual.json b/mailbox/store/src/test/resources/eml/nonTextual.json index 85ef80c4e6..a9e8e7e739 100644 --- a/mailbox/store/src/test/resources/eml/nonTextual.json +++ b/mailbox/store/src/test/resources/eml/nonTextual.json @@ -58,14 +58,14 @@ "value": "multipart/mixed; boundary=\"------------030000010109090603040500\"" }], "mailboxId":"18", - "mediaType":"plain", + "mediaType":"multipart", "messageId":"184", "threadId": "184", "modSeq":42, "sentDate":"2015-06-18T12:43:26+0200", "size":25, "subject":["Test message"], - "subtype":"text", + "subtype":"mixed", "to":[{ "name":null, "address":"[email protected]", diff --git a/mailbox/store/src/test/resources/eml/nonTextualWithoutAttachmentTextContent.json b/mailbox/store/src/test/resources/eml/nonTextualWithoutAttachmentTextContent.json index 255bdb2063..c02940adf5 100644 --- a/mailbox/store/src/test/resources/eml/nonTextualWithoutAttachmentTextContent.json +++ b/mailbox/store/src/test/resources/eml/nonTextualWithoutAttachmentTextContent.json @@ -58,14 +58,14 @@ "value": "multipart/mixed; boundary=\"------------030000010109090603040500\"" }], "mailboxId":"18", - "mediaType":"plain", + "mediaType":"multipart", "messageId":"184", "threadId": "184", "modSeq":42, "sentDate":"2015-06-18T12:43:26+0200", "size":25, "subject":["Test message"], - "subtype":"text", + "subtype":"mixed", "to":[{ "name":null, "address":"[email protected]", diff --git a/mailbox/store/src/test/resources/eml/pgpSignedMail.json b/mailbox/store/src/test/resources/eml/pgpSignedMail.json index 3395c05e4b..b8c745a0e4 100644 --- a/mailbox/store/src/test/resources/eml/pgpSignedMail.json +++ b/mailbox/store/src/test/resources/eml/pgpSignedMail.json @@ -7,8 +7,8 @@ "size": 25, "date": "2015-06-07T00:00:00+0200", "saveDate": null, - "mediaType": "plain", - "subtype": "text", + "mediaType": "text", + "subtype": "plain", "userFlags": [ "security", "debian" diff --git a/mailbox/store/src/test/resources/eml/spamMail.json b/mailbox/store/src/test/resources/eml/spamMail.json index 2d4eac4b35..c018e89bc4 100644 --- a/mailbox/store/src/test/resources/eml/spamMail.json +++ b/mailbox/store/src/test/resources/eml/spamMail.json @@ -7,8 +7,8 @@ "size": 25, "date": "2015-06-07T00:00:00+0200", "saveDate": null, - "mediaType": "plain", - "subtype": "text", + "mediaType": "multipart", + "subtype": "mixed", "mimeMessageID": "<[email protected]>", "userFlags": [], "headers": [{ diff --git a/mailbox/store/src/test/resources/eml/spamMailNoHeaders.json b/mailbox/store/src/test/resources/eml/spamMailNoHeaders.json index 8d4f8c35b3..a371606b15 100644 --- a/mailbox/store/src/test/resources/eml/spamMailNoHeaders.json +++ b/mailbox/store/src/test/resources/eml/spamMailNoHeaders.json @@ -7,8 +7,8 @@ "size": 25, "date": "2015-06-07T00:00:00+0200", "saveDate": null, - "mediaType": "plain", - "subtype": "text", + "mediaType": "multipart", + "subtype": "mixed", "mimeMessageID": "<[email protected]>", "userFlags": [], "headers": [], --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
