This is an automated email from the ASF dual-hosted git repository.

btellier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-project.git

commit 27246a391888758ec39b6955f968beb3a2692357
Author: Benoit TELLIER <[email protected]>
AuthorDate: Sun Apr 5 19:17:49 2026 +0200

    JAMES-4198 Heuristic to skip attachment parsing based on message headers
---
 .../src/test/resources/eml/alternative.json        |  4 +--
 .../src/test/resources/eml/alternative_simple.json |  4 +--
 .../src/test/resources/eml/inlined-mixed.json      |  4 +--
 .../apache/james/mailbox/store/MessageStorer.java  | 30 ++++++++++++++++++++--
 .../apache/james/mailbox/store/ResultUtils.java    | 26 ++++++++++++++++++-
 .../mailbox/store/search/mime/MimePartParser.java  |  3 +++
 mailbox/store/src/test/resources/eml/htmlMail.json |  4 +--
 .../src/test/resources/eml/invalidCharset.json     |  4 +--
 .../store/src/test/resources/eml/james-3901.json   |  4 +--
 .../store/src/test/resources/eml/nonTextual.json   |  4 +--
 .../nonTextualWithoutAttachmentTextContent.json    |  4 +--
 .../src/test/resources/eml/pgpSignedMail.json      |  4 +--
 mailbox/store/src/test/resources/eml/spamMail.json |  4 +--
 .../src/test/resources/eml/spamMailNoHeaders.json  |  4 +--
 14 files changed, 78 insertions(+), 25 deletions(-)

diff --git a/mailbox/opensearch/src/test/resources/eml/alternative.json 
b/mailbox/opensearch/src/test/resources/eml/alternative.json
index 5cc8f5c0c3..a71413429a 100644
--- a/mailbox/opensearch/src/test/resources/eml/alternative.json
+++ b/mailbox/opensearch/src/test/resources/eml/alternative.json
@@ -26,7 +26,7 @@
     {"name":"content-type","value":"multipart/mixed; 
boundary=\"-=Part.17f.732e3d28e1c76db4.18da4b40791.62ef5e3fa995057d=-\""}
   ],
   "mailboxId":"18",
-  "mediaType":"plain",
+  "mediaType":"multipart",
   "messageId":"184",
   "threadId":"184",
   "modSeq":42,
@@ -34,7 +34,7 @@
   "saveDate":null,
   "size":25,
   "subject":["Test"],
-  "subtype":"text",
+  "subtype":"mixed",
   "to":[{"name":"Benoit 
TELLIER","address":"[email protected]","domain":"linagora"}],
   "uid":25,
   "userFlags":[],
diff --git a/mailbox/opensearch/src/test/resources/eml/alternative_simple.json 
b/mailbox/opensearch/src/test/resources/eml/alternative_simple.json
index 50ce79eeec..e4a5e827c4 100644
--- a/mailbox/opensearch/src/test/resources/eml/alternative_simple.json
+++ b/mailbox/opensearch/src/test/resources/eml/alternative_simple.json
@@ -16,7 +16,7 @@
  {"name":"content-type","value":"multipart/alternative; 
boundary=\"-=Part.17e.48ac92d73c356567.18da4b40791.360a293e2f389efe=-\""}
 ],
  "mailboxId":"18",
- "mediaType":"plain",
+ "mediaType":"multipart",
  "messageId":"184",
  "threadId":"184",
  "modSeq":42,
@@ -24,7 +24,7 @@
  "saveDate":null,
  "size":25,
  "subject":["Test"],
- "subtype":"text",
+ "subtype":"alternative",
  "to":[{"name":"Benoit 
TELLIER","address":"[email protected]","domain":"linagora"}],
  "uid":25,
  "userFlags":[],
diff --git a/mailbox/opensearch/src/test/resources/eml/inlined-mixed.json 
b/mailbox/opensearch/src/test/resources/eml/inlined-mixed.json
index 04055bf8e4..6a7f797e1b 100644
--- a/mailbox/opensearch/src/test/resources/eml/inlined-mixed.json
+++ b/mailbox/opensearch/src/test/resources/eml/inlined-mixed.json
@@ -55,14 +55,14 @@
         "value": "8bit"
     }],
     "mailboxId": "18",
-    "mediaType": "plain",
+    "mediaType": "multipart",
     "messageId": "184",
     "threadId": "184",
     "modSeq": 42,
     "sentDate": "2022-01-26T12:21:37+0100",
     "size": 25,
     "subject": ["My subject"],
-    "subtype": "text",
+    "subtype": "mixed",
     "to": [{
         "name": "Alice",
         "address": "[email protected]",
diff --git 
a/mailbox/store/src/main/java/org/apache/james/mailbox/store/MessageStorer.java 
b/mailbox/store/src/main/java/org/apache/james/mailbox/store/MessageStorer.java
index 8b0c973561..aa2693dde4 100644
--- 
a/mailbox/store/src/main/java/org/apache/james/mailbox/store/MessageStorer.java
+++ 
b/mailbox/store/src/main/java/org/apache/james/mailbox/store/MessageStorer.java
@@ -44,8 +44,14 @@ import 
org.apache.james.mailbox.store.mail.model.MimeMessageId;
 import org.apache.james.mailbox.store.mail.model.Subject;
 import org.apache.james.mailbox.store.mail.model.impl.MessageParser;
 import org.apache.james.mailbox.store.mail.utils.MimeMessageHeadersUtil;
+import org.apache.james.mime4j.codec.DecodeMonitor;
 import org.apache.james.mime4j.dom.Message;
+import org.apache.james.mime4j.dom.field.ContentDispositionField;
+import org.apache.james.mime4j.dom.field.ContentTypeField;
+import org.apache.james.mime4j.field.ContentDispositionFieldLenientImpl;
+import org.apache.james.mime4j.field.ContentTypeFieldLenientImpl;
 import org.apache.james.mime4j.message.HeaderImpl;
+import org.apache.james.mime4j.stream.Field;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -104,7 +110,7 @@ public interface MessageStorer {
 
             return mapperFactory.getMessageMapper(session)
                 .executeReactive(
-                    storeAttachments(messageId, content, maybeMessage, session)
+                    storeAttachments(messageId, content, maybeMessage, 
session, headers)
                         .subscribeOn(Schedulers.boundedElastic())
                         
.zipWith(threadIdGuessingAlgorithm.guessThreadIdReactive(messageId, 
mimeMessageId, inReplyTo, references, subject, session))
                         
.flatMap(Throwing.function((Tuple2<List<MessageAttachmentMetadata>, ThreadId> 
pair) -> {
@@ -118,13 +124,33 @@ public interface MessageStorer {
                         }).sneakyThrow()));
         }
 
-        private Mono<List<MessageAttachmentMetadata>> 
storeAttachments(MessageId messageId, Content messageContent, Optional<Message> 
maybeMessage, MailboxSession session) {
+        private Mono<List<MessageAttachmentMetadata>> 
storeAttachments(MessageId messageId, Content messageContent, Optional<Message> 
maybeMessage, MailboxSession session, HeaderImpl headers) {
+            if (!mayNeedAttachmentParsing(headers)) {
+                return Mono.just(ImmutableList.of());
+            }
             return Mono.usingWhen(Mono.fromCallable(() -> 
extractAttachments(messageContent, maybeMessage)),
                 attachments -> 
attachmentMapperFactory.getAttachmentMapper(session)
                     .storeAttachmentsReactive(attachments.getAttachments(), 
messageId),
                 parsingResults -> 
Mono.fromRunnable(parsingResults::dispose).subscribeOn(Schedulers.boundedElastic()));
         }
 
+        private boolean mayNeedAttachmentParsing(HeaderImpl headers) {
+            Field rawContentType = headers.getField("Content-Type");
+            if (rawContentType != null) {
+                ContentTypeField contentTypeField = 
ContentTypeFieldLenientImpl.PARSER.parse(rawContentType, DecodeMonitor.SILENT);
+                if 
(contentTypeField.getMediaType().equalsIgnoreCase("multipart")) {
+                    return true;
+                }
+            }
+            Field rawDisposition = headers.getField("Content-Disposition");
+            if (rawDisposition != null) {
+                ContentDispositionField dispositionField = 
ContentDispositionFieldLenientImpl.PARSER.parse(rawDisposition, 
DecodeMonitor.SILENT);
+                return ContentDispositionField.DISPOSITION_TYPE_ATTACHMENT
+                    .equalsIgnoreCase(dispositionField.getDispositionType());
+            }
+            return false;
+        }
+
         private MessageParser.ParsingResult extractAttachments(Content 
contentIn, Optional<Message> maybeMessage) {
             return maybeMessage.map(message -> {
                 try {
diff --git 
a/mailbox/store/src/main/java/org/apache/james/mailbox/store/ResultUtils.java 
b/mailbox/store/src/main/java/org/apache/james/mailbox/store/ResultUtils.java
index 3c9600c89b..1680f9ac51 100644
--- 
a/mailbox/store/src/main/java/org/apache/james/mailbox/store/ResultUtils.java
+++ 
b/mailbox/store/src/main/java/org/apache/james/mailbox/store/ResultUtils.java
@@ -231,6 +231,30 @@ public class ResultUtils {
         return result;
     }
 
+    private static boolean isNonMultipart(MailboxMessage message) throws 
IOException {
+        return createHeaders(message).stream()
+            .filter(h -> h.getName().equalsIgnoreCase("Content-Type"))
+            .map(Header::getValue)
+            .findFirst()
+            .map(ct -> !ct.toLowerCase().trim().startsWith("multipart"))
+            .orElse(true);
+    }
+
+    private static PartContentBuilder buildHandleSinglePart(int[] path, 
MailboxMessage message) throws IOException, MimeException {
+        // CF RFC-3501 section 6.4.5
+        //
+        // Every message has at least one part number.  Non-[MIME-IMB]
+        // messages, and non-multipart [MIME-IMB] messages with no
+        // encapsulated message, only have a part 1.
+        if (path.length == 1 && path[0] == 1 && isNonMultipart(message)) {
+            InputStream stream = message.getFullContent();
+            PartContentBuilder result = new PartContentBuilder();
+            result.parse(stream);
+            return result;
+        }
+        return build(path, message);
+    }
+
     private static int[] path(MimePath mimePath) {
         if (mimePath == null) {
             return null;
@@ -253,7 +277,7 @@ public class ResultUtils {
             throws IOException, MimeException {
         int[] path = path(mimePath);
         if (path != null) {
-            PartContentBuilder builder = build(path, message);
+            PartContentBuilder builder = buildHandleSinglePart(path, message);
             List<Header> headers = builder.getMimeHeaders();
             messageResult.setMimeHeaders(mimePath, headers.iterator());
         }
diff --git 
a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/MimePartParser.java
 
b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/MimePartParser.java
index c8f8148032..770a6de203 100644
--- 
a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/MimePartParser.java
+++ 
b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/MimePartParser.java
@@ -73,6 +73,9 @@ public class MimePartParser {
     private void processMimePart(MimeTokenStream stream, EntityState state) {
         switch (state) {
             case T_START_MULTIPART:
+                extractMimePartBodyDescription(stream);
+                stackCurrent();
+                break;
             case T_START_MESSAGE:
                 stackCurrent();
                 break;
diff --git a/mailbox/store/src/test/resources/eml/htmlMail.json 
b/mailbox/store/src/test/resources/eml/htmlMail.json
index e9803bc164..295233d4ea 100644
--- a/mailbox/store/src/test/resources/eml/htmlMail.json
+++ b/mailbox/store/src/test/resources/eml/htmlMail.json
@@ -7,8 +7,8 @@
   "size":25,
   "date":"2015-06-07T00:00:00+0200",
   "saveDate": null,
-  "mediaType":"plain",
-  "subtype":"text",
+  "mediaType":"multipart",
+  "subtype":"alternative",
   "userFlags":["social","pocket-money"],
   "mimeMessageID": "<[email protected]>",
   "headers": [{
diff --git a/mailbox/store/src/test/resources/eml/invalidCharset.json 
b/mailbox/store/src/test/resources/eml/invalidCharset.json
index d65196e233..c0b55b0207 100644
--- a/mailbox/store/src/test/resources/eml/invalidCharset.json
+++ b/mailbox/store/src/test/resources/eml/invalidCharset.json
@@ -22,7 +22,7 @@
     {"name":"content-transfer-encoding","value":"7bit"}
   ],
   "mailboxId":"18",
-  "mediaType":"plain",
+  "mediaType":"text",
   "messageId":"184",
   "threadId": "184",
   "modSeq":42,
@@ -30,7 +30,7 @@
   "saveDate": null,
   "size":25,
   "subject":["Inline attachment"],
-  "subtype":"text",
+  "subtype":"plain",
   "to":[{
     "name":"Antoine DUPRAT",
     "address":"[email protected]",
diff --git a/mailbox/store/src/test/resources/eml/james-3901.json 
b/mailbox/store/src/test/resources/eml/james-3901.json
index 791c790d26..7685a62304 100644
--- a/mailbox/store/src/test/resources/eml/james-3901.json
+++ b/mailbox/store/src/test/resources/eml/james-3901.json
@@ -32,7 +32,7 @@
     
{"name":"content-type","value":"multipart/mixed;\tboundary=\"--boundary-LibPST-iamunique-1722682679_-_-\""}
   ],
   "mailboxId":"18",
-  "mediaType":"plain",
+  "mediaType":"multipart",
   "messageId":"184",
   "threadId":"184",
   "modSeq":42,
@@ -40,7 +40,7 @@
   "saveDate":null,
   "size":25,
   "subject":["Revised Draft"],
-  "subtype":"text",
+  "subtype":"mixed",
   "to":[{"name":null,"address":"Mary Kay Miller","domain":null}],
   "uid":25,
   "userFlags":[],
diff --git a/mailbox/store/src/test/resources/eml/nonTextual.json 
b/mailbox/store/src/test/resources/eml/nonTextual.json
index 85ef80c4e6..a9e8e7e739 100644
--- a/mailbox/store/src/test/resources/eml/nonTextual.json
+++ b/mailbox/store/src/test/resources/eml/nonTextual.json
@@ -58,14 +58,14 @@
     "value": "multipart/mixed; 
boundary=\"------------030000010109090603040500\""
   }],
   "mailboxId":"18",
-  "mediaType":"plain",
+  "mediaType":"multipart",
   "messageId":"184",
   "threadId": "184",
   "modSeq":42,
   "sentDate":"2015-06-18T12:43:26+0200",
   "size":25,
   "subject":["Test message"],
-  "subtype":"text",
+  "subtype":"mixed",
   "to":[{
     "name":null,
     "address":"[email protected]",
diff --git 
a/mailbox/store/src/test/resources/eml/nonTextualWithoutAttachmentTextContent.json
 
b/mailbox/store/src/test/resources/eml/nonTextualWithoutAttachmentTextContent.json
index 255bdb2063..c02940adf5 100644
--- 
a/mailbox/store/src/test/resources/eml/nonTextualWithoutAttachmentTextContent.json
+++ 
b/mailbox/store/src/test/resources/eml/nonTextualWithoutAttachmentTextContent.json
@@ -58,14 +58,14 @@
     "value": "multipart/mixed; 
boundary=\"------------030000010109090603040500\""
   }],
   "mailboxId":"18",
-  "mediaType":"plain",
+  "mediaType":"multipart",
   "messageId":"184",
   "threadId": "184",
   "modSeq":42,
   "sentDate":"2015-06-18T12:43:26+0200",
   "size":25,
   "subject":["Test message"],
-  "subtype":"text",
+  "subtype":"mixed",
   "to":[{
     "name":null,
     "address":"[email protected]",
diff --git a/mailbox/store/src/test/resources/eml/pgpSignedMail.json 
b/mailbox/store/src/test/resources/eml/pgpSignedMail.json
index 3395c05e4b..b8c745a0e4 100644
--- a/mailbox/store/src/test/resources/eml/pgpSignedMail.json
+++ b/mailbox/store/src/test/resources/eml/pgpSignedMail.json
@@ -7,8 +7,8 @@
   "size": 25,
   "date": "2015-06-07T00:00:00+0200",
   "saveDate": null,
-  "mediaType": "plain",
-  "subtype": "text",
+  "mediaType": "text",
+  "subtype": "plain",
   "userFlags": [
        "security",
        "debian"
diff --git a/mailbox/store/src/test/resources/eml/spamMail.json 
b/mailbox/store/src/test/resources/eml/spamMail.json
index 2d4eac4b35..c018e89bc4 100644
--- a/mailbox/store/src/test/resources/eml/spamMail.json
+++ b/mailbox/store/src/test/resources/eml/spamMail.json
@@ -7,8 +7,8 @@
   "size": 25,
   "date": "2015-06-07T00:00:00+0200",
   "saveDate": null,
-  "mediaType": "plain",
-  "subtype": "text",
+  "mediaType": "multipart",
+  "subtype": "mixed",
   "mimeMessageID": "<[email protected]>",
   "userFlags": [],
   "headers": [{
diff --git a/mailbox/store/src/test/resources/eml/spamMailNoHeaders.json 
b/mailbox/store/src/test/resources/eml/spamMailNoHeaders.json
index 8d4f8c35b3..a371606b15 100644
--- a/mailbox/store/src/test/resources/eml/spamMailNoHeaders.json
+++ b/mailbox/store/src/test/resources/eml/spamMailNoHeaders.json
@@ -7,8 +7,8 @@
   "size": 25,
   "date": "2015-06-07T00:00:00+0200",
   "saveDate": null,
-  "mediaType": "plain",
-  "subtype": "text",
+  "mediaType": "multipart",
+  "subtype": "mixed",
   "mimeMessageID": "<[email protected]>",
   "userFlags": [],
   "headers": [],


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to