This is an automated email from the ASF dual-hosted git repository.

btellier pushed a commit to branch 3.9.x
in repository https://gitbox.apache.org/repos/asf/james-project.git

commit 7daefc80284fc3d494ce7062688b1ab7e3ccc0d1
Author: Rene Cordier <[email protected]>
AuthorDate: Mon Nov 3 15:30:35 2025 +0700

    Limit the subject length for indexation to not go over Lucene raw 
indexation max size limit
---
 .../mailbox/opensearch/json/IndexableMessage.java  |  9 +++++++-
 .../opensearch/OpenSearchIntegrationTest.java      | 27 ++++++++++++++++++++++
 .../james/mailbox/store/search/SearchUtil.java     | 16 +++++++++++++
 3 files changed, 51 insertions(+), 1 deletion(-)

diff --git 
a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/IndexableMessage.java
 
b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/IndexableMessage.java
index cb2bd7cae7..fdd78aa3b5 100644
--- 
a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/IndexableMessage.java
+++ 
b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/IndexableMessage.java
@@ -27,6 +27,7 @@ import java.time.ZonedDateTime;
 import java.time.format.DateTimeFormatter;
 import java.util.List;
 import java.util.Optional;
+import java.util.Set;
 import java.util.stream.Collectors;
 
 import org.apache.james.mailbox.ModSeq;
@@ -145,7 +146,7 @@ public class IndexableMessage {
                     ZonedDateTime internalDate = 
getSanitizedInternalDate(message, zoneId);
 
                     List<HeaderCollection.Header> headers = 
headerCollection.getHeaders();
-                    Subjects subjects = 
Subjects.from(headerCollection.getSubjectSet());
+                    Subjects subjects = 
Subjects.from(limitSubjectsLength(headerCollection.getSubjectSet()));
                     EMailers from = 
EMailers.from(headerCollection.getFromAddressSet());
                     EMailers to = 
EMailers.from(headerCollection.getToAddressSet());
                     EMailers cc = 
EMailers.from(headerCollection.getCcAddressSet());
@@ -226,6 +227,12 @@ public class IndexableMessage {
                 return ImmutableList.of();
             }
         }
+
+        private Set<String> limitSubjectsLength(Set<String> subjects) {
+            return subjects.stream()
+                .map(SearchUtil::truncateSubjectField)
+                .collect(Collectors.toSet());
+        }
     }
 
     public static Builder builder() {
diff --git 
a/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/OpenSearchIntegrationTest.java
 
b/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/OpenSearchIntegrationTest.java
index d44a2b8ba6..4a3182dafd 100644
--- 
a/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/OpenSearchIntegrationTest.java
+++ 
b/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/OpenSearchIntegrationTest.java
@@ -306,6 +306,33 @@ class OpenSearchIntegrationTest extends 
AbstractMessageSearchIndexTest {
             .containsExactly(composedMessageId.getUid());
     }
 
+    @Test
+    void tooLongSubjectShouldNotMakeIndexingFail() throws Exception {
+        MailboxPath mailboxPath = MailboxPath.forUser(USERNAME, INBOX);
+        MailboxSession session = MailboxSessionUtil.create(USERNAME);
+        MessageManager messageManager = 
storeMailboxManager.getMailbox(mailboxPath, session);
+
+        String recipient = "[email protected]";
+        ComposedMessageId composedMessageId = 
messageManager.appendMessage(MessageManager.AppendCommand.from(
+                Message.Builder.of()
+                    .setTo(recipient)
+                    .setSubject(Strings.repeat("0123456789", 5000))
+                    .setBody("0123456789", StandardCharsets.UTF_8)),
+            session).getId();
+
+        CALMLY_AWAIT.atMost(Durations.TEN_SECONDS)
+            .untilAsserted(() -> assertThat(client.search(
+                    new SearchRequest.Builder()
+                        .index(indexName.getValue())
+                        .query(QueryBuilders.matchAll().build().toQuery())
+                        .build())
+                .block()
+                .hits().total().value()).isEqualTo(14));
+
+        
assertThat(Flux.from(messageManager.search(SearchQuery.of(SearchQuery.address(SearchQuery.AddressType.To,
 recipient)), session)).toStream())
+            .containsExactly(composedMessageId.getUid());
+    }
+
     @Test
     void fieldsExceedingLuceneLimitShouldNotBeIgnored() throws Exception {
         MailboxPath mailboxPath = MailboxPath.forUser(USERNAME, INBOX);
diff --git 
a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/SearchUtil.java
 
b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/SearchUtil.java
index 0a90b132b6..b45f2bf903 100644
--- 
a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/SearchUtil.java
+++ 
b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/SearchUtil.java
@@ -18,6 +18,8 @@
  ****************************************************************/
 package org.apache.james.mailbox.store.search;
 
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Locale;
 import java.util.Set;
@@ -52,6 +54,7 @@ public class SearchUtil {
     private static final char OPEN_SQUARE_BRACKED = '[';
     private static final char CLOSE_SQUARE_BRACKED = ']';
     private static final char COLON = ':';
+    private static final int MAX_RAW_BYTES = 32766;
 
     /**
      * Return the DISPLAY ADDRESS for the given {@link Mailbox}. 
@@ -491,4 +494,17 @@ public class SearchUtil {
 
         return result.toString();
     }
+
+    public static String truncateSubjectField(String subject) {
+        if (subject == null) {
+            return null;
+        }
+
+        byte[] subjectAsBytes = subject.getBytes(StandardCharsets.UTF_8);
+        if (subjectAsBytes.length < MAX_RAW_BYTES) {
+            return subject;
+        }
+
+        return new String(Arrays.copyOf(subjectAsBytes, MAX_RAW_BYTES), 
StandardCharsets.UTF_8);
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to