This is an automated email from the ASF dual-hosted git repository. btellier pushed a commit to branch 3.9.x in repository https://gitbox.apache.org/repos/asf/james-project.git
commit 7daefc80284fc3d494ce7062688b1ab7e3ccc0d1 Author: Rene Cordier <[email protected]> AuthorDate: Mon Nov 3 15:30:35 2025 +0700 Limit the subject length for indexation to not go over Lucene raw indexation max size limit --- .../mailbox/opensearch/json/IndexableMessage.java | 9 +++++++- .../opensearch/OpenSearchIntegrationTest.java | 27 ++++++++++++++++++++++ .../james/mailbox/store/search/SearchUtil.java | 16 +++++++++++++ 3 files changed, 51 insertions(+), 1 deletion(-) diff --git a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/IndexableMessage.java b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/IndexableMessage.java index cb2bd7cae7..fdd78aa3b5 100644 --- a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/IndexableMessage.java +++ b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/IndexableMessage.java @@ -27,6 +27,7 @@ import java.time.ZonedDateTime; import java.time.format.DateTimeFormatter; import java.util.List; import java.util.Optional; +import java.util.Set; import java.util.stream.Collectors; import org.apache.james.mailbox.ModSeq; @@ -145,7 +146,7 @@ public class IndexableMessage { ZonedDateTime internalDate = getSanitizedInternalDate(message, zoneId); List<HeaderCollection.Header> headers = headerCollection.getHeaders(); - Subjects subjects = Subjects.from(headerCollection.getSubjectSet()); + Subjects subjects = Subjects.from(limitSubjectsLength(headerCollection.getSubjectSet())); EMailers from = EMailers.from(headerCollection.getFromAddressSet()); EMailers to = EMailers.from(headerCollection.getToAddressSet()); EMailers cc = EMailers.from(headerCollection.getCcAddressSet()); @@ -226,6 +227,12 @@ public class IndexableMessage { return ImmutableList.of(); } } + + private Set<String> limitSubjectsLength(Set<String> subjects) { + return subjects.stream() + .map(SearchUtil::truncateSubjectField) + .collect(Collectors.toSet()); + } } public static Builder builder() { diff --git a/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/OpenSearchIntegrationTest.java b/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/OpenSearchIntegrationTest.java index d44a2b8ba6..4a3182dafd 100644 --- a/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/OpenSearchIntegrationTest.java +++ b/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/OpenSearchIntegrationTest.java @@ -306,6 +306,33 @@ class OpenSearchIntegrationTest extends AbstractMessageSearchIndexTest { .containsExactly(composedMessageId.getUid()); } + @Test + void tooLongSubjectShouldNotMakeIndexingFail() throws Exception { + MailboxPath mailboxPath = MailboxPath.forUser(USERNAME, INBOX); + MailboxSession session = MailboxSessionUtil.create(USERNAME); + MessageManager messageManager = storeMailboxManager.getMailbox(mailboxPath, session); + + String recipient = "[email protected]"; + ComposedMessageId composedMessageId = messageManager.appendMessage(MessageManager.AppendCommand.from( + Message.Builder.of() + .setTo(recipient) + .setSubject(Strings.repeat("0123456789", 5000)) + .setBody("0123456789", StandardCharsets.UTF_8)), + session).getId(); + + CALMLY_AWAIT.atMost(Durations.TEN_SECONDS) + .untilAsserted(() -> assertThat(client.search( + new SearchRequest.Builder() + .index(indexName.getValue()) + .query(QueryBuilders.matchAll().build().toQuery()) + .build()) + .block() + .hits().total().value()).isEqualTo(14)); + + assertThat(Flux.from(messageManager.search(SearchQuery.of(SearchQuery.address(SearchQuery.AddressType.To, recipient)), session)).toStream()) + .containsExactly(composedMessageId.getUid()); + } + @Test void fieldsExceedingLuceneLimitShouldNotBeIgnored() throws Exception { MailboxPath mailboxPath = MailboxPath.forUser(USERNAME, INBOX); diff --git a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/SearchUtil.java b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/SearchUtil.java index 0a90b132b6..b45f2bf903 100644 --- a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/SearchUtil.java +++ b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/SearchUtil.java @@ -18,6 +18,8 @@ ****************************************************************/ package org.apache.james.mailbox.store.search; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; import java.util.HashSet; import java.util.Locale; import java.util.Set; @@ -52,6 +54,7 @@ public class SearchUtil { private static final char OPEN_SQUARE_BRACKED = '['; private static final char CLOSE_SQUARE_BRACKED = ']'; private static final char COLON = ':'; + private static final int MAX_RAW_BYTES = 32766; /** * Return the DISPLAY ADDRESS for the given {@link Mailbox}. @@ -491,4 +494,17 @@ public class SearchUtil { return result.toString(); } + + public static String truncateSubjectField(String subject) { + if (subject == null) { + return null; + } + + byte[] subjectAsBytes = subject.getBytes(StandardCharsets.UTF_8); + if (subjectAsBytes.length < MAX_RAW_BYTES) { + return subject; + } + + return new String(Arrays.copyOf(subjectAsBytes, MAX_RAW_BYTES), StandardCharsets.UTF_8); + } } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
