This is an automated email from the ASF dual-hosted git repository.

btellier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-project.git


The following commit(s) were added to refs/heads/master by this push:
     new 955c610e88 JAMES-4166 Implement collapseThreads support for Lucene 
(#2935)
955c610e88 is described below

commit 955c610e88cdb7e7d91e0d64e69167c78bf1420d
Author: Trần Hồng Quân <[email protected]>
AuthorDate: Fri Feb 6 20:12:25 2026 +0700

    JAMES-4166 Implement collapseThreads support for Lucene (#2935)
---
 mailbox/lucene/pom.xml                             |  5 ++
 .../lucene/search/LuceneIndexableDocument.java     |  6 +-
 .../lucene/search/LuceneMessageSearchIndex.java    | 70 ++++++++++++++++++----
 .../search/LuceneMessageSearchIndexTest.java       |  5 ++
 .../memory/MemoryEmailQueryMethodNoViewTest.java   | 12 ----
 .../rfc8621/memory/MemoryEmailQueryMethodTest.java | 12 ----
 upgrade-instructions.md                            | 13 ++++
 7 files changed, 86 insertions(+), 37 deletions(-)

diff --git a/mailbox/lucene/pom.xml b/mailbox/lucene/pom.xml
index 95df49120f..9a1784159a 100644
--- a/mailbox/lucene/pom.xml
+++ b/mailbox/lucene/pom.xml
@@ -122,6 +122,11 @@
             <artifactId>lucene-core</artifactId>
             <version>${lucene.version}</version>
         </dependency>
+        <dependency>
+            <groupId>org.apache.lucene</groupId>
+            <artifactId>lucene-grouping</artifactId>
+            <version>${lucene.version}</version>
+        </dependency>
         <dependency>
             <groupId>org.apache.lucene</groupId>
             <artifactId>lucene-highlighter</artifactId>
diff --git 
a/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneIndexableDocument.java
 
b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneIndexableDocument.java
index 4f14d14c3f..1f45d594a1 100644
--- 
a/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneIndexableDocument.java
+++ 
b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneIndexableDocument.java
@@ -88,6 +88,7 @@ import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.LongPoint;
 import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.SortedDocValuesField;
 import org.apache.lucene.document.SortedSetDocValuesField;
 import org.apache.lucene.document.StoredField;
 import org.apache.lucene.document.StringField;
@@ -143,7 +144,10 @@ public class LuceneIndexableDocument {
         
Optional.ofNullable(SearchUtil.getSerializedMessageIdIfSupportedByUnderlyingStorageOrNull(message))
             .ifPresent(serializedMessageId -> doc.add(new 
StringField(MESSAGE_ID_FIELD, serializedMessageId, Field.Store.YES)));
         
Optional.ofNullable(SearchUtil.getSerializedThreadIdIfSupportedByUnderlyingStorageOrNull(message))
-            .ifPresent(serializedThreadId -> doc.add(new 
StringField(THREAD_ID_FIELD, serializedThreadId, Field.Store.YES)));
+            .ifPresent(serializedThreadId -> {
+                doc.add(new StringField(THREAD_ID_FIELD, serializedThreadId, 
Field.Store.YES));
+                doc.add(new SortedDocValuesField(THREAD_ID_FIELD, new 
BytesRef(serializedThreadId)));
+            });
 
         HeaderCollection headerCollection = 
mimePartExtracted.getHeaderCollection();
 
diff --git 
a/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneMessageSearchIndex.java
 
b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneMessageSearchIndex.java
index 60fc932240..b9298d7013 100644
--- 
a/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneMessageSearchIndex.java
+++ 
b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneMessageSearchIndex.java
@@ -131,7 +131,12 @@ import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TermRangeQuery;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.WildcardQuery;
+import org.apache.lucene.search.grouping.GroupDocs;
+import org.apache.lucene.search.grouping.GroupingSearch;
+import org.apache.lucene.search.grouping.TermGroupSelector;
+import org.apache.lucene.search.grouping.TopGroups;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -308,6 +313,13 @@ public class LuceneMessageSearchIndex extends 
ListeningMessageSearchIndex {
             return Flux.empty();
         }
 
+        if (searchQuery.shouldCollapseThreads()) {
+            return searchCollapseThreads(mailboxIds, searchQuery, 
searchOptions);
+        }
+        return searchWithoutCollapseThreads(mailboxIds, searchQuery, 
searchOptions);
+    }
+
+    private Flux<MessageId> searchWithoutCollapseThreads(Collection<MailboxId> 
mailboxIds, SearchQuery searchQuery, SearchOptions searchOptions) throws 
MailboxException {
         long requestedLimit = 
Math.addExact(searchOptions.offset().getOffset(), 
searchOptions.limit().getLimit().orElseThrow());
 
         return Flux.fromIterable(searchMultimap(mailboxIds, searchQuery)
@@ -320,6 +332,36 @@ public class LuceneMessageSearchIndex extends 
ListeningMessageSearchIndex {
             .collect(ImmutableList.toImmutableList()));
     }
 
+    private Flux<MessageId> searchCollapseThreads(Collection<MailboxId> 
mailboxIds, SearchQuery searchQuery, SearchOptions searchOptions) throws 
MailboxException {
+        Query query = buildQuery(mailboxIds, searchQuery);
+
+        try (IndexReader reader = DirectoryReader.open(writer)) {
+            IndexSearcher searcher = new IndexSearcher(reader);
+
+            GroupingSearch groupingSearch = new GroupingSearch(new 
TermGroupSelector(THREAD_ID_FIELD));
+            Sort sort = createSort(searchQuery.getSorts());
+            groupingSearch.setGroupSort(sort);
+            groupingSearch.setSortWithinGroup(sort);
+            // get the first message of each thread group
+            groupingSearch.setGroupDocsOffset(0);
+            groupingSearch.setGroupDocsLimit(1);
+
+            int groupOffset = 
Math.toIntExact(searchOptions.offset().getOffset());
+            int topNGroups = 
Math.toIntExact(searchOptions.limit().getLimit().orElseThrow());
+
+            TopGroups<BytesRef> topGroups = groupingSearch.search(searcher, 
query, groupOffset, topNGroups);
+            List<MessageId> result = new ArrayList<>(topGroups.groups.length);
+            for (GroupDocs<BytesRef> group : topGroups.groups) {
+                ScoreDoc[] scoreDocs = group.scoreDocs();
+                Document document = 
searcher.storedFields().document(scoreDocs[0].doc);
+                
documentToSearchResult(document).getMessageId().ifPresent(result::add);
+            }
+            return Flux.fromIterable(result);
+        } catch (IOException e) {
+            throw new MailboxException("Unable to search the mailbox", e);
+        }
+    }
+
     private List<SearchResult> searchMultimap(Collection<MailboxId> 
mailboxIds, SearchQuery searchQuery) throws MailboxException {
         return searchDocument(mailboxIds, searchQuery, maxQueryResults)
             .stream()
@@ -336,22 +378,12 @@ public class LuceneMessageSearchIndex extends 
ListeningMessageSearchIndex {
     }
 
     public List<Document> searchDocument(Collection<MailboxId> mailboxIds, 
SearchQuery searchQuery, int maxQueryResults) throws MailboxException {
-        Query inMailboxes = buildQueryFromMailboxes(mailboxIds);
-
         try (IndexReader reader = DirectoryReader.open(writer)) {
             IndexSearcher searcher = new IndexSearcher(reader);
-            BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
-            queryBuilder.add(inMailboxes, BooleanClause.Occur.MUST);
-            // Not return flags documents
-            queryBuilder.add(new PrefixQuery(new Term(FLAGS_FIELD, "")), 
BooleanClause.Occur.MUST_NOT);
-
-            List<Criterion> crits = searchQuery.getCriteria();
-            for (Criterion crit : crits) {
-                queryBuilder.add(createQuery(crit, inMailboxes, 
searchQuery.getRecentMessageUids()), BooleanClause.Occur.MUST);
-            }
+            Query query = buildQuery(mailboxIds, searchQuery);
 
             // query for all the documents sorted as specified in the 
SearchQuery
-            TopDocs docs = searcher.search(queryBuilder.build(), 
maxQueryResults, createSort(searchQuery.getSorts()));
+            TopDocs docs = searcher.search(query, maxQueryResults, 
createSort(searchQuery.getSorts()));
 
             return Stream.of(docs.scoreDocs)
                 .map(Throwing.function(sDoc -> 
searcher.storedFields().document(sDoc.doc)))
@@ -370,6 +402,20 @@ public class LuceneMessageSearchIndex extends 
ListeningMessageSearchIndex {
         return queryBuilder.build();
     }
 
+    private Query buildQuery(Collection<MailboxId> mailboxIds, SearchQuery 
searchQuery) throws MailboxException {
+        Query inMailboxes = buildQueryFromMailboxes(mailboxIds);
+        BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
+        queryBuilder.add(inMailboxes, BooleanClause.Occur.MUST);
+        // Not return flags documents
+        queryBuilder.add(new PrefixQuery(new Term(FLAGS_FIELD, "")), 
BooleanClause.Occur.MUST_NOT);
+
+        List<Criterion> crits = searchQuery.getCriteria();
+        for (Criterion crit : crits) {
+            queryBuilder.add(createQuery(crit, inMailboxes, 
searchQuery.getRecentMessageUids()), BooleanClause.Occur.MUST);
+        }
+        return queryBuilder.build();
+    }
+
     private String toSentDateField(DateResolution res) {
         return switch (res) {
             case Year -> SENT_DATE_FIELD_YEAR_RESOLUTION;
diff --git 
a/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMessageSearchIndexTest.java
 
b/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMessageSearchIndexTest.java
index fcb8b2d347..1c59cb4d93 100644
--- 
a/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMessageSearchIndexTest.java
+++ 
b/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMessageSearchIndexTest.java
@@ -96,6 +96,11 @@ class LuceneMessageSearchIndexTest extends 
AbstractMessageSearchIndexTest {
         return InMemoryMessageId.of(1000);
     }
 
+    @Override
+    protected boolean supportsCollapseThreads() {
+        return true;
+    }
+
     @Disabled("JAMES-1799: ignoring failing test after generalizing OpenSearch 
test suite to other mailbox search backends")
     @Override
     public void uidShouldreturnEveryThing() {
diff --git 
a/server/protocols/jmap-rfc-8621-integration-tests/memory-jmap-rfc-8621-integration-tests/src/test/java/org/apache/james/jmap/rfc8621/memory/MemoryEmailQueryMethodNoViewTest.java
 
b/server/protocols/jmap-rfc-8621-integration-tests/memory-jmap-rfc-8621-integration-tests/src/test/java/org/apache/james/jmap/rfc8621/memory/MemoryEmailQueryMethodNoViewTest.java
index 2587a843ef..49141a8c7b 100644
--- 
a/server/protocols/jmap-rfc-8621-integration-tests/memory-jmap-rfc-8621-integration-tests/src/test/java/org/apache/james/jmap/rfc8621/memory/MemoryEmailQueryMethodNoViewTest.java
+++ 
b/server/protocols/jmap-rfc-8621-integration-tests/memory-jmap-rfc-8621-integration-tests/src/test/java/org/apache/james/jmap/rfc8621/memory/MemoryEmailQueryMethodNoViewTest.java
@@ -102,16 +102,4 @@ public class MemoryEmailQueryMethodNoViewTest implements 
EmailQueryMethodContrac
     public void 
inMailboxBeforeSortedByReceivedAtShouldCollapseThreads(GuiceJamesServer server) 
{
     }
 
-    @Test
-    @Override
-    @Disabled("JAMES-4166 collapseThreads does not support Lucene 
implementation yet")
-    public void collapseThreadsShouldApplyOnSearchIndexPath(GuiceJamesServer 
server) {
-    }
-
-    @Test
-    @Override
-    @Disabled("JAMES-4166 collapseThreads does not support Lucene 
implementation yet")
-    public void 
collapseThreadsShouldApplyPaginationOnCollapsedResults(GuiceJamesServer server) 
{
-    }
-
 }
diff --git 
a/server/protocols/jmap-rfc-8621-integration-tests/memory-jmap-rfc-8621-integration-tests/src/test/java/org/apache/james/jmap/rfc8621/memory/MemoryEmailQueryMethodTest.java
 
b/server/protocols/jmap-rfc-8621-integration-tests/memory-jmap-rfc-8621-integration-tests/src/test/java/org/apache/james/jmap/rfc8621/memory/MemoryEmailQueryMethodTest.java
index 970107e19c..f5152dd222 100644
--- 
a/server/protocols/jmap-rfc-8621-integration-tests/memory-jmap-rfc-8621-integration-tests/src/test/java/org/apache/james/jmap/rfc8621/memory/MemoryEmailQueryMethodTest.java
+++ 
b/server/protocols/jmap-rfc-8621-integration-tests/memory-jmap-rfc-8621-integration-tests/src/test/java/org/apache/james/jmap/rfc8621/memory/MemoryEmailQueryMethodTest.java
@@ -52,16 +52,4 @@ public class MemoryEmailQueryMethodTest extends MemoryBase 
implements EmailQuery
         
EmailQueryMethodContract.super.shouldListMailsReceivedAfterADate(server);
     }
 
-    @Test
-    @Override
-    @Disabled("JAMES-4166 collapseThreads does not support Lucene 
implementation yet")
-    public void collapseThreadsShouldApplyOnSearchIndexPath(GuiceJamesServer 
server) {
-    }
-
-    @Test
-    @Override
-    @Disabled("JAMES-4166 collapseThreads does not support Lucene 
implementation yet")
-    public void 
collapseThreadsShouldApplyPaginationOnCollapsedResults(GuiceJamesServer server) 
{
-    }
-
 }
diff --git a/upgrade-instructions.md b/upgrade-instructions.md
index 145b04d820..cc79b2954e 100644
--- a/upgrade-instructions.md
+++ b/upgrade-instructions.md
@@ -17,6 +17,19 @@ Changes to apply between 3.9.x and 3.10.0 will be reported 
here.
 Change list:
  - [Adding thread_id column to Cassandra email_query_view_sent_at and 
email_query_view_received_at 
tables](#adding-thread_id-column-to-cassandra-email_query_view_sent_at-and-email_query_view_received_at-tables)
  - [Adding thread_id column to Postgresql email_query_view 
table](#adding-thread_id-column-to-postgresql-email_query_view-table)
+ - [Lucene mailbox index schema update for collapseThreads 
support](#lucene-mailbox-index-schema-update-for-collapsethreads-support)
+
+### Lucene mailbox index schema update for collapseThreads support
+
+Date: 06/02/2026
+
+Concerned products: James apps relying on Lucene as the search index
+
+JIRA: https://issues.apache.org/jira/browse/JAMES-4166
+
+James now requires the `threadId` field to be indexed as a 
SortedDocValuesField to support `collapseThreads` on mailbox search.
+
+After upgrading, you need to rebuild the Lucene mailbox index by [reindexing 
all 
mails](https://james.apache.org/server/manage-webadmin.html#ReIndexing_all_mails).
 
 ### Adding thread_id column to Cassandra email_query_view_sent_at and 
email_query_view_received_at tables
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to