This is an automated email from the ASF dual-hosted git repository.
rcordier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-project.git
The following commit(s) were added to refs/heads/master by this push:
new ab68ea48fd JAMES-4077 [SearchSnippet] Highlight OpenSearch
implementation (#2447)
ab68ea48fd is described below
commit ab68ea48fd460fdc4491b8a3372ed00ba7e97c2f
Author: vttran <[email protected]>
AuthorDate: Mon Oct 14 11:35:07 2024 +0700
JAMES-4077 [SearchSnippet] Highlight OpenSearch implementation (#2447)
---
.../backends/opensearch/search/ScrolledSearch.java | 2 +-
.../searchhighligt/SearchHighLighterContract.java | 64 ++++--
.../lucene/search/LuceneSearchHighlighter.java | 3 +
.../OpenSearchListeningMessageSearchIndex.java | 5 +-
.../opensearch/json/JsonMessageConstants.java | 1 -
.../search/OpenSearchSearchHighlighter.java | 91 ++++++++
.../opensearch/search/OpenSearchSearcher.java | 42 +++-
.../search/OpenSearchSearchHighlighterTest.java | 244 +++++++++++++++++++++
8 files changed, 423 insertions(+), 29 deletions(-)
diff --git
a/backends-common/opensearch/src/main/java/org/apache/james/backends/opensearch/search/ScrolledSearch.java
b/backends-common/opensearch/src/main/java/org/apache/james/backends/opensearch/search/ScrolledSearch.java
index 3a5d22d1ce..8f9ccf209d 100644
---
a/backends-common/opensearch/src/main/java/org/apache/james/backends/opensearch/search/ScrolledSearch.java
+++
b/backends-common/opensearch/src/main/java/org/apache/james/backends/opensearch/search/ScrolledSearch.java
@@ -72,7 +72,7 @@ public class ScrolledSearch {
}
Consumer<ScrollResponse<ObjectNode>> onResponse = searchResponse -> {
- scrollId.set(Optional.of(searchResponse.scrollId()));
+ scrollId.set(Optional.ofNullable(searchResponse.scrollId()));
sink.next(searchResponse);
if (searchResponse.hits().hits().isEmpty()) {
diff --git
a/mailbox/api/src/test/java/org/apache/james/mailbox/searchhighligt/SearchHighLighterContract.java
b/mailbox/api/src/test/java/org/apache/james/mailbox/searchhighligt/SearchHighLighterContract.java
index e95f5f8d06..e273e95e9c 100644
---
a/mailbox/api/src/test/java/org/apache/james/mailbox/searchhighligt/SearchHighLighterContract.java
+++
b/mailbox/api/src/test/java/org/apache/james/mailbox/searchhighligt/SearchHighLighterContract.java
@@ -33,6 +33,7 @@ import org.apache.james.mailbox.MessageManager;
import org.apache.james.mailbox.exception.MailboxException;
import org.apache.james.mailbox.model.ComposedMessageId;
import org.apache.james.mailbox.model.MailboxId;
+import org.apache.james.mailbox.model.MessageId;
import org.apache.james.mailbox.model.MultimailboxesSearchQuery;
import org.apache.james.mailbox.model.SearchQuery;
import org.apache.james.mime4j.dom.Message;
@@ -64,14 +65,14 @@ public interface SearchHighLighterContract {
ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
Message.Builder.of()
.setTo("[email protected]")
- .setSubject("Hallo! Thx Matthieu for your help")
+ .setSubject("Hallo, Thx Matthieu for your help")
.setBody("append contentA to inbox",
StandardCharsets.UTF_8)),
session).getId();
ComposedMessageId m2 = appendMessage(MessageManager.AppendCommand.from(
Message.Builder.of()
.setTo("[email protected]")
- .setSubject("Hallo! Thx Alex for your help")
+ .setSubject("Hallo, Thx Alex for your help")
.setBody("append contentB to inbox",
StandardCharsets.UTF_8)),
session).getId();
@@ -90,7 +91,7 @@ public interface SearchHighLighterContract {
assertThat(searchSnippets).hasSize(1);
assertSoftly(softly -> {
softly.assertThat(searchSnippets.getFirst().messageId()).isEqualTo(m1.getMessageId());
-
softly.assertThat(searchSnippets.getFirst().highlightedSubject()).contains("Hallo!
Thx <mark>Matthieu</mark> for your help");
+
softly.assertThat(searchSnippets.getFirst().highlightedSubject()).contains("Hallo,
Thx <mark>Matthieu</mark> for your help");
});
}
@@ -102,14 +103,14 @@ public interface SearchHighLighterContract {
ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
Message.Builder.of()
.setTo("[email protected]")
- .setSubject("Hallo! Thx Matthieu for your help")
+ .setSubject("Hallo, Thx Matthieu for your help")
.setBody("append contentA to inbox",
StandardCharsets.UTF_8)),
session).getId();
ComposedMessageId m2 = appendMessage(MessageManager.AppendCommand.from(
Message.Builder.of()
.setTo("[email protected]")
- .setSubject("Hallo! Thx Alex for your help")
+ .setSubject("Hallo, Thx Alex for your help")
.setBody("append contentB to inbox",
StandardCharsets.UTF_8)),
session).getId();
@@ -139,14 +140,14 @@ public interface SearchHighLighterContract {
ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
Message.Builder.of()
.setTo("[email protected]")
- .setSubject("Hallo! Thx Matthieu for your help")
+ .setSubject("Hallo, Thx Matthieu for your help")
.setBody("append contentA to inbox",
StandardCharsets.UTF_8)),
session).getId();
ComposedMessageId m2 = appendMessage(MessageManager.AppendCommand.from(
Message.Builder.of()
.setTo("[email protected]")
- .setSubject("Hallo! Thx Alex for your help")
+ .setSubject("Hallo, Thx Alex for your help")
.setBody("append contentB to inbox",
StandardCharsets.UTF_8)),
session).getId();
@@ -172,14 +173,14 @@ public interface SearchHighLighterContract {
ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
Message.Builder.of()
.setTo("[email protected]")
- .setSubject("Hallo! Thx Naruto for your help")
+ .setSubject("Hallo, Thx Naruto for your help")
.setBody("append Naruto to inbox",
StandardCharsets.UTF_8)),
session).getId();
ComposedMessageId m2 = appendMessage(MessageManager.AppendCommand.from(
Message.Builder.of()
.setTo("[email protected]")
- .setSubject("Hallo! Thx Alex for your help")
+ .setSubject("Hallo, Thx Alex for your help")
.setBody("append contentB to inbox",
StandardCharsets.UTF_8)),
session).getId();
@@ -200,7 +201,7 @@ public interface SearchHighLighterContract {
assertSoftly(softly -> {
softly.assertThat(searchSnippets.getFirst().messageId()).isEqualTo(m1.getMessageId());
softly.assertThat(searchSnippets.getFirst().highlightedBody()).contains("append
<mark>Naruto</mark> to inbox");
-
softly.assertThat(searchSnippets.getFirst().highlightedSubject()).contains("Hallo!
Thx <mark>Naruto</mark> for your help");
+
softly.assertThat(searchSnippets.getFirst().highlightedSubject()).contains("Hallo,
Thx <mark>Naruto</mark> for your help");
});
}
@@ -245,7 +246,7 @@ public interface SearchHighLighterContract {
ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
Message.Builder.of()
.setTo("[email protected]")
- .setSubject("Hallo! Thx Naruto Itachi for your help")
+ .setSubject("Hallo, Thx Naruto Itachi for your help")
.setBody("append Naruto Itachi to inbox",
StandardCharsets.UTF_8)),
session).getId();
@@ -319,7 +320,7 @@ public interface SearchHighLighterContract {
ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
Message.Builder.of()
.setTo("[email protected]")
- .setSubject("Hallo! Thx Matthieu for your help")
+ .setSubject("Hallo, Thx Matthieu for your help")
.setBody("append contentA to inbox",
StandardCharsets.UTF_8)),
session).getId();
@@ -344,7 +345,7 @@ public interface SearchHighLighterContract {
ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
Message.Builder.of()
.setTo("[email protected]")
- .setSubject("Hallo! Thx Matthieu for your help")
+ .setSubject("Hallo, Thx Matthieu for your help")
.setBody("append contentA to inbox",
StandardCharsets.UTF_8)),
session).getId();
@@ -364,7 +365,7 @@ public interface SearchHighLighterContract {
// Then highlightSearch should return username1 entry
assertThat(searchSnippets).hasSize(1);
-
assertThat(searchSnippets.getFirst().highlightedSubject()).contains("Hallo! Thx
<mark>Matthieu</mark> for your help");
+
assertThat(searchSnippets.getFirst().highlightedSubject()).contains("Hallo, Thx
<mark>Matthieu</mark> for your help");
}
@Test
@@ -374,14 +375,14 @@ public interface SearchHighLighterContract {
ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
Message.Builder.of()
.setTo("[email protected]")
- .setSubject("Hallo! Thx Naruto for your help")
+ .setSubject("Hallo, Thx Naruto for your help")
.setBody("append Naruto to inbox",
StandardCharsets.UTF_8)),
session).getId();
ComposedMessageId m2 = appendMessage(MessageManager.AppendCommand.from(
Message.Builder.of()
.setTo("[email protected]")
- .setSubject("Hallo! Thx Alex for your help")
+ .setSubject("Hallo, Thx Alex for your help")
.setBody("append contentB to inbox",
StandardCharsets.UTF_8)),
session).getId();
@@ -402,8 +403,8 @@ public interface SearchHighLighterContract {
assertThat(searchSnippets.stream()
.map(SearchSnippet::highlightedSubject)
.toList())
- .containsExactlyInAnyOrder(Optional.of("Hallo! Thx
<mark>Naruto</mark> for your help"),
- Optional.of("Hallo! Thx <mark>Alex</mark> for your help"));
+ .containsExactlyInAnyOrder(Optional.of("Hallo, Thx
<mark>Naruto</mark> for your help"),
+ Optional.of("Hallo, Thx <mark>Alex</mark> for your help"));
}
@Test
@@ -413,14 +414,14 @@ public interface SearchHighLighterContract {
ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
Message.Builder.of()
.setTo("[email protected]")
- .setSubject("Hallo! Thx Naruto for your help - Sasuke for
your help")
+ .setSubject("Hallo, Thx Naruto for your help - Sasuke for
your help")
.setBody("append Naruto to inbox",
StandardCharsets.UTF_8)),
session).getId();
ComposedMessageId m2 = appendMessage(MessageManager.AppendCommand.from(
Message.Builder.of()
.setTo("[email protected]")
- .setSubject("Hallo! Thx Alex for your help")
+ .setSubject("Hallo, Thx Alex for your help")
.setBody("append contentB to inbox",
StandardCharsets.UTF_8)),
session).getId();
@@ -441,6 +442,27 @@ public interface SearchHighLighterContract {
assertThat(searchSnippets.stream()
.map(SearchSnippet::highlightedSubject)
.toList())
- .containsExactlyInAnyOrder(Optional.of("Hallo! Thx
<mark>Naruto</mark> for your help - <mark>Sasuke</mark> for your help"));
+ .containsExactlyInAnyOrder(Optional.of("Hallo, Thx
<mark>Naruto</mark> for your help - <mark>Sasuke</mark> for your help"));
+ }
+
+ @Test
+ default void highLightSearchShouldReturnEmptyWhenMessageIdsIsEmpty()
throws Exception {
+ MailboxSession session = session(USERNAME1);
+ ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
+ Message.Builder.of()
+ .setTo("[email protected]")
+ .setSubject("Hallo, Thx Naruto Itachi for your help")
+ .setBody("append Naruto Itachi to inbox",
StandardCharsets.UTF_8)),
+ session).getId();
+
+ verifyMessageWasIndexed(1);
+
+ List<MessageId> messageIdsSearch = List.of();
+
+ assertThat(Flux.from(testee().highlightSearch(messageIdsSearch,
MultimailboxesSearchQuery.from(SearchQuery.of(SearchQuery.bodyContains("Naruto
Itachi")))
+ .inMailboxes(List.of(m1.getMailboxId()))
+ .build(), session))
+ .collectList()
+ .block()).hasSize(0);
}
}
diff --git
a/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneSearchHighlighter.java
b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneSearchHighlighter.java
index 6a4facb1de..ee1fdf6edc 100644
---
a/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneSearchHighlighter.java
+++
b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneSearchHighlighter.java
@@ -98,6 +98,9 @@ public class LuceneSearchHighlighter implements
SearchHighlighter {
@Override
public Flux<SearchSnippet> highlightSearch(List<MessageId> messageIds,
MultimailboxesSearchQuery expression, MailboxSession session) {
+ if (messageIds.isEmpty()) {
+ return Flux.empty();
+ }
return storeMailboxManager.getInMailboxIds(expression, session)
.collectList()
.flatMapMany(inMailboxIdsAccessible ->
highlightSearch(inMailboxIdsAccessible, expression.getSearchQuery(),
messageIds));
diff --git
a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/events/OpenSearchListeningMessageSearchIndex.java
b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/events/OpenSearchListeningMessageSearchIndex.java
index 547245de05..9de1fedf31 100644
---
a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/events/OpenSearchListeningMessageSearchIndex.java
+++
b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/events/OpenSearchListeningMessageSearchIndex.java
@@ -28,6 +28,7 @@ import static
org.apache.james.mailbox.opensearch.json.JsonMessageConstants.IS_U
import static
org.apache.james.mailbox.opensearch.json.JsonMessageConstants.MAILBOX_ID;
import static
org.apache.james.mailbox.opensearch.json.JsonMessageConstants.MESSAGE_ID;
import static
org.apache.james.mailbox.opensearch.json.JsonMessageConstants.UID;
+import static
org.apache.james.mailbox.opensearch.search.OpenSearchSearcher.SEARCH_HIGHLIGHT;
import java.util.Collection;
import java.util.EnumSet;
@@ -324,7 +325,7 @@ public class OpenSearchListeningMessageSearchIndex extends
ListeningMessageSearc
Preconditions.checkArgument(session != null, "'session' is mandatory");
Optional<Integer> noLimit = Optional.empty();
- return searcher.search(ImmutableList.of(mailbox.getMailboxId()),
searchQuery, noLimit, UID_FIELD)
+ return searcher.search(ImmutableList.of(mailbox.getMailboxId()),
searchQuery, noLimit, UID_FIELD, !SEARCH_HIGHLIGHT)
.handle(this::extractUidFromHit);
}
@@ -336,7 +337,7 @@ public class OpenSearchListeningMessageSearchIndex extends
ListeningMessageSearc
return Flux.empty();
}
- return searcher.search(mailboxIds, searchQuery, Optional.empty(),
MESSAGE_ID_FIELD)
+ return searcher.search(mailboxIds, searchQuery, Optional.empty(),
MESSAGE_ID_FIELD, !SEARCH_HIGHLIGHT)
.handle(this::extractMessageIdFromHit)
.distinct()
.take(limit);
diff --git
a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/JsonMessageConstants.java
b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/JsonMessageConstants.java
index 65cef55261..bb99f9ff85 100644
---
a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/JsonMessageConstants.java
+++
b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/JsonMessageConstants.java
@@ -45,7 +45,6 @@ public interface JsonMessageConstants {
String SENT_DATE = "sentDate";
String SAVE_DATE = "saveDate";
String ATTACHMENTS = "attachments";
- String TEXT = "text";
String MIME_MESSAGE_ID = "mimeMessageID";
String USER = "user";
diff --git
a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/search/OpenSearchSearchHighlighter.java
b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/search/OpenSearchSearchHighlighter.java
new file mode 100644
index 0000000000..9f676b3a7e
--- /dev/null
+++
b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/search/OpenSearchSearchHighlighter.java
@@ -0,0 +1,91 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ ****************************************************************/
+
+package org.apache.james.mailbox.opensearch.search;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+
+import org.apache.james.mailbox.MailboxSession;
+import org.apache.james.mailbox.model.MailboxId;
+import org.apache.james.mailbox.model.MessageId;
+import org.apache.james.mailbox.model.MultimailboxesSearchQuery;
+import org.apache.james.mailbox.model.SearchQuery;
+import org.apache.james.mailbox.opensearch.json.JsonMessageConstants;
+import org.apache.james.mailbox.searchhighligt.SearchHighlighter;
+import org.apache.james.mailbox.searchhighligt.SearchSnippet;
+import org.apache.james.mailbox.store.StoreMailboxManager;
+import org.opensearch.client.opensearch.core.search.Hit;
+
+import com.fasterxml.jackson.databind.node.ObjectNode;
+
+import reactor.core.publisher.Flux;
+
+public class OpenSearchSearchHighlighter implements SearchHighlighter {
+ public static final String ATTACHMENT_TEXT_CONTENT_FIELD =
JsonMessageConstants.ATTACHMENTS + "." +
JsonMessageConstants.Attachment.TEXT_CONTENT;
+ public static final List<String> SNIPPET_FIELDS = List.of(
+ JsonMessageConstants.MESSAGE_ID,
+ JsonMessageConstants.SUBJECT,
+ JsonMessageConstants.TEXT_BODY,
+ ATTACHMENT_TEXT_CONTENT_FIELD);
+
+ private final OpenSearchSearcher openSearchSearcher;
+ private final StoreMailboxManager storeMailboxManager;
+ private final MessageId.Factory messageIdFactory;
+
+ public OpenSearchSearchHighlighter(OpenSearchSearcher openSearchSearcher,
StoreMailboxManager storeMailboxManager, MessageId.Factory messageIdFactory) {
+ this.openSearchSearcher = openSearchSearcher;
+ this.storeMailboxManager = storeMailboxManager;
+ this.messageIdFactory = messageIdFactory;
+ }
+
+ @Override
+ public Flux<SearchSnippet> highlightSearch(List<MessageId> messageIds,
MultimailboxesSearchQuery expression, MailboxSession session) {
+ if (messageIds.isEmpty()) {
+ return Flux.empty();
+ }
+
+ return storeMailboxManager.getInMailboxIds(expression, session)
+ .collectList()
+ .flatMapMany(mailboxIds -> highlightSearch(mailboxIds,
expression.getSearchQuery(), messageIds.size()));
+ }
+
+ private Flux<SearchSnippet> highlightSearch(List<MailboxId> mailboxIds,
SearchQuery query, int limit) {
+ return openSearchSearcher.search(mailboxIds, query,
Optional.of(limit), SNIPPET_FIELDS, OpenSearchSearcher.SEARCH_HIGHLIGHT)
+ .map(this::buildSearchSnippet);
+ }
+
+ private SearchSnippet buildSearchSnippet(Hit<ObjectNode> searchResult) {
+ MessageId messageId =
Optional.ofNullable(searchResult.fields().get(JsonMessageConstants.MESSAGE_ID))
+ .map(jsonData -> jsonData.toJson().asJsonArray().getString(0))
+ .map(messageIdFactory::fromString)
+ .orElseThrow(() -> new IllegalStateException("Can not extract
MessageID for search result: " + searchResult.id()));
+
+ Map<String, List<String>> highlightHit = searchResult.highlight();
+
+ Optional<String> highlightedSubject =
Optional.ofNullable(highlightHit.get(JsonMessageConstants.SUBJECT))
+ .map(List::getFirst);
+ Optional<String> highlightedTextBody =
Optional.ofNullable(highlightHit.get(JsonMessageConstants.TEXT_BODY))
+ .or(() ->
Optional.ofNullable(highlightHit.get(ATTACHMENT_TEXT_CONTENT_FIELD)))
+ .map(List::getFirst);
+
+ return new SearchSnippet(messageId, highlightedSubject,
highlightedTextBody);
+ }
+}
diff --git
a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/search/OpenSearchSearcher.java
b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/search/OpenSearchSearcher.java
index 81b16e3ab3..09f1524de4 100644
---
a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/search/OpenSearchSearcher.java
+++
b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/search/OpenSearchSearcher.java
@@ -19,6 +19,8 @@
package org.apache.james.mailbox.opensearch.search;
+import static
org.apache.james.mailbox.opensearch.search.OpenSearchSearchHighlighter.ATTACHMENT_TEXT_CONTENT_FIELD;
+
import java.util.Collection;
import java.util.List;
import java.util.Optional;
@@ -31,11 +33,15 @@ import org.apache.james.backends.opensearch.RoutingKey;
import org.apache.james.backends.opensearch.search.ScrolledSearch;
import org.apache.james.mailbox.model.MailboxId;
import org.apache.james.mailbox.model.SearchQuery;
+import org.apache.james.mailbox.opensearch.json.JsonMessageConstants;
import org.apache.james.mailbox.opensearch.query.QueryConverter;
import org.apache.james.mailbox.opensearch.query.SortConverter;
+import org.apache.james.mailbox.searchhighligt.SearchHighlighterConfiguration;
import org.opensearch.client.opensearch._types.SortOptions;
import org.opensearch.client.opensearch._types.Time;
import org.opensearch.client.opensearch.core.SearchRequest;
+import org.opensearch.client.opensearch.core.search.Highlight;
+import org.opensearch.client.opensearch.core.search.HighlightField;
import org.opensearch.client.opensearch.core.search.Hit;
import com.fasterxml.jackson.databind.node.ObjectNode;
@@ -44,6 +50,7 @@ import reactor.core.publisher.Flux;
public class OpenSearchSearcher {
public static final int DEFAULT_SEARCH_SIZE = 100;
+ public static final boolean SEARCH_HIGHLIGHT = true;
private static final Time TIMEOUT = new Time.Builder().time("1m").build();
private static final int MAX_ROUTING_KEY = 5;
@@ -52,24 +59,47 @@ public class OpenSearchSearcher {
private final int size;
private final AliasName aliasName;
private final RoutingKey.Factory<MailboxId> routingKeyFactory;
+ private final Highlight highlightQuery;
public OpenSearchSearcher(ReactorOpenSearchClient client, QueryConverter
queryConverter, int size,
ReadAliasName aliasName,
RoutingKey.Factory<MailboxId> routingKeyFactory) {
+ this(client, queryConverter, size, aliasName, routingKeyFactory,
SearchHighlighterConfiguration.DEFAULT);
+ }
+
+ public OpenSearchSearcher(ReactorOpenSearchClient client, QueryConverter
queryConverter, int size,
+ ReadAliasName aliasName,
RoutingKey.Factory<MailboxId> routingKeyFactory,
+ SearchHighlighterConfiguration
searchHighlighterConfiguration) {
this.client = client;
this.queryConverter = queryConverter;
this.size = size;
this.aliasName = aliasName;
this.routingKeyFactory = routingKeyFactory;
+
+ HighlightField highlightField = new HighlightField.Builder()
+ .forceSource(true)
+ .preTags(searchHighlighterConfiguration.preTagFormatter())
+ .postTags(searchHighlighterConfiguration.postTagFormatter())
+ .fragmentSize(searchHighlighterConfiguration.fragmentSize())
+ .numberOfFragments(1)
+ .build();
+
+ this.highlightQuery = new Highlight.Builder()
+ .fields(JsonMessageConstants.SUBJECT, highlightField)
+ .fields(JsonMessageConstants.TEXT_BODY, highlightField)
+ .fields(ATTACHMENT_TEXT_CONTENT_FIELD, highlightField)
+ .build();
}
public Flux<Hit<ObjectNode>> search(Collection<MailboxId> mailboxIds,
SearchQuery query,
- Optional<Integer> limit, List<String>
fields) {
- SearchRequest searchRequest = prepareSearch(mailboxIds, query, limit,
fields);
+ Optional<Integer> limit, List<String>
fields,
+ boolean searchHighlight) {
+ SearchRequest searchRequest = prepareSearch(mailboxIds, query, limit,
fields, searchHighlight);
return new ScrolledSearch(client, searchRequest)
.searchHits();
}
- private SearchRequest prepareSearch(Collection<MailboxId> mailboxIds,
SearchQuery query, Optional<Integer> limit, List<String> fields) {
+ private SearchRequest prepareSearch(Collection<MailboxId> mailboxIds,
SearchQuery query,
+ Optional<Integer> limit, List<String>
fields, boolean highlight) {
List<SortOptions> sorts = query.getSorts()
.stream()
.flatMap(SortConverter::convertSort)
@@ -84,6 +114,10 @@ public class OpenSearchSearcher {
.storedFields(fields)
.sort(sorts);
+ if (highlight) {
+ request.highlight(highlightQuery);
+ }
+
return toRoutingKey(mailboxIds)
.map(request::routing)
.orElse(request)
@@ -101,7 +135,7 @@ public class OpenSearchSearcher {
}
private int computeRequiredSize(Optional<Integer> limit) {
- return limit.map(value -> Math.min(value.intValue(), size))
+ return limit.map(value -> Math.min(value, size))
.orElse(size);
}
diff --git
a/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/search/OpenSearchSearchHighlighterTest.java
b/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/search/OpenSearchSearchHighlighterTest.java
new file mode 100644
index 0000000000..a0d73e19ba
--- /dev/null
+++
b/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/search/OpenSearchSearchHighlighterTest.java
@@ -0,0 +1,244 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ ****************************************************************/
+
+package org.apache.james.mailbox.opensearch.search;
+
+import static
org.apache.james.mailbox.opensearch.search.OpenSearchSearcherTest.SEARCH_SIZE;
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.awaitility.Durations.ONE_HUNDRED_MILLISECONDS;
+import static org.junit.jupiter.api.Assumptions.assumeTrue;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.time.ZoneId;
+import java.util.List;
+import java.util.UUID;
+
+import org.apache.james.backends.opensearch.DockerOpenSearchExtension;
+import org.apache.james.backends.opensearch.IndexName;
+import org.apache.james.backends.opensearch.OpenSearchIndexer;
+import org.apache.james.backends.opensearch.ReactorOpenSearchClient;
+import org.apache.james.backends.opensearch.ReadAliasName;
+import org.apache.james.backends.opensearch.WriteAliasName;
+import org.apache.james.core.Username;
+import org.apache.james.mailbox.MailboxManager;
+import org.apache.james.mailbox.MailboxSession;
+import org.apache.james.mailbox.MessageManager;
+import org.apache.james.mailbox.inmemory.InMemoryMessageId;
+import org.apache.james.mailbox.inmemory.manager.InMemoryIntegrationResources;
+import org.apache.james.mailbox.model.ComposedMessageId;
+import org.apache.james.mailbox.model.Mailbox;
+import org.apache.james.mailbox.model.MailboxACL;
+import org.apache.james.mailbox.model.MailboxId;
+import org.apache.james.mailbox.model.MailboxPath;
+import org.apache.james.mailbox.model.MessageId;
+import org.apache.james.mailbox.model.MultimailboxesSearchQuery;
+import org.apache.james.mailbox.model.SearchQuery;
+import org.apache.james.mailbox.opensearch.IndexAttachments;
+import org.apache.james.mailbox.opensearch.IndexHeaders;
+import org.apache.james.mailbox.opensearch.MailboxIdRoutingKeyFactory;
+import org.apache.james.mailbox.opensearch.MailboxIndexCreationUtil;
+import org.apache.james.mailbox.opensearch.OpenSearchMailboxConfiguration;
+import
org.apache.james.mailbox.opensearch.events.OpenSearchListeningMessageSearchIndex;
+import org.apache.james.mailbox.opensearch.json.MessageToOpenSearchJson;
+import org.apache.james.mailbox.opensearch.query.CriterionConverter;
+import org.apache.james.mailbox.opensearch.query.QueryConverter;
+import org.apache.james.mailbox.searchhighligt.SearchHighLighterContract;
+import org.apache.james.mailbox.searchhighligt.SearchHighlighter;
+import org.apache.james.mailbox.searchhighligt.SearchSnippet;
+import org.apache.james.mailbox.store.StoreMailboxManager;
+import org.apache.james.mailbox.store.StoreMessageManager;
+import org.apache.james.mailbox.store.search.MessageSearchIndex;
+import org.apache.james.mailbox.tika.TikaConfiguration;
+import org.apache.james.mailbox.tika.TikaExtension;
+import org.apache.james.mailbox.tika.TikaHttpClientImpl;
+import org.apache.james.mailbox.tika.TikaTextExtractor;
+import org.apache.james.metrics.tests.RecordingMetricFactory;
+import org.apache.james.mime4j.dom.Message;
+import org.apache.james.util.ClassLoaderUtils;
+import org.awaitility.Awaitility;
+import org.awaitility.Durations;
+import org.awaitility.core.ConditionFactory;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.RegisterExtension;
+
+import com.github.fge.lambdas.Throwing;
+import com.google.common.collect.ImmutableSet;
+
+import reactor.core.publisher.Flux;
+
+public class OpenSearchSearchHighlighterTest implements
SearchHighLighterContract {
+ private MessageSearchIndex messageSearchIndex;
+ private StoreMailboxManager storeMailboxManager;
+ private StoreMessageManager inboxMessageManager;
+ private OpenSearchSearchHighlighter testee;
+
+ private static final ConditionFactory CALMLY_AWAIT = Awaitility
+ .with().pollInterval(ONE_HUNDRED_MILLISECONDS)
+ .and().pollDelay(ONE_HUNDRED_MILLISECONDS)
+ .await();
+
+ @RegisterExtension
+ static TikaExtension tika = new TikaExtension();
+
+ @RegisterExtension
+ static DockerOpenSearchExtension openSearch = new
DockerOpenSearchExtension(DockerOpenSearchExtension.CleanupStrategy.NONE);
+ static ReactorOpenSearchClient client;
+ static TikaTextExtractor textExtractor;
+
+ @BeforeAll
+ static void setUpAll() throws Exception {
+ client = openSearch.getDockerOpenSearch().clientProvider().get();
+ textExtractor = new TikaTextExtractor(new RecordingMetricFactory(),
+ new TikaHttpClientImpl(TikaConfiguration.builder()
+ .host(tika.getIp())
+ .port(tika.getPort())
+ .timeoutInMillis(tika.getTimeoutInMillis())
+ .build()));
+ }
+
+ @AfterAll
+ static void tearDown() throws IOException {
+ client.close();
+ }
+
+ @BeforeEach
+ public void setUp() throws Exception {
+ WriteAliasName writeAliasName = new
WriteAliasName(UUID.randomUUID().toString());
+ ReadAliasName readAliasName = new
ReadAliasName(UUID.randomUUID().toString());
+ IndexName indexName = new IndexName(UUID.randomUUID().toString());
+ MailboxIndexCreationUtil.prepareClient(
+ client, readAliasName, writeAliasName, indexName,
+ openSearch.getDockerOpenSearch().configuration());
+
+ MailboxIdRoutingKeyFactory routingKeyFactory = new
MailboxIdRoutingKeyFactory();
+ OpenSearchMailboxConfiguration openSearchMailboxConfiguration =
OpenSearchMailboxConfiguration.builder()
+ .optimiseMoves(false)
+ .textFuzzinessSearch(false)
+ .build();
+ final MessageId.Factory messageIdFactory = new
InMemoryMessageId.Factory();
+
+ OpenSearchSearcher openSearchSearcher = new OpenSearchSearcher(client,
new QueryConverter(new CriterionConverter(openSearchMailboxConfiguration)),
SEARCH_SIZE,
+ readAliasName, routingKeyFactory);
+
+ InMemoryIntegrationResources resources =
InMemoryIntegrationResources.builder()
+ .preProvisionnedFakeAuthenticator()
+ .fakeAuthorizator()
+ .inVmEventBus()
+ .defaultAnnotationLimits()
+ .defaultMessageParser()
+ .listeningSearchIndex(preInstanciationStage -> new
OpenSearchListeningMessageSearchIndex(
+ preInstanciationStage.getMapperFactory(),
+ ImmutableSet.of(),
+ new OpenSearchIndexer(client,
+ writeAliasName),
+ openSearchSearcher,
+ new MessageToOpenSearchJson(textExtractor,
ZoneId.of("Europe/Paris"), IndexAttachments.YES, IndexHeaders.YES),
+ preInstanciationStage.getSessionProvider(), routingKeyFactory,
messageIdFactory,
+ openSearchMailboxConfiguration, new RecordingMetricFactory()))
+ .noPreDeletionHooks()
+ .storeQuotaManager()
+ .build();
+
+ storeMailboxManager = resources.getMailboxManager();
+ messageSearchIndex = resources.getSearchIndex();
+ MailboxSession session =
storeMailboxManager.createSystemSession(USERNAME1);
+ MailboxPath inboxPath = MailboxPath.inbox(USERNAME1);
+ storeMailboxManager.createMailbox(inboxPath, session);
+ inboxMessageManager = (StoreMessageManager)
storeMailboxManager.getMailbox(inboxPath, session);
+
+ testee = new OpenSearchSearchHighlighter(openSearchSearcher,
storeMailboxManager, messageIdFactory);
+ }
+
+ @Override
+ public SearchHighlighter testee() {
+ return testee;
+ }
+
+ @Override
+ public MailboxSession session(Username username) {
+ return storeMailboxManager.createSystemSession(username);
+ }
+
+ @Override
+ public MessageManager.AppendResult
appendMessage(MessageManager.AppendCommand appendCommand, MailboxSession
session) {
+ return Throwing.supplier(() ->
inboxMessageManager.appendMessage(appendCommand, session)).get();
+ }
+
+ @Override
+ public MailboxId randomMailboxId(Username username) {
+ String random = new String(new byte[8]);
+ return Throwing.supplier(() ->
storeMailboxManager.createMailbox(MailboxPath.forUser(USERNAME1, random),
session(username)).get()).get();
+ }
+
+ @Override
+ public void applyRightsCommand(MailboxId mailboxId, Username owner,
Username delegated) {
+ Mailbox mailbox = inboxMessageManager.getMailboxEntity();
+ Throwing.runnable(() ->
storeMailboxManager.applyRightsCommand(mailbox.generateAssociatedPath(),
+
MailboxACL.command().forUser(delegated).rights(MailboxACL.FULL_RIGHTS).asAddition(),
+ session(owner))).run();
+ }
+
+ @Override
+ public void verifyMessageWasIndexed(int indexedMessageCount) {
+ CALMLY_AWAIT.atMost(Durations.TEN_SECONDS)
+ .untilAsserted(() ->
assertThat(messageSearchIndex.search(session(USERNAME1),
inboxMessageManager.getMailboxEntity(), SearchQuery.of()).toStream().count())
+ .isEqualTo(indexedMessageCount));
+ }
+
+ @Test
+ void shouldHighlightAttachmentTextContentWhenTextBodyDoesNotMatch() throws
Exception {
+
assumeTrue(storeMailboxManager.getSupportedSearchCapabilities().contains(MailboxManager.SearchCapabilities.Attachment));
+ MailboxSession session = session(USERNAME1);
+
+ ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
+ Message.Builder.of()
+ .setTo("[email protected]")
+ .setSubject("Hallo, Thx Matthieu for your help")
+ .setBody("append contentA to inbox",
StandardCharsets.UTF_8)),
+ session).getId();
+
+ // m2 has an attachment with text content: "This is a beautiful banana"
+ ComposedMessageId m2 = inboxMessageManager.appendMessage(
+ MessageManager.AppendCommand.builder()
+
.build(ClassLoaderUtils.getSystemResourceAsSharedStream("eml/emailWithTextAttachment.eml")),
+ session).getId();
+
+ verifyMessageWasIndexed(2);
+
+ String keywordSearch = "beautiful";
+ MultimailboxesSearchQuery multiMailboxSearch =
MultimailboxesSearchQuery.from(SearchQuery.of(
+ new
SearchQuery.ConjunctionCriterion(SearchQuery.Conjunction.OR,
+ List.of(SearchQuery.bodyContains(keywordSearch),
+ SearchQuery.attachmentContains(keywordSearch)))))
+ .inMailboxes(List.of(m1.getMailboxId(), m2.getMailboxId()))
+ .build();
+
+ List<SearchSnippet> searchSnippets =
Flux.from(testee().highlightSearch(List.of(m1.getMessageId(),
m2.getMessageId()), multiMailboxSearch, session))
+ .collectList()
+ .block();
+
+ assertThat(searchSnippets).hasSize(1);
+ assertThat(searchSnippets.getFirst().highlightedBody()).contains("This
is a <mark>beautiful</mark> banana.");
+ }
+
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]