This is an automated email from the ASF dual-hosted git repository.
btellier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-project.git
The following commit(s) were added to refs/heads/master by this push:
new a6f9f32d8c JAMES-4100 Improve Search Snippet display (#2583)
a6f9f32d8c is described below
commit a6f9f32d8c0a63f262b62391d3ec1d15510ad29b
Author: hungphan227 <[email protected]>
AuthorDate: Wed Jan 22 15:18:30 2025 +0700
JAMES-4100 Improve Search Snippet display (#2583)
Co-authored-by: hung phan <[email protected]>
---
.../searchhighligt/SearchHighLighterContract.java | 39 ++++++++++++++++++++++
.../lucene/search/LuceneIndexableDocument.java | 2 +-
.../lucene/search/LuceneSearchHighlighter.java | 2 ++
.../mailbox/opensearch/json/IndexableMessage.java | 2 +-
.../james/mailbox/store/search/SearchUtil.java | 17 ++++++++++
.../contract/SearchSnippetGetMethodContract.scala | 6 ++--
6 files changed, 63 insertions(+), 5 deletions(-)
diff --git
a/mailbox/api/src/test/java/org/apache/james/mailbox/searchhighligt/SearchHighLighterContract.java
b/mailbox/api/src/test/java/org/apache/james/mailbox/searchhighligt/SearchHighLighterContract.java
index 2181140be9..998b8f2ac2 100644
---
a/mailbox/api/src/test/java/org/apache/james/mailbox/searchhighligt/SearchHighLighterContract.java
+++
b/mailbox/api/src/test/java/org/apache/james/mailbox/searchhighligt/SearchHighLighterContract.java
@@ -532,4 +532,43 @@ public interface SearchHighLighterContract {
softly.assertThat(searchSnippets.getFirst().highlightedBody().get()).contains("<mark>barcamp</mark>");
});
}
+
+ @Test
+ default void highlightSearchShouldShortenGreaterThanCharacters() throws
Exception {
+ MailboxSession session = session(USERNAME1);
+
+ // Given m1,m2 with m1 has body containing the searched word (contentA)
+ ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
+ Message.Builder.of()
+ .setTo("[email protected]")
+ .setSubject("Hallo, Thx Matthieu for your help")
+ .setBody("Start \n>>>>>>>>>> append contentA to > inbox
\n>>>>>> End",
+ StandardCharsets.UTF_8)),
+ session).getId();
+
+ ComposedMessageId m2 = appendMessage(MessageManager.AppendCommand.from(
+ Message.Builder.of()
+ .setTo("[email protected]")
+ .setSubject("Hallo, Thx Alex for your help")
+ .setBody("append contentB to inbox",
StandardCharsets.UTF_8)),
+ session).getId();
+
+ verifyMessageWasIndexed(2);
+
+ // When searching for the word (contentA) in the body
+ MultimailboxesSearchQuery multiMailboxSearch =
MultimailboxesSearchQuery.from(SearchQuery.of(
+ SearchQuery.bodyContains("contentA")))
+ .inMailboxes(List.of(m1.getMailboxId(), m2.getMailboxId()))
+ .build();
+
+ // Then highlightSearch should return the SearchSnippet with the
highlightedBody containing the word (contentA)
+ List<SearchSnippet> searchSnippets =
Flux.from(testee().highlightSearch(List.of(m1.getMessageId(),
m2.getMessageId()), multiMailboxSearch, session))
+ .collectList()
+ .block();
+ assertThat(searchSnippets).hasSize(1);
+ assertSoftly(softly -> {
+
softly.assertThat(searchSnippets.getFirst().messageId()).isEqualTo(m1.getMessageId());
+
softly.assertThat(searchSnippets.getFirst().highlightedBody().get()).isEqualTo("Start
\n append <mark>contentA</mark> to > inbox \n End");
+ });
+ }
}
diff --git
a/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneIndexableDocument.java
b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneIndexableDocument.java
index bee20308d2..4f14d14c3f 100644
---
a/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneIndexableDocument.java
+++
b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneIndexableDocument.java
@@ -195,7 +195,7 @@ public class LuceneIndexableDocument {
doc.add(new TextField(BCC_FIELD,
uppercase(EMailers.from(headerCollection.getBccAddressSet()).serialize()),
Field.Store.YES));
// index body
- Optional<String> bodyText = mimePartExtracted.locateFirstTextBody();
+ Optional<String> bodyText =
mimePartExtracted.locateFirstTextBody().map(SearchUtil::removeGreaterThanCharactersAtBeginningOfLine);
Optional<String> bodyHtml = mimePartExtracted.locateFirstHtmlBody();
bodyText.or(() -> bodyHtml)
diff --git
a/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneSearchHighlighter.java
b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneSearchHighlighter.java
index c398865df8..8a25d922c4 100644
---
a/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneSearchHighlighter.java
+++
b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneSearchHighlighter.java
@@ -57,6 +57,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
+import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
@@ -126,6 +127,7 @@ public class LuceneSearchHighlighter implements
SearchHighlighter {
Query query = buildQueryFromSearchQuery(searchQuery);
QueryScorer scorer = new QueryScorer(query);
Highlighter highlighter = new Highlighter(formatter, scorer);
+ highlighter.setEncoder(new SimpleHTMLEncoder());
highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer,
configuration.fragmentSize()));
return highlighter;
}
diff --git
a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/IndexableMessage.java
b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/IndexableMessage.java
index 722d8b8988..8bfaf646f8 100644
---
a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/IndexableMessage.java
+++
b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/IndexableMessage.java
@@ -135,7 +135,7 @@ public class IndexableMessage {
.asMimePart(textExtractor)
.map(parsingResult -> {
- Optional<String> bodyText =
parsingResult.locateFirstTextBody();
+ Optional<String> bodyText =
parsingResult.locateFirstTextBody().map(SearchUtil::removeGreaterThanCharactersAtBeginningOfLine);
Optional<String> bodyHtml =
parsingResult.locateFirstHtmlBody();
boolean hasAttachment =
MessageAttachmentMetadata.hasNonInlinedAttachment(message.getAttachments());
diff --git
a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/SearchUtil.java
b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/SearchUtil.java
index 8c7686f60c..0a90b132b6 100644
---
a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/SearchUtil.java
+++
b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/SearchUtil.java
@@ -473,5 +473,22 @@ public class SearchUtil {
};
}
+ public static String removeGreaterThanCharactersAtBeginningOfLine(String
text) {
+ StringBuilder result = new StringBuilder();
+ boolean isNewLine = false;
+ for (int i = 0; i < text.length(); i++) {
+ char current = text.charAt(i);
+
+ if (current == '\n') {
+ isNewLine = true;
+ result.append(current);
+ } else if (!isNewLine || current != '>') {
+ result.append(current);
+ isNewLine = false;
+ }
+ }
+
+ return result.toString();
+ }
}
diff --git
a/server/protocols/jmap-rfc-8621-integration-tests/jmap-rfc-8621-integration-tests-common/src/main/scala/org/apache/james/jmap/rfc8621/contract/SearchSnippetGetMethodContract.scala
b/server/protocols/jmap-rfc-8621-integration-tests/jmap-rfc-8621-integration-tests-common/src/main/scala/org/apache/james/jmap/rfc8621/contract/SearchSnippetGetMethodContract.scala
index 7246e6a80e..17cd4d3cf9 100644
---
a/server/protocols/jmap-rfc-8621-integration-tests/jmap-rfc-8621-integration-tests-common/src/main/scala/org/apache/james/jmap/rfc8621/contract/SearchSnippetGetMethodContract.scala
+++
b/server/protocols/jmap-rfc-8621-integration-tests/jmap-rfc-8621-integration-tests-common/src/main/scala/org/apache/james/jmap/rfc8621/contract/SearchSnippetGetMethodContract.scala
@@ -529,12 +529,12 @@ trait SearchSnippetGetMethodContract {
| "list": [
| {
| "emailId": "${messageId1.serialize}",
- | "subject": "Weekly report - <mark>vttran</mark>
27/02-03/03/2023",
+ | "subject": "Weekly report - <mark>vttran</mark>
27/02-03/03/2023",
| "preview": null
| },
| {
| "emailId": "${messageId2.serialize}",
- | "subject": "Weekly report - <mark>vttran</mark>
19/08-23/08/2024",
+ | "subject": "Weekly report - <mark>vttran</mark>
19/08-23/08/2024",
| "preview": null
| }
| ],
@@ -610,7 +610,7 @@ trait SearchSnippetGetMethodContract {
| "list": [
| {
| "emailId": "${messageId1.serialize}",
- | "subject": "Weekly report - <mark>vttran</mark>
27/02-03/03/2023",
+ | "subject": "Weekly report - <mark>vttran</mark>
27/02-03/03/2023",
| "preview": null
| },
| {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]