This is an automated email from the ASF dual-hosted git repository. rcordier pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/james-project.git
The following commit(s) were added to refs/heads/master by this push: new cd8887f540 JAMES-4057 Match attachment against file extensions (#2731) cd8887f540 is described below commit cd8887f54087f445ebe911f70c7f3d72472a6d9d Author: Benoit TELLIER <btell...@linagora.com> AuthorDate: Wed May 28 04:18:11 2025 +0200 JAMES-4057 Match attachment against file extensions (#2731) --- .../opensearch/query/CriterionConverter.java | 47 ++++++++++++++++++---- .../opensearch/OpenSearchIntegrationTest.java | 28 +++++++++++++ ...hOptimizeMoveAndFuzzySearchIntegrationTest.java | 7 ++++ 3 files changed, 74 insertions(+), 8 deletions(-) diff --git a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/query/CriterionConverter.java b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/query/CriterionConverter.java index 2c12932330..3753da0e32 100644 --- a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/query/CriterionConverter.java +++ b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/query/CriterionConverter.java @@ -252,10 +252,18 @@ public class CriterionConverter { } case FULL: if (useQueryStringQuery && QUERY_STRING_CONTROL_CHAR.matchesAnyOf(textCriterion.getOperator().getValue())) { - return new SimpleQueryStringQuery.Builder() - .fields(ImmutableList.of(JsonMessageConstants.TEXT_BODY, JsonMessageConstants.HTML_BODY, JsonMessageConstants.ATTACHMENTS + "." + JsonMessageConstants.Attachment.TEXT_CONTENT)) - .query(textCriterion.getOperator().getValue()) - .build().toQuery(); + return new BoolQuery.Builder() + .should(new SimpleQueryStringQuery.Builder() + .fields(ImmutableList.of(JsonMessageConstants.TEXT_BODY, JsonMessageConstants.HTML_BODY, JsonMessageConstants.ATTACHMENTS + "." + JsonMessageConstants.Attachment.TEXT_CONTENT)) + .query(textCriterion.getOperator().getValue()) + .build().toQuery()) + .should(new TermQuery.Builder() + .field(JsonMessageConstants.ATTACHMENTS + "." + JsonMessageConstants.Attachment.FILE_EXTENSION) + .value(new FieldValue.Builder().stringValue(textCriterion.getOperator().getValue()).build()) + .build() + .toQuery()) + .build() + .toQuery(); } else { return new BoolQuery.Builder() .should(new MatchQuery.Builder() @@ -279,15 +287,28 @@ public class CriterionConverter { .operator(Operator.And) .build() .toQuery()) + .should(new TermQuery.Builder() + .field(JsonMessageConstants.ATTACHMENTS + "." + JsonMessageConstants.Attachment.FILE_EXTENSION) + .value(new FieldValue.Builder().stringValue(textCriterion.getOperator().getValue()).build()) + .build() + .toQuery()) .build() .toQuery(); } case ATTACHMENTS: if (useQueryStringQuery) { - return new SimpleQueryStringQuery.Builder() - .fields(ImmutableList.of(JsonMessageConstants.ATTACHMENTS + "." + JsonMessageConstants.Attachment.TEXT_CONTENT)) - .query(textCriterion.getOperator().getValue()) - .build().toQuery(); + return new BoolQuery.Builder() + .should(new SimpleQueryStringQuery.Builder() + .fields(ImmutableList.of(JsonMessageConstants.ATTACHMENTS + "." + JsonMessageConstants.Attachment.TEXT_CONTENT)) + .query(textCriterion.getOperator().getValue()) + .build().toQuery()) + .should(new TermQuery.Builder() + .field(JsonMessageConstants.ATTACHMENTS + "." + JsonMessageConstants.Attachment.FILE_EXTENSION) + .value(new FieldValue.Builder().stringValue(textCriterion.getOperator().getValue()).build()) + .build() + .toQuery()) + .build() + .toQuery(); } else { return new BoolQuery.Builder() .should(new MatchQuery.Builder() @@ -297,6 +318,11 @@ public class CriterionConverter { .operator(Operator.And) .build() .toQuery()) + .should(new TermQuery.Builder() + .field(JsonMessageConstants.ATTACHMENTS + "." + JsonMessageConstants.Attachment.FILE_EXTENSION) + .value(new FieldValue.Builder().stringValue(textCriterion.getOperator().getValue()).build()) + .build() + .toQuery()) .build() .toQuery(); } @@ -309,6 +335,11 @@ public class CriterionConverter { .operator(Operator.And) .build() .toQuery()) + .should(new TermQuery.Builder() + .field(JsonMessageConstants.ATTACHMENTS + "." + JsonMessageConstants.Attachment.FILE_EXTENSION) + .value(new FieldValue.Builder().stringValue(textCriterion.getOperator().getValue()).build()) + .build() + .toQuery()) .build() .toQuery(); default: diff --git a/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/OpenSearchIntegrationTest.java b/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/OpenSearchIntegrationTest.java index e1b3299606..a2d2441303 100644 --- a/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/OpenSearchIntegrationTest.java +++ b/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/OpenSearchIntegrationTest.java @@ -507,6 +507,34 @@ class OpenSearchIntegrationTest extends AbstractMessageSearchIndexTest { .containsOnly(messageId2.getMessageId()); } + @Test + void shouldMatchFileExtension() throws Exception { + MailboxPath mailboxPath = MailboxPath.forUser(USERNAME, INBOX); + MailboxSession session = MailboxSessionUtil.create(USERNAME); + MessageManager messageManager = storeMailboxManager.getMailbox(mailboxPath, session); + + messageManager.appendMessage( + MessageManager.AppendCommand.builder().build( + Message.Builder + .of() + .setSubject("test") + .setBody("testmail", StandardCharsets.UTF_8) + .addField(new RawField("To", "al...@domain.tld")) + .build()), + session).getId(); + + ComposedMessageId messageId2 = messageManager.appendMessage( + MessageManager.AppendCommand.builder() + .build(ClassLoaderUtils.getSystemResourceAsSharedStream("eml/attachments-filename-in-content-type.eml")), + session).getId(); + + awaitForOpenSearch(QueryBuilders.matchAll().build().toQuery(), 15); + Thread.sleep(500); + + assertThat(Flux.from(messageManager.search(SearchQuery.of(SearchQuery.mailContains("txt")), session)).toStream()) + .containsOnly(messageId2.getUid()); + } + @Disabled("MAILBOX-403 Relaxed the matching constraints for email addresses in text bodies to reduce OpenSearch disk space usage") @Test public void textShouldNotMatchOtherAddressesOfTheSameDomain() { diff --git a/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/OpenSearchOptimizeMoveAndFuzzySearchIntegrationTest.java b/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/OpenSearchOptimizeMoveAndFuzzySearchIntegrationTest.java index 6376c7deaf..249c50fcfd 100644 --- a/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/OpenSearchOptimizeMoveAndFuzzySearchIntegrationTest.java +++ b/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/OpenSearchOptimizeMoveAndFuzzySearchIntegrationTest.java @@ -30,6 +30,7 @@ import org.apache.james.mailbox.model.ComposedMessageId; import org.apache.james.mailbox.model.MailboxPath; import org.apache.james.mailbox.model.SearchQuery; import org.apache.james.mime4j.dom.Message; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.opensearch.client.opensearch._types.query_dsl.QueryBuilders; @@ -88,4 +89,10 @@ class OpenSearchOptimizeMoveAndFuzzySearchIntegrationTest extends OpenSearchInte assertThat(Flux.from(messageManager.search(SearchQuery.of(SearchQuery.bodyContains("boyd")), session)).toStream()) .containsExactly(composedMessageId.getUid()); } + + @Disabled("Fuzzyness makes the results wider") + @Override + void shouldMatchFileExtension() { + + } } \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: notifications-unsubscr...@james.apache.org For additional commands, e-mail: notifications-h...@james.apache.org