(jackrabbit-oak) 01/01: OAK-11638 Elastic: ignore standard token filter configurations

thomasm Fri, 04 Apr 2025 01:09:45 -0700

This is an automated email from the ASF dual-hosted git repository.

thomasm pushed a commit to branch OAK-11638
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git


commit d6f55ec116f78d0c9ef0b3464ecf39cee881ff36
Author: Thomas Mueller <[email protected]>
AuthorDate: Fri Apr 4 09:59:25 2025 +0200

    OAK-11638 Elastic: ignore standard token filter configurations
---
 .../index/elastic/index/ElasticCustomAnalyzer.java |  9 ++++---
 .../plugins/index/FullTextAnalyzerCommonTest.java  | 30 ++++++++++++++++++++++
 2 files changed, 36 insertions(+), 3 deletions(-)

diff --git 
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticCustomAnalyzer.java
 
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticCustomAnalyzer.java
index 02690691b7..f337935a9f 100644
--- 
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticCustomAnalyzer.java
+++ 
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticCustomAnalyzer.java
@@ -257,14 +257,17 @@ public class ElasticCustomAnalyzer {
 
             Map<String, Object> args = convertNodeState(child, transformers, 
content);
 
-            if (name.equals("word_delimiter")) {
+            if (name.equals("standard")) {
+                // OAK-11638 ignore standard token filter
+                LOG.info("Ignore standard token filter");
+                skipEntry = true;
+            } else if (name.equals("word_delimiter")) {
                 // 
https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-word-delimiter-tokenfilter.html
                 // We recommend using the word_delimiter_graph instead of the 
word_delimiter filter.
                 // The word_delimiter filter can produce invalid token graphs.
                 LOG.info("Replacing the word delimiter filter with the word 
delimiter graph");
                 name = "word_delimiter_graph";
-            }
-            if (name.equals("hyphenation_compound_word")) {
+            } else if (name.equals("hyphenation_compound_word")) {
                 name = "hyphenation_decompounder";
                 String hypenator = args.getOrDefault("hyphenator", 
"").toString();
                 LOG.info("Using the hyphenation_decompounder: " + hypenator);
diff --git 
a/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextAnalyzerCommonTest.java
 
b/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextAnalyzerCommonTest.java
index 77f0893be9..3821a084b0 100644
--- 
a/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextAnalyzerCommonTest.java
+++ 
b/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextAnalyzerCommonTest.java
@@ -1189,6 +1189,36 @@ public abstract class FullTextAnalyzerCommonTest extends 
AbstractQueryTest {
         });
     }
 
+    // OAK-11638
+    @Test
+    public void analyzerWithStandardTokenFilter() throws Exception {
+        setup(List.of("foo"), idx -> {
+            Tree analyzers = idx.addChild(FulltextIndexConstants.ANALYZERS);
+            Tree defaultAnalyzers = 
analyzers.addChild(FulltextIndexConstants.ANL_DEFAULT);
+            Tree tokenizer = 
defaultAnalyzers.addChild(FulltextIndexConstants.ANL_TOKENIZER);
+            tokenizer.setProperty(FulltextIndexConstants.ANL_NAME, "Standard");
+            Tree filters = 
defaultAnalyzers.addChild(FulltextIndexConstants.ANL_FILTERS);
+            filters.setOrderableChildren(true);
+            filters.addChild("Standard");
+            filters.addChild("LowerCase");
+            Tree synFilter = addFilter(filters, "Synonym");
+            synFilter.setProperty("synonyms", "syn.txt");
+            synFilter.addChild("syn.txt").addChild(JCR_CONTENT)
+                    .setProperty(JCR_DATA, "plane, airplane, aircraft\n" +
+                            "find=>replace");
+            filters.addChild("GermanLightStem");
+            filters.addChild("FrenchLightStem");
+            filters.addChild("ItalianLightStem");
+            filters.addChild("PorterStem");
+        });
+        Tree content = root.getTree("/").addChild("content");
+        content.addChild("bar").setProperty("foo", "replace");
+        root.commit();
+        assertEventually(() -> {
+            assertQuery("select * from [nt:base] where contains(*, 'find')", 
List.of("/content/bar"));
+        });
+    }
+
     // OAK-11568
     @Test
     @Ignore

(jackrabbit-oak) 01/01: OAK-11638 Elastic: ignore standard token filter configurations

Reply via email to