This is an automated email from the ASF dual-hosted git repository. thomasm pushed a commit to branch OAK-11638 in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
commit d6f55ec116f78d0c9ef0b3464ecf39cee881ff36 Author: Thomas Mueller <[email protected]> AuthorDate: Fri Apr 4 09:59:25 2025 +0200 OAK-11638 Elastic: ignore standard token filter configurations --- .../index/elastic/index/ElasticCustomAnalyzer.java | 9 ++++--- .../plugins/index/FullTextAnalyzerCommonTest.java | 30 ++++++++++++++++++++++ 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticCustomAnalyzer.java b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticCustomAnalyzer.java index 02690691b7..f337935a9f 100644 --- a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticCustomAnalyzer.java +++ b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticCustomAnalyzer.java @@ -257,14 +257,17 @@ public class ElasticCustomAnalyzer { Map<String, Object> args = convertNodeState(child, transformers, content); - if (name.equals("word_delimiter")) { + if (name.equals("standard")) { + // OAK-11638 ignore standard token filter + LOG.info("Ignore standard token filter"); + skipEntry = true; + } else if (name.equals("word_delimiter")) { // https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-word-delimiter-tokenfilter.html // We recommend using the word_delimiter_graph instead of the word_delimiter filter. // The word_delimiter filter can produce invalid token graphs. LOG.info("Replacing the word delimiter filter with the word delimiter graph"); name = "word_delimiter_graph"; - } - if (name.equals("hyphenation_compound_word")) { + } else if (name.equals("hyphenation_compound_word")) { name = "hyphenation_decompounder"; String hypenator = args.getOrDefault("hyphenator", "").toString(); LOG.info("Using the hyphenation_decompounder: " + hypenator); diff --git a/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextAnalyzerCommonTest.java b/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextAnalyzerCommonTest.java index 77f0893be9..3821a084b0 100644 --- a/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextAnalyzerCommonTest.java +++ b/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextAnalyzerCommonTest.java @@ -1189,6 +1189,36 @@ public abstract class FullTextAnalyzerCommonTest extends AbstractQueryTest { }); } + // OAK-11638 + @Test + public void analyzerWithStandardTokenFilter() throws Exception { + setup(List.of("foo"), idx -> { + Tree analyzers = idx.addChild(FulltextIndexConstants.ANALYZERS); + Tree defaultAnalyzers = analyzers.addChild(FulltextIndexConstants.ANL_DEFAULT); + Tree tokenizer = defaultAnalyzers.addChild(FulltextIndexConstants.ANL_TOKENIZER); + tokenizer.setProperty(FulltextIndexConstants.ANL_NAME, "Standard"); + Tree filters = defaultAnalyzers.addChild(FulltextIndexConstants.ANL_FILTERS); + filters.setOrderableChildren(true); + filters.addChild("Standard"); + filters.addChild("LowerCase"); + Tree synFilter = addFilter(filters, "Synonym"); + synFilter.setProperty("synonyms", "syn.txt"); + synFilter.addChild("syn.txt").addChild(JCR_CONTENT) + .setProperty(JCR_DATA, "plane, airplane, aircraft\n" + + "find=>replace"); + filters.addChild("GermanLightStem"); + filters.addChild("FrenchLightStem"); + filters.addChild("ItalianLightStem"); + filters.addChild("PorterStem"); + }); + Tree content = root.getTree("/").addChild("content"); + content.addChild("bar").setProperty("foo", "replace"); + root.commit(); + assertEventually(() -> { + assertQuery("select * from [nt:base] where contains(*, 'find')", List.of("/content/bar")); + }); + } + // OAK-11568 @Test @Ignore
