This is an automated email from the ASF dual-hosted git repository. fortino pushed a commit to branch trunk in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
The following commit(s) were added to refs/heads/trunk by this push: new 0e9d8119cf OAK-10353: Elastic custom analyzer should ignore unsupported properties (#1029) 0e9d8119cf is described below commit 0e9d8119cfbb73a416547aaac9ece554bd866abc Author: Fabrizio Fortino <fabrizio.fort...@gmail.com> AuthorDate: Thu Jul 20 09:04:47 2023 +0200 OAK-10353: Elastic custom analyzer should ignore unsupported properties (#1029) * OAK-10353: Elastic custom analyzer should ignore unsupported properties (eg: hidden, arrays) * OAK-10353: (doc) add link to lucene logic for unsupported props * test: fix Lowecase -> LowerCase --- .../index/elastic/index/ElasticCustomAnalyzer.java | 17 +++- .../index/elastic/ElasticFullTextAnalyzerTest.java | 18 ++-- .../plugins/index/FullTextAnalyzerCommonTest.java | 111 +++++++++++---------- 3 files changed, 79 insertions(+), 67 deletions(-) diff --git a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticCustomAnalyzer.java b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticCustomAnalyzer.java index 4a9cb052f7..4e2c26d40e 100644 --- a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticCustomAnalyzer.java +++ b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticCustomAnalyzer.java @@ -173,6 +173,7 @@ public class ElasticCustomAnalyzer { .map(Map.Entry::getValue) .findFirst().orElseGet(Collections::emptyList); Map<String, String> luceneArgs = StreamSupport.stream(child.getProperties().spliterator(), false) + .filter(ElasticCustomAnalyzer::isPropertySupported) .filter(ps -> !unsupportedParameters.contains(ps.getName())) .collect(Collectors.toMap(PropertyState::getName, ps -> ps.getValue(Type.STRING))); @@ -267,11 +268,7 @@ public class ElasticCustomAnalyzer { private static Map<String, Object> convertNodeState(NodeState state, List<ParameterTransformer> transformers, List<String> preloadedContent) { Map<String, Object> luceneParams = StreamSupport.stream(Spliterators.spliteratorUnknownSize(state.getProperties().iterator(), Spliterator.ORDERED), false) - .filter(ps -> ps.getType() != Type.BINARY && - !ps.isArray() && - !NodeStateUtils.isHidden(ps.getName()) && - !IGNORE_PROP_NAMES.contains(ps.getName()) - ) + .filter(ElasticCustomAnalyzer::isPropertySupported) .collect(Collectors.toMap(PropertyState::getName, ps -> { String value = ps.getValue(Type.STRING); List<String> values = Arrays.asList(value.split(",")); @@ -292,6 +289,16 @@ public class ElasticCustomAnalyzer { }); } + /* + * See org.apache.jackrabbit.oak.plugins.index.lucene.NodeStateAnalyzerFactory#convertNodeState + */ + private static boolean isPropertySupported(PropertyState ps) { + return ps.getType() != Type.BINARY && + !ps.isArray() && + !NodeStateUtils.isHidden(ps.getName()) && + !IGNORE_PROP_NAMES.contains(ps.getName()); + } + /** * This loader is just used to load resources in order to benefit from parser (eg: to remove comments or support multiple * formats) already implemented in lucene. diff --git a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticFullTextAnalyzerTest.java b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticFullTextAnalyzerTest.java index e61cc5ab72..fa72151e08 100644 --- a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticFullTextAnalyzerTest.java +++ b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticFullTextAnalyzerTest.java @@ -104,8 +104,8 @@ public class ElasticFullTextAnalyzerTest extends FullTextAnalyzerCommonTest { anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME, "Standard"); Tree filters = anl.addChild(FulltextIndexConstants.ANL_FILTERS); - filters.addChild("LowerCase"); - Tree stemmer = filters.addChild("stemmer"); + addFilter(filters, "LowerCase"); + Tree stemmer = addFilter(filters, "stemmer"); stemmer.setProperty("language", "dutch_kp"); }); @@ -126,7 +126,7 @@ public class ElasticFullTextAnalyzerTest extends FullTextAnalyzerCommonTest { anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME, "Standard"); Tree filters = anl.addChild(FulltextIndexConstants.ANL_FILTERS); - filters.addChild("Apostrophe"); + addFilter(filters, "Apostrophe"); }); Tree test = root.getTree("/"); @@ -144,7 +144,7 @@ public class ElasticFullTextAnalyzerTest extends FullTextAnalyzerCommonTest { anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME, "Standard"); Tree filters = anl.addChild(FulltextIndexConstants.ANL_FILTERS); - Tree dd = filters.addChild("dictionary_decompounder"); + Tree dd = addFilter(filters, "dictionary_decompounder"); dd.setProperty("word_list", "words.txt"); dd.addChild("words.txt").addChild(JcrConstants.JCR_CONTENT) .setProperty(JcrConstants.JCR_DATA, "Donau\ndampf\nmeer\nschiff"); @@ -168,7 +168,7 @@ public class ElasticFullTextAnalyzerTest extends FullTextAnalyzerCommonTest { anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME, "Standard"); Tree filters = anl.addChild(FulltextIndexConstants.ANL_FILTERS); - Tree dd = filters.addChild("fingerprint"); + Tree dd = addFilter(filters, "fingerprint"); dd.setProperty("max_output_size", "10"); }); @@ -190,7 +190,7 @@ public class ElasticFullTextAnalyzerTest extends FullTextAnalyzerCommonTest { anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME, "Standard"); Tree filters = anl.addChild(FulltextIndexConstants.ANL_FILTERS); - Tree kt = filters.addChild("keep_types"); + Tree kt = addFilter(filters, "keep_types"); kt.setProperty("types", "<NUM>"); }); @@ -212,12 +212,12 @@ public class ElasticFullTextAnalyzerTest extends FullTextAnalyzerCommonTest { anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME, "Standard"); Tree filters = anl.addChild(FulltextIndexConstants.ANL_FILTERS); - Tree mh = filters.addChild("min_hash"); + Tree mh = addFilter(filters, "min_hash"); mh.setProperty("hash_count", "1"); mh.setProperty("bucket_count", "512"); mh.setProperty("hash_set_size", "1"); mh.setProperty("with_rotation", "true"); - Tree shingle = filters.addChild("shingle"); + Tree shingle = addFilter(filters, "shingle"); shingle.setProperty("min_shingle_size", "5"); shingle.setProperty("max_shingle_size", "5"); shingle.setProperty("output_unigrams", "false"); @@ -241,7 +241,7 @@ public class ElasticFullTextAnalyzerTest extends FullTextAnalyzerCommonTest { anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME, "Standard"); Tree filters = anl.addChild(FulltextIndexConstants.ANL_FILTERS); - Tree snowball = filters.addChild("SnowballPorter"); + Tree snowball = addFilter(filters, "SnowballPorter"); snowball.setProperty("language", "Italian"); }); diff --git a/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextAnalyzerCommonTest.java b/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextAnalyzerCommonTest.java index 19060dd7f3..5f4efc1aa3 100644 --- a/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextAnalyzerCommonTest.java +++ b/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextAnalyzerCommonTest.java @@ -19,6 +19,7 @@ package org.apache.jackrabbit.oak.plugins.index; import org.apache.jackrabbit.JcrConstants; import org.apache.jackrabbit.oak.api.QueryEngine; import org.apache.jackrabbit.oak.api.Tree; +import org.apache.jackrabbit.oak.api.Type; import org.apache.jackrabbit.oak.commons.junit.LogCustomizer; import org.apache.jackrabbit.oak.plugins.index.search.FulltextIndexConstants; import org.apache.jackrabbit.oak.plugins.index.search.util.IndexDefinitionBuilder; @@ -26,6 +27,8 @@ import org.apache.jackrabbit.oak.query.AbstractQueryTest; import org.junit.Assert; import org.junit.Test; +import java.io.ByteArrayInputStream; +import java.io.IOException; import java.nio.charset.StandardCharsets; import java.text.ParseException; import java.util.List; @@ -36,6 +39,7 @@ import java.util.function.Consumer; import static org.apache.jackrabbit.JcrConstants.JCR_CONTENT; import static org.apache.jackrabbit.JcrConstants.JCR_DATA; import static org.apache.jackrabbit.oak.plugins.index.search.FulltextIndexConstants.ANALYZERS; +import static org.apache.jackrabbit.oak.spi.nodetype.NodeTypeConstants.NT_OAK_UNSTRUCTURED; import static org.hamcrest.CoreMatchers.containsString; import static org.hamcrest.MatcherAssert.assertThat; @@ -256,7 +260,7 @@ public abstract class FullTextAnalyzerCommonTest extends AbstractQueryTest { Tree anl = idx.addChild(FulltextIndexConstants.ANALYZERS).addChild(FulltextIndexConstants.ANL_DEFAULT); anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME, "whitespace"); - Tree stopFilter = anl.addChild(FulltextIndexConstants.ANL_FILTERS).addChild("Stop"); + Tree stopFilter = addFilter(anl.addChild(FulltextIndexConstants.ANL_FILTERS), "Stop"); stopFilter.setProperty("words", "stop1.txt, stop2.txt"); stopFilter.addChild("stop1.txt").addChild(JcrConstants.JCR_CONTENT) .setProperty(JcrConstants.JCR_DATA, "foo"); @@ -279,21 +283,21 @@ public abstract class FullTextAnalyzerCommonTest extends AbstractQueryTest { anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME, "Standard"); Tree charFilters = anl.addChild(FulltextIndexConstants.ANL_CHAR_FILTERS); - charFilters.addChild("HTMLStrip"); - Tree mappingFilter = charFilters.addChild("Mapping"); + addFilter(charFilters, "HTMLStrip"); + Tree mappingFilter = addFilter(charFilters, "Mapping"); mappingFilter.setProperty("mapping", "mappings.txt"); mappingFilter.addChild("mappings.txt").addChild(JcrConstants.JCR_CONTENT) .setProperty(JcrConstants.JCR_DATA, getHinduArabicMapping()); Tree filters = anl.addChild(FulltextIndexConstants.ANL_FILTERS); - filters.addChild("LowerCase"); - Tree stopFilter = filters.addChild("Stop"); + addFilter(filters, "LowerCase"); + Tree stopFilter = addFilter(filters, "Stop"); stopFilter.setProperty("words", "stop1.txt, stop2.txt"); stopFilter.addChild("stop1.txt").addChild(JcrConstants.JCR_CONTENT) .setProperty(JcrConstants.JCR_DATA, "my"); stopFilter.addChild("stop2.txt").addChild(JcrConstants.JCR_CONTENT) .setProperty(JcrConstants.JCR_DATA, "is"); - filters.addChild("PorterStem"); + addFilter(filters, "PorterStem"); }); Tree test = root.getTree("/"); @@ -322,11 +326,13 @@ public abstract class FullTextAnalyzerCommonTest extends AbstractQueryTest { anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME, "Standard"); Tree charFilters = anl.addChild(FulltextIndexConstants.ANL_CHAR_FILTERS); - Tree mappingFilter = charFilters.addChild("Mapping"); + Tree mappingFilter = addFilter(charFilters, "Mapping"); mappingFilter.setProperty("mapping", "mapping-ISOLatin1Accent.txt"); mappingFilter.addChild("mapping-ISOLatin1Accent.txt").addChild(JcrConstants.JCR_CONTENT) .setProperty(JcrConstants.JCR_DATA, mappings); - Tree synFilter = anl.addChild(FulltextIndexConstants.ANL_FILTERS).addChild("Synonym"); + + Tree filters = anl.addChild(FulltextIndexConstants.ANL_FILTERS); + Tree synFilter = addFilter(filters, "Synonym"); synFilter.setProperty("synonyms", "syn.txt"); synFilter.setProperty("format", "solr"); synFilter.setProperty("expand", "true"); @@ -335,9 +341,8 @@ public abstract class FullTextAnalyzerCommonTest extends AbstractQueryTest { .setProperty(JcrConstants.JCR_DATA, "# Synonym mappings can be used for spelling correction too\n" + "tool => instrument"); - Tree filters = anl.addChild(FulltextIndexConstants.ANL_FILTERS); - filters.addChild("LowerCase"); - Tree stopFilter = filters.addChild("Stop"); + addFilter(filters, "LowerCase"); + Tree stopFilter = addFilter(filters, "Stop"); stopFilter.setProperty("format", "snowball"); stopFilter.setProperty("enablePositionIncrements", "true"); stopFilter.setProperty("ignoreCase", "true"); @@ -367,8 +372,8 @@ public abstract class FullTextAnalyzerCommonTest extends AbstractQueryTest { anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME, "Standard"); Tree filters = anl.addChild(FulltextIndexConstants.ANL_FILTERS); - filters.addChild("LowerCase"); - filters.addChild("SpanishLightStem"); + addFilter(filters, "LowerCase"); + addFilter(filters, "SpanishLightStem"); }); Tree test = root.getTree("/"); @@ -385,8 +390,7 @@ public abstract class FullTextAnalyzerCommonTest extends AbstractQueryTest { Tree anl = idx.addChild(FulltextIndexConstants.ANALYZERS).addChild(FulltextIndexConstants.ANL_DEFAULT); anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME, "Standard"); - Tree filters = anl.addChild(FulltextIndexConstants.ANL_FILTERS); - filters.addChild("KStem"); + addFilter(anl.addChild(FulltextIndexConstants.ANL_FILTERS), "KStem"); }); Tree test = root.getTree("/"); @@ -404,12 +408,12 @@ public abstract class FullTextAnalyzerCommonTest extends AbstractQueryTest { anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME, "Standard"); Tree filters = anl.addChild(FulltextIndexConstants.ANL_FILTERS); - filters.addChild("LowerCase"); - Tree marker = filters.addChild("KeywordMarker"); + addFilter(filters, "LowerCase"); + Tree marker = addFilter(filters, "KeywordMarker"); marker.setProperty("protected", "protected.txt"); marker.addChild("protected.txt").addChild(JcrConstants.JCR_CONTENT) .setProperty(JcrConstants.JCR_DATA, "# some comment here\nrunning"); - filters.addChild("PorterStem"); + addFilter(filters, "PorterStem"); }); Tree test = root.getTree("/"); @@ -426,7 +430,7 @@ public abstract class FullTextAnalyzerCommonTest extends AbstractQueryTest { anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME, "Standard"); Tree charFilters = anl.addChild(FulltextIndexConstants.ANL_CHAR_FILTERS); - Tree patternReplace = charFilters.addChild("PatternReplace"); + Tree patternReplace = addFilter(charFilters, "PatternReplace"); patternReplace.setProperty("pattern", "(\\d+)-(?=\\d)"); patternReplace.setProperty("replacement", "$1"); }); @@ -447,8 +451,7 @@ public abstract class FullTextAnalyzerCommonTest extends AbstractQueryTest { Tree anl = idx.addChild(FulltextIndexConstants.ANALYZERS).addChild(FulltextIndexConstants.ANL_DEFAULT); anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME, "Classic"); - Tree filters = anl.addChild(FulltextIndexConstants.ANL_FILTERS); - filters.addChild("Classic"); + addFilter(anl.addChild(FulltextIndexConstants.ANL_FILTERS), "Classic"); }); Tree test = root.getTree("/"); @@ -464,9 +467,9 @@ public abstract class FullTextAnalyzerCommonTest extends AbstractQueryTest { Tree anl = idx.addChild(FulltextIndexConstants.ANALYZERS).addChild(FulltextIndexConstants.ANL_DEFAULT); anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME, "Standard"); - Tree filters = anl.addChild(FulltextIndexConstants.ANL_FILTERS); - Tree asciiFilter = filters.addChild("AsciiFolding"); + Tree asciiFilter = addFilter(anl.addChild(FulltextIndexConstants.ANL_FILTERS), "AsciiFolding"); asciiFilter.setProperty("preserveOriginal", "true"); + asciiFilter.setProperty(JcrConstants.JCR_PRIMARYTYPE, NT_OAK_UNSTRUCTURED, Type.NAME); }); Tree test = root.getTree("/"); @@ -483,12 +486,12 @@ public abstract class FullTextAnalyzerCommonTest extends AbstractQueryTest { anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME, "Standard"); Tree filters = anl.addChild(FulltextIndexConstants.ANL_FILTERS); - Tree cjk = filters.addChild("CJKBigram"); + Tree cjk = addFilter(filters, "CJKBigram"); cjk.setProperty("hangul", "false"); cjk.setProperty("hiragana", "false"); cjk.setProperty("katakana", "false"); cjk.setProperty("outputUnigrams", "false"); - filters.addChild("CJKWidth"); + addFilter(filters, "CJKWidth"); }); Tree test = root.getTree("/"); @@ -508,8 +511,7 @@ public abstract class FullTextAnalyzerCommonTest extends AbstractQueryTest { Tree anl = idx.addChild(FulltextIndexConstants.ANALYZERS).addChild(FulltextIndexConstants.ANL_DEFAULT); anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME, "Standard"); - Tree filters = anl.addChild(FulltextIndexConstants.ANL_FILTERS); - Tree commonGrams = filters.addChild("CommonGrams"); + Tree commonGrams = addFilter(anl.addChild(FulltextIndexConstants.ANL_FILTERS), "CommonGrams"); commonGrams.setProperty("words", "words.txt"); commonGrams.addChild("words.txt").addChild(JcrConstants.JCR_CONTENT) .setProperty(JcrConstants.JCR_DATA, "is\nthe"); @@ -529,8 +531,7 @@ public abstract class FullTextAnalyzerCommonTest extends AbstractQueryTest { Tree anl = idx.addChild(FulltextIndexConstants.ANALYZERS).addChild(FulltextIndexConstants.ANL_DEFAULT); anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME, "Whitespace"); - Tree filters = anl.addChild(FulltextIndexConstants.ANL_FILTERS); - Tree delimited = filters.addChild("DelimitedPayload"); + Tree delimited = addFilter(anl.addChild(FulltextIndexConstants.ANL_FILTERS), "DelimitedPayload"); delimited.setProperty("encoder", "float"); }); @@ -548,9 +549,9 @@ public abstract class FullTextAnalyzerCommonTest extends AbstractQueryTest { anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME, "Standard"); Tree filters = anl.addChild(FulltextIndexConstants.ANL_FILTERS); - filters.addChild("LowerCase"); - filters.addChild("ASCIIFolding"); - Tree wordDelimiter = filters.addChild("WordDelimiter"); + addFilter(filters, "LowerCase"); + addFilter(filters, "ASCIIFolding"); + Tree wordDelimiter = addFilter(filters, "WordDelimiter"); wordDelimiter.setProperty("generateWordParts", "1"); wordDelimiter.setProperty("stemEnglishPossessive", "1"); wordDelimiter.setProperty("generateNumberParts", "1"); @@ -560,7 +561,7 @@ public abstract class FullTextAnalyzerCommonTest extends AbstractQueryTest { wordDelimiter.setProperty("catenateWords", "0"); wordDelimiter.setProperty("catenateNumbers", "0"); wordDelimiter.setProperty("catenateAll", "0"); - filters.addChild("PorterStem"); + addFilter(filters, "PorterStem"); }); Tree test = root.getTree("/"); @@ -584,8 +585,7 @@ public abstract class FullTextAnalyzerCommonTest extends AbstractQueryTest { Tree anl = idx.addChild(FulltextIndexConstants.ANALYZERS).addChild(FulltextIndexConstants.ANL_DEFAULT); anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME, "Whitespace"); - Tree filters = anl.addChild(FulltextIndexConstants.ANL_FILTERS); - Tree edgeNGram = filters.addChild("NGram"); + Tree edgeNGram = addFilter(anl.addChild(FulltextIndexConstants.ANL_FILTERS), "NGram"); edgeNGram.setProperty("minGramSize", "2"); edgeNGram.setProperty("maxGramSize", "3"); }); @@ -606,8 +606,7 @@ public abstract class FullTextAnalyzerCommonTest extends AbstractQueryTest { Tree anl = idx.addChild(FulltextIndexConstants.ANALYZERS).addChild(FulltextIndexConstants.ANL_DEFAULT); anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME, "Whitespace"); - Tree filters = anl.addChild(FulltextIndexConstants.ANL_FILTERS); - Tree edgeNGram = filters.addChild("EdgeNGram"); + Tree edgeNGram = addFilter(anl.addChild(FulltextIndexConstants.ANL_FILTERS), "EdgeNGram"); edgeNGram.setProperty("minGramSize", "1"); edgeNGram.setProperty("maxGramSize", "3"); }); @@ -626,8 +625,7 @@ public abstract class FullTextAnalyzerCommonTest extends AbstractQueryTest { Tree anl = idx.addChild(FulltextIndexConstants.ANALYZERS).addChild(FulltextIndexConstants.ANL_DEFAULT); anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME, "Whitespace"); - Tree filters = anl.addChild(FulltextIndexConstants.ANL_FILTERS); - Tree elision = filters.addChild("Elision"); + Tree elision = addFilter(anl.addChild(FulltextIndexConstants.ANL_FILTERS), "Elision"); elision.setProperty("articles", "articles.txt"); elision.addChild("articles.txt").addChild(JcrConstants.JCR_CONTENT) .setProperty(JcrConstants.JCR_DATA, "j\ns\nc\nt"); @@ -647,8 +645,7 @@ public abstract class FullTextAnalyzerCommonTest extends AbstractQueryTest { Tree anl = idx.addChild(FulltextIndexConstants.ANALYZERS).addChild(FulltextIndexConstants.ANL_DEFAULT); anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME, "Standard"); - Tree filters = anl.addChild(FulltextIndexConstants.ANL_FILTERS); - Tree kw = filters.addChild("KeepWord"); + Tree kw = addFilter(anl.addChild(FulltextIndexConstants.ANL_FILTERS), "KeepWord"); kw.setProperty("words", "words.txt"); kw.addChild("words.txt").addChild(JcrConstants.JCR_CONTENT) .setProperty(JcrConstants.JCR_DATA, "dog\nelephant\nfox"); @@ -672,8 +669,7 @@ public abstract class FullTextAnalyzerCommonTest extends AbstractQueryTest { Tree anl = idx.addChild(FulltextIndexConstants.ANALYZERS).addChild(FulltextIndexConstants.ANL_DEFAULT); anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME, "Whitespace"); - Tree filters = anl.addChild(FulltextIndexConstants.ANL_FILTERS); - Tree length = filters.addChild("Length"); + Tree length = addFilter(anl.addChild(FulltextIndexConstants.ANL_FILTERS), "Length"); length.setProperty("min", "0"); length.setProperty("max", "4"); }); @@ -695,8 +691,7 @@ public abstract class FullTextAnalyzerCommonTest extends AbstractQueryTest { Tree anl = idx.addChild(FulltextIndexConstants.ANALYZERS).addChild(FulltextIndexConstants.ANL_DEFAULT); anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME, "Whitespace"); - Tree filters = anl.addChild(FulltextIndexConstants.ANL_FILTERS); - Tree length = filters.addChild("LimitTokenCount"); + Tree length = addFilter(anl.addChild(FulltextIndexConstants.ANL_FILTERS), "LimitTokenCount"); length.setProperty("maxTokenCount", "2"); }); @@ -717,8 +712,7 @@ public abstract class FullTextAnalyzerCommonTest extends AbstractQueryTest { Tree anl = idx.addChild(FulltextIndexConstants.ANALYZERS).addChild(FulltextIndexConstants.ANL_DEFAULT); anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME, "Standard"); - Tree filters = anl.addChild(FulltextIndexConstants.ANL_FILTERS); - filters.addChild("GermanNormalization"); + addFilter(anl.addChild(FulltextIndexConstants.ANL_FILTERS), "GermanNormalization"); }); Tree test = root.getTree("/"); @@ -735,8 +729,7 @@ public abstract class FullTextAnalyzerCommonTest extends AbstractQueryTest { Tree anl = idx.addChild(FulltextIndexConstants.ANALYZERS).addChild(FulltextIndexConstants.ANL_DEFAULT); anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME, "Standard"); - Tree filters = anl.addChild(FulltextIndexConstants.ANL_FILTERS); - Tree pcg = filters.addChild("PatternCaptureGroup"); + Tree pcg = addFilter(anl.addChild(FulltextIndexConstants.ANL_FILTERS), "PatternCaptureGroup"); pcg.setProperty("pattern", "(([a-z]+)(\\d*))"); }); @@ -754,8 +747,7 @@ public abstract class FullTextAnalyzerCommonTest extends AbstractQueryTest { Tree anl = idx.addChild(FulltextIndexConstants.ANALYZERS).addChild(FulltextIndexConstants.ANL_DEFAULT); anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME, "Standard"); - Tree filters = anl.addChild(FulltextIndexConstants.ANL_FILTERS); - Tree shingle = filters.addChild("Shingle"); + Tree shingle = addFilter(anl.addChild(FulltextIndexConstants.ANL_FILTERS), "Shingle"); shingle.setProperty("minShingleSize", "2"); shingle.setProperty("maxShingleSize", "3"); shingle.setProperty("outputUnigrams", "false"); @@ -777,7 +769,7 @@ public abstract class FullTextAnalyzerCommonTest extends AbstractQueryTest { idx.setProperty(IndexConstants.ASYNC_PROPERTY_NAME, "async"); Tree anl = idx.addChild(FulltextIndexConstants.ANALYZERS).addChild(FulltextIndexConstants.ANL_DEFAULT); anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME, "Standard"); - Tree synFilter = anl.addChild(FulltextIndexConstants.ANL_FILTERS).addChild("Synonym"); + Tree synFilter = addFilter(anl.addChild(FulltextIndexConstants.ANL_FILTERS), "Synonym"); synFilter.setProperty("synonyms", "syn.txt"); // Don't add syn.txt to make analyzer (and hence index def) invalid // synFilter.addChild("syn.txt").addChild(JCR_CONTENT).setProperty(JCR_DATA, "blah, foo, bar"); @@ -798,7 +790,7 @@ public abstract class FullTextAnalyzerCommonTest extends AbstractQueryTest { setup(List.of("foo"), idx -> { Tree anl = idx.addChild(FulltextIndexConstants.ANALYZERS).addChild(FulltextIndexConstants.ANL_DEFAULT); anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME, "Standard"); - Tree synFilter = anl.addChild(FulltextIndexConstants.ANL_FILTERS).addChild("Synonym"); + Tree synFilter = addFilter(anl.addChild(FulltextIndexConstants.ANL_FILTERS), "Synonym"); synFilter.setProperty("synonyms", "syn.txt"); synFilter.addChild("syn.txt").addChild(JcrConstants.JCR_CONTENT) .setProperty(JcrConstants.JCR_DATA, "plane, airplane, aircraft\nflies=>scars"); @@ -867,6 +859,19 @@ public abstract class FullTextAnalyzerCommonTest extends AbstractQueryTest { }); } + protected Tree addFilter(Tree analyzer, String filterName) { + Tree filter = analyzer.addChild(filterName); + // mimics nodes api + filter.setProperty(JcrConstants.JCR_PRIMARYTYPE, NT_OAK_UNSTRUCTURED, Type.NAME); + try { + filter.setProperty("binary", root.createBlob(new ByteArrayInputStream(new byte[0])), Type.BINARY); + } catch (IOException e) { + throw new RuntimeException(e); + } + filter.setProperty("array", List.of("a", "b"), Type.STRINGS); + return filter; + } + private static final BiConsumer<IndexDefinitionBuilder, List<String>> DEFAULT_BUILDER_HOOK = ((builder, analyzedFields) -> analyzedFields.forEach(f -> builder.indexRule("nt:base").property(f).analyzed().nodeScopeIndex()));