This is an automated email from the ASF dual-hosted git repository. thomasm pushed a commit to branch OAK-10532 in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
commit 1f89af2964e1fd7fb0140a03195af1a7403e5786 Author: Thomas Mueller <[email protected]> AuthorDate: Wed Aug 28 17:52:34 2024 +0200 OAK-10532 Cost estimation for not(@x) calculates cost for @x='value' instead --- oak-lucene/hs_err_pid67158.log | 9 +++ .../index/lucene/LuceneIndexStatistics.java | 7 +- .../index/lucene/LuceneIndexPlannerCommonTest.java | 79 +++++++++++++++++++++- .../oak/plugins/index/search/FieldNames.java | 4 ++ .../search/spi/query/FulltextIndexPlanner.java | 17 +++-- 5 files changed, 107 insertions(+), 9 deletions(-) diff --git a/oak-lucene/hs_err_pid67158.log b/oak-lucene/hs_err_pid67158.log new file mode 100644 index 0000000000..0dee34afa4 --- /dev/null +++ b/oak-lucene/hs_err_pid67158.log @@ -0,0 +1,9 @@ +# +# A fatal error has been detected by the Java Runtime Environment: +# +# SIGSEGV (0xb) at pc=0x0000000100e5e670, pid=67158, tid=32515 +# +# JRE version: OpenJDK Runtime Environment Homebrew (11.0.12) (build 11.0.12+0) +# Java VM: OpenJDK 64-Bit Server VM Homebrew (11.0.12+0, mixed mode, tiered, compressed oops, g1 gc, bsd-aarch64) +# Problematic frame: +# C [libjimage.dylib+0x2670] ImageStrings::find(Endian*, char const*, int*, unsigned int)+0x6c \ No newline at end of file diff --git a/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexStatistics.java b/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexStatistics.java index 02cde0eeee..62ed9b8079 100644 --- a/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexStatistics.java +++ b/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexStatistics.java @@ -21,6 +21,7 @@ import java.util.Collections; import java.util.Map; import org.apache.jackrabbit.guava.common.collect.Maps; +import org.apache.jackrabbit.oak.plugins.index.search.FieldNames; import org.apache.jackrabbit.oak.plugins.index.search.IndexStatistics; import org.apache.lucene.index.Fields; import org.apache.lucene.index.IndexReader; @@ -28,8 +29,6 @@ import org.apache.lucene.index.MultiFields; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.apache.jackrabbit.oak.plugins.index.search.FieldNames.isPropertyField; - /** * This class would populate some statistics from a reader. We want to be careful here such that * we only collect statistics which don't incur reads from the index i.e. we would only collect @@ -70,7 +69,7 @@ public class LuceneIndexStatistics implements IndexStatistics { if (fields != null) { for(String f : fields) { - if (isPropertyField(f)) { + if (FieldNames.isPropertyField(f) || FieldNames.isNullPropsField(f)) { int docCntForField = -1; try { if (failReadingSyntheticallyFalliableField && SYNTHETICALLY_FALLIABLE_FIELD.equals(f)) { @@ -115,7 +114,7 @@ public class LuceneIndexStatistics implements IndexStatistics { return -1; } - int docCntForField = isPropertyField(field) ? 0 : -1; + int docCntForField = FieldNames.isPropertyField(field) ? 0 : -1; if (numDocsForField.containsKey(field)) { docCntForField = numDocsForField.get(field); } diff --git a/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexPlannerCommonTest.java b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexPlannerCommonTest.java index 5f743765c4..f9e7e29011 100644 --- a/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexPlannerCommonTest.java +++ b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexPlannerCommonTest.java @@ -25,6 +25,7 @@ import org.apache.jackrabbit.oak.plugins.index.lucene.reader.DefaultIndexReader; import org.apache.jackrabbit.oak.plugins.index.lucene.reader.LuceneIndexReader; import org.apache.jackrabbit.oak.plugins.index.lucene.reader.LuceneIndexReaderFactory; import org.apache.jackrabbit.oak.plugins.index.lucene.util.LuceneIndexDefinitionBuilder; +import org.apache.jackrabbit.oak.plugins.index.search.FieldNames; import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition; import org.apache.jackrabbit.oak.plugins.index.search.IndexNode; import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndexPlanner; @@ -69,7 +70,6 @@ import static org.junit.Assert.assertEquals; public class LuceneIndexPlannerCommonTest extends IndexPlannerCommonTest { - @Test public void useNumDocsOnFieldForCost() throws Exception { NodeBuilder defn = newLucenePropertyIndexDefinition(builder, "test", of("foo", "foo1", "foo2"), "async"); @@ -331,6 +331,83 @@ public class LuceneIndexPlannerCommonTest extends IndexPlannerCommonTest { assertEquals(1, plan.getEstimatedEntryCount()); } + @Test + public void isNotNullAndIsNull() throws Exception{ + String indexPath = "/test"; + LuceneIndexDefinitionBuilder idxBuilder = new LuceneIndexDefinitionBuilder(child(builder, indexPath)); + idxBuilder.indexRule("nt:unstructured").property("foo") + .enclosingRule().property("foo").propertyIndex().nullCheckEnabled() + .enclosingRule().property("foo2").propertyIndex().nullCheckEnabled(); + NodeState defn = idxBuilder.build(); + + LuceneIndexDefinition idxDefn = new LuceneIndexDefinition(root, defn, indexPath); + List<Document> list = new ArrayList<>(); + // 10 documents have "foo" (foo = 1) + for (int i = 0; i < 10; i++) { + Document doc = new Document(); + doc.add(new StringField("foo", "1", Field.Store.NO)); + list.add(doc); + } + // 20 documents have "foo2 is null" (:nullProps = foo2) + for (int i = 0; i < 20; i++) { + Document doc = new Document(); + doc.add(new StringField(FieldNames.NULL_PROPS, "foo2", Field.Store.NO)); + list.add(doc); + } + Directory sampleDirectory = createSampleDirectory(0, list); + LuceneIndexNode node = createIndexNode(idxDefn, sampleDirectory); + + // foo is null + // that can be at most 20, because there are only + // that many documents with ":nullProps" + FilterImpl filter = createFilter("nt:unstructured"); + filter.restrictProperty("foo", Operator.EQUAL, null); + + FulltextIndexPlanner planner = new FulltextIndexPlanner(node, indexPath, filter, Collections.emptyList()); + QueryIndex.IndexPlan plan = planner.getPlan(); + + assertEquals(20, plan.getEstimatedEntryCount()); + assertEquals(1.0, plan.getCostPerExecution(), 0); + assertEquals(1.0, plan.getCostPerEntry(), 0); + + // foo2 is null: + // that can be at most 20, because there are only 10 documents with this property + filter = createFilter("nt:unstructured"); + filter.restrictProperty("foo2", Operator.EQUAL, null); + + planner = new FulltextIndexPlanner(node, indexPath, filter, Collections.emptyList()); + plan = planner.getPlan(); + + assertEquals(20, plan.getEstimatedEntryCount()); + assertEquals(1.0, plan.getCostPerExecution(), 0); + assertEquals(1.0, plan.getCostPerEntry(), 0); + + // foo is not null: + // that can be at most 10, because there are only 10 documents with this property + filter = createFilter("nt:unstructured"); + filter.restrictProperty("foo", Operator.NOT_EQUAL, null); + + planner = new FulltextIndexPlanner(node, indexPath, filter, Collections.emptyList()); + plan = planner.getPlan(); + + assertEquals(10, plan.getEstimatedEntryCount()); + assertEquals(1.0, plan.getCostPerExecution(), 0); + assertEquals(1.0, plan.getCostPerEntry(), 0); + + // foo2 is not null: + // that can be at most 0, because there are no documents wit this property + filter = createFilter("nt:unstructured"); + filter.restrictProperty("foo2", Operator.NOT_EQUAL, null); + + planner = new FulltextIndexPlanner(node, indexPath, filter, Collections.emptyList()); + plan = planner.getPlan(); + + assertEquals(0, plan.getEstimatedEntryCount()); + assertEquals(1.0, plan.getCostPerExecution(), 0); + assertEquals(1.0, plan.getCostPerEntry(), 0); + + } + @Test public void fullTextWithPropRestriction() throws Exception{ String indexPath = "/test"; diff --git a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java index 96e9e21521..a8d2237702 100644 --- a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java +++ b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java @@ -154,6 +154,10 @@ public final class FieldNames { && !field.endsWith("_facet"); } + public static boolean isNullPropsField(String field) { + return field.equals(NULL_PROPS); + } + public static String createSimilarityFieldName(String name) { return SIMILARITY_PREFIX + name; } diff --git a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndexPlanner.java b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndexPlanner.java index f89487a487..48c3c94519 100644 --- a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndexPlanner.java +++ b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndexPlanner.java @@ -40,6 +40,7 @@ import org.apache.jackrabbit.oak.commons.PathUtils; import org.apache.jackrabbit.oak.plugins.index.IndexConstants; import org.apache.jackrabbit.oak.plugins.index.IndexSelectionPolicy; import org.apache.jackrabbit.oak.plugins.index.property.ValuePatternUtil; +import org.apache.jackrabbit.oak.plugins.index.search.FieldNames; import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition; import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition.IndexingRule; import org.apache.jackrabbit.oak.plugins.index.search.IndexFormatVersion; @@ -246,7 +247,8 @@ public class FulltextIndexPlanner { } if (pd != null && pd.propertyIndexEnabled()) { - if (pr.isNullRestriction() && !pd.nullCheckEnabled){ + // for "is not null" we can use an asterisk query + if (pr.isNullRestriction() && !pd.nullCheckEnabled) { continue; } @@ -838,6 +840,11 @@ public class FulltextIndexPlanner { if (result.relPropMapping.containsKey(key)) { key = getName(key); } + PropertyRestriction pr = filter.getPropertyRestriction(key); + // for "is not null" we can use an asterisk query + if (pr.isNullRestriction()) { + key = FieldNames.NULL_PROPS; + } int docCntForField = indexStatistics.getDocCountFor(key); if (docCntForField == -1) { continue; @@ -845,10 +852,12 @@ public class FulltextIndexPlanner { int weight = propDef.getValue().weight; - PropertyRestriction pr = filter.getPropertyRestriction(key); if (pr != null) { - if (pr.isNotNullRestriction()) { - // don't use weight for "is not null" restrictions + if (pr.isNotNullRestriction() || pr.isNullRestriction()) { + // don't use weight for "is not null" restrictions, + // as all documents with this field can match; + // and don't use the weight for "is not null" restrictions, + // as all documents with ":nullProps" can match weight = 1; } else { if (weight > 1) {
