This is an automated email from the ASF dual-hosted git repository.

thomasm pushed a commit to branch OAK-10532
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 1f89af2964e1fd7fb0140a03195af1a7403e5786
Author: Thomas Mueller <[email protected]>
AuthorDate: Wed Aug 28 17:52:34 2024 +0200

    OAK-10532 Cost estimation for not(@x) calculates cost for @x='value' instead
---
 oak-lucene/hs_err_pid67158.log                     |  9 +++
 .../index/lucene/LuceneIndexStatistics.java        |  7 +-
 .../index/lucene/LuceneIndexPlannerCommonTest.java | 79 +++++++++++++++++++++-
 .../oak/plugins/index/search/FieldNames.java       |  4 ++
 .../search/spi/query/FulltextIndexPlanner.java     | 17 +++--
 5 files changed, 107 insertions(+), 9 deletions(-)

diff --git a/oak-lucene/hs_err_pid67158.log b/oak-lucene/hs_err_pid67158.log
new file mode 100644
index 0000000000..0dee34afa4
--- /dev/null
+++ b/oak-lucene/hs_err_pid67158.log
@@ -0,0 +1,9 @@
+#
+# A fatal error has been detected by the Java Runtime Environment:
+#
+#  SIGSEGV (0xb) at pc=0x0000000100e5e670, pid=67158, tid=32515
+#
+# JRE version: OpenJDK Runtime Environment Homebrew (11.0.12) (build 11.0.12+0)
+# Java VM: OpenJDK 64-Bit Server VM Homebrew (11.0.12+0, mixed mode, tiered, 
compressed oops, g1 gc, bsd-aarch64)
+# Problematic frame:
+# C  [libjimage.dylib+0x2670]  ImageStrings::find(Endian*, char const*, int*, 
unsigned int)+0x6c
\ No newline at end of file
diff --git 
a/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexStatistics.java
 
b/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexStatistics.java
index 02cde0eeee..62ed9b8079 100644
--- 
a/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexStatistics.java
+++ 
b/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexStatistics.java
@@ -21,6 +21,7 @@ import java.util.Collections;
 import java.util.Map;
 
 import org.apache.jackrabbit.guava.common.collect.Maps;
+import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
 import org.apache.jackrabbit.oak.plugins.index.search.IndexStatistics;
 import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.IndexReader;
@@ -28,8 +29,6 @@ import org.apache.lucene.index.MultiFields;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import static 
org.apache.jackrabbit.oak.plugins.index.search.FieldNames.isPropertyField;
-
 /**
  * This class would populate some statistics from a reader. We want to be 
careful here such that
  * we only collect statistics which don't incur reads from the index i.e. we 
would only collect
@@ -70,7 +69,7 @@ public class LuceneIndexStatistics implements IndexStatistics 
{
 
         if (fields != null) {
             for(String f : fields) {
-                if (isPropertyField(f)) {
+                if (FieldNames.isPropertyField(f) || 
FieldNames.isNullPropsField(f)) {
                     int docCntForField = -1;
                     try {
                         if (failReadingSyntheticallyFalliableField && 
SYNTHETICALLY_FALLIABLE_FIELD.equals(f)) {
@@ -115,7 +114,7 @@ public class LuceneIndexStatistics implements 
IndexStatistics {
             return -1;
         }
 
-        int docCntForField = isPropertyField(field) ? 0 : -1;
+        int docCntForField = FieldNames.isPropertyField(field) ? 0 : -1;
         if (numDocsForField.containsKey(field)) {
             docCntForField = numDocsForField.get(field);
         }
diff --git 
a/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexPlannerCommonTest.java
 
b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexPlannerCommonTest.java
index 5f743765c4..f9e7e29011 100644
--- 
a/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexPlannerCommonTest.java
+++ 
b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexPlannerCommonTest.java
@@ -25,6 +25,7 @@ import 
org.apache.jackrabbit.oak.plugins.index.lucene.reader.DefaultIndexReader;
 import org.apache.jackrabbit.oak.plugins.index.lucene.reader.LuceneIndexReader;
 import 
org.apache.jackrabbit.oak.plugins.index.lucene.reader.LuceneIndexReaderFactory;
 import 
org.apache.jackrabbit.oak.plugins.index.lucene.util.LuceneIndexDefinitionBuilder;
+import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
 import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
 import org.apache.jackrabbit.oak.plugins.index.search.IndexNode;
 import 
org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndexPlanner;
@@ -69,7 +70,6 @@ import static org.junit.Assert.assertEquals;
 
 public class LuceneIndexPlannerCommonTest extends IndexPlannerCommonTest {
 
-
     @Test
     public void useNumDocsOnFieldForCost() throws Exception {
         NodeBuilder defn = newLucenePropertyIndexDefinition(builder, "test", 
of("foo", "foo1", "foo2"), "async");
@@ -331,6 +331,83 @@ public class LuceneIndexPlannerCommonTest extends 
IndexPlannerCommonTest {
         assertEquals(1, plan.getEstimatedEntryCount());
     }
 
+    @Test
+    public void isNotNullAndIsNull() throws Exception{
+        String indexPath = "/test";
+        LuceneIndexDefinitionBuilder idxBuilder = new 
LuceneIndexDefinitionBuilder(child(builder, indexPath));
+        idxBuilder.indexRule("nt:unstructured").property("foo")
+                
.enclosingRule().property("foo").propertyIndex().nullCheckEnabled()
+                
.enclosingRule().property("foo2").propertyIndex().nullCheckEnabled();
+        NodeState defn = idxBuilder.build();
+
+        LuceneIndexDefinition idxDefn = new LuceneIndexDefinition(root, defn, 
indexPath);
+        List<Document> list = new ArrayList<>();
+        // 10 documents have "foo" (foo = 1)
+        for (int i = 0; i < 10; i++) {
+            Document doc = new Document();
+            doc.add(new StringField("foo", "1", Field.Store.NO));
+            list.add(doc);
+        }
+        // 20 documents have "foo2 is null" (:nullProps = foo2)
+        for (int i = 0; i < 20; i++) {
+            Document doc = new Document();
+            doc.add(new StringField(FieldNames.NULL_PROPS, "foo2", 
Field.Store.NO));
+            list.add(doc);
+        }
+        Directory sampleDirectory = createSampleDirectory(0, list);
+        LuceneIndexNode node = createIndexNode(idxDefn, sampleDirectory);
+
+        // foo is null
+        // that can be at most 20, because there are only
+        // that many documents with ":nullProps"
+        FilterImpl filter = createFilter("nt:unstructured");
+        filter.restrictProperty("foo", Operator.EQUAL, null);
+
+        FulltextIndexPlanner planner = new FulltextIndexPlanner(node, 
indexPath, filter, Collections.emptyList());
+        QueryIndex.IndexPlan plan = planner.getPlan();
+
+        assertEquals(20, plan.getEstimatedEntryCount());
+        assertEquals(1.0, plan.getCostPerExecution(), 0);
+        assertEquals(1.0, plan.getCostPerEntry(), 0);
+
+        // foo2 is null:
+        // that can be at most 20, because there are only 10 documents with 
this property
+        filter = createFilter("nt:unstructured");
+        filter.restrictProperty("foo2", Operator.EQUAL, null);
+
+        planner = new FulltextIndexPlanner(node, indexPath, filter, 
Collections.emptyList());
+        plan = planner.getPlan();
+
+        assertEquals(20, plan.getEstimatedEntryCount());
+        assertEquals(1.0, plan.getCostPerExecution(), 0);
+        assertEquals(1.0, plan.getCostPerEntry(), 0);
+
+        // foo is not null:
+        // that can be at most 10, because there are only 10 documents with 
this property
+        filter = createFilter("nt:unstructured");
+        filter.restrictProperty("foo", Operator.NOT_EQUAL, null);
+
+        planner = new FulltextIndexPlanner(node, indexPath, filter, 
Collections.emptyList());
+        plan = planner.getPlan();
+
+        assertEquals(10, plan.getEstimatedEntryCount());
+        assertEquals(1.0, plan.getCostPerExecution(), 0);
+        assertEquals(1.0, plan.getCostPerEntry(), 0);
+
+        // foo2 is not null:
+        // that can be at most 0, because there are no documents wit this 
property
+        filter = createFilter("nt:unstructured");
+        filter.restrictProperty("foo2", Operator.NOT_EQUAL, null);
+
+        planner = new FulltextIndexPlanner(node, indexPath, filter, 
Collections.emptyList());
+        plan = planner.getPlan();
+
+        assertEquals(0, plan.getEstimatedEntryCount());
+        assertEquals(1.0, plan.getCostPerExecution(), 0);
+        assertEquals(1.0, plan.getCostPerEntry(), 0);
+
+    }
+
     @Test
     public void fullTextWithPropRestriction() throws Exception{
         String indexPath = "/test";
diff --git 
a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java
 
b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java
index 96e9e21521..a8d2237702 100644
--- 
a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java
+++ 
b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java
@@ -154,6 +154,10 @@ public final class FieldNames {
                 && !field.endsWith("_facet");
     }
 
+    public static boolean isNullPropsField(String field) {
+        return field.equals(NULL_PROPS);
+    }
+
     public static String createSimilarityFieldName(String name) {
         return SIMILARITY_PREFIX + name;
     }
diff --git 
a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndexPlanner.java
 
b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndexPlanner.java
index f89487a487..48c3c94519 100644
--- 
a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndexPlanner.java
+++ 
b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndexPlanner.java
@@ -40,6 +40,7 @@ import org.apache.jackrabbit.oak.commons.PathUtils;
 import org.apache.jackrabbit.oak.plugins.index.IndexConstants;
 import org.apache.jackrabbit.oak.plugins.index.IndexSelectionPolicy;
 import org.apache.jackrabbit.oak.plugins.index.property.ValuePatternUtil;
+import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
 import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
 import 
org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition.IndexingRule;
 import org.apache.jackrabbit.oak.plugins.index.search.IndexFormatVersion;
@@ -246,7 +247,8 @@ public class FulltextIndexPlanner {
                 }
 
                 if (pd != null && pd.propertyIndexEnabled()) {
-                    if (pr.isNullRestriction() && !pd.nullCheckEnabled){
+                    // for "is not null" we can use an asterisk query
+                    if (pr.isNullRestriction() && !pd.nullCheckEnabled) {
                         continue;
                     }
 
@@ -838,6 +840,11 @@ public class FulltextIndexPlanner {
             if (result.relPropMapping.containsKey(key)) {
                 key = getName(key);
             }
+            PropertyRestriction pr = filter.getPropertyRestriction(key);
+            // for "is not null" we can use an asterisk query
+            if (pr.isNullRestriction()) {
+                key = FieldNames.NULL_PROPS;
+            }
             int docCntForField = indexStatistics.getDocCountFor(key);
             if (docCntForField == -1) {
                 continue;
@@ -845,10 +852,12 @@ public class FulltextIndexPlanner {
 
             int weight = propDef.getValue().weight;
 
-            PropertyRestriction pr = filter.getPropertyRestriction(key);
             if (pr != null) {
-                if (pr.isNotNullRestriction()) {
-                    // don't use weight for "is not null" restrictions
+                if (pr.isNotNullRestriction() || pr.isNullRestriction()) {
+                    // don't use weight for "is not null" restrictions,
+                    // as all documents with this field can match;
+                    // and don't use the weight for "is not null" restrictions,
+                    // as all documents with ":nullProps" can match
                     weight = 1;
                 } else {
                     if (weight > 1) {

Reply via email to