This is an automated email from the ASF dual-hosted git repository.
thomasm pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
The following commit(s) were added to refs/heads/trunk by this push:
new 0b18923f5d OAK-10532 Cost estimation for not(@x) calculates cost for
@x='value' (#1673)
0b18923f5d is described below
commit 0b18923f5db1f299620b7db8fd5f0c28c9857ff3
Author: Thomas Mueller <[email protected]>
AuthorDate: Tue Sep 3 14:09:10 2024 +0200
OAK-10532 Cost estimation for not(@x) calculates cost for @x='value'
(#1673)
* OAK-10532 Cost estimation for not(@x) calculates cost for @x='value'
instead
* OAK-10532 Cost estimation for not(@x) calculates cost for @x='value'
instead
* OAK-10532 Cost estimation for not(@x) calculates cost for @x='value'
instead
* OAK-10532 Cost estimation for not(@x) calculates cost for @x='value'
instead
* OAK-10532 Cost estimation for not(@x) calculates cost for @x='value'
instead
* OAK-10532 Cost estimation for not(@x) calculates cost for @x='value'
instead
* OAK-10532 Cost estimation for not(@x) calculates cost for @x='value'
instead
* OAK-10532 Cost estimation for not(@x) calculates cost for @x='value'
instead
* OAK-10532 Cost estimation for not(@x) calculates cost for @x='value'
instead
* OAK-10532 Cost estimation for not(@x) calculates cost for @x='value'
instead
* OAK-10532 Cost estimation for not(@x) calculates cost for @x='value'
instead
---
.../main/java/org/apache/jackrabbit/oak/Oak.java | 8 +++
.../jackrabbit/oak/query/QueryEngineSettings.java | 15 +++-
.../jackrabbit/oak/query/index/FilterImpl.java | 5 ++
.../index/lucene/LuceneIndexStatistics.java | 3 +-
.../index/lucene/LuceneIndexPlannerCommonTest.java | 79 +++++++++++++++++++++-
.../apache/jackrabbit/oak/spi/query/Filter.java | 5 ++
.../jackrabbit/oak/spi/query/QueryLimits.java | 11 +++
.../oak/plugins/index/search/FieldNames.java | 4 ++
.../search/spi/query/FulltextIndexPlanner.java | 21 +++++-
9 files changed, 145 insertions(+), 6 deletions(-)
diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/Oak.java
b/oak-core/src/main/java/org/apache/jackrabbit/oak/Oak.java
index cff90c48fa..6ceebaf14e 100644
--- a/oak-core/src/main/java/org/apache/jackrabbit/oak/Oak.java
+++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/Oak.java
@@ -584,6 +584,10 @@ public class Oak {
LOG.info("Registered Prefetch feature: " +
QueryEngineSettings.FT_NAME_PREFETCH_FOR_QUERIES);
closer.register(prefetchFeature);
queryEngineSettings.setPrefetchFeature(prefetchFeature);
+ Feature improvedIsNullCostFeature =
newFeature(QueryEngineSettings.FT_NAME_IMPROVED_IS_NULL_COST, whiteboard);
+ LOG.info("Registered improved cost feature: " +
QueryEngineSettings.FT_NAME_IMPROVED_IS_NULL_COST);
+ closer.register(improvedIsNullCostFeature);
+
queryEngineSettings.setImprovedIsNullCostFeature(improvedIsNullCostFeature);
}
return this;
@@ -979,6 +983,10 @@ public class Oak {
settings.setPrefetchFeature(prefetch);
}
+ public void setImprovedIsNullCostFeature(@Nullable Feature feature) {
+ settings.setImprovedIsNullCostFeature(feature);
+ }
+
@Override
public void setQueryValidatorPattern(String key, String pattern,
String comment, boolean failQuery) {
settings.getQueryValidator().setPattern(key, pattern, comment,
failQuery);
diff --git
a/oak-core/src/main/java/org/apache/jackrabbit/oak/query/QueryEngineSettings.java
b/oak-core/src/main/java/org/apache/jackrabbit/oak/query/QueryEngineSettings.java
index 0bf4a9a48d..a6f68ea9ca 100644
---
a/oak-core/src/main/java/org/apache/jackrabbit/oak/query/QueryEngineSettings.java
+++
b/oak-core/src/main/java/org/apache/jackrabbit/oak/query/QueryEngineSettings.java
@@ -63,6 +63,8 @@ public class QueryEngineSettings implements
QueryEngineSettingsMBean, QueryLimit
public static final String FT_NAME_PREFETCH_FOR_QUERIES = "FT_OAK-10490";
+ public static final String FT_NAME_IMPROVED_IS_NULL_COST = "FT_OAK-10532";
+
public static final int DEFAULT_PREFETCH_COUNT =
Integer.getInteger(OAK_QUERY_PREFETCH_COUNT, -1);
public static final String OAK_QUERY_FAIL_TRAVERSAL =
"oak.queryFailTraversal";
@@ -111,6 +113,7 @@ public class QueryEngineSettings implements
QueryEngineSettingsMBean, QueryLimit
private final long queryLengthErrorLimit =
Long.getLong(OAK_QUERY_LENGTH_ERROR_LIMIT, 100 * 1024 * 1024); //100MB
private Feature prefetchFeature;
+ private Feature improvedIsNullCostFeature;
private String autoOptionsMappingJson = "{}";
private QueryOptions.AutomaticQueryOptionsMapping autoOptionsMapping = new
QueryOptions.AutomaticQueryOptionsMapping(autoOptionsMappingJson);
@@ -205,6 +208,16 @@ public class QueryEngineSettings implements
QueryEngineSettingsMBean, QueryLimit
System.setProperty(OAK_FAST_QUERY_SIZE, String.valueOf(fastQuerySize));
}
+ public void setImprovedIsNullCostFeature(@Nullable Feature feature) {
+ this.improvedIsNullCostFeature = feature;
+ }
+
+ @Override
+ public boolean getImprovedIsNullCost() {
+ // enabled if the feature toggle is not used
+ return improvedIsNullCostFeature == null ||
improvedIsNullCostFeature.isEnabled();
+ }
+
public String getStrictPathRestriction() {
return strictPathRestriction.name();
}
@@ -272,5 +285,5 @@ public class QueryEngineSettings implements
QueryEngineSettingsMBean, QueryLimit
", classNamesIgnoredInCallTrace=" +
Arrays.toString(classNamesIgnoredInCallTrace) +
'}';
}
-
+
}
diff --git
a/oak-core/src/main/java/org/apache/jackrabbit/oak/query/index/FilterImpl.java
b/oak-core/src/main/java/org/apache/jackrabbit/oak/query/index/FilterImpl.java
index faa4306572..e9dfda69ea 100644
---
a/oak-core/src/main/java/org/apache/jackrabbit/oak/query/index/FilterImpl.java
+++
b/oak-core/src/main/java/org/apache/jackrabbit/oak/query/index/FilterImpl.java
@@ -146,6 +146,11 @@ public class FilterImpl implements Filter {
public boolean getFailTraversal() {
return false;
}
+
+ @Override
+ public boolean getImprovedIsNullCost() {
+ return true;
+ }
});
}
diff --git
a/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexStatistics.java
b/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexStatistics.java
index 02cde0eeee..4b1a99851b 100644
---
a/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexStatistics.java
+++
b/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexStatistics.java
@@ -29,6 +29,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static
org.apache.jackrabbit.oak.plugins.index.search.FieldNames.isPropertyField;
+import static
org.apache.jackrabbit.oak.plugins.index.search.FieldNames.isNullPropsField;
/**
* This class would populate some statistics from a reader. We want to be
careful here such that
@@ -70,7 +71,7 @@ public class LuceneIndexStatistics implements IndexStatistics
{
if (fields != null) {
for(String f : fields) {
- if (isPropertyField(f)) {
+ if (isPropertyField(f) || isNullPropsField(f)) {
int docCntForField = -1;
try {
if (failReadingSyntheticallyFalliableField &&
SYNTHETICALLY_FALLIABLE_FIELD.equals(f)) {
diff --git
a/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexPlannerCommonTest.java
b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexPlannerCommonTest.java
index 5f743765c4..f9e7e29011 100644
---
a/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexPlannerCommonTest.java
+++
b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexPlannerCommonTest.java
@@ -25,6 +25,7 @@ import
org.apache.jackrabbit.oak.plugins.index.lucene.reader.DefaultIndexReader;
import org.apache.jackrabbit.oak.plugins.index.lucene.reader.LuceneIndexReader;
import
org.apache.jackrabbit.oak.plugins.index.lucene.reader.LuceneIndexReaderFactory;
import
org.apache.jackrabbit.oak.plugins.index.lucene.util.LuceneIndexDefinitionBuilder;
+import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
import org.apache.jackrabbit.oak.plugins.index.search.IndexNode;
import
org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndexPlanner;
@@ -69,7 +70,6 @@ import static org.junit.Assert.assertEquals;
public class LuceneIndexPlannerCommonTest extends IndexPlannerCommonTest {
-
@Test
public void useNumDocsOnFieldForCost() throws Exception {
NodeBuilder defn = newLucenePropertyIndexDefinition(builder, "test",
of("foo", "foo1", "foo2"), "async");
@@ -331,6 +331,83 @@ public class LuceneIndexPlannerCommonTest extends
IndexPlannerCommonTest {
assertEquals(1, plan.getEstimatedEntryCount());
}
+ @Test
+ public void isNotNullAndIsNull() throws Exception{
+ String indexPath = "/test";
+ LuceneIndexDefinitionBuilder idxBuilder = new
LuceneIndexDefinitionBuilder(child(builder, indexPath));
+ idxBuilder.indexRule("nt:unstructured").property("foo")
+
.enclosingRule().property("foo").propertyIndex().nullCheckEnabled()
+
.enclosingRule().property("foo2").propertyIndex().nullCheckEnabled();
+ NodeState defn = idxBuilder.build();
+
+ LuceneIndexDefinition idxDefn = new LuceneIndexDefinition(root, defn,
indexPath);
+ List<Document> list = new ArrayList<>();
+ // 10 documents have "foo" (foo = 1)
+ for (int i = 0; i < 10; i++) {
+ Document doc = new Document();
+ doc.add(new StringField("foo", "1", Field.Store.NO));
+ list.add(doc);
+ }
+ // 20 documents have "foo2 is null" (:nullProps = foo2)
+ for (int i = 0; i < 20; i++) {
+ Document doc = new Document();
+ doc.add(new StringField(FieldNames.NULL_PROPS, "foo2",
Field.Store.NO));
+ list.add(doc);
+ }
+ Directory sampleDirectory = createSampleDirectory(0, list);
+ LuceneIndexNode node = createIndexNode(idxDefn, sampleDirectory);
+
+ // foo is null
+ // that can be at most 20, because there are only
+ // that many documents with ":nullProps"
+ FilterImpl filter = createFilter("nt:unstructured");
+ filter.restrictProperty("foo", Operator.EQUAL, null);
+
+ FulltextIndexPlanner planner = new FulltextIndexPlanner(node,
indexPath, filter, Collections.emptyList());
+ QueryIndex.IndexPlan plan = planner.getPlan();
+
+ assertEquals(20, plan.getEstimatedEntryCount());
+ assertEquals(1.0, plan.getCostPerExecution(), 0);
+ assertEquals(1.0, plan.getCostPerEntry(), 0);
+
+ // foo2 is null:
+ // that can be at most 20, because there are only 10 documents with
this property
+ filter = createFilter("nt:unstructured");
+ filter.restrictProperty("foo2", Operator.EQUAL, null);
+
+ planner = new FulltextIndexPlanner(node, indexPath, filter,
Collections.emptyList());
+ plan = planner.getPlan();
+
+ assertEquals(20, plan.getEstimatedEntryCount());
+ assertEquals(1.0, plan.getCostPerExecution(), 0);
+ assertEquals(1.0, plan.getCostPerEntry(), 0);
+
+ // foo is not null:
+ // that can be at most 10, because there are only 10 documents with
this property
+ filter = createFilter("nt:unstructured");
+ filter.restrictProperty("foo", Operator.NOT_EQUAL, null);
+
+ planner = new FulltextIndexPlanner(node, indexPath, filter,
Collections.emptyList());
+ plan = planner.getPlan();
+
+ assertEquals(10, plan.getEstimatedEntryCount());
+ assertEquals(1.0, plan.getCostPerExecution(), 0);
+ assertEquals(1.0, plan.getCostPerEntry(), 0);
+
+ // foo2 is not null:
+ // that can be at most 0, because there are no documents wit this
property
+ filter = createFilter("nt:unstructured");
+ filter.restrictProperty("foo2", Operator.NOT_EQUAL, null);
+
+ planner = new FulltextIndexPlanner(node, indexPath, filter,
Collections.emptyList());
+ plan = planner.getPlan();
+
+ assertEquals(0, plan.getEstimatedEntryCount());
+ assertEquals(1.0, plan.getCostPerExecution(), 0);
+ assertEquals(1.0, plan.getCostPerEntry(), 0);
+
+ }
+
@Test
public void fullTextWithPropRestriction() throws Exception{
String indexPath = "/test";
diff --git
a/oak-query-spi/src/main/java/org/apache/jackrabbit/oak/spi/query/Filter.java
b/oak-query-spi/src/main/java/org/apache/jackrabbit/oak/spi/query/Filter.java
index 277dd392e6..83b4ff52c8 100644
---
a/oak-query-spi/src/main/java/org/apache/jackrabbit/oak/spi/query/Filter.java
+++
b/oak-query-spi/src/main/java/org/apache/jackrabbit/oak/spi/query/Filter.java
@@ -454,6 +454,11 @@ public interface Filter {
return false;
}
+ @Override
+ public boolean getImprovedIsNullCost() {
+ return true;
+ }
+
};
@Override
diff --git
a/oak-query-spi/src/main/java/org/apache/jackrabbit/oak/spi/query/QueryLimits.java
b/oak-query-spi/src/main/java/org/apache/jackrabbit/oak/spi/query/QueryLimits.java
index a98248b4c3..366e2a0872 100644
---
a/oak-query-spi/src/main/java/org/apache/jackrabbit/oak/spi/query/QueryLimits.java
+++
b/oak-query-spi/src/main/java/org/apache/jackrabbit/oak/spi/query/QueryLimits.java
@@ -29,6 +29,16 @@ public interface QueryLimits {
boolean getFullTextComparisonWithoutIndex();
+ /**
+ * See OAK-10532. This method is used for backward compatibility
+ * (bug compatibility) only.
+ *
+ * @return true, except when backward compatibility for OAK-10532 is
enabled
+ */
+ default boolean getImprovedIsNullCost() {
+ return true;
+ }
+
boolean getFailTraversal();
default String getStrictPathRestriction() {
@@ -43,4 +53,5 @@ public interface QueryLimits {
default @NotNull String[] getIgnoredClassNamesInCallTrace() {
return new String[] {};
}
+
}
diff --git
a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java
b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java
index 96e9e21521..a8d2237702 100644
---
a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java
+++
b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java
@@ -154,6 +154,10 @@ public final class FieldNames {
&& !field.endsWith("_facet");
}
+ public static boolean isNullPropsField(String field) {
+ return field.equals(NULL_PROPS);
+ }
+
public static String createSimilarityFieldName(String name) {
return SIMILARITY_PREFIX + name;
}
diff --git
a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndexPlanner.java
b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndexPlanner.java
index f89487a487..0ec36f6a75 100644
---
a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndexPlanner.java
+++
b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndexPlanner.java
@@ -40,6 +40,7 @@ import org.apache.jackrabbit.oak.commons.PathUtils;
import org.apache.jackrabbit.oak.plugins.index.IndexConstants;
import org.apache.jackrabbit.oak.plugins.index.IndexSelectionPolicy;
import org.apache.jackrabbit.oak.plugins.index.property.ValuePatternUtil;
+import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
import
org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition.IndexingRule;
import org.apache.jackrabbit.oak.plugins.index.search.IndexFormatVersion;
@@ -89,6 +90,7 @@ public class FulltextIndexPlanner {
private final IndexNode indexNode;
protected PlanResult result;
protected static boolean useActualEntryCount;
+ private final boolean improvedIsNullCost;
static {
useActualEntryCount =
Boolean.parseBoolean(System.getProperty(FLAG_ENTRY_COUNT, "true"));
@@ -106,6 +108,7 @@ public class FulltextIndexPlanner {
this.definition = indexNode.getDefinition();
this.filter = filter;
this.sortOrder = sortOrder;
+ this.improvedIsNullCost =
filter.getQueryLimits().getImprovedIsNullCost();
}
public IndexPlan getPlan() {
@@ -838,17 +841,29 @@ public class FulltextIndexPlanner {
if (result.relPropMapping.containsKey(key)) {
key = getName(key);
}
- int docCntForField = indexStatistics.getDocCountFor(key);
+ PropertyRestriction pr = filter.getPropertyRestriction(key);
+ String fieldName = key;
+ // for "is not null" we can use an asterisk query
+ if (improvedIsNullCost) {
+ if (pr != null && pr.isNullRestriction()) {
+ fieldName = FieldNames.NULL_PROPS;
+ }
+ }
+ int docCntForField = indexStatistics.getDocCountFor(fieldName);
if (docCntForField == -1) {
continue;
}
int weight = propDef.getValue().weight;
- PropertyRestriction pr = filter.getPropertyRestriction(key);
if (pr != null) {
if (pr.isNotNullRestriction()) {
// don't use weight for "is not null" restrictions
+ // as all documents with this field can match;
+ weight = 1;
+ } else if (improvedIsNullCost && pr.isNullRestriction()) {
+ // don't use the weight for "is null" restrictions
+ // as all documents with ":nullProps" can match
weight = 1;
} else {
if (weight > 1) {
@@ -1164,7 +1179,7 @@ public class FulltextIndexPlanner {
nodeNameRestriction = true;
}
- public void disableUniquePaths(){
+ public void disableUniquePaths() {
uniquePathsRequired = false;
}
}