This is an automated email from the ASF dual-hosted git repository.
englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 490ad14720 [stats](nereids)in predicate range adjust (#22156)
490ad14720 is described below
commit 490ad1472029ac4853286a1f22fd7ccafce34b72
Author: minghong <[email protected]>
AuthorDate: Wed Jul 26 19:10:04 2023 +0800
[stats](nereids)in predicate range adjust (#22156)
1. refactor in-predicate filter estimation
example: A in (1, 2, 3, 4)
after in-preidcate filter, A.stats.max<=4 and A.stats.min>=1
2. maintain minExpr and maxExpr in in-predicate stats derive
---
.../doris/nereids/stats/FilterEstimation.java | 76 ++++++++++++++++------
.../doris/nereids/stats/FilterEstimationTest.java | 5 +-
2 files changed, 60 insertions(+), 21 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
index 6cb397437d..44e9a97a8b 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
@@ -17,6 +17,7 @@
package org.apache.doris.nereids.stats;
+import org.apache.doris.analysis.LiteralExpr;
import org.apache.doris.nereids.stats.FilterEstimation.EstimationContext;
import org.apache.doris.nereids.trees.TreeNode;
import org.apache.doris.nereids.trees.expressions.And;
@@ -36,6 +37,7 @@ import org.apache.doris.nereids.trees.expressions.Or;
import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.expressions.SlotReference;
import org.apache.doris.nereids.trees.expressions.functions.Function;
+import org.apache.doris.nereids.trees.expressions.literal.Literal;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.statistics.Bucket;
import org.apache.doris.statistics.ColumnStatistic;
@@ -266,8 +268,12 @@ public class FilterEstimation extends
ExpressionVisitor<Statistics, EstimationCo
return context.statistics.withSel(DEFAULT_IN_COEFFICIENT);
}
List<Expression> options = inPredicate.getOptions();
- double maxOption = 0;
- double minOption = Double.MAX_VALUE;
+ // init minOption and maxOption by compareExpr.max and compareExpr.min
respectively,
+ // and then adjust min/max by options
+ double minOptionValue = compareExprStats.maxValue;
+ double maxOptionValue = compareExprStats.minValue;
+ LiteralExpr minOptionLiteral = compareExprStats.maxExpr;
+ LiteralExpr maxOptionLiteral = compareExprStats.minExpr;
/* suppose A.(min, max) = (0, 10), A.ndv=10
A in ( 1, 2, 5, 100):
validInOptCount = 3, that is (1, 2, 5)
@@ -283,32 +289,62 @@ public class FilterEstimation extends
ExpressionVisitor<Statistics, EstimationCo
A.(min, max) not changed
A.selectivity = 7/10
*/
- double validInOptCount = 0;
+ int validInOptCount = 0;
double selectivity = 1.0;
ColumnStatisticBuilder compareExprStatsBuilder = new
ColumnStatisticBuilder(compareExprStats);
-
+ int nonLiteralOptionCount = 0;
for (Expression option : options) {
ColumnStatistic optionStats =
ExpressionEstimation.estimate(option, context.statistics);
- double validOptionNdv =
compareExprStats.ndvIntersection(optionStats);
- if (validOptionNdv > 0.0) {
- validInOptCount += validOptionNdv;
- maxOption = Math.max(optionStats.maxValue, maxOption);
- minOption = Math.min(optionStats.minValue, minOption);
+ if (option instanceof Literal) {
+ // remove the options which is out of compareExpr.range
+ if (compareExprStats.minValue <= optionStats.maxValue
+ && optionStats.maxValue <= compareExprStats.maxValue) {
+ validInOptCount++;
+ LiteralExpr optionLiteralExpr = ((Literal)
option).toLegacyLiteral();
+ if (maxOptionLiteral == null ||
optionLiteralExpr.compareTo(maxOptionLiteral) >= 0) {
+ maxOptionLiteral = optionLiteralExpr;
+ maxOptionValue = optionStats.maxValue;
+ }
+
+ if (minOptionLiteral == null ||
optionLiteralExpr.compareTo(minOptionLiteral) <= 0) {
+ minOptionLiteral = optionLiteralExpr;
+ minOptionValue = optionStats.minValue;
+ }
+ }
+ } else {
+ nonLiteralOptionCount++;
+ }
+ }
+ if (nonLiteralOptionCount > 0) {
+ // A in (x+1, ...)
+ // "x+1" is not literal, and if const-fold can not handle it, it
blocks estimation of min/max value.
+ // and hence, we do not adjust compareExpr.stats.range.
+ int newNdv = nonLiteralOptionCount + validInOptCount;
+ if (newNdv < compareExprStats.ndv) {
+ compareExprStatsBuilder.setNdv(newNdv);
+ selectivity = StatsMathUtil.divide(newNdv,
compareExprStats.ndv);
+ } else {
+ selectivity = 1.0;
+ }
+ } else {
+ maxOptionValue = Math.min(maxOptionValue,
compareExprStats.maxValue);
+ minOptionValue = Math.max(minOptionValue,
compareExprStats.minValue);
+ compareExprStatsBuilder.setMaxValue(maxOptionValue);
+ compareExprStatsBuilder.setMaxExpr(maxOptionLiteral);
+ compareExprStatsBuilder.setMinValue(minOptionValue);
+ compareExprStatsBuilder.setMinExpr(minOptionLiteral);
+ if (validInOptCount < compareExprStats.ndv) {
+ compareExprStatsBuilder.setNdv(validInOptCount);
+ selectivity = StatsMathUtil.divide(validInOptCount,
compareExprStats.ndv);
+ } else {
+ selectivity = 1.0;
}
}
- maxOption = Math.min(maxOption, compareExprStats.maxValue);
- minOption = Math.max(minOption, compareExprStats.minValue);
- compareExprStatsBuilder.setMaxValue(maxOption);
- compareExprStatsBuilder.setMinValue(minOption);
-
- selectivity = StatsMathUtil.minNonNaN(1.0, validInOptCount /
compareExprStats.ndv);
- compareExprStatsBuilder.setNdv(validInOptCount);
Statistics estimated = new Statistics(context.statistics);
estimated = estimated.withSel(selectivity);
- if (compareExpr instanceof SlotReference) {
- estimated.addColumnStats(compareExpr,
- compareExprStatsBuilder.build());
- }
+ estimated.addColumnStats(compareExpr,
+ compareExprStatsBuilder.build());
+
return estimated;
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
index 1fe5e5b0a0..394dacdee8 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
@@ -17,6 +17,7 @@
package org.apache.doris.nereids.stats;
+import org.apache.doris.analysis.IntLiteral;
import org.apache.doris.nereids.trees.expressions.And;
import org.apache.doris.nereids.trees.expressions.Cast;
import org.apache.doris.nereids.trees.expressions.EqualTo;
@@ -418,7 +419,9 @@ class FilterEstimationTest {
.setAvgSizeByte(4)
.setNumNulls(0)
.setMinValue(1)
- .setMaxValue(10);
+ .setMinExpr(new IntLiteral(1))
+ .setMaxValue(10)
+ .setMaxExpr(new IntLiteral(10));
slotToColumnStat.put(a, builder.build());
Statistics stat = new Statistics(1000, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]