This is an automated email from the ASF dual-hosted git repository.

englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 490ad14720 [stats](nereids)in predicate range adjust (#22156)
490ad14720 is described below

commit 490ad1472029ac4853286a1f22fd7ccafce34b72
Author: minghong <[email protected]>
AuthorDate: Wed Jul 26 19:10:04 2023 +0800

    [stats](nereids)in predicate range adjust (#22156)
    
    1. refactor in-predicate filter estimation
    example: A in (1, 2, 3, 4)
    after in-preidcate filter, A.stats.max<=4 and A.stats.min>=1
    2. maintain minExpr and maxExpr in in-predicate stats derive
---
 .../doris/nereids/stats/FilterEstimation.java      | 76 ++++++++++++++++------
 .../doris/nereids/stats/FilterEstimationTest.java  |  5 +-
 2 files changed, 60 insertions(+), 21 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
index 6cb397437d..44e9a97a8b 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
@@ -17,6 +17,7 @@
 
 package org.apache.doris.nereids.stats;
 
+import org.apache.doris.analysis.LiteralExpr;
 import org.apache.doris.nereids.stats.FilterEstimation.EstimationContext;
 import org.apache.doris.nereids.trees.TreeNode;
 import org.apache.doris.nereids.trees.expressions.And;
@@ -36,6 +37,7 @@ import org.apache.doris.nereids.trees.expressions.Or;
 import org.apache.doris.nereids.trees.expressions.Slot;
 import org.apache.doris.nereids.trees.expressions.SlotReference;
 import org.apache.doris.nereids.trees.expressions.functions.Function;
+import org.apache.doris.nereids.trees.expressions.literal.Literal;
 import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
 import org.apache.doris.statistics.Bucket;
 import org.apache.doris.statistics.ColumnStatistic;
@@ -266,8 +268,12 @@ public class FilterEstimation extends 
ExpressionVisitor<Statistics, EstimationCo
             return context.statistics.withSel(DEFAULT_IN_COEFFICIENT);
         }
         List<Expression> options = inPredicate.getOptions();
-        double maxOption = 0;
-        double minOption = Double.MAX_VALUE;
+        // init minOption and maxOption by compareExpr.max and compareExpr.min 
respectively,
+        // and then adjust min/max by options
+        double minOptionValue = compareExprStats.maxValue;
+        double maxOptionValue = compareExprStats.minValue;
+        LiteralExpr minOptionLiteral = compareExprStats.maxExpr;
+        LiteralExpr maxOptionLiteral = compareExprStats.minExpr;
         /* suppose A.(min, max) = (0, 10), A.ndv=10
          A in ( 1, 2, 5, 100):
               validInOptCount = 3, that is (1, 2, 5)
@@ -283,32 +289,62 @@ public class FilterEstimation extends 
ExpressionVisitor<Statistics, EstimationCo
               A.(min, max) not changed
               A.selectivity = 7/10
         */
-        double validInOptCount = 0;
+        int validInOptCount = 0;
         double selectivity = 1.0;
         ColumnStatisticBuilder compareExprStatsBuilder = new 
ColumnStatisticBuilder(compareExprStats);
-
+        int nonLiteralOptionCount = 0;
         for (Expression option : options) {
             ColumnStatistic optionStats = 
ExpressionEstimation.estimate(option, context.statistics);
-            double validOptionNdv = 
compareExprStats.ndvIntersection(optionStats);
-            if (validOptionNdv > 0.0) {
-                validInOptCount += validOptionNdv;
-                maxOption = Math.max(optionStats.maxValue, maxOption);
-                minOption = Math.min(optionStats.minValue, minOption);
+            if (option instanceof Literal) {
+                // remove the options which is out of compareExpr.range
+                if (compareExprStats.minValue <= optionStats.maxValue
+                        && optionStats.maxValue <= compareExprStats.maxValue) {
+                    validInOptCount++;
+                    LiteralExpr optionLiteralExpr = ((Literal) 
option).toLegacyLiteral();
+                    if (maxOptionLiteral == null || 
optionLiteralExpr.compareTo(maxOptionLiteral) >= 0) {
+                        maxOptionLiteral = optionLiteralExpr;
+                        maxOptionValue = optionStats.maxValue;
+                    }
+
+                    if (minOptionLiteral == null || 
optionLiteralExpr.compareTo(minOptionLiteral) <= 0) {
+                        minOptionLiteral = optionLiteralExpr;
+                        minOptionValue = optionStats.minValue;
+                    }
+                }
+            } else {
+                nonLiteralOptionCount++;
+            }
+        }
+        if (nonLiteralOptionCount > 0) {
+            // A in (x+1, ...)
+            // "x+1" is not literal, and if const-fold can not handle it, it 
blocks estimation of min/max value.
+            // and hence, we do not adjust compareExpr.stats.range.
+            int newNdv = nonLiteralOptionCount + validInOptCount;
+            if (newNdv < compareExprStats.ndv) {
+                compareExprStatsBuilder.setNdv(newNdv);
+                selectivity = StatsMathUtil.divide(newNdv, 
compareExprStats.ndv);
+            } else {
+                selectivity = 1.0;
+            }
+        } else {
+            maxOptionValue = Math.min(maxOptionValue, 
compareExprStats.maxValue);
+            minOptionValue = Math.max(minOptionValue, 
compareExprStats.minValue);
+            compareExprStatsBuilder.setMaxValue(maxOptionValue);
+            compareExprStatsBuilder.setMaxExpr(maxOptionLiteral);
+            compareExprStatsBuilder.setMinValue(minOptionValue);
+            compareExprStatsBuilder.setMinExpr(minOptionLiteral);
+            if (validInOptCount < compareExprStats.ndv) {
+                compareExprStatsBuilder.setNdv(validInOptCount);
+                selectivity = StatsMathUtil.divide(validInOptCount, 
compareExprStats.ndv);
+            } else {
+                selectivity = 1.0;
             }
         }
-        maxOption = Math.min(maxOption, compareExprStats.maxValue);
-        minOption = Math.max(minOption, compareExprStats.minValue);
-        compareExprStatsBuilder.setMaxValue(maxOption);
-        compareExprStatsBuilder.setMinValue(minOption);
-
-        selectivity = StatsMathUtil.minNonNaN(1.0, validInOptCount / 
compareExprStats.ndv);
-        compareExprStatsBuilder.setNdv(validInOptCount);
         Statistics estimated = new Statistics(context.statistics);
         estimated = estimated.withSel(selectivity);
-        if (compareExpr instanceof SlotReference) {
-            estimated.addColumnStats(compareExpr,
-                    compareExprStatsBuilder.build());
-        }
+        estimated.addColumnStats(compareExpr,
+                compareExprStatsBuilder.build());
+
         return estimated;
     }
 
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
index 1fe5e5b0a0..394dacdee8 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
@@ -17,6 +17,7 @@
 
 package org.apache.doris.nereids.stats;
 
+import org.apache.doris.analysis.IntLiteral;
 import org.apache.doris.nereids.trees.expressions.And;
 import org.apache.doris.nereids.trees.expressions.Cast;
 import org.apache.doris.nereids.trees.expressions.EqualTo;
@@ -418,7 +419,9 @@ class FilterEstimationTest {
                 .setAvgSizeByte(4)
                 .setNumNulls(0)
                 .setMinValue(1)
-                .setMaxValue(10);
+                .setMinExpr(new IntLiteral(1))
+                .setMaxValue(10)
+                .setMaxExpr(new IntLiteral(10));
         slotToColumnStat.put(a, builder.build());
         Statistics stat = new Statistics(1000, slotToColumnStat);
         FilterEstimation filterEstimation = new FilterEstimation();


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to