This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 6dcc221e44d [opt](nereids) fix non-null selectivity computing (#42286)
6dcc221e44d is described below
commit 6dcc221e44d1ebedac9e598e04a326f40e39c69c
Author: xzj7019 <[email protected]>
AuthorDate: Thu Oct 24 11:47:50 2024 +0800
[opt](nereids) fix non-null selectivity computing (#42286)
Introduced by pr: #40762
which doesn't update numNulls and other related column stats when they
needs to be normalized.
This pr adds the updating trigger condition and makes the related case
healthy.
---
.../org/apache/doris/statistics/Statistics.java | 3 +-
.../doris/nereids/stats/FilterEstimationTest.java | 72 +++++++++++++++++++++-
.../doris/nereids/stats/StatsCalculatorTest.java | 4 +-
3 files changed, 75 insertions(+), 4 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
index 72000f3ce5a..6ad4297dcb1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
@@ -119,7 +119,8 @@ public class Statistics {
// the following columnStatistic.isUnKnown() judgment is loop
inside since current doris
// supports partial stats deriving, i.e, allowing part of tables
have stats and other parts don't,
// or part of columns have stats but other parts don't, especially
join and filter estimation.
- if (!checkColumnStatsValid(columnStatistic, rowCount) &&
!columnStatistic.isUnKnown()) {
+ if (!columnStatistic.isUnKnown() &&
(!checkColumnStatsValid(columnStatistic, rowCount)
+ || isNumNullsDecreaseByProportion &&
columnStatistic.numNulls != 0)) {
ColumnStatisticBuilder columnStatisticBuilder = new
ColumnStatisticBuilder(columnStatistic);
double ndv = Math.min(columnStatistic.ndv, rowCount);
double numNulls = Math.min(columnStatistic.numNulls * factor,
rowCount - ndv);
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
index 28fe50d16ec..9b0fdc3880d 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
@@ -36,9 +36,11 @@ import
org.apache.doris.nereids.trees.expressions.SlotReference;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Left;
import org.apache.doris.nereids.trees.expressions.literal.BigIntLiteral;
import org.apache.doris.nereids.trees.expressions.literal.DateLiteral;
+import org.apache.doris.nereids.trees.expressions.literal.DateTimeLiteral;
import org.apache.doris.nereids.trees.expressions.literal.DoubleLiteral;
import org.apache.doris.nereids.trees.expressions.literal.IntegerLiteral;
import org.apache.doris.nereids.trees.expressions.literal.VarcharLiteral;
+import org.apache.doris.nereids.types.DateTimeType;
import org.apache.doris.nereids.types.DateType;
import org.apache.doris.nereids.types.DoubleType;
import org.apache.doris.nereids.types.IntegerType;
@@ -1144,7 +1146,75 @@ class FilterEstimationTest {
Statistics result = filterEstimation.estimate(and, stats);
// result 1.0->2.0 bc happens because the calculation from
normalization of
// "Math.min(columnStatistic.numNulls * factor, rowCount - ndv);"
- Assertions.assertEquals(result.getRowCount(), 2.0, 0.01);
+ Assertions.assertEquals(result.getRowCount(), 3.5, 0.01);
+ }
+
+ /**
+ * dt BETWEEN "2020-05-25 00:00:00" and "2020-05-25 23:59:59"
+ * and day BETWEEN "2020-05-24" and "2020-05-26"
+ * and game="mus" and plat = "37wan";
+ */
+ @Test
+ void testMultiAndWithNull() {
+ SlotReference dt = new SlotReference("dt", DateTimeType.INSTANCE);
+ ColumnStatisticBuilder dtBuilder = new ColumnStatisticBuilder(1000000)
+ .setNdv(783813.0)
+ .setNumNulls(50833.0)
+ .setMaxValue(new DateTimeLiteral("2020-05-31
07:59:59").getDouble())
+ .setMinValue(new DateTimeLiteral("2020-05-01
08:00:04").getDouble());
+ DateLiteral dtMin = new DateTimeLiteral("2020-05-25 00:00:00");
+ DateLiteral dtMax = new DateTimeLiteral("2020-05-25 23:59:59");
+ GreaterThanEqual dtGreater = new GreaterThanEqual(dt, dtMin);
+ LessThan dtLess = new LessThan(dt, dtMax);
+ And dtAnd = new And(dtLess, dtGreater);
+
+ SlotReference day = new SlotReference("day", DateType.INSTANCE);
+ ColumnStatisticBuilder dayBuilder = new ColumnStatisticBuilder(1000000)
+ .setNdv(31.0)
+ .setNumNulls(49699.0)
+ .setMaxValue(new DateLiteral("2020-05-31").getDouble())
+ .setMinValue(new DateLiteral("2020-05-01").getDouble());
+ DateLiteral dayMin = new DateLiteral("2020-05-24");
+ DateLiteral dayMax = new DateLiteral("2020-05-26");
+ GreaterThanEqual dayGreater = new GreaterThanEqual(day, dayMin);
+ LessThan dayLess = new LessThan(day, dayMax);
+ And dayAnd = new And(dayLess, dayGreater);
+
+ SlotReference game = new SlotReference("game", new VarcharType(500));
+ ColumnStatisticBuilder gameBuilder = new
ColumnStatisticBuilder(1000000)
+ .setNdv(1.0)
+ .setNumNulls(49813.0)
+ .setMaxExpr(new StringLiteral("mus"))
+ .setMaxValue(new VarcharLiteral("mus").getDouble())
+ .setMinExpr(new StringLiteral("mus"))
+ .setMinValue(new VarcharLiteral("mus").getDouble());
+ VarcharLiteral mus = new VarcharLiteral("mus");
+ EqualTo gameEqualTo = new EqualTo(game, mus);
+
+ SlotReference plat = new SlotReference("plat", new VarcharType(500));
+ ColumnStatisticBuilder platBuilder = new
ColumnStatisticBuilder(1000000)
+ .setNdv(1.0)
+ .setNumNulls(49691.0)
+ .setMaxExpr(new StringLiteral("37wan"))
+ .setMaxValue(new VarcharLiteral("37wan").getDouble())
+ .setMinExpr(new StringLiteral("37wan"))
+ .setMinValue(new VarcharLiteral("37wan").getDouble());
+ VarcharLiteral wan = new VarcharLiteral("37wan");
+ EqualTo wanEqualTo = new EqualTo(plat, wan);
+ And equalAnd = new And(gameEqualTo, wanEqualTo);
+
+ And partialAnd = new And(dtAnd, dayAnd);
+ And allAnd = new And(partialAnd, equalAnd);
+
+ Statistics stats = new Statistics(1000000, new HashMap<>());
+ stats.addColumnStats(dt, dtBuilder.build());
+ stats.addColumnStats(day, dayBuilder.build());
+ stats.addColumnStats(game, gameBuilder.build());
+ stats.addColumnStats(plat, platBuilder.build());
+
+ FilterEstimation filterEstimation = new FilterEstimation();
+ Statistics result = filterEstimation.estimate(allAnd, stats);
+ Assertions.assertEquals(result.getRowCount(), 2109.16, 0.01);
}
/**
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java
index 49d295ea7c0..cf91eacb51c 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java
@@ -145,13 +145,13 @@ public class StatsCalculatorTest {
GroupExpression groupExpression = new GroupExpression(logicalFilter,
ImmutableList.of(childGroup));
Group ownerGroup = new Group(null, groupExpression, null);
StatsCalculator.estimate(groupExpression, null);
- Assertions.assertEquals(49.45,
ownerGroup.getStatistics().getRowCount(), 0.001);
+ Assertions.assertEquals(49.945,
ownerGroup.getStatistics().getRowCount(), 0.001);
LogicalFilter<GroupPlan> logicalFilterOr = new LogicalFilter<>(or,
groupPlan);
GroupExpression groupExpressionOr = new
GroupExpression(logicalFilterOr, ImmutableList.of(childGroup));
Group ownerGroupOr = new Group(null, groupExpressionOr, null);
StatsCalculator.estimate(groupExpressionOr, null);
- Assertions.assertEquals(1449.05,
+ Assertions.assertEquals(1448.555,
ownerGroupOr.getStatistics().getRowCount(), 0.001);
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]