This is an automated email from the ASF dual-hosted git repository.
englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 8debc96d74 [enhancement](nereids) update FilterEstimation and Agg in
stats derive (#17790)
8debc96d74 is described below
commit 8debc96d74b2a5c2c4d24d47f8df47554096333d
Author: minghong <[email protected]>
AuthorDate: Fri Mar 17 18:01:50 2023 +0800
[enhancement](nereids) update FilterEstimation and Agg in stats derive
(#17790)
* 1. update ndv in Stats,
2. skip __DORIS_DELETE_SIGN__=0 in stats derive,
3. equalTo in stats derive
4. update agg stats derive, support the case: all column_stats are unknown
* computeSize
* fix ut
---
.../apache/doris/nereids/memo/GroupExpression.java | 16 ++---
.../doris/nereids/stats/FilterEstimation.java | 69 +++++++++++++---------
.../apache/doris/nereids/stats/JoinEstimation.java | 6 ++
.../doris/nereids/stats/StatsCalculator.java | 35 ++++++++---
.../plans/physical/PhysicalHashAggregate.java | 2 +-
.../plans/physical/PhysicalNestedLoopJoin.java | 3 +-
.../org/apache/doris/statistics/Statistics.java | 31 +++++++---
.../org/apache/doris/nereids/memo/RankTest.java | 3 +-
.../doris/nereids/stats/FilterEstimationTest.java | 37 ++++++------
.../doris/nereids/stats/StatsCalculatorTest.java | 12 ++--
.../suites/nereids_syntax_p0/join.groovy | 27 ---------
11 files changed, 132 insertions(+), 109 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/GroupExpression.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/GroupExpression.java
index 5b4774284a..12e9816f8e 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/GroupExpression.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/GroupExpression.java
@@ -57,7 +57,7 @@ public class GroupExpression {
private final BitSet ruleMasks;
private boolean statDerived;
- private long estOutputRowCount = -1;
+ private double estOutputRowCount = -1;
//Record the rule that generate this plan. It's used for debugging
private Rule fromRule;
@@ -301,16 +301,13 @@ public class GroupExpression {
return new Statistics(child(idx).getStatistics());
}
- public void setEstOutputRowCount(long estOutputRowCount) {
+ public void setEstOutputRowCount(double estOutputRowCount) {
this.estOutputRowCount = estOutputRowCount;
}
- public long getEstOutputRowCount() {
- return estOutputRowCount;
- }
-
@Override
public String toString() {
+ DecimalFormat format = new DecimalFormat("#,###.##");
StringBuilder builder = new StringBuilder("id:");
builder.append(id.asInt());
if (ownerGroup == null) {
@@ -318,11 +315,8 @@ public class GroupExpression {
} else {
builder.append("#").append(ownerGroup.getGroupId().asInt());
}
-
- DecimalFormat decimalFormat = new DecimalFormat();
- decimalFormat.setGroupingSize(3);
- builder.append(" cost=").append(decimalFormat.format((long) cost));
- builder.append(" estRows=").append(estOutputRowCount);
+ builder.append(" cost=").append(format.format((long) cost));
+ builder.append(" estRows=").append(format.format(estOutputRowCount));
builder.append(" (plan=").append(plan.toString()).append(")
children=[");
for (Group group : children) {
builder.append(group.getGroupId()).append(" ");
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
index 2aa55114d8..e2159f1040 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
@@ -31,10 +31,10 @@ import
org.apache.doris.nereids.trees.expressions.LessThanEqual;
import org.apache.doris.nereids.trees.expressions.Not;
import org.apache.doris.nereids.trees.expressions.NullSafeEqual;
import org.apache.doris.nereids.trees.expressions.Or;
+import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.expressions.SlotReference;
import org.apache.doris.nereids.trees.expressions.literal.Literal;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
-import org.apache.doris.nereids.types.coercion.NumericType;
import org.apache.doris.statistics.Bucket;
import org.apache.doris.statistics.ColumnStatistic;
import org.apache.doris.statistics.ColumnStatisticBuilder;
@@ -48,6 +48,7 @@ import com.google.common.base.Preconditions;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
+import java.util.Set;
/**
* Calculate selectivity of expression that produces boolean value.
@@ -85,10 +86,10 @@ public class FilterEstimation extends
ExpressionVisitor<Statistics, EstimationCo
Statistics rightStats = rightExpr.accept(this, context);
double rowCount = leftStats.getRowCount() +
rightStats.getRowCount() - andStats.getRowCount();
Statistics orStats = context.statistics.withRowCount(rowCount);
- for (Map.Entry<Expression, ColumnStatistic> entry :
leftStats.columnStatistics().entrySet()) {
- ColumnStatistic leftColStats = entry.getValue();
+ for (Map.Entry<Expression, ColumnStatistic> entry :
orStats.columnStatistics().entrySet()) {
+ ColumnStatistic leftColStats =
leftStats.findColumnStatistics(entry.getKey());
ColumnStatistic rightColStats =
rightStats.findColumnStatistics(entry.getKey());
- ColumnStatisticBuilder estimatedColStatsBuilder = new
ColumnStatisticBuilder(leftColStats);
+ ColumnStatisticBuilder estimatedColStatsBuilder = new
ColumnStatisticBuilder(entry.getValue());
if (leftColStats.minValue <= rightColStats.minValue) {
estimatedColStatsBuilder.setMinValue(leftColStats.minValue);
estimatedColStatsBuilder.setMinExpr(leftColStats.minExpr);
@@ -113,7 +114,17 @@ public class FilterEstimation extends
ExpressionVisitor<Statistics, EstimationCo
@Override
public Statistics visitComparisonPredicate(ComparisonPredicate cp,
EstimationContext context) {
Expression left = cp.left();
+ if (left instanceof SlotReference && ((SlotReference)
left).getColumn().isPresent()) {
+ if ("__DORIS_DELETE_SIGN__".equals(((SlotReference)
left).getColumn().get().getName())) {
+ return context.statistics;
+ }
+ }
Expression right = cp.right();
+ if (right instanceof SlotReference && ((SlotReference)
right).getColumn().isPresent()) {
+ if ("__DORIS_DELETE_SIGN__".equals(((SlotReference)
right).getColumn().get().getName())) {
+ return context.statistics;
+ }
+ }
ColumnStatistic statsForLeft = ExpressionEstimation.estimate(left,
context.statistics);
ColumnStatistic statsForRight = ExpressionEstimation.estimate(right,
context.statistics);
if (!(left instanceof Literal) && !(right instanceof Literal)) {
@@ -152,10 +163,6 @@ public class FilterEstimation extends
ExpressionVisitor<Statistics, EstimationCo
if (statsForLeft == ColumnStatistic.UNKNOWN) {
return context.statistics.withSel(DEFAULT_INEQUALITY_COEFFICIENT);
}
- Expression rightExpr = cp.child(1);
- if (!(rightExpr.getDataType() instanceof NumericType)) {
- return context.statistics.withSel(DEFAULT_INEQUALITY_COEFFICIENT);
- }
double selectivity;
double ndv = statsForLeft.ndv;
double val = statsForRight.maxValue;
@@ -175,7 +182,33 @@ public class FilterEstimation extends
ExpressionVisitor<Statistics, EstimationCo
if (statsForLeft.histogram != null) {
return estimateEqualToWithHistogram(cp.left(), statsForLeft,
val, context);
}
- return context.statistics.withSel(selectivity);
+ // cp.left : func(A), we assume func(A) has same statistics with A
+ // for example: cast(N_NAME as varchar(*)) = 'GERMANY',
+ // we assume cast(N_NAME as varchar(*)) and N_NAME have the same
col stats
+ Set<Slot> leftSlots = cp.left().getInputSlots();
+ Preconditions.checkArgument(leftSlots.size() <= 1,
+ "stats derive: equal condition only support at one column,
but we meet "
+ + leftSlots.size()
+ );
+
+ Statistics equalStats = context.statistics.withSel(selectivity);
+ /*
+ leftSlots could be empty, for example:
+ select * from (select 'jj' as kk1, sum(k2) from ${tableName2}
where k10 = '2015-04-02' group by kk1)tt
+ where kk1 in ('jj')
+ kk1 in ('jj') => kk1 = 'jj' => 'jj'='jj
+ TODO const fold could eliminate this equalTo.
+ */
+ if (!leftSlots.isEmpty()) {
+ Slot leftSlot = leftSlots.iterator().next();
+ //update min/max of cp.left
+ ColumnStatistic columnStats =
equalStats.findColumnStatistics(leftSlot);
+ ColumnStatisticBuilder colStatsBuilder = new
ColumnStatisticBuilder(columnStats);
+ colStatsBuilder.setMaxValue(val);
+ colStatsBuilder.setMinValue(val);
+ equalStats.addColumnStats(leftSlot, colStatsBuilder.build());
+ }
+ return equalStats;
} else {
if (cp instanceof LessThan || cp instanceof LessThanEqual) {
if (context.isNot) {
@@ -238,7 +271,6 @@ public class FilterEstimation extends
ExpressionVisitor<Statistics, EstimationCo
A.selectivity = 7/10
*/
double validInOptCount = 0;
- double columnSelectivity = 1.0;
double selectivity = 1.0;
ColumnStatisticBuilder compareExprStatsBuilder = new
ColumnStatisticBuilder(compareExprStats);
if (isNotIn) {
@@ -250,7 +282,6 @@ public class FilterEstimation extends
ExpressionVisitor<Statistics, EstimationCo
}
}
validInOptCount = Math.max(1, compareExprStats.ndv -
validInOptCount);
- columnSelectivity = compareExprStats.ndv == 0 ? 0 : Math.max(1,
validInOptCount) / compareExprStats.ndv;
} else {
for (Expression option : options) {
ColumnStatistic optionStats =
ExpressionEstimation.estimate(option, context.statistics);
@@ -263,29 +294,13 @@ public class FilterEstimation extends
ExpressionVisitor<Statistics, EstimationCo
}
maxOption = Math.min(maxOption, compareExprStats.maxValue);
minOption = Math.max(minOption, compareExprStats.minValue);
- if (maxOption == minOption) {
- columnSelectivity = 1.0;
- } else {
- double outputRange = maxOption - minOption;
- double originRange = Math.max(1, compareExprStats.maxValue -
compareExprStats.minValue);
- double orginDensity = StatsMathUtil.minNonNaN(1,
- compareExprStats.ndv /
StatsMathUtil.nonZeroDivisor(originRange));
- double outputDensity = StatsMathUtil.minNonNaN(1,
- validInOptCount /
StatsMathUtil.nonZeroDivisor(outputRange));
- columnSelectivity = StatsMathUtil.minNonNaN(1, outputDensity
- / StatsMathUtil.nonZeroDivisor(orginDensity));
- }
compareExprStatsBuilder.setMaxValue(maxOption);
compareExprStatsBuilder.setMinValue(minOption);
}
selectivity = StatsMathUtil.minNonNaN(1.0, validInOptCount /
compareExprStats.ndv);
-
- compareExprStatsBuilder.setSelectivity(compareExprStats.selectivity *
columnSelectivity);
compareExprStatsBuilder.setNdv(validInOptCount);
-
Statistics estimated = new Statistics(context.statistics);
-
estimated = estimated.withSel(selectivity);
if (compareExpr instanceof SlotReference) {
estimated.addColumnStats(compareExpr,
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java
index c1e30f1da7..e77c060bba 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java
@@ -21,6 +21,7 @@ import org.apache.doris.common.Pair;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.plans.JoinType;
import org.apache.doris.nereids.trees.plans.algebra.Join;
+import org.apache.doris.nereids.util.ExpressionUtils;
import org.apache.doris.statistics.Statistics;
import org.apache.doris.statistics.StatisticsBuilder;
@@ -70,6 +71,11 @@ public class JoinEstimation {
.build();
List<Expression> joinConditions = join.getHashJoinConjuncts();
Statistics innerJoinStats = estimateInnerJoin(crossJoinStats,
joinConditions);
+ if (!join.getOtherJoinConjuncts().isEmpty()) {
+ FilterEstimation filterEstimation = new FilterEstimation();
+ innerJoinStats = filterEstimation.estimate(
+ ExpressionUtils.and(join.getOtherJoinConjuncts()),
innerJoinStats);
+ }
innerJoinStats.setWidth(leftStats.getWidth() + rightStats.getWidth());
innerJoinStats.setPenalty(0);
double rowCount;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index 5e196a4441..8f10d6f4ea 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -106,6 +106,7 @@ import java.util.stream.Collectors;
* Used to calculate the stats for each plan
*/
public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> {
+ public static double DEFAULT_AGGREGATE_RATIO = 0.5;
private final GroupExpression groupExpression;
private StatsCalculator(GroupExpression groupExpression) {
@@ -130,7 +131,7 @@ public class StatsCalculator extends
DefaultPlanVisitor<Statistics, Void> {
if (originStats == null || originStats.getRowCount() >
stats.getRowCount()) {
groupExpression.getOwnerGroup().setStatistics(stats);
}
- groupExpression.setEstOutputRowCount((long) stats.getRowCount());
+ groupExpression.setEstOutputRowCount(stats.getRowCount());
groupExpression.setStatDerived(true);
}
@@ -436,12 +437,32 @@ public class StatsCalculator extends
DefaultPlanVisitor<Statistics, Void> {
// TODO: since we have no column stats here. just use a fix ratio to
compute the row count.
List<Expression> groupByExpressions =
aggregate.getGroupByExpressions();
Statistics childStats = groupExpression.childStatistics(0);
- Map<Expression, ColumnStatistic> childSlotToColumnStats =
childStats.columnStatistics();
- double resultSetCount = groupByExpressions.stream().flatMap(expr ->
expr.getInputSlots().stream())
-
.filter(childSlotToColumnStats::containsKey).map(childSlotToColumnStats::get).map(s
-> s.ndv)
- .reduce(1d, (a, b) -> a * b);
- if (resultSetCount <= 0) {
- resultSetCount = 1L;
+ double resultSetCount = 1;
+ if (!groupByExpressions.isEmpty()) {
+ Map<Expression, ColumnStatistic> childSlotToColumnStats =
childStats.columnStatistics();
+ double inputRowCount = childStats.getRowCount();
+ if (inputRowCount == 0) {
+ //on empty relation, Agg output 1 tuple
+ resultSetCount = 1;
+ } else {
+ List<ColumnStatistic> groupByKeyStats =
groupByExpressions.stream()
+ .flatMap(expr -> expr.getInputSlots().stream())
+ .map(Slot::getExprId)
+ .filter(childSlotToColumnStats::containsKey)
+ .map(childSlotToColumnStats::get)
+ .filter(s -> !s.isUnKnown)
+ .collect(Collectors.toList());
+ if (groupByKeyStats.isEmpty()) {
+ //all column stats are unknown, use default ratio
+ resultSetCount = inputRowCount * DEFAULT_AGGREGATE_RATIO;
+ } else {
+ resultSetCount = groupByKeyStats.stream()
+ .map(s -> s.ndv)
+ .reduce(1.0, (a, b) -> a * b);
+ //agg output tuples should be less than input tuples
+ resultSetCount = Math.min(resultSetCount, inputRowCount);
+ }
+ }
}
resultSetCount = Math.min(resultSetCount, childStats.getRowCount());
Map<Expression, ColumnStatistic> slotToColumnStats = Maps.newHashMap();
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java
index 3da6bf2d9d..1d8cdbf71f 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java
@@ -257,7 +257,7 @@ public class PhysicalHashAggregate<CHILD_TYPE extends Plan>
extends PhysicalUnar
@Override
public PhysicalHashAggregate<CHILD_TYPE>
withAggOutput(List<NamedExpression> newOutput) {
return new PhysicalHashAggregate<>(groupByExpressions, newOutput,
partitionExpressions,
- aggregateParam, maybeUsingStream, groupExpression,
getLogicalProperties(),
+ aggregateParam, maybeUsingStream, Optional.empty(),
getLogicalProperties(),
requireProperties, physicalProperties, statistics, child());
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalNestedLoopJoin.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalNestedLoopJoin.java
index c6e263c99a..29e92b5038 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalNestedLoopJoin.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalNestedLoopJoin.java
@@ -119,7 +119,8 @@ public class PhysicalNestedLoopJoin<
"type", joinType,
"otherJoinCondition", otherJoinConjuncts,
"isMarkJoin", markJoinSlotReference.isPresent(),
- "markJoinSlotReference", markJoinSlotReference.isPresent() ?
markJoinSlotReference.get() : "empty"
+ "markJoinSlotReference", markJoinSlotReference.isPresent() ?
markJoinSlotReference.get() : "empty",
+ "stats", statistics
);
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
index 048f342d89..b9cf6040e8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
@@ -20,12 +20,12 @@ package org.apache.doris.statistics;
import org.apache.doris.nereids.stats.StatsMathUtil;
import org.apache.doris.nereids.trees.expressions.Expression;
+import java.text.DecimalFormat;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
public class Statistics {
-
private final double rowCount;
private final Map<Expression, ColumnStatistic> expressionToColumnStats;
@@ -38,6 +38,19 @@ public class Statistics {
@Deprecated
private double penalty;
+ /**
+ * after filter, compute the new ndv of a column
+ * @param ndv original ndv of column
+ * @param newRowCount the row count of table after filter
+ * @param oldRowCount the row count of table before filter
+ * @return the new ndv after filter
+ */
+ public static double computeNdv(double ndv, double newRowCount, double
oldRowCount) {
+ double selectOneTuple = newRowCount /
StatsMathUtil.nonZeroDivisor(oldRowCount);
+ double allTuplesOfSameDistinctValueNotSelected = Math.pow((1 -
selectOneTuple), oldRowCount / ndv);
+ return Math.min(ndv * (1 - allTuplesOfSameDistinctValueNotSelected),
newRowCount);
+ }
+
public Statistics(Statistics another) {
this.rowCount = another.rowCount;
this.expressionToColumnStats = new
HashMap<>(another.expressionToColumnStats);
@@ -72,17 +85,18 @@ public class Statistics {
public Statistics withRowCount(double rowCount) {
Statistics statistics = new Statistics(rowCount, new
HashMap<>(expressionToColumnStats), width, penalty);
- statistics.fix(rowCount / StatsMathUtil.nonZeroDivisor(this.rowCount));
+ statistics.fix(rowCount, StatsMathUtil.nonZeroDivisor(this.rowCount));
return statistics;
}
- public void fix(double sel) {
+ public void fix(double newRowCount, double originRowCount) {
+ double sel = newRowCount / originRowCount;
for (Entry<Expression, ColumnStatistic> entry :
expressionToColumnStats.entrySet()) {
ColumnStatistic columnStatistic = entry.getValue();
ColumnStatisticBuilder columnStatisticBuilder = new
ColumnStatisticBuilder(columnStatistic);
-
columnStatisticBuilder.setNdv(Math.min(Math.ceil(columnStatistic.ndv * sel),
rowCount));
-
columnStatisticBuilder.setNumNulls(Math.min(Math.ceil(columnStatistic.numNulls
* sel), rowCount));
-
columnStatisticBuilder.setCount(Math.min(Math.ceil(columnStatistic.count *
sel), rowCount));
+ columnStatisticBuilder.setNdv(computeNdv(columnStatistic.ndv,
newRowCount, originRowCount));
+
columnStatisticBuilder.setNumNulls(Math.min(columnStatistic.numNulls * sel,
rowCount));
+ columnStatisticBuilder.setCount(newRowCount);
expressionToColumnStats.put(entry.getKey(),
columnStatisticBuilder.build());
}
}
@@ -105,7 +119,7 @@ public class Statistics {
public double computeSize() {
if (computeSize <= 0) {
computeSize = Math.max(1, expressionToColumnStats.values().stream()
- .map(s -> s.dataSize).reduce(0D, Double::sum)
+ .map(s -> s.avgSizeByte).reduce(0D, Double::sum)
) * rowCount;
}
return computeSize;
@@ -113,7 +127,8 @@ public class Statistics {
@Override
public String toString() {
- return String.format("rows=%.4f", rowCount);
+ DecimalFormat format = new DecimalFormat("#,###.##");
+ return format.format(rowCount);
}
public void setWidth(double width) {
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/memo/RankTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/memo/RankTest.java
index 70fe91afc9..977ba7beb2 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/nereids/memo/RankTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/memo/RankTest.java
@@ -55,12 +55,13 @@ public class RankTest extends TPCHTestBase {
.optimize()
.getCascadesContext()
.getMemo();
- PhysicalPlan plan1 = memo.unrank(memo.rank(1).first);
PhysicalPlan plan2 = PlanChecker.from(connectContext)
.analyze(field.get(null).toString())
.rewrite()
.optimize()
.getBestPlanTree(PhysicalProperties.GATHER);
+ PhysicalPlan plan1 = memo.unrank(memo.rank(1).first);
+
Assertions.assertTrue(PlanChecker.isPlanEqualWithoutID(plan1,
plan2));
}
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
index 3992e2de9a..691cf53720 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
@@ -197,8 +197,7 @@ class FilterEstimationTest {
Statistics stat = new Statistics(1000, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation();
Statistics expected = filterEstimation.estimate(or, stat);
- Assertions.assertTrue(
- Precision.equals(50, expected.getRowCount(), 0.01));
+ Assertions.assertEquals(51, expected.getRowCount(), 0.1);
}
// a > 500 and b < 100 or a > c
@@ -450,9 +449,9 @@ class FilterEstimationTest {
Assertions.assertEquals(1000 * 7.0 / 10.0, estimated.getRowCount());
}
- //c>100
+ // c>100
// a is primary-key, a.ndv is reduced
- // b is normal, b.ndv is not changed
+ // b is normal, b.ndv is smaller: newNdv = ndv * (1 - Math.pow(1 -
selectivity, rowCount / ndv));
// c.selectivity is still 1, but its range becomes half
@Test
public void test12() {
@@ -466,8 +465,8 @@ class FilterEstimationTest {
.setNdv(1000)
.setAvgSizeByte(4)
.setNumNulls(0)
- .setMinValue(10000)
- .setMaxValue(1000)
+ .setMinValue(1000)
+ .setMaxValue(10000)
.setSelectivity(1.0);
ColumnStatisticBuilder builderB = new ColumnStatisticBuilder()
.setNdv(100)
@@ -492,7 +491,7 @@ class FilterEstimationTest {
ColumnStatistic statsA = estimated.findColumnStatistics(a);
Assertions.assertEquals(500, statsA.ndv);
ColumnStatistic statsB = estimated.findColumnStatistics(b);
- Assertions.assertEquals(50, statsB.ndv);
+ Assertions.assertEquals(100, statsB.ndv, 0.1);
ColumnStatistic statsC = estimated.findColumnStatistics(c);
Assertions.assertEquals(50, statsC.ndv);
Assertions.assertEquals(100, statsC.minValue);
@@ -502,9 +501,10 @@ class FilterEstimationTest {
/**
* test filter estimation, like 20>c>10, c in (0,40)
* filter range has intersection with (c.min, c.max)
- * a primary key, a.ndv reduced by 1/4, a.selectivity=0.25
- * b normal field, b.ndv not changed, b.selectivity=1.0
- * c.ndv = 10/40 * c.ndv, c.selectivity=1
+ * rows = 100
+ * a primary key, a.ndv reduced by 1/4
+ * b normal field, b.ndv=20 =>
+ * c.ndv = 10/40 * c.ndv
*/
@Test
public void testFilterInsideMinMax() {
@@ -547,13 +547,13 @@ class FilterEstimationTest {
Statistics estimated = filterEstimation.estimate(and, stat);
Assertions.assertEquals(25, estimated.getRowCount());
ColumnStatistic statsA = estimated.findColumnStatistics(a);
- Assertions.assertEquals(25, statsA.ndv);
+ Assertions.assertEquals(25, statsA.ndv, 0.1);
//Assertions.assertEquals(0.25, statsA.selectivity);
Assertions.assertEquals(0, statsA.minValue);
Assertions.assertEquals(100, statsA.maxValue);
ColumnStatistic statsB = estimated.findColumnStatistics(b);
- Assertions.assertEquals(5, statsB.ndv);
+ Assertions.assertEquals(15.6, statsB.ndv, 0.1);
Assertions.assertEquals(0, statsB.minValue);
Assertions.assertEquals(500, statsB.maxValue);
Assertions.assertEquals(1.0, statsB.selectivity);
@@ -686,10 +686,10 @@ class FilterEstimationTest {
ColumnStatistic statsA = estimated.findColumnStatistics(a);
ColumnStatistic statsB = estimated.findColumnStatistics(b);
ColumnStatistic statsC = estimated.findColumnStatistics(c);
- Assertions.assertEquals(5, statsA.ndv);
+ Assertions.assertEquals(5, statsA.ndv, 0.1);
Assertions.assertEquals(0, statsA.minValue);
Assertions.assertEquals(100, statsA.maxValue);
- Assertions.assertEquals(1, statsB.ndv);
+ Assertions.assertEquals(4.5, statsB.ndv, 0.1);
Assertions.assertEquals(0, statsB.minValue);
Assertions.assertEquals(500, statsB.maxValue);
Assertions.assertEquals(2, statsC.ndv);
@@ -763,10 +763,10 @@ class FilterEstimationTest {
System.out.println(statsA);
System.out.println(statsB);
System.out.println(statsC);
- Assertions.assertEquals(5, statsA.ndv);
+ Assertions.assertEquals(5, statsA.ndv, 0.1);
Assertions.assertEquals(0, statsA.minValue);
Assertions.assertEquals(100, statsA.maxValue);
- Assertions.assertEquals(1, statsB.ndv);
+ Assertions.assertEquals(4.5, statsB.ndv, 0.1);
Assertions.assertEquals(0, statsB.minValue);
Assertions.assertEquals(500, statsB.maxValue);
Assertions.assertEquals(2, statsC.ndv);
@@ -832,13 +832,10 @@ class FilterEstimationTest {
ColumnStatistic statsA = estimated.findColumnStatistics(a);
ColumnStatistic statsB = estimated.findColumnStatistics(b);
ColumnStatistic statsC = estimated.findColumnStatistics(c);
- System.out.println(statsA);
- System.out.println(statsB);
- System.out.println(statsC);
Assertions.assertEquals(75, statsA.ndv);
Assertions.assertEquals(0, statsA.minValue);
Assertions.assertEquals(100, statsA.maxValue);
- Assertions.assertEquals(15, statsB.ndv);
+ Assertions.assertEquals(19.9, statsB.ndv, 0.1);
Assertions.assertEquals(0, statsB.minValue);
Assertions.assertEquals(500, statsB.maxValue);
Assertions.assertEquals(30, statsC.ndv);
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java
index ddccd7eddc..14501cdd91 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java
@@ -145,14 +145,14 @@ public class StatsCalculatorTest {
Group ownerGroup = newGroup();
groupExpression.setOwnerGroup(ownerGroup);
StatsCalculator.estimate(groupExpression);
- Assertions.assertEquals((long) 500,
ownerGroup.getStatistics().getRowCount(), 0.001);
+ Assertions.assertEquals((10000 * 0.1 * 0.05),
ownerGroup.getStatistics().getRowCount(), 0.001);
LogicalFilter<GroupPlan> logicalFilterOr = new LogicalFilter<>(or,
groupPlan);
GroupExpression groupExpressionOr = new
GroupExpression(logicalFilterOr, ImmutableList.of(childGroup));
Group ownerGroupOr = newGroup();
groupExpressionOr.setOwnerGroup(ownerGroupOr);
StatsCalculator.estimate(groupExpressionOr);
- Assertions.assertEquals((long) 1000,
+ Assertions.assertEquals((long) (10000 * (0.1 + 0.05 - 0.1 * 0.05)),
ownerGroupOr.getStatistics().getRowCount(), 0.001);
}
@@ -292,8 +292,8 @@ public class StatsCalculatorTest {
Statistics limitStats = ownerGroup.getStatistics();
Assertions.assertEquals(1, limitStats.getRowCount());
ColumnStatistic slot1Stats = limitStats.columnStatistics().get(slot1);
- Assertions.assertEquals(1, slot1Stats.ndv);
- Assertions.assertEquals(1, slot1Stats.numNulls);
+ Assertions.assertEquals(1, slot1Stats.ndv, 0.1);
+ Assertions.assertEquals(0.5, slot1Stats.numNulls);
}
@Test
@@ -322,7 +322,7 @@ public class StatsCalculatorTest {
Statistics topNStats = ownerGroup.getStatistics();
Assertions.assertEquals(1, topNStats.getRowCount());
ColumnStatistic slot1Stats = topNStats.columnStatistics().get(slot1);
- Assertions.assertEquals(1, slot1Stats.ndv);
- Assertions.assertEquals(1, slot1Stats.numNulls);
+ Assertions.assertEquals(1, slot1Stats.ndv, 0.1);
+ Assertions.assertEquals(0.5, slot1Stats.numNulls);
}
}
diff --git a/regression-test/suites/nereids_syntax_p0/join.groovy
b/regression-test/suites/nereids_syntax_p0/join.groovy
index 982c4d8316..9bda506a3d 100644
--- a/regression-test/suites/nereids_syntax_p0/join.groovy
+++ b/regression-test/suites/nereids_syntax_p0/join.groovy
@@ -204,33 +204,6 @@ suite("join") {
insert into outerjoin_D values( 1 );
"""
- def explainStr =
- sql(""" explain SELECT count(1)
- FROM
- (SELECT sub1.wtid,
- count(*)
- FROM
- (SELECT a.wtid ,
- a.wfid
- FROM test_table_b a ) sub1
- INNER JOIN [shuffle]
- (SELECT a.wtid,
- a.wfid
- FROM test_table_a a ) sub2
- ON sub1.wtid = sub2.wtid
- AND sub1.wfid = sub2.wfid
- GROUP BY sub1.wtid ) qqqq;""").toString()
- logger.info(explainStr)
- assertTrue(
- //if analyze finished
- explainStr.contains("VAGGREGATE (update serialize)") &&
explainStr.contains("VAGGREGATE (merge finalize)")
- && explainStr.contains("wtid[#8] = wtid[#3]") &&
explainStr.contains("projections: wtid[#5], wfid[#6]")
- ||
- //analyze not finished
- explainStr.contains("VAGGREGATE (update finalize)") &&
explainStr.contains("VAGGREGATE (update finalize)")
- && explainStr.contains("VEXCHANGE") &&
explainStr.contains("VHASH JOIN")
- )
-
test {
sql"""select * from test_table_a a cross join test_table_b b on a.wtid
> b.wtid"""
check{result, exception, startTime, endTime ->
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]