This is an automated email from the ASF dual-hosted git repository.
englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new a9bd84c60d3 [fix](nereids)Fix CTE consumer stats derivation when
producer stats are not precomputed (#61074)
a9bd84c60d3 is described below
commit a9bd84c60d32fdd192e9cd97f5259999518a7ea5
Author: minghong <[email protected]>
AuthorDate: Wed Mar 11 16:11:07 2026 +0800
[fix](nereids)Fix CTE consumer stats derivation when producer stats are not
precomputed (#61074)
### What problem does this PR solve?
This change replaces cached CTE producer statistics with a [cteId ->
producer] mapping and makes [StatsDerive] lazily derive producer stats
from that producer when a CTE consumer is visited first, preventing
null-pointer failures in mid-plan derivation.
---
.../org/apache/doris/nereids/StatementContext.java | 23 +++++++++++-----------
.../doris/nereids/rules/analysis/AnalyzeCTE.java | 5 ++++-
.../rewrite/DecomposeRepeatWithPreAggregation.java | 2 +-
.../doris/nereids/rules/rewrite/OrExpansion.java | 2 ++
.../nereids/rules/rewrite/RewriteCteChildren.java | 5 ++++-
.../rules/rewrite/SplitMultiDistinctStrategy.java | 4 +---
.../doris/nereids/rules/rewrite/StatsDerive.java | 15 +++++++++++---
7 files changed, 36 insertions(+), 20 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
index d938657d611..af24f9a83c7 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
@@ -55,6 +55,7 @@ import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.RelationId;
import org.apache.doris.nereids.trees.plans.TableId;
import org.apache.doris.nereids.trees.plans.logical.LogicalCTEConsumer;
+import org.apache.doris.nereids.trees.plans.logical.LogicalCTEProducer;
import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
import org.apache.doris.nereids.util.RelationUtil;
import org.apache.doris.qe.ConnectContext;
@@ -158,7 +159,7 @@ public class StatementContext implements Closeable {
private final Map<CTEId, Set<LogicalCTEConsumer>> cteIdToConsumers = new
HashMap<>();
private final Map<CTEId, Set<Slot>> cteIdToOutputIds = new HashMap<>();
- private final Map<CTEId, Statistics> cteIdToProducerStats = new
HashMap<>();
+ private final Map<CTEId, LogicalCTEProducer<? extends Plan>>
cteIdToProducer = new HashMap<>();
private final Map<RelationId, Set<Expression>> consumerIdToFilters = new
HashMap<>();
// Used to update consumer's stats
@@ -646,7 +647,7 @@ public class StatementContext implements Closeable {
return new CteEnvironmentSnapshot(
copyMapOfSets(cteIdToConsumers),
copyMapOfSets(cteIdToOutputIds),
- new HashMap<>(cteIdToProducerStats),
+ new HashMap<>(cteIdToProducer),
copyMapOfSets(consumerIdToFilters),
copyMapOfLists(cteIdToConsumerGroup),
new HashMap<>(rewrittenCteProducer),
@@ -661,8 +662,8 @@ public class StatementContext implements Closeable {
cteIdToOutputIds.clear();
cteIdToOutputIds.putAll(snapshot.cteIdToOutputIds);
- cteIdToProducerStats.clear();
- cteIdToProducerStats.putAll(snapshot.cteIdToProducerStats);
+ cteIdToProducer.clear();
+ cteIdToProducer.putAll(snapshot.cteIdToProducer);
consumerIdToFilters.clear();
consumerIdToFilters.putAll(snapshot.consumerIdToFilters);
@@ -697,7 +698,7 @@ public class StatementContext implements Closeable {
public static class CteEnvironmentSnapshot {
private final Map<CTEId, Set<LogicalCTEConsumer>> cteIdToConsumers;
private final Map<CTEId, Set<Slot>> cteIdToOutputIds;
- private final Map<CTEId, Statistics> cteIdToProducerStats;
+ private final Map<CTEId, LogicalCTEProducer<? extends Plan>>
cteIdToProducer;
private final Map<RelationId, Set<Expression>> consumerIdToFilters;
private final Map<CTEId, List<Pair<Multimap<Slot, Slot>, Group>>>
cteIdToConsumerGroup;
private final Map<CTEId, LogicalPlan> rewrittenCteProducer;
@@ -709,14 +710,14 @@ public class StatementContext implements Closeable {
public CteEnvironmentSnapshot(
Map<CTEId, Set<LogicalCTEConsumer>> cteIdToConsumers,
Map<CTEId, Set<Slot>> cteIdToOutputIds,
- Map<CTEId, Statistics> cteIdToProducerStats,
+ Map<CTEId, LogicalCTEProducer<? extends Plan>> cteIdToProducer,
Map<RelationId, Set<Expression>> consumerIdToFilters,
Map<CTEId, List<Pair<Multimap<Slot, Slot>, Group>>>
cteIdToConsumerGroup,
Map<CTEId, LogicalPlan> rewrittenCteProducer,
Map<CTEId, LogicalPlan> rewrittenCteConsumer) {
this.cteIdToConsumers = cteIdToConsumers;
this.cteIdToOutputIds = cteIdToOutputIds;
- this.cteIdToProducerStats = cteIdToProducerStats;
+ this.cteIdToProducer = cteIdToProducer;
this.consumerIdToFilters = consumerIdToFilters;
this.cteIdToConsumerGroup = cteIdToConsumerGroup;
this.rewrittenCteProducer = rewrittenCteProducer;
@@ -1119,12 +1120,12 @@ public class StatementContext implements Closeable {
return prepareStage;
}
- public Statistics getProducerStatsByCteId(CTEId id) {
- return cteIdToProducerStats.get(id);
+ public LogicalCTEProducer<? extends Plan> getCteProducerByCteId(CTEId id) {
+ return cteIdToProducer.get(id);
}
- public void setProducerStats(CTEId id, Statistics stats) {
- cteIdToProducerStats.put(id, stats);
+ public void setCteProducer(CTEId id, LogicalCTEProducer<? extends Plan>
producer) {
+ cteIdToProducer.put(id, producer);
}
public void setIsInsert(boolean isInsert) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/AnalyzeCTE.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/AnalyzeCTE.java
index f59d88a8b84..f29644ff65f 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/AnalyzeCTE.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/AnalyzeCTE.java
@@ -133,7 +133,9 @@ public class AnalyzeCTE extends OneAnalysisRuleFactory {
.withChildren(ImmutableList.of(analyzedCtePlan));
outerCteCtx = new CTEContext(cteId, logicalSubQueryAlias,
outerCteCtx);
outerCteCtx.setAnalyzedPlan(logicalSubQueryAlias);
- cteProducerPlans.add(new LogicalCTEProducer<>(cteId,
logicalSubQueryAlias));
+ LogicalCTEProducer<Plan> cteProducer = new
LogicalCTEProducer<>(cteId, logicalSubQueryAlias);
+ cascadesContext.getStatementContext().setCteProducer(cteId,
cteProducer);
+ cteProducerPlans.add(cteProducer);
}
}
return Pair.of(outerCteCtx, cteProducerPlans);
@@ -237,6 +239,7 @@ public class AnalyzeCTE extends OneAnalysisRuleFactory {
outerCteCtx = new CTEContext(cteId, logicalSubQueryAlias, outerCteCtx);
outerCteCtx.setAnalyzedPlan(logicalSubQueryAlias);
LogicalCTEProducer<Plan> cteProducer = new LogicalCTEProducer<>(cteId,
logicalSubQueryAlias);
+ cascadesContext.getStatementContext().setCteProducer(cteId,
cteProducer);
return Pair.of(outerCteCtx, cteProducer);
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DecomposeRepeatWithPreAggregation.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DecomposeRepeatWithPreAggregation.java
index a3101afb58b..aa0e75bc043 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DecomposeRepeatWithPreAggregation.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DecomposeRepeatWithPreAggregation.java
@@ -492,8 +492,8 @@ public class DecomposeRepeatWithPreAggregation extends
DefaultPlanRewriter<Disti
}
LogicalCTEProducer<LogicalAggregate<Plan>> producer =
new LogicalCTEProducer<>(ctx.statementContext.getNextCTEId(),
preAggClone);
+ ctx.statementContext.setCteProducer(producer.getCteId(), producer);
ctx.cteProducerList.add(producer);
- producer.accept(new StatsDerive(false), new DeriveContext());
return producer;
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/OrExpansion.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/OrExpansion.java
index cb3067c1859..aed58ae002f 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/OrExpansion.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/OrExpansion.java
@@ -139,10 +139,12 @@ public class OrExpansion extends
DefaultPlanRewriter<OrExpandsionContext> implem
.deepCopy((LogicalPlan) join.left(), new DeepCopierContext());
LogicalCTEProducer<? extends Plan> leftProducer = new
LogicalCTEProducer<>(
ctx.statementContext.getNextCTEId(), leftClone);
+ ctx.statementContext.setCteProducer(leftProducer.getCteId(),
leftProducer);
LogicalPlan rightClone = LogicalPlanDeepCopier.INSTANCE
.deepCopy((LogicalPlan) join.right(), new DeepCopierContext());
LogicalCTEProducer<? extends Plan> rightProducer = new
LogicalCTEProducer<>(
ctx.statementContext.getNextCTEId(), rightClone);
+ ctx.statementContext.setCteProducer(rightProducer.getCteId(),
rightProducer);
Map<Slot, Slot> leftCloneToLeft = new HashMap<>();
for (int i = 0; i < leftClone.getOutput().size(); i++) {
leftCloneToLeft.put(leftClone.getOutput().get(i),
(join.left()).getOutput().get(i));
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteCteChildren.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteCteChildren.java
index 31aeb0be1ba..0360c4aa0e7 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteCteChildren.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteCteChildren.java
@@ -147,7 +147,10 @@ public class RewriteCteChildren extends
DefaultPlanRewriter<CascadesContext> imp
cascadesContext.addPlanProcesses(rewrittenCtx.getPlanProcesses());
cascadesContext.getStatementContext().getRewrittenCteProducer().put(cteProducer.getCteId(),
child);
}
- return cteProducer.withChildren(child);
+ LogicalCTEProducer<? extends Plan> rewrittenProducer =
(LogicalCTEProducer<? extends Plan>) cteProducer
+ .withChildren(child);
+
cascadesContext.getStatementContext().setCteProducer(rewrittenProducer.getCteId(),
rewrittenProducer);
+ return rewrittenProducer;
}
private LogicalPlan pushPlanUnderAnchor(LogicalPlan plan) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinctStrategy.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinctStrategy.java
index 8fc150c7055..c9585d269aa 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinctStrategy.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinctStrategy.java
@@ -18,7 +18,6 @@
package org.apache.doris.nereids.rules.rewrite;
import
org.apache.doris.nereids.rules.rewrite.DistinctAggStrategySelector.DistinctSelectorContext;
-import org.apache.doris.nereids.rules.rewrite.StatsDerive.DeriveContext;
import org.apache.doris.nereids.trees.copier.DeepCopierContext;
import org.apache.doris.nereids.trees.copier.LogicalPlanDeepCopier;
import org.apache.doris.nereids.trees.expressions.Alias;
@@ -60,9 +59,8 @@ public class SplitMultiDistinctStrategy {
.deepCopy(agg, new DeepCopierContext());
LogicalCTEProducer<Plan> producer = new
LogicalCTEProducer<>(ctx.statementContext.getNextCTEId(),
cloneAgg.child());
+ ctx.statementContext.setCteProducer(producer.getCteId(), producer);
ctx.cteProducerList.add(producer);
- StatsDerive derive = new StatsDerive(false);
- producer.accept(derive, new DeriveContext());
Map<Slot, Slot> originToProducerSlot = new HashMap<>();
for (int i = 0; i < agg.child().getOutput().size(); ++i) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/StatsDerive.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/StatsDerive.java
index e3e06c23e6b..7ad4e30bb22 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/StatsDerive.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/StatsDerive.java
@@ -351,20 +351,29 @@ public class StatsDerive extends PlanVisitor<Statistics,
StatsDerive.DeriveConte
@Override
public Statistics visitLogicalCTEProducer(LogicalCTEProducer<? extends
Plan> cteProducer, DeriveContext context) {
+ // Fallback registration: ensure cteId -> producer mapping is always
available
+ // even if some upstream rewrite path misses explicit registration.
+
ConnectContext.get().getStatementContext().setCteProducer(cteProducer.getCteId(),
cteProducer);
Statistics prodStats = cteProducer.child().accept(this, context);
StatisticsBuilder builder = new StatisticsBuilder(prodStats);
builder.setWidthInJoinCluster(1);
Statistics stats = builder.build();
cteProducer.setStatistics(stats);
-
ConnectContext.get().getStatementContext().setProducerStats(cteProducer.getCteId(),
stats);
return stats;
}
@Override
public Statistics visitLogicalCTEConsumer(LogicalCTEConsumer cteConsumer,
DeriveContext context) {
CTEId cteId = cteConsumer.getCteId();
- Statistics prodStats =
ConnectContext.get().getStatementContext().getProducerStatsByCteId(cteId);
- Preconditions.checkArgument(prodStats != null, String.format("Stats
for CTE: %s not found", cteId));
+ LogicalCTEProducer<? extends Plan> cteProducer =
ConnectContext.get().getStatementContext()
+ .getCteProducerByCteId(cteId);
+ Preconditions.checkState(cteProducer != null,
+ String.format("CTE producer for CTE: %s not found", cteId));
+ Statistics prodStats = cteProducer.getStats();
+ if (prodStats == null || deepDerive) {
+ prodStats = cteProducer.accept(this, context);
+ }
+ Preconditions.checkState(prodStats != null, String.format("Stats for
CTE: %s not found", cteId));
Statistics consumerStats = new Statistics(prodStats.getRowCount(), 1,
new HashMap<>());
for (Slot slot : cteConsumer.getOutput()) {
Slot prodSlot = cteConsumer.getProducerSlot(slot);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]