This is an automated email from the ASF dual-hosted git repository.

englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new a9bd84c60d3 [fix](nereids)Fix CTE consumer stats derivation when 
producer stats are not precomputed (#61074)
a9bd84c60d3 is described below

commit a9bd84c60d32fdd192e9cd97f5259999518a7ea5
Author: minghong <[email protected]>
AuthorDate: Wed Mar 11 16:11:07 2026 +0800

    [fix](nereids)Fix CTE consumer stats derivation when producer stats are not 
precomputed (#61074)
    
    ### What problem does this PR solve?
    This change replaces cached CTE producer statistics with a [cteId ->
    producer] mapping and makes [StatsDerive] lazily derive producer stats
    from that producer when a CTE consumer is visited first, preventing
    null-pointer failures in mid-plan derivation.
---
 .../org/apache/doris/nereids/StatementContext.java | 23 +++++++++++-----------
 .../doris/nereids/rules/analysis/AnalyzeCTE.java   |  5 ++++-
 .../rewrite/DecomposeRepeatWithPreAggregation.java |  2 +-
 .../doris/nereids/rules/rewrite/OrExpansion.java   |  2 ++
 .../nereids/rules/rewrite/RewriteCteChildren.java  |  5 ++++-
 .../rules/rewrite/SplitMultiDistinctStrategy.java  |  4 +---
 .../doris/nereids/rules/rewrite/StatsDerive.java   | 15 +++++++++++---
 7 files changed, 36 insertions(+), 20 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
index d938657d611..af24f9a83c7 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
@@ -55,6 +55,7 @@ import org.apache.doris.nereids.trees.plans.Plan;
 import org.apache.doris.nereids.trees.plans.RelationId;
 import org.apache.doris.nereids.trees.plans.TableId;
 import org.apache.doris.nereids.trees.plans.logical.LogicalCTEConsumer;
+import org.apache.doris.nereids.trees.plans.logical.LogicalCTEProducer;
 import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
 import org.apache.doris.nereids.util.RelationUtil;
 import org.apache.doris.qe.ConnectContext;
@@ -158,7 +159,7 @@ public class StatementContext implements Closeable {
     private final Map<CTEId, Set<LogicalCTEConsumer>> cteIdToConsumers = new 
HashMap<>();
     private final Map<CTEId, Set<Slot>> cteIdToOutputIds = new HashMap<>();
 
-    private final Map<CTEId, Statistics> cteIdToProducerStats = new 
HashMap<>();
+    private final Map<CTEId, LogicalCTEProducer<? extends Plan>> 
cteIdToProducer = new HashMap<>();
 
     private final Map<RelationId, Set<Expression>> consumerIdToFilters = new 
HashMap<>();
     // Used to update consumer's stats
@@ -646,7 +647,7 @@ public class StatementContext implements Closeable {
         return new CteEnvironmentSnapshot(
                 copyMapOfSets(cteIdToConsumers),
                 copyMapOfSets(cteIdToOutputIds),
-                new HashMap<>(cteIdToProducerStats),
+                new HashMap<>(cteIdToProducer),
                 copyMapOfSets(consumerIdToFilters),
                 copyMapOfLists(cteIdToConsumerGroup),
                 new HashMap<>(rewrittenCteProducer),
@@ -661,8 +662,8 @@ public class StatementContext implements Closeable {
         cteIdToOutputIds.clear();
         cteIdToOutputIds.putAll(snapshot.cteIdToOutputIds);
 
-        cteIdToProducerStats.clear();
-        cteIdToProducerStats.putAll(snapshot.cteIdToProducerStats);
+        cteIdToProducer.clear();
+        cteIdToProducer.putAll(snapshot.cteIdToProducer);
 
         consumerIdToFilters.clear();
         consumerIdToFilters.putAll(snapshot.consumerIdToFilters);
@@ -697,7 +698,7 @@ public class StatementContext implements Closeable {
     public static class CteEnvironmentSnapshot {
         private final Map<CTEId, Set<LogicalCTEConsumer>> cteIdToConsumers;
         private final Map<CTEId, Set<Slot>> cteIdToOutputIds;
-        private final Map<CTEId, Statistics> cteIdToProducerStats;
+        private final Map<CTEId, LogicalCTEProducer<? extends Plan>> 
cteIdToProducer;
         private final Map<RelationId, Set<Expression>> consumerIdToFilters;
         private final Map<CTEId, List<Pair<Multimap<Slot, Slot>, Group>>> 
cteIdToConsumerGroup;
         private final Map<CTEId, LogicalPlan> rewrittenCteProducer;
@@ -709,14 +710,14 @@ public class StatementContext implements Closeable {
         public CteEnvironmentSnapshot(
                 Map<CTEId, Set<LogicalCTEConsumer>> cteIdToConsumers,
                 Map<CTEId, Set<Slot>> cteIdToOutputIds,
-                Map<CTEId, Statistics> cteIdToProducerStats,
+                Map<CTEId, LogicalCTEProducer<? extends Plan>> cteIdToProducer,
                 Map<RelationId, Set<Expression>> consumerIdToFilters,
                 Map<CTEId, List<Pair<Multimap<Slot, Slot>, Group>>> 
cteIdToConsumerGroup,
                 Map<CTEId, LogicalPlan> rewrittenCteProducer,
                 Map<CTEId, LogicalPlan> rewrittenCteConsumer) {
             this.cteIdToConsumers = cteIdToConsumers;
             this.cteIdToOutputIds = cteIdToOutputIds;
-            this.cteIdToProducerStats = cteIdToProducerStats;
+            this.cteIdToProducer = cteIdToProducer;
             this.consumerIdToFilters = consumerIdToFilters;
             this.cteIdToConsumerGroup = cteIdToConsumerGroup;
             this.rewrittenCteProducer = rewrittenCteProducer;
@@ -1119,12 +1120,12 @@ public class StatementContext implements Closeable {
         return prepareStage;
     }
 
-    public Statistics getProducerStatsByCteId(CTEId id) {
-        return cteIdToProducerStats.get(id);
+    public LogicalCTEProducer<? extends Plan> getCteProducerByCteId(CTEId id) {
+        return cteIdToProducer.get(id);
     }
 
-    public void setProducerStats(CTEId id, Statistics stats) {
-        cteIdToProducerStats.put(id, stats);
+    public void setCteProducer(CTEId id, LogicalCTEProducer<? extends Plan> 
producer) {
+        cteIdToProducer.put(id, producer);
     }
 
     public void setIsInsert(boolean isInsert) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/AnalyzeCTE.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/AnalyzeCTE.java
index f59d88a8b84..f29644ff65f 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/AnalyzeCTE.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/AnalyzeCTE.java
@@ -133,7 +133,9 @@ public class AnalyzeCTE extends OneAnalysisRuleFactory {
                         .withChildren(ImmutableList.of(analyzedCtePlan));
                 outerCteCtx = new CTEContext(cteId, logicalSubQueryAlias, 
outerCteCtx);
                 outerCteCtx.setAnalyzedPlan(logicalSubQueryAlias);
-                cteProducerPlans.add(new LogicalCTEProducer<>(cteId, 
logicalSubQueryAlias));
+                LogicalCTEProducer<Plan> cteProducer = new 
LogicalCTEProducer<>(cteId, logicalSubQueryAlias);
+                cascadesContext.getStatementContext().setCteProducer(cteId, 
cteProducer);
+                cteProducerPlans.add(cteProducer);
             }
         }
         return Pair.of(outerCteCtx, cteProducerPlans);
@@ -237,6 +239,7 @@ public class AnalyzeCTE extends OneAnalysisRuleFactory {
         outerCteCtx = new CTEContext(cteId, logicalSubQueryAlias, outerCteCtx);
         outerCteCtx.setAnalyzedPlan(logicalSubQueryAlias);
         LogicalCTEProducer<Plan> cteProducer = new LogicalCTEProducer<>(cteId, 
logicalSubQueryAlias);
+        cascadesContext.getStatementContext().setCteProducer(cteId, 
cteProducer);
         return Pair.of(outerCteCtx, cteProducer);
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DecomposeRepeatWithPreAggregation.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DecomposeRepeatWithPreAggregation.java
index a3101afb58b..aa0e75bc043 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DecomposeRepeatWithPreAggregation.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DecomposeRepeatWithPreAggregation.java
@@ -492,8 +492,8 @@ public class DecomposeRepeatWithPreAggregation extends 
DefaultPlanRewriter<Disti
         }
         LogicalCTEProducer<LogicalAggregate<Plan>> producer =
                 new LogicalCTEProducer<>(ctx.statementContext.getNextCTEId(), 
preAggClone);
+        ctx.statementContext.setCteProducer(producer.getCteId(), producer);
         ctx.cteProducerList.add(producer);
-        producer.accept(new StatsDerive(false), new DeriveContext());
         return producer;
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/OrExpansion.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/OrExpansion.java
index cb3067c1859..aed58ae002f 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/OrExpansion.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/OrExpansion.java
@@ -139,10 +139,12 @@ public class OrExpansion extends 
DefaultPlanRewriter<OrExpandsionContext> implem
                 .deepCopy((LogicalPlan) join.left(), new DeepCopierContext());
         LogicalCTEProducer<? extends Plan> leftProducer = new 
LogicalCTEProducer<>(
                 ctx.statementContext.getNextCTEId(), leftClone);
+        ctx.statementContext.setCteProducer(leftProducer.getCteId(), 
leftProducer);
         LogicalPlan rightClone = LogicalPlanDeepCopier.INSTANCE
                 .deepCopy((LogicalPlan) join.right(), new DeepCopierContext());
         LogicalCTEProducer<? extends Plan> rightProducer = new 
LogicalCTEProducer<>(
                 ctx.statementContext.getNextCTEId(), rightClone);
+        ctx.statementContext.setCteProducer(rightProducer.getCteId(), 
rightProducer);
         Map<Slot, Slot> leftCloneToLeft = new HashMap<>();
         for (int i = 0; i < leftClone.getOutput().size(); i++) {
             leftCloneToLeft.put(leftClone.getOutput().get(i), 
(join.left()).getOutput().get(i));
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteCteChildren.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteCteChildren.java
index 31aeb0be1ba..0360c4aa0e7 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteCteChildren.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteCteChildren.java
@@ -147,7 +147,10 @@ public class RewriteCteChildren extends 
DefaultPlanRewriter<CascadesContext> imp
             cascadesContext.addPlanProcesses(rewrittenCtx.getPlanProcesses());
             
cascadesContext.getStatementContext().getRewrittenCteProducer().put(cteProducer.getCteId(),
 child);
         }
-        return cteProducer.withChildren(child);
+        LogicalCTEProducer<? extends Plan> rewrittenProducer = 
(LogicalCTEProducer<? extends Plan>) cteProducer
+                .withChildren(child);
+        
cascadesContext.getStatementContext().setCteProducer(rewrittenProducer.getCteId(),
 rewrittenProducer);
+        return rewrittenProducer;
     }
 
     private LogicalPlan pushPlanUnderAnchor(LogicalPlan plan) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinctStrategy.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinctStrategy.java
index 8fc150c7055..c9585d269aa 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinctStrategy.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinctStrategy.java
@@ -18,7 +18,6 @@
 package org.apache.doris.nereids.rules.rewrite;
 
 import 
org.apache.doris.nereids.rules.rewrite.DistinctAggStrategySelector.DistinctSelectorContext;
-import org.apache.doris.nereids.rules.rewrite.StatsDerive.DeriveContext;
 import org.apache.doris.nereids.trees.copier.DeepCopierContext;
 import org.apache.doris.nereids.trees.copier.LogicalPlanDeepCopier;
 import org.apache.doris.nereids.trees.expressions.Alias;
@@ -60,9 +59,8 @@ public class SplitMultiDistinctStrategy {
                 .deepCopy(agg, new DeepCopierContext());
         LogicalCTEProducer<Plan> producer = new 
LogicalCTEProducer<>(ctx.statementContext.getNextCTEId(),
                 cloneAgg.child());
+        ctx.statementContext.setCteProducer(producer.getCteId(), producer);
         ctx.cteProducerList.add(producer);
-        StatsDerive derive = new StatsDerive(false);
-        producer.accept(derive, new DeriveContext());
 
         Map<Slot, Slot> originToProducerSlot = new HashMap<>();
         for (int i = 0; i < agg.child().getOutput().size(); ++i) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/StatsDerive.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/StatsDerive.java
index e3e06c23e6b..7ad4e30bb22 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/StatsDerive.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/StatsDerive.java
@@ -351,20 +351,29 @@ public class StatsDerive extends PlanVisitor<Statistics, 
StatsDerive.DeriveConte
 
     @Override
     public Statistics visitLogicalCTEProducer(LogicalCTEProducer<? extends 
Plan> cteProducer, DeriveContext context) {
+        // Fallback registration: ensure cteId -> producer mapping is always 
available
+        // even if some upstream rewrite path misses explicit registration.
+        
ConnectContext.get().getStatementContext().setCteProducer(cteProducer.getCteId(),
 cteProducer);
         Statistics prodStats = cteProducer.child().accept(this, context);
         StatisticsBuilder builder = new StatisticsBuilder(prodStats);
         builder.setWidthInJoinCluster(1);
         Statistics stats = builder.build();
         cteProducer.setStatistics(stats);
-        
ConnectContext.get().getStatementContext().setProducerStats(cteProducer.getCteId(),
 stats);
         return stats;
     }
 
     @Override
     public Statistics visitLogicalCTEConsumer(LogicalCTEConsumer cteConsumer, 
DeriveContext context) {
         CTEId cteId = cteConsumer.getCteId();
-        Statistics prodStats = 
ConnectContext.get().getStatementContext().getProducerStatsByCteId(cteId);
-        Preconditions.checkArgument(prodStats != null, String.format("Stats 
for CTE: %s not found", cteId));
+        LogicalCTEProducer<? extends Plan> cteProducer = 
ConnectContext.get().getStatementContext()
+                .getCteProducerByCteId(cteId);
+        Preconditions.checkState(cteProducer != null,
+                String.format("CTE producer for CTE: %s not found", cteId));
+        Statistics prodStats = cteProducer.getStats();
+        if (prodStats == null || deepDerive) {
+            prodStats = cteProducer.accept(this, context);
+        }
+        Preconditions.checkState(prodStats != null, String.format("Stats for 
CTE: %s not found", cteId));
         Statistics consumerStats = new Statistics(prodStats.getRowCount(), 1, 
new HashMap<>());
         for (Slot slot : cteConsumer.getOutput()) {
             Slot prodSlot = cteConsumer.getProducerSlot(slot);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to