This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 37b062fd65 [refactor](nereids) forbid unknown stats for branch 2.0 
#24061 (#24323)
37b062fd65 is described below

commit 37b062fd65a8ca37538b7e4e9dc95a8c7321920a
Author: minghong <[email protected]>
AuthorDate: Wed Sep 13 21:02:20 2023 +0800

    [refactor](nereids) forbid unknown stats for branch 2.0 #24061 (#24323)
---
 .../glue/translator/PhysicalPlanTranslator.java    | 35 ++++++++++++++++
 .../glue/translator/PlanTranslatorContext.java     | 31 ++++++++++++++
 .../doris/nereids/stats/StatsCalculator.java       | 49 +++++-----------------
 3 files changed, 77 insertions(+), 38 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
index 406dda5485..359373ddbf 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
@@ -126,6 +126,11 @@ import 
org.apache.doris.nereids.trees.plans.physical.PhysicalUnion;
 import org.apache.doris.nereids.trees.plans.physical.PhysicalWindow;
 import org.apache.doris.nereids.trees.plans.physical.RuntimeFilter;
 import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanVisitor;
+import org.apache.doris.nereids.types.ArrayType;
+import org.apache.doris.nereids.types.DataType;
+import org.apache.doris.nereids.types.JsonType;
+import org.apache.doris.nereids.types.MapType;
+import org.apache.doris.nereids.types.StructType;
 import org.apache.doris.nereids.util.ExpressionUtils;
 import org.apache.doris.nereids.util.JoinUtils;
 import org.apache.doris.nereids.util.Utils;
@@ -235,6 +240,14 @@ public class PhysicalPlanTranslator extends 
DefaultPlanVisitor<PlanFragment, Pla
         Collections.reverse(context.getPlanFragments());
         // TODO: maybe we need to trans nullable directly? and then we could 
remove call computeMemLayout
         context.getDescTable().computeMemLayout();
+        if (ConnectContext.get() != null && 
ConnectContext.get().getSessionVariable().forbidUnknownColStats) {
+            Set<ScanNode> scans = context.getScanNodeWithUnknownColumnStats();
+            if (!scans.isEmpty()) {
+                StringBuilder builder = new StringBuilder();
+                scans.forEach(scanNode -> builder.append(scanNode));
+                throw new AnalysisException("tables with unknown column stats: 
" + builder);
+            }
+        }
         return rootFragment;
     }
 
@@ -530,6 +543,15 @@ public class PhysicalPlanTranslator extends 
DefaultPlanVisitor<PlanFragment, Pla
         // TODO: move all node set cardinality into one place
         if (olapScan.getStats() != null) {
             olapScanNode.setCardinality((long) 
olapScan.getStats().getRowCount());
+            if 
(ConnectContext.get().getSessionVariable().forbidUnknownColStats) {
+                for (int i = 0; i < slots.size(); i++) {
+                    Slot slot = slots.get(i);
+                    if 
(olapScan.getStats().findColumnStatistics(slot).isUnKnown()
+                            && !isComplexDataType(slot.getDataType())) {
+                        context.addUnknownStatsColumn(olapScanNode, 
tupleDescriptor.getSlots().get(i).getId());
+                    }
+                }
+            }
         }
         // TODO: Do we really need tableName here?
         TableName tableName = new TableName(null, "", "");
@@ -1978,6 +2000,14 @@ public class PhysicalPlanTranslator extends 
DefaultPlanVisitor<PlanFragment, Pla
             scanNode.getTupleDesc().getSlots().add(smallest);
         }
         try {
+            if (ConnectContext.get() != null && 
ConnectContext.get().getSessionVariable().forbidUnknownColStats) {
+                for (SlotId slotId : requiredByProjectSlotIdSet) {
+                    if (context.isColumnStatsUnknown(scanNode, slotId)) {
+                        throw new AnalysisException("meet unknown column stats 
on table " + scanNode);
+                    }
+                }
+                context.removeScanFromStatsUnknownColumnsMap(scanNode);
+            }
             scanNode.updateRequiredSlots(context, requiredByProjectSlotIdSet);
         } catch (UserException e) {
             Util.logAndThrowRuntimeException(LOG,
@@ -2240,4 +2270,9 @@ public class PhysicalPlanTranslator extends 
DefaultPlanVisitor<PlanFragment, Pla
         }
         return outputExprs;
     }
+
+    private boolean isComplexDataType(DataType dataType) {
+        return dataType instanceof ArrayType || dataType instanceof MapType || 
dataType instanceof JsonType
+                || dataType instanceof StructType;
+    }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java
index 256b37d705..e69b5ee8ef 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java
@@ -45,11 +45,13 @@ import org.apache.doris.thrift.TPushAggOp;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
 
 import java.util.IdentityHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
+import java.util.Set;
 import java.util.stream.Collectors;
 import javax.annotation.Nullable;
 
@@ -90,6 +92,7 @@ public class PlanTranslatorContext {
     private final Map<CTEId, PhysicalCTEProducer> cteProducerMap = 
Maps.newHashMap();
 
     private final Map<RelationId, TPushAggOp> tablePushAggOp = 
Maps.newHashMap();
+    private final Map<ScanNode, Set<SlotId>> statsUnknownColumnsMap = 
Maps.newHashMap();
 
     public PlanTranslatorContext(CascadesContext ctx) {
         this.translator = new 
RuntimeFilterTranslator(ctx.getRuntimeFilterContext());
@@ -100,6 +103,34 @@ public class PlanTranslatorContext {
         translator = null;
     }
 
+    /**
+     * remember the unknown-stats column and its scan, used for 
forbid_unknown_col_stats check
+     */
+    public void addUnknownStatsColumn(ScanNode scan, SlotId slotId) {
+        Set<SlotId> slots = statsUnknownColumnsMap.get(scan);
+        if (slots == null) {
+            statsUnknownColumnsMap.put(scan, Sets.newHashSet(slotId));
+        } else {
+            statsUnknownColumnsMap.get(scan).add(slotId);
+        }
+    }
+
+    public boolean isColumnStatsUnknown(ScanNode scan, SlotId slotId) {
+        Set<SlotId> unknownSlots = statsUnknownColumnsMap.get(scan);
+        if (unknownSlots == null) {
+            return false;
+        }
+        return unknownSlots.contains(slotId);
+    }
+
+    public void removeScanFromStatsUnknownColumnsMap(ScanNode scan) {
+        statsUnknownColumnsMap.remove(scan);
+    }
+
+    public Set<ScanNode> getScanNodeWithUnknownColumnStats() {
+        return statsUnknownColumnsMap.keySet();
+    }
+
     public List<PlanFragment> getPlanFragments() {
         return planFragments;
     }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index 45aeae54fd..24ec929e82 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -26,7 +26,6 @@ import org.apache.doris.common.Config;
 import org.apache.doris.common.FeConstants;
 import org.apache.doris.common.Pair;
 import org.apache.doris.nereids.CascadesContext;
-import org.apache.doris.nereids.exceptions.AnalysisException;
 import org.apache.doris.nereids.memo.Group;
 import org.apache.doris.nereids.memo.GroupExpression;
 import org.apache.doris.nereids.trees.expressions.Alias;
@@ -123,7 +122,6 @@ import org.apache.doris.statistics.StatisticConstants;
 import org.apache.doris.statistics.StatisticRange;
 import org.apache.doris.statistics.Statistics;
 import org.apache.doris.statistics.StatisticsBuilder;
-import org.apache.doris.statistics.util.StatisticsUtil;
 
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Maps;
@@ -623,46 +621,21 @@ public class StatsCalculator extends 
DefaultPlanVisitor<Statistics, Void> {
                         
.setAvgSizeByte(slotReference.getColumn().get().getType().getSlotSize())
                         .build();
             }
-            if (cache.isUnKnown) {
-                if (forbidUnknownColStats && !shouldIgnoreThisCol) {
-                    if (StatisticsUtil.statsTblAvailable()) {
-                        throw new AnalysisException(String.format("Found 
unknown stats for column:%s.%s.\n"
-                                + "It may caused by:\n"
-                                + "\n"
-                                + "1. This column never got analyzed\n"
-                                + "2. This table is empty\n"
-                                + "3. Stats load failed caused by unstable of 
backends,"
-                                + "and FE cached the unknown stats by default 
in this scenario\n"
-                                + "4. There is a bug, please report it to 
Doris community\n"
-                                + "\n"
-                                + "If an unknown stats for this column is 
tolerable,"
-                                + "you could set session variable 
`forbid_unknown_col_stats` to false to make planner"
-                                + " ignore this error and keep planning.", 
table.getName(), colName));
-                    } else {
-                        throw new AnalysisException("BE is not available!");
+            if (!cache.isUnKnown) {
+                rowCount = Math.max(rowCount, cache.count);
+                Histogram histogram = getColumnHistogram(table, colName);
+                if (histogram != null) {
+                    ColumnStatisticBuilder columnStatisticBuilder =
+                            new 
ColumnStatisticBuilder(cache).setHistogram(histogram);
+                    cache = columnStatisticBuilder.build();
+                    if 
(ConnectContext.get().getSessionVariable().isEnableMinidump()
+                            && 
!ConnectContext.get().getSessionVariable().isPlayNereidsDump()) {
+                        totalColumnStatisticMap.put(table.getName() + ":" + 
colName, cache);
+                        totalHistogramMap.put(table.getName() + colName, 
histogram);
                     }
                 }
-                columnStatisticMap.put(slotReference, cache);
-                continue;
-            }
-            rowCount = Math.max(rowCount, cache.count);
-            Histogram histogram = getColumnHistogram(table, colName);
-            if (histogram != null) {
-                ColumnStatisticBuilder columnStatisticBuilder =
-                        new 
ColumnStatisticBuilder(cache).setHistogram(histogram);
-                columnStatisticMap.put(slotReference, 
columnStatisticBuilder.build());
-                cache = columnStatisticBuilder.build();
-                if 
(ConnectContext.get().getSessionVariable().isEnableMinidump()
-                        && 
!ConnectContext.get().getSessionVariable().isPlayNereidsDump()) {
-                    totalHistogramMap.put(table.getName() + ":" + colName, 
histogram);
-                }
             }
             columnStatisticMap.put(slotReference, cache);
-            if (ConnectContext.get().getSessionVariable().isEnableMinidump()
-                    && 
!ConnectContext.get().getSessionVariable().isPlayNereidsDump()) {
-                totalColumnStatisticMap.put(table.getName() + ":" + colName, 
cache);
-                totalHistogramMap.put(table.getName() + colName, histogram);
-            }
         }
         return new Statistics(rowCount, columnStatisticMap);
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to