This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 5505fa3755f [Fix](statistics) Fix partition name NPE and sample for 
all table during auto analyze (#28916)
5505fa3755f is described below

commit 5505fa3755fdc7aa2e88899943ad8f614dfbdf80
Author: Jibing-Li <[email protected]>
AuthorDate: Sun Dec 24 01:49:49 2023 +0800

    [Fix](statistics) Fix partition name NPE and sample for all table during 
auto analyze (#28916)
    
    Fix partition name NPE and sample for all table during auto analyze.
    Sample for all tables because getData may have latency, which may cause 
full analyze a huge table and use too much resource. Sample for all tables to 
avoid this. Will improve the strategy later.
---
 fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java   | 2 +-
 .../src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java | 6 +++++-
 .../main/java/org/apache/doris/statistics/StatisticConstants.java   | 2 +-
 .../java/org/apache/doris/statistics/StatisticsAutoCollector.java   | 2 +-
 .../test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java  | 2 +-
 .../test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java | 2 +-
 .../org/apache/doris/statistics/StatisticsAutoCollectorTest.java    | 2 +-
 7 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 7d7fdb1404e..b9658bf34a5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -1422,7 +1422,7 @@ public class SessionVariable implements Serializable, 
Writable {
                             + "When enable_auto_sample is enabled, tables"
                             + "larger than this value will automatically 
collect "
                             + "statistics through sampling"})
-    public long hugeTableLowerBoundSizeInBytes = 5L * 1024 * 1024 * 1024;
+    public long hugeTableLowerBoundSizeInBytes = 0;
 
     @VariableMgr.VarAttr(name = HUGE_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS, 
flag = VariableMgr.GLOBAL,
             description = {"控制对大表的自动ANALYZE的最小时间间隔,"
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
index 50042e4610f..e062e4eef85 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
@@ -61,7 +61,11 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
 
     public void doExecute() throws Exception {
         Set<String> partitionNames = info.colToPartitions.get(info.colName);
-        if (partitionNames.isEmpty()) {
+        if (partitionNames == null || partitionNames.isEmpty()) {
+            if (partitionNames == null) {
+                LOG.warn("Table {}.{}.{}, partitionNames for column {} is 
null. ColToPartitions:[{}]",
+                        info.catalogId, info.dbId, info.tblId, info.colName, 
info.colToPartitions);
+            }
             StatsId statsId = new StatsId(concatColumnStatsId(), 
info.catalogId, info.dbId,
                     info.tblId, info.indexId, info.colName, null);
             job.appendBuf(this, Arrays.asList(new ColStatsData(statsId)));
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
index 111305b03ed..3d6d2fe52aa 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
@@ -86,7 +86,7 @@ public class StatisticConstants {
     public static final int INSERT_MERGE_ITEM_COUNT = 200;
 
     public static final long HUGE_TABLE_DEFAULT_SAMPLE_ROWS = 4194304;
-    public static final long HUGE_TABLE_LOWER_BOUND_SIZE_IN_BYTES = 5L * 1024 
* 1024 * 1024;
+    public static final long HUGE_TABLE_LOWER_BOUND_SIZE_IN_BYTES = 0;
 
     public static final long HUGE_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS = 
TimeUnit.HOURS.toMillis(12);
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
index 244b1059d7e..ee50471175d 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
@@ -148,7 +148,7 @@ public class StatisticsAutoCollector extends 
StatisticsCollector {
 
     protected void createAnalyzeJobForTbl(DatabaseIf<? extends TableIf> db,
             List<AnalysisInfo> analysisInfos, TableIf table) {
-        AnalysisMethod analysisMethod = table.getDataSize(true) > 
StatisticsUtil.getHugeTableLowerBoundSizeInBytes()
+        AnalysisMethod analysisMethod = table.getDataSize(true) >= 
StatisticsUtil.getHugeTableLowerBoundSizeInBytes()
                 ? AnalysisMethod.SAMPLE : AnalysisMethod.FULL;
         AnalysisInfo jobInfo = new AnalysisInfoBuilder()
                 .setJobId(Env.getCurrentEnv().getNextId())
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java
index a569a5cb06d..12a1a9c046b 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java
@@ -86,7 +86,7 @@ public class HMSAnalysisTaskTest {
         new MockUp<HMSExternalTable>() {
             @Mock
             public long getDataSize(boolean singleReplica) {
-                return 1000;
+                return StatisticsUtil.getHugeTableLowerBoundSizeInBytes() - 1;
             }
         };
         HMSAnalysisTask task = new HMSAnalysisTask();
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
index 5b27c79c863..e0b5a4b0478 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
@@ -101,7 +101,7 @@ public class OlapAnalysisTaskTest {
 
             @Mock
             public long getDataSize(boolean singleReplica) {
-                return 1000;
+                return StatisticsUtil.getHugeTableLowerBoundSizeInBytes() - 1;
             }
         };
 
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java
index 9f6d3db74b5..0b4b2203d0d 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java
@@ -346,7 +346,7 @@ public class StatisticsAutoCollectorTest {
 
             @Mock
             public long getDataSize(boolean singleReplica) {
-                return 1000;
+                return StatisticsUtil.getHugeTableLowerBoundSizeInBytes() - 1;
             }
 
             @Mock


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to