This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 23d7b4d4ec3 [branch2.0](backport)(statistics) Merge external stats
related pr to branch2.0 (#25462)
23d7b4d4ec3 is described below
commit 23d7b4d4ec3940b98b6fc8bf7d9d0bd549d8172d
Author: Jibing-Li <[email protected]>
AuthorDate: Mon Oct 16 16:01:11 2023 +0800
[branch2.0](backport)(statistics) Merge external stats related pr to
branch2.0 (#25462)
https://github.com/apache/doris/pull/23170
https://github.com/apache/doris/pull/23574
https://github.com/apache/doris/pull/23751
https://github.com/apache/doris/pull/24414
https://github.com/apache/doris/pull/24154
https://github.com/apache/doris/pull/24376
https://github.com/apache/doris/pull/24696
https://github.com/apache/doris/pull/24540
https://github.com/apache/doris/pull/24891
https://github.com/apache/doris/pull/25175
---
fe/fe-core/src/main/cup/sql_parser.cup | 9 +
.../org/apache/doris/analysis/AnalyzeTblStmt.java | 6 -
.../doris/analysis/ShowAnalyzeTaskStatus.java | 5 +
.../java/org/apache/doris/analysis/TableRef.java | 6 +-
.../main/java/org/apache/doris/catalog/Env.java | 8 +-
.../java/org/apache/doris/catalog/OlapTable.java | 40 ++--
.../main/java/org/apache/doris/catalog/Table.java | 10 +-
.../java/org/apache/doris/catalog/TableIf.java | 13 +-
.../doris/catalog/external/ExternalTable.java | 7 +
.../doris/catalog/external/HMSExternalTable.java | 31 +++
.../org/apache/doris/datasource/CatalogIf.java | 1 -
.../apache/doris/datasource/ExternalCatalog.java | 1 -
.../doris/datasource/hive/HiveMetaStoreCache.java | 3 +
.../glue/translator/PhysicalPlanTranslator.java | 11 +-
.../doris/nereids/rules/analysis/BindRelation.java | 9 +-
.../LogicalFileScanToPhysicalFileScan.java | 3 +-
.../trees/copier/LogicalPlanDeepCopier.java | 2 +-
.../trees/plans/logical/LogicalFileScan.java | 19 +-
.../trees/plans/physical/PhysicalFileScan.java | 16 +-
.../java/org/apache/doris/persist/EditLog.java | 13 +-
.../apache/doris/planner/SingleNodePlanner.java | 1 +
.../doris/planner/external/FileQueryScanNode.java | 9 +
.../doris/planner/external/HiveScanNode.java | 52 ++++
.../java/org/apache/doris/qe/ConnectContext.java | 2 +
.../java/org/apache/doris/qe/SessionVariable.java | 17 ++
.../java/org/apache/doris/qe/StmtExecutor.java | 7 +
.../apache/doris/statistics/AnalysisManager.java | 1 -
.../doris/statistics/ColumnStatisticBuilder.java | 2 +-
.../apache/doris/statistics/HMSAnalysisTask.java | 81 ++++---
.../doris/statistics/StatisticsRepository.java | 9 +-
.../doris/statistics/util/StatisticsUtil.java | 91 ++++---
fe/fe-core/src/main/jflex/sql_scanner.flex | 1 +
.../doris/statistics/AnalysisManagerTest.java | 265 ++++++++++++++++++++-
.../doris/statistics/OlapAnalysisTaskTest.java | 70 ++++++
gensrc/thrift/PaloInternalService.thrift | 1 +
.../hive/test_hive_partition_statistic.out | 87 +++++++
.../hive/test_hive_statistic_timeout.out | 7 +
.../jdbc/test_mysql_jdbc_statistics.groovy | 4 +-
.../hive/test_hive_partition_statistic.groovy | 53 +++++
.../hive/test_hive_sample_statistic.groovy | 99 ++++++++
.../hive/test_hive_statistic.groovy | 8 +-
.../hive/test_hive_statistic_auto.groovy | 87 +++++++
.../hive/test_hive_statistic_cache.groovy | 36 ++-
.../hive/test_hive_statistic_sample.groovy | 150 ++++++++++++
.../hive/test_hive_statistic_timeout.groovy | 54 +++++
.../suites/statistics/test_basic_statistics.groovy | 75 ++++++
46 files changed, 1351 insertions(+), 131 deletions(-)
diff --git a/fe/fe-core/src/main/cup/sql_parser.cup
b/fe/fe-core/src/main/cup/sql_parser.cup
index 6cc9a54a51f..4b0a8f3b737 100644
--- a/fe/fe-core/src/main/cup/sql_parser.cup
+++ b/fe/fe-core/src/main/cup/sql_parser.cup
@@ -524,6 +524,7 @@ terminal String
KW_QUOTA,
KW_RANDOM,
KW_RANGE,
+ KW_RECENT,
KW_READ,
KW_REBALANCE,
KW_RECOVER,
@@ -5789,6 +5790,14 @@ partition_names ::=
{:
RESULT = new PartitionNames(true, Lists.newArrayList(partName));
:}
+ | KW_PARTITIONS LPAREN STAR RPAREN
+ {:
+ RESULT = new PartitionNames(true);
+ :}
+ | KW_PARTITIONS KW_WITH KW_RECENT INTEGER_LITERAL:count
+ {:
+ RESULT = new PartitionNames(count);
+ :}
;
opt_table_sample ::=
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java
index cbc66f367f2..185bee1d132 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java
@@ -21,7 +21,6 @@ import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Database;
import org.apache.doris.catalog.DatabaseIf;
import org.apache.doris.catalog.Env;
-import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.catalog.View;
import org.apache.doris.catalog.external.ExternalTable;
@@ -167,11 +166,6 @@ public class AnalyzeTblStmt extends AnalyzeStmt {
}
analyzeProperties.check();
- // TODO support external table
- if (analyzeProperties.isSampleRows() && !(table instanceof OlapTable))
{
- throw new AnalysisException("Sampling statistics "
- + "collection of external tables is not supported with
rows, use percent instead.");
- }
if (analyzeProperties.isSync()
&& (analyzeProperties.isAutomatic() ||
analyzeProperties.getPeriodTimeInMs() != 0)) {
throw new AnalysisException("Automatic/Period statistics
collection "
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAnalyzeTaskStatus.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAnalyzeTaskStatus.java
index 927a56d19d2..7c6c5cf17fe 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAnalyzeTaskStatus.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAnalyzeTaskStatus.java
@@ -59,4 +59,9 @@ public class ShowAnalyzeTaskStatus extends ShowStmt {
public long getJobId() {
return jobId;
}
+
+ @Override
+ public RedirectStatus getRedirectStatus() {
+ return RedirectStatus.FORWARD_NO_SYNC;
+ }
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java
index f332d269b3f..b6c47e1cbfc 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java
@@ -470,9 +470,11 @@ public class TableRef implements ParseNode, Writable {
}
protected void analyzeSample() throws AnalysisException {
- if ((sampleTabletIds != null || tableSample != null) &&
desc.getTable().getType() != TableIf.TableType.OLAP) {
+ if ((sampleTabletIds != null || tableSample != null)
+ && desc.getTable().getType() != TableIf.TableType.OLAP
+ && desc.getTable().getType() !=
TableIf.TableType.HMS_EXTERNAL_TABLE) {
throw new AnalysisException("Sample table " +
desc.getTable().getName()
- + " type " + desc.getTable().getType() + " is not OLAP");
+ + " type " + desc.getTable().getType() + " is not supported");
}
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
index 2b0f581375c..e836b69f77d 100755
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
@@ -5420,10 +5420,6 @@ public class Env {
return loadManagerAdapter;
}
- public StatisticsAutoCollector getStatisticsAutoCollector() {
- return statisticsAutoCollector;
- }
-
public QueryStats getQueryStats() {
return queryStats;
}
@@ -5436,4 +5432,8 @@ public class Env {
public ColumnIdFlushDaemon getColumnIdFlusher() {
return columnIdFlusher;
}
+
+ public StatisticsAutoCollector getStatisticsAutoCollector() {
+ return statisticsAutoCollector;
+ }
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
index f59df2554d3..bc0073f7ef9 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
@@ -1105,6 +1105,26 @@ public class OlapTable extends Table {
return new MVAnalysisTask(info);
}
+ public boolean needReAnalyzeTable(TableStatsMeta tblStats) {
+ if (tblStats == null) {
+ return true;
+ }
+ long rowCount = getRowCount();
+ // TODO: Do we need to analyze an empty table?
+ if (rowCount == 0) {
+ return false;
+ }
+ if (!tblStats.analyzeColumns().containsAll(getBaseSchema()
+ .stream()
+ .map(Column::getName)
+ .collect(Collectors.toSet()))) {
+ return true;
+ }
+ long updateRows = tblStats.updatedRows.get();
+ int tblHealth = StatisticsUtil.getTableHealth(rowCount, updateRows);
+ return tblHealth < Config.table_stats_health_threshold;
+ }
+
@Override
public long getRowCount() {
long rowCount = 0;
@@ -2282,24 +2302,4 @@ public class OlapTable extends Table {
}
return dataSize;
}
-
- public boolean needReAnalyzeTable(TableStatsMeta tblStats) {
- if (tblStats == null) {
- return true;
- }
- long rowCount = getRowCount();
- // TODO: Do we need to analyze an empty table?
- if (rowCount == 0) {
- return false;
- }
- if (!tblStats.analyzeColumns().containsAll(getBaseSchema()
- .stream()
- .map(Column::getName)
- .collect(Collectors.toSet()))) {
- return true;
- }
- long updateRows = tblStats.updatedRows.get();
- int tblHealth = StatisticsUtil.getTableHealth(rowCount, updateRows);
- return tblHealth < Config.table_stats_health_threshold;
- }
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java
index ba7e55c7d86..fdaa41d2a8f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java
@@ -556,7 +556,11 @@ public abstract class Table extends MetaObject implements
Writable, TableIf {
return Optional.empty();
}
- public void analyze(String dbName) {
+ public void analyze(String dbName) {}
+
+ @Override
+ public boolean needReAnalyzeTable(TableStatsMeta tblStats) {
+ return true;
}
@Override
@@ -565,7 +569,7 @@ public abstract class Table extends MetaObject implements
Writable, TableIf {
}
@Override
- public boolean needReAnalyzeTable(TableStatsMeta tblStats) {
- return true;
+ public List<Long> getChunkSizes() {
+ throw new NotImplementedException("getChunkSized not implemented");
}
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java
index 108d227e591..46a3b4b5973 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java
@@ -138,6 +138,14 @@ public interface TableIf {
Optional<ColumnStatistic> getColumnStatistic(String colName);
+ boolean needReAnalyzeTable(TableStatsMeta tblStats);
+
+ Map<String, Set<String>> findReAnalyzeNeededPartitions();
+
+ // Get all the chunk sizes of this table. Now, only HMS external table
implemented this interface.
+ // For HMS external table, the return result is a list of all the files'
size.
+ List<Long> getChunkSizes();
+
void write(DataOutput out) throws IOException;
/**
@@ -239,15 +247,10 @@ public interface TableIf {
return -1L;
}
- Map<String, Set<String>> findReAnalyzeNeededPartitions();
-
default long getDataSize(boolean singleReplica) {
// TODO: Each tableIf should impl it by itself.
return 0;
}
- boolean needReAnalyzeTable(TableStatsMeta tblStats);
-
-
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalTable.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalTable.java
index 01b3ce9ee2d..a915136193c 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalTable.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalTable.java
@@ -78,6 +78,7 @@ public class ExternalTable implements TableIf, Writable,
GsonPostProcessable {
// this field will be refreshed after reloading schema
protected volatile long schemaUpdateTime;
+ protected long dbId;
protected boolean objectCreated;
protected ExternalCatalog catalog;
protected ReentrantReadWriteLock rwLock = new ReentrantReadWriteLock(true);
@@ -119,6 +120,7 @@ public class ExternalTable implements TableIf, Writable,
GsonPostProcessable {
try {
// getDbOrAnalysisException will call makeSureInitialized in
ExternalCatalog.
ExternalDatabase db = catalog.getDbOrAnalysisException(dbName);
+ dbId = db.getId();
db.makeSureInitialized();
} catch (AnalysisException e) {
Util.logAndThrowRuntimeException(LOG, String.format("Exception to
get db %s", dbName), e);
@@ -397,4 +399,9 @@ public class ExternalTable implements TableIf, Writable,
GsonPostProcessable {
partitions.add("Dummy Partition");
return
getBaseSchema().stream().collect(Collectors.toMap(Column::getName, k ->
partitions));
}
+
+ @Override
+ public List<Long> getChunkSizes() {
+ throw new NotImplementedException("getChunkSized not implemented");
+ }
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
index 728eec3e6f9..740a35f9572 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
@@ -26,6 +26,7 @@ import org.apache.doris.catalog.ScalarType;
import org.apache.doris.catalog.Type;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.datasource.HMSExternalCatalog;
+import org.apache.doris.datasource.hive.HiveMetaStoreCache;
import org.apache.doris.datasource.hive.PooledHiveMetaStoreClient;
import org.apache.doris.statistics.AnalysisInfo;
import org.apache.doris.statistics.BaseAnalysisTask;
@@ -644,6 +645,36 @@ public class HMSExternalTable extends ExternalTable {
super.gsonPostProcess();
estimatedRowCount = -1;
}
+
+ @Override
+ public List<Long> getChunkSizes() {
+ HiveMetaStoreCache.HivePartitionValues partitionValues =
StatisticsUtil.getPartitionValuesForTable(this);
+ List<HiveMetaStoreCache.FileCacheValue> filesByPartitions
+ = StatisticsUtil.getFilesForPartitions(this, partitionValues,
0);
+ List<Long> result = Lists.newArrayList();
+ for (HiveMetaStoreCache.FileCacheValue files : filesByPartitions) {
+ for (HiveMetaStoreCache.HiveFileStatus file : files.getFiles()) {
+ result.add(file.getLength());
+ }
+ }
+ return result;
+ }
+
+ @Override
+ public long getDataSize(boolean singleReplica) {
+ long totalSize = StatisticsUtil.getTotalSizeFromHMS(this);
+ // Usually, we can get total size from HMS parameter.
+ if (totalSize > 0) {
+ return totalSize;
+ }
+ // If not found the size in HMS, calculate it by sum all files' size
in table.
+ List<Long> chunkSizes = getChunkSizes();
+ long total = 0;
+ for (long size : chunkSizes) {
+ total += size;
+ }
+ return total;
+ }
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogIf.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogIf.java
index cebc526d14b..369fba3624b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogIf.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogIf.java
@@ -177,5 +177,4 @@ public interface CatalogIf<T extends DatabaseIf> {
public ConcurrentHashMap<Long, DatabaseIf> getIdToDb();
public boolean enableAutoAnalyze();
-
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java
index 22a6816543e..ee3f75a9f14 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java
@@ -612,5 +612,4 @@ public abstract class ExternalCatalog
}
return ret;
}
-
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
index 0a85d9ff5bd..df6f48b97dc 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
@@ -1071,6 +1071,9 @@ public class HiveMetaStoreCache {
long length;
long blockSize;
long modificationTime;
+ boolean splittable;
+ List<String> partitionValues;
+ AcidInfo acidInfo;
}
@Data
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
index 549a6c8c1c7..e2bdf6b92fd 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
@@ -469,7 +469,12 @@ public class PhysicalPlanTranslator extends
DefaultPlanVisitor<PlanFragment, Pla
break;
case HIVE:
scanNode = new
HiveScanNode(fileScan.translatePlanNodeId(), tupleDescriptor, false);
- ((HiveScanNode)
scanNode).setSelectedPartitions(fileScan.getSelectedPartitions());
+ HiveScanNode hiveScanNode = (HiveScanNode) scanNode;
+
hiveScanNode.setSelectedPartitions(fileScan.getSelectedPartitions());
+ if (fileScan.getTableSample().isPresent()) {
+ hiveScanNode.setTableSample(new
TableSample(fileScan.getTableSample().get().isPercent,
+ fileScan.getTableSample().get().sampleValue,
fileScan.getTableSample().get().seek));
+ }
break;
default:
throw new RuntimeException("do not support DLA type " +
((HMSExternalTable) table).getDlaType());
@@ -491,7 +496,9 @@ public class PhysicalPlanTranslator extends
DefaultPlanVisitor<PlanFragment, Pla
TableRef ref = new TableRef(tableName, null, null);
BaseTableRef tableRef = new BaseTableRef(ref, table, tableName);
tupleDescriptor.setRef(tableRef);
-
+ if (fileScan.getStats() != null) {
+ scanNode.setCardinality((long) fileScan.getStats().getRowCount());
+ }
Utils.execWithUncheckedException(scanNode::init);
context.addScanNode(scanNode);
ScanNode finalScanNode = scanNode;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java
index cffe32df5b0..33dcbbde920 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java
@@ -42,7 +42,6 @@ import org.apache.doris.nereids.rules.RuleType;
import org.apache.doris.nereids.trees.expressions.EqualTo;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.Slot;
-import org.apache.doris.nereids.trees.expressions.StatementScopeIdGenerator;
import org.apache.doris.nereids.trees.expressions.literal.TinyIntLiteral;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.PreAggStatus;
@@ -214,13 +213,13 @@ public class BindRelation extends OneAnalysisRuleFactory {
Plan hiveViewPlan = parseAndAnalyzeHiveView(hiveCatalog,
ddlSql, cascadesContext);
return new LogicalSubQueryAlias<>(tableQualifier,
hiveViewPlan);
}
- return new
LogicalFileScan(StatementScopeIdGenerator.newRelationId(),
- (HMSExternalTable) table, tableQualifier);
+ return new LogicalFileScan(unboundRelation.getRelationId(),
(HMSExternalTable) table, tableQualifier,
+ unboundRelation.getTableSample());
case ICEBERG_EXTERNAL_TABLE:
case PAIMON_EXTERNAL_TABLE:
case MAX_COMPUTE_EXTERNAL_TABLE:
- return new
LogicalFileScan(StatementScopeIdGenerator.newRelationId(),
- (ExternalTable) table, tableQualifier);
+ return new LogicalFileScan(unboundRelation.getRelationId(),
(ExternalTable) table, tableQualifier,
+ unboundRelation.getTableSample());
case SCHEMA:
return new LogicalSchemaScan(unboundRelation.getRelationId(),
table, tableQualifier);
case JDBC_EXTERNAL_TABLE:
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/LogicalFileScanToPhysicalFileScan.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/LogicalFileScanToPhysicalFileScan.java
index f53ce1553ae..d86e1d1667e 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/LogicalFileScanToPhysicalFileScan.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/LogicalFileScanToPhysicalFileScan.java
@@ -39,7 +39,8 @@ public class LogicalFileScanToPhysicalFileScan extends
OneImplementationRuleFact
Optional.empty(),
fileScan.getLogicalProperties(),
fileScan.getConjuncts(),
- fileScan.getSelectedPartitions())
+ fileScan.getSelectedPartitions(),
+ fileScan.getTableSample())
).toRule(RuleType.LOGICAL_FILE_SCAN_TO_PHYSICAL_FILE_SCAN_RULE);
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/copier/LogicalPlanDeepCopier.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/copier/LogicalPlanDeepCopier.java
index 39f7fb310b4..9c05bae461d 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/copier/LogicalPlanDeepCopier.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/copier/LogicalPlanDeepCopier.java
@@ -203,7 +203,7 @@ public class LogicalPlanDeepCopier extends
DefaultPlanRewriter<DeepCopierContext
return
context.getRelationReplaceMap().get(fileScan.getRelationId());
}
LogicalFileScan newFileScan = new
LogicalFileScan(StatementScopeIdGenerator.newRelationId(),
- fileScan.getTable(), fileScan.getQualifier());
+ fileScan.getTable(), fileScan.getQualifier(),
fileScan.getTableSample());
updateReplaceMapWithOutput(fileScan, newFileScan,
context.exprIdReplaceMap);
context.putRelation(fileScan.getRelationId(), newFileScan);
Set<Expression> conjuncts = fileScan.getConjuncts().stream()
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFileScan.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFileScan.java
index c25a25efcec..390fb1c97e0 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFileScan.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFileScan.java
@@ -21,6 +21,7 @@ import org.apache.doris.catalog.PartitionItem;
import org.apache.doris.catalog.external.ExternalTable;
import org.apache.doris.nereids.memo.GroupExpression;
import org.apache.doris.nereids.properties.LogicalProperties;
+import org.apache.doris.nereids.trees.TableSample;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.PlanType;
@@ -49,22 +50,26 @@ public class LogicalFileScan extends LogicalCatalogRelation
{
private final Set<Expression> conjuncts;
@Getter
private final SelectedPartitions selectedPartitions;
+ @Getter
+ private final Optional<TableSample> tableSample;
/**
* Constructor for LogicalFileScan.
*/
public LogicalFileScan(RelationId id, ExternalTable table, List<String>
qualifier,
Optional<GroupExpression> groupExpression,
Optional<LogicalProperties> logicalProperties,
- Set<Expression> conjuncts, SelectedPartitions selectedPartitions) {
+ Set<Expression> conjuncts, SelectedPartitions selectedPartitions,
Optional<TableSample> tableSample) {
super(id, PlanType.LOGICAL_FILE_SCAN, table, qualifier,
groupExpression, logicalProperties);
this.conjuncts = conjuncts;
this.selectedPartitions = selectedPartitions;
+ this.tableSample = tableSample;
}
- public LogicalFileScan(RelationId id, ExternalTable table, List<String>
qualifier) {
+ public LogicalFileScan(RelationId id, ExternalTable table, List<String>
qualifier,
+ Optional<TableSample> tableSample) {
this(id, table, qualifier, Optional.empty(), Optional.empty(),
- Sets.newHashSet(), SelectedPartitions.NOT_PRUNED);
+ Sets.newHashSet(), SelectedPartitions.NOT_PRUNED, tableSample);
}
@Override
@@ -85,24 +90,24 @@ public class LogicalFileScan extends LogicalCatalogRelation
{
@Override
public LogicalFileScan withGroupExpression(Optional<GroupExpression>
groupExpression) {
return new LogicalFileScan(relationId, (ExternalTable) table,
qualifier, groupExpression,
- Optional.of(getLogicalProperties()), conjuncts,
selectedPartitions);
+ Optional.of(getLogicalProperties()), conjuncts,
selectedPartitions, tableSample);
}
@Override
public Plan withGroupExprLogicalPropChildren(Optional<GroupExpression>
groupExpression,
Optional<LogicalProperties> logicalProperties, List<Plan>
children) {
return new LogicalFileScan(relationId, (ExternalTable) table,
qualifier,
- groupExpression, logicalProperties, conjuncts,
selectedPartitions);
+ groupExpression, logicalProperties, conjuncts,
selectedPartitions, tableSample);
}
public LogicalFileScan withConjuncts(Set<Expression> conjuncts) {
return new LogicalFileScan(relationId, (ExternalTable) table,
qualifier, groupExpression,
- Optional.of(getLogicalProperties()), conjuncts,
selectedPartitions);
+ Optional.of(getLogicalProperties()), conjuncts,
selectedPartitions, tableSample);
}
public LogicalFileScan withSelectedPartitions(SelectedPartitions
selectedPartitions) {
return new LogicalFileScan(relationId, (ExternalTable) table,
qualifier, groupExpression,
- Optional.of(getLogicalProperties()), conjuncts,
selectedPartitions);
+ Optional.of(getLogicalProperties()), conjuncts,
selectedPartitions, tableSample);
}
@Override
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalFileScan.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalFileScan.java
index be4f5b17983..3dc83c7f43d 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalFileScan.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalFileScan.java
@@ -22,6 +22,7 @@ import org.apache.doris.nereids.memo.GroupExpression;
import org.apache.doris.nereids.properties.DistributionSpec;
import org.apache.doris.nereids.properties.LogicalProperties;
import org.apache.doris.nereids.properties.PhysicalProperties;
+import org.apache.doris.nereids.trees.TableSample;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.PlanType;
@@ -47,6 +48,8 @@ public class PhysicalFileScan extends PhysicalCatalogRelation
{
private final Set<Expression> conjuncts;
@Getter
private final SelectedPartitions selectedPartitions;
+ @Getter
+ private final Optional<TableSample> tableSample;
/**
* Constructor for PhysicalFileScan.
@@ -54,11 +57,12 @@ public class PhysicalFileScan extends
PhysicalCatalogRelation {
public PhysicalFileScan(RelationId id, ExternalTable table, List<String>
qualifier,
DistributionSpec distributionSpec, Optional<GroupExpression>
groupExpression,
LogicalProperties logicalProperties, Set<Expression> conjuncts,
- SelectedPartitions selectedPartitions) {
+ SelectedPartitions selectedPartitions, Optional<TableSample>
tableSample) {
super(id, PlanType.PHYSICAL_FILE_SCAN, table, qualifier,
groupExpression, logicalProperties);
this.distributionSpec = distributionSpec;
this.conjuncts = conjuncts;
this.selectedPartitions = selectedPartitions;
+ this.tableSample = tableSample;
}
/**
@@ -67,12 +71,14 @@ public class PhysicalFileScan extends
PhysicalCatalogRelation {
public PhysicalFileScan(RelationId id, ExternalTable table, List<String>
qualifier,
DistributionSpec distributionSpec, Optional<GroupExpression>
groupExpression,
LogicalProperties logicalProperties, PhysicalProperties
physicalProperties,
- Statistics statistics, Set<Expression> conjuncts,
SelectedPartitions selectedPartitions) {
+ Statistics statistics, Set<Expression> conjuncts,
SelectedPartitions selectedPartitions,
+ Optional<TableSample> tableSample) {
super(id, PlanType.PHYSICAL_FILE_SCAN, table, qualifier,
groupExpression, logicalProperties,
physicalProperties, statistics);
this.distributionSpec = distributionSpec;
this.conjuncts = conjuncts;
this.selectedPartitions = selectedPartitions;
+ this.tableSample = tableSample;
}
@Override
@@ -95,14 +101,14 @@ public class PhysicalFileScan extends
PhysicalCatalogRelation {
@Override
public PhysicalFileScan withGroupExpression(Optional<GroupExpression>
groupExpression) {
return new PhysicalFileScan(relationId, getTable(), qualifier,
distributionSpec,
- groupExpression, getLogicalProperties(), conjuncts,
selectedPartitions);
+ groupExpression, getLogicalProperties(), conjuncts,
selectedPartitions, tableSample);
}
@Override
public Plan withGroupExprLogicalPropChildren(Optional<GroupExpression>
groupExpression,
Optional<LogicalProperties> logicalProperties, List<Plan>
children) {
return new PhysicalFileScan(relationId, getTable(), qualifier,
distributionSpec,
- groupExpression, logicalProperties.get(), conjuncts,
selectedPartitions);
+ groupExpression, logicalProperties.get(), conjuncts,
selectedPartitions, tableSample);
}
@Override
@@ -115,6 +121,6 @@ public class PhysicalFileScan extends
PhysicalCatalogRelation {
Statistics statistics) {
return new PhysicalFileScan(relationId, getTable(), qualifier,
distributionSpec,
groupExpression, getLogicalProperties(), physicalProperties,
statistics, conjuncts,
- selectedPartitions);
+ selectedPartitions, tableSample);
}
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java
b/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java
index efe61042478..72b14427003 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java
@@ -82,6 +82,7 @@ import org.apache.doris.policy.Policy;
import org.apache.doris.policy.StoragePolicy;
import org.apache.doris.resource.workloadgroup.WorkloadGroup;
import org.apache.doris.statistics.AnalysisInfo;
+import org.apache.doris.statistics.AnalysisManager;
import org.apache.doris.statistics.TableStatsMeta;
import org.apache.doris.system.Backend;
import org.apache.doris.system.Frontend;
@@ -1037,11 +1038,19 @@ public class EditLog {
break;
}
case OperationType.OP_CREATE_ANALYSIS_JOB: {
-
env.getAnalysisManager().replayCreateAnalysisJob((AnalysisInfo)
journal.getData());
+ AnalysisInfo info = (AnalysisInfo) journal.getData();
+ if (AnalysisManager.needAbandon(info)) {
+ break;
+ }
+ env.getAnalysisManager().replayCreateAnalysisJob(info);
break;
}
case OperationType.OP_CREATE_ANALYSIS_TASK: {
-
env.getAnalysisManager().replayCreateAnalysisTask((AnalysisInfo)
journal.getData());
+ AnalysisInfo info = (AnalysisInfo) journal.getData();
+ if (AnalysisManager.needAbandon(info)) {
+ break;
+ }
+ env.getAnalysisManager().replayCreateAnalysisTask(info);
break;
}
case OperationType.OP_DELETE_ANALYSIS_JOB: {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
index f33f2e483ca..e3834070d57 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
@@ -2024,6 +2024,7 @@ public class SingleNodePlanner {
break;
case HIVE:
scanNode = new HiveScanNode(ctx.getNextNodeId(),
tblRef.getDesc(), true);
+ ((HiveScanNode)
scanNode).setTableSample(tblRef.getTableSample());
break;
default:
throw new UserException("Not supported table type" +
table.getType());
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java
index 44e113a280b..67e09cbf205 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java
@@ -20,6 +20,7 @@ package org.apache.doris.planner.external;
import org.apache.doris.analysis.Analyzer;
import org.apache.doris.analysis.SlotDescriptor;
import org.apache.doris.analysis.SlotId;
+import org.apache.doris.analysis.TableSample;
import org.apache.doris.analysis.TupleDescriptor;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Env;
@@ -71,6 +72,7 @@ import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
+import lombok.Getter;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@@ -92,6 +94,9 @@ public abstract class FileQueryScanNode extends FileScanNode {
protected Map<String, SlotDescriptor> destSlotDescByName;
protected TFileScanRangeParams params;
+ @Getter
+ protected TableSample tableSample;
+
/**
* External file scan node for Query hms table
* needCheckColumnPriv: Some of ExternalFileScanNode do not need to check
column priv
@@ -200,6 +205,10 @@ public abstract class FileQueryScanNode extends
FileScanNode {
setColumnPositionMapping();
}
+ public void setTableSample(TableSample tSample) {
+ this.tableSample = tSample;
+ }
+
@Override
public void finalize(Analyzer analyzer) throws UserException {
doFinalize();
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java
index 0b6e1d44466..1ba77fa5f9c 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java
@@ -63,8 +63,10 @@ import org.apache.logging.log4j.Logger;
import java.io.IOException;
import java.util.Collection;
+import java.util.Collections;
import java.util.List;
import java.util.Map;
+import java.util.Random;
import java.util.stream.Collectors;
public class HiveScanNode extends FileQueryScanNode {
@@ -218,6 +220,11 @@ public class HiveScanNode extends FileQueryScanNode {
if (ConnectContext.get().getExecutor() != null) {
ConnectContext.get().getExecutor().getSummaryProfile().setGetPartitionFilesFinishTime();
}
+ if (tableSample != null) {
+ List<HiveMetaStoreCache.HiveFileStatus> hiveFileStatuses =
selectFiles(fileCaches);
+ splitAllFiles(allFiles, hiveFileStatuses);
+ return;
+ }
for (HiveMetaStoreCache.FileCacheValue fileCacheValue : fileCaches) {
// This if branch is to support old splitter, will remove later.
if (fileCacheValue.getSplits() != null) {
@@ -235,6 +242,51 @@ public class HiveScanNode extends FileQueryScanNode {
}
}
+ private void splitAllFiles(List<Split> allFiles,
+ List<HiveMetaStoreCache.HiveFileStatus>
hiveFileStatuses) throws IOException {
+ for (HiveMetaStoreCache.HiveFileStatus status : hiveFileStatuses) {
+ allFiles.addAll(splitFile(status.getPath(), status.getBlockSize(),
+ status.getBlockLocations(), status.getLength(),
status.getModificationTime(),
+ status.isSplittable(), status.getPartitionValues(),
+ new HiveSplitCreator(status.getAcidInfo())));
+ }
+ }
+
+ private List<HiveMetaStoreCache.HiveFileStatus>
selectFiles(List<FileCacheValue> inputCacheValue) {
+ List<HiveMetaStoreCache.HiveFileStatus> fileList =
Lists.newArrayList();
+ long totalSize = 0;
+ for (FileCacheValue value : inputCacheValue) {
+ for (HiveMetaStoreCache.HiveFileStatus file : value.getFiles()) {
+ file.setSplittable(value.isSplittable());
+ file.setPartitionValues(value.getPartitionValues());
+ file.setAcidInfo(value.getAcidInfo());
+ fileList.add(file);
+ totalSize += file.getLength();
+ }
+ }
+ long sampleSize = 0;
+ if (tableSample.isPercent()) {
+ sampleSize = totalSize * tableSample.getSampleValue() / 100;
+ } else {
+ long estimatedRowSize = 0;
+ for (Column column : hmsTable.getFullSchema()) {
+ estimatedRowSize += column.getDataType().getSlotSize();
+ }
+ sampleSize = estimatedRowSize * tableSample.getSampleValue();
+ }
+ long selectedSize = 0;
+ Collections.shuffle(fileList, new Random(tableSample.getSeek()));
+ int index = 0;
+ for (HiveMetaStoreCache.HiveFileStatus file : fileList) {
+ selectedSize += file.getLength();
+ index += 1;
+ if (selectedSize >= sampleSize) {
+ break;
+ }
+ }
+ return fileList.subList(0, index);
+ }
+
private List<FileCacheValue> getFileSplitByTransaction(HiveMetaStoreCache
cache, List<HivePartition> partitions) {
for (HivePartition partition : partitions) {
if (partition.getPartitionValues() == null ||
partition.getPartitionValues().isEmpty()) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java
b/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java
index 62734093277..90d04c8a2dd 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java
@@ -717,6 +717,8 @@ public class ConnectContext {
if (executor != null && executor.isInsertStmt()) {
// particular for insert stmt, we can expand other type of timeout
in the same way
return Math.max(sessionVariable.getInsertTimeoutS(),
sessionVariable.getQueryTimeoutS());
+ } else if (executor != null && executor.isAnalyzeStmt()) {
+ return sessionVariable.getAnalyzeTimeoutS();
} else {
// normal query stmt
return sessionVariable.getQueryTimeoutS();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 34518114453..4e7a4893b67 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -65,6 +65,7 @@ public class SessionVariable implements Serializable,
Writable {
public static final String EXEC_MEM_LIMIT = "exec_mem_limit";
public static final String SCAN_QUEUE_MEM_LIMIT = "scan_queue_mem_limit";
public static final String QUERY_TIMEOUT = "query_timeout";
+ public static final String ANALYZE_TIMEOUT = "analyze_timeout";
public static final String MAX_EXECUTION_TIME = "max_execution_time";
public static final String INSERT_TIMEOUT = "insert_timeout";
@@ -450,6 +451,10 @@ public class SessionVariable implements Serializable,
Writable {
@VariableMgr.VarAttr(name = QUERY_TIMEOUT)
public int queryTimeoutS = 900;
+ // query timeout in second.
+ @VariableMgr.VarAttr(name = ANALYZE_TIMEOUT, needForward = true)
+ public int analyzeTimeoutS = 43200;
+
// The global max_execution_time value provides the default for the
session value for new connections.
// The session value applies to SELECT executions executed within the
session that include
// no MAX_EXECUTION_TIME(N) optimizer hint or for which N is 0.
@@ -1335,6 +1340,10 @@ public class SessionVariable implements Serializable,
Writable {
return queryTimeoutS;
}
+ public int getAnalyzeTimeoutS() {
+ return analyzeTimeoutS;
+ }
+
public void setEnableTwoPhaseReadOpt(boolean enable) {
enableTwoPhaseReadOpt = enable;
}
@@ -1514,6 +1523,10 @@ public class SessionVariable implements Serializable,
Writable {
this.queryTimeoutS = queryTimeoutS;
}
+ public void setAnalyzeTimeoutS(int analyzeTimeoutS) {
+ this.analyzeTimeoutS = analyzeTimeoutS;
+ }
+
public void setMaxExecutionTimeMS(int maxExecutionTimeMS) {
this.maxExecutionTimeMS = maxExecutionTimeMS;
this.queryTimeoutS = this.maxExecutionTimeMS / 1000;
@@ -2433,6 +2446,9 @@ public class SessionVariable implements Serializable,
Writable {
if (queryOptions.isSetInsertTimeout()) {
setInsertTimeoutS(queryOptions.getInsertTimeout());
}
+ if (queryOptions.isSetAnalyzeTimeout()) {
+ setAnalyzeTimeoutS(queryOptions.getAnalyzeTimeout());
+ }
}
/**
@@ -2444,6 +2460,7 @@ public class SessionVariable implements Serializable,
Writable {
queryOptions.setScanQueueMemLimit(Math.min(maxScanQueueMemByte,
maxExecMemByte / 20));
queryOptions.setQueryTimeout(queryTimeoutS);
queryOptions.setInsertTimeout(insertTimeoutS);
+ queryOptions.setAnalyzeTimeout(analyzeTimeoutS);
return queryOptions;
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java
b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java
index 5d90008ff75..5dcb392fb62 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java
@@ -397,6 +397,13 @@ public class StmtExecutor {
return parsedStmt instanceof InsertStmt;
}
+ public boolean isAnalyzeStmt() {
+ if (parsedStmt == null) {
+ return false;
+ }
+ return parsedStmt instanceof AnalyzeStmt;
+ }
+
/**
* Used for audit in ConnectProcessor.
* <p>
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
index 3eebb1ec9aa..9fa576f85cd 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
@@ -651,7 +651,6 @@ public class AnalysisManager extends Daemon implements
Writable {
tableStats.updateByJob(jobInfo);
logCreateTableStats(tableStats);
}
-
}
public List<AnalysisInfo> showAnalysisJob(ShowAnalyzeStmt stmt) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticBuilder.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticBuilder.java
index fa4cf7ebc99..f97459555c8 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticBuilder.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticBuilder.java
@@ -190,7 +190,7 @@ public class ColumnStatisticBuilder {
}
public ColumnStatistic build() {
- dataSize = Math.max((count - numNulls + 1) * avgSizeByte, 0);
+ dataSize = dataSize > 0 ? dataSize : Math.max((count - numNulls + 1) *
avgSizeByte, 0);
if (original == null && !isUnknown) {
original = new ColumnStatistic(count, ndv, null, avgSizeByte,
numNulls,
dataSize, minValue, maxValue, minExpr, maxExpr,
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
index 836e6e6e493..076479aae8f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
@@ -32,10 +32,12 @@ import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.Random;
import java.util.Set;
import java.util.StringJoiner;
import java.util.stream.Collectors;
@@ -43,10 +45,9 @@ import java.util.stream.Collectors;
public class HMSAnalysisTask extends BaseAnalysisTask {
private static final Logger LOG =
LogManager.getLogger(HMSAnalysisTask.class);
- public static final String TOTAL_SIZE = "totalSize";
- public static final String NUM_ROWS = "numRows";
- public static final String NUM_FILES = "numFiles";
- public static final String TIMESTAMP = "transient_lastDdlTime";
+ // While doing sample analysis, the sampled ndv result will multiply a
factor (total size/sample size)
+ // if ndv(col)/count(col) is greater than this threshold.
+ private static final String NDV_MULTIPLY_THRESHOLD = "0.3";
private static final String ANALYZE_TABLE_TEMPLATE = "INSERT INTO "
+ "${internalDB}.${columnStatTbl}"
@@ -58,12 +59,15 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
+ "${idxId} AS idx_id, "
+ "'${colId}' AS col_id, "
+ "NULL AS part_id, "
- + "${countExpr} AS row_count, "
- + "NDV(`${colName}`) AS ndv, "
- + "${nullCountExpr} AS null_count, "
+ + "ROUND(COUNT(1) * ${scaleFactor}) AS row_count, "
+ + "case when NDV(`${colName}`)/count('${colName}') < "
+ + NDV_MULTIPLY_THRESHOLD
+ + " then NDV(`${colName}`) "
+ + "else NDV(`${colName}`) * ${scaleFactor} end AS ndv, "
+ + "ROUND(SUM(CASE WHEN `${colName}` IS NULL THEN 1 ELSE 0 END) *
${scaleFactor}) AS null_count, "
+ "MIN(`${colName}`) AS min, "
+ "MAX(`${colName}`) AS max, "
- + "${dataSizeFunction} AS data_size, "
+ + "${dataSizeFunction} * ${scaleFactor} AS data_size, "
+ "NOW() "
+ "FROM `${catalogName}`.`${dbName}`.`${tblName}` ${sampleExpr}";
@@ -83,7 +87,7 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
+ "${dataSizeFunction} AS data_size, "
+ "NOW() FROM `${catalogName}`.`${dbName}`.`${tblName}` where ";
- private static final String ANALYZE_TABLE_COUNT_TEMPLATE = "SELECT
${countExpr} as rowCount "
+ private static final String ANALYZE_TABLE_COUNT_TEMPLATE = "SELECT
ROUND(COUNT(1) * ${scaleFactor}) as rowCount "
+ "FROM `${catalogName}`.`${dbName}`.`${tblName}` ${sampleExpr}";
// cache stats for each partition, it would be inserted into
column_statistics in a batch.
@@ -160,7 +164,6 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
params.put("colName", col.getName());
params.put("colId", info.colName);
params.put("dataSizeFunction", getDataSizeFunction(col));
- params.put("nullCountExpr", getNullCountExpression());
StringSubstitutor stringSubstitutor = new
StringSubstitutor(params);
String sql = stringSubstitutor.replace(sb.toString());
executeInsertSql(sql);
@@ -277,7 +280,8 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
commonParams.put("catalogName", catalog.getName());
commonParams.put("dbName", db.getFullName());
commonParams.put("tblName", tbl.getName());
- commonParams.put("countExpr", getCountExpression());
+ commonParams.put("sampleExpr", getSampleExpression());
+ commonParams.put("scaleFactor", getSampleScaleFactor());
if (col != null) {
commonParams.put("type", col.getType().toString());
}
@@ -285,30 +289,51 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
return commonParams;
}
- protected String getCountExpression() {
- if (info.samplePercent > 0) {
- return String.format("ROUND(COUNT(1) * 100 / %d)",
info.samplePercent);
- } else {
- return "COUNT(1)";
+ protected String getSampleExpression() {
+ if (tableSample == null) {
+ return "";
}
- }
-
- protected String getNullCountExpression() {
- if (info.samplePercent > 0) {
- return String.format("ROUND(SUM(CASE WHEN `${colName}` IS NULL
THEN 1 ELSE 0 END) * 100 / %d)",
- info.samplePercent);
+ if (tableSample.isPercent()) {
+ return String.format("TABLESAMPLE(%d PERCENT)",
tableSample.getSampleValue());
} else {
- return "SUM(CASE WHEN `${colName}` IS NULL THEN 1 ELSE 0 END)";
+ return String.format("TABLESAMPLE(%d ROWS)",
tableSample.getSampleValue());
}
}
- protected String getDataSizeFunction(Column column) {
- String originFunction = super.getDataSizeFunction(column);
- if (info.samplePercent > 0 && !isPartitionOnly) {
- return String.format("ROUND((%s) * 100 / %d)", originFunction,
info.samplePercent);
+ // Get the sample scale factor. While analyzing, the result of count, null
count and data size need to
+ // multiply this factor to get more accurate result.
+ protected String getSampleScaleFactor() {
+ if (tableSample == null) {
+ return "1";
+ }
+ long target = 0;
+ // Get list of all files' size in this HMS table.
+ List<Long> chunkSizes = table.getChunkSizes();
+ Collections.shuffle(chunkSizes, new Random(tableSample.getSeek()));
+ long total = 0;
+ // Calculate the total size of this HMS table.
+ for (long size : chunkSizes) {
+ total += size;
+ }
+ // Calculate the sample target size for percent and rows sample.
+ if (tableSample.isPercent()) {
+ target = total * tableSample.getSampleValue() / 100;
} else {
- return originFunction;
+ int columnSize = 0;
+ for (Column column : table.getFullSchema()) {
+ columnSize += column.getDataType().getSlotSize();
+ }
+ target = columnSize * tableSample.getSampleValue();
+ }
+ // Calculate the actual sample size (cumulate).
+ long cumulate = 0;
+ for (long size : chunkSizes) {
+ cumulate += size;
+ if (cumulate >= target) {
+ break;
+ }
}
+ return Double.toString(Math.max(((double) total) / cumulate, 1));
}
@Override
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
index cd3cc67f3c9..63953f5bfb5 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
@@ -248,7 +248,14 @@ public class StatisticsRepository {
builder.setMaxValue(StatisticsUtil.convertToDouble(column.getType(), max));
}
if (dataSize != null) {
- builder.setDataSize(Double.parseDouble(dataSize));
+ double size = Double.parseDouble(dataSize);
+ double rows = Double.parseDouble(rowCount);
+ if (size > 0) {
+ builder.setDataSize(size);
+ if (rows > 0) {
+ builder.setAvgSizeByte(size / rows);
+ }
+ }
}
ColumnStatistic columnStatistic = builder.build();
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
index 40ae13a0e0e..f482c812879 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
@@ -539,6 +539,19 @@ public class StatisticsUtil {
return totalSize / estimatedRowSize;
}
+ /**
+ * Get total size parameter from HMS.
+ * @param table Hive HMSExternalTable to get HMS total size parameter.
+ * @return Long value of table total size, return 0 if not found.
+ */
+ public static long getTotalSizeFromHMS(HMSExternalTable table) {
+ Map<String, String> parameters =
table.getRemoteTable().getParameters();
+ if (parameters == null) {
+ return 0;
+ }
+ return parameters.containsKey(TOTAL_SIZE) ?
Long.parseLong(parameters.get(TOTAL_SIZE)) : 0;
+ }
+
/**
* Estimate iceberg table row count.
* Get the row count by adding all task file recordCount.
@@ -574,13 +587,42 @@ public class StatisticsUtil {
if (table.isView()) {
return 0;
}
+ HiveMetaStoreCache.HivePartitionValues partitionValues =
getPartitionValuesForTable(table);
+ int totalPartitionSize = partitionValues == null ? 1 :
partitionValues.getIdToPartitionItem().size();
+
+ // Get files for all partitions.
+ int samplePartitionSize = Config.hive_stats_partition_sample_size;
+ List<HiveMetaStoreCache.FileCacheValue> filesByPartitions
+ = getFilesForPartitions(table, partitionValues,
samplePartitionSize);
+ long totalSize = 0;
+ // Calculate the total file size.
+ for (HiveMetaStoreCache.FileCacheValue files : filesByPartitions) {
+ for (HiveMetaStoreCache.HiveFileStatus file : files.getFiles()) {
+ totalSize += file.getLength();
+ }
+ }
+ // Estimate row count: totalSize/estimatedRowSize
+ long estimatedRowSize = 0;
+ for (Column column : table.getFullSchema()) {
+ estimatedRowSize += column.getDataType().getSlotSize();
+ }
+ if (estimatedRowSize == 0) {
+ return 1;
+ }
+ if (samplePartitionSize < totalPartitionSize) {
+ totalSize = totalSize * totalPartitionSize / samplePartitionSize;
+ }
+ return totalSize / estimatedRowSize;
+ }
+
+ public static HiveMetaStoreCache.HivePartitionValues
getPartitionValuesForTable(HMSExternalTable table) {
+ if (table.isView()) {
+ return null;
+ }
HiveMetaStoreCache cache = Env.getCurrentEnv().getExtMetaCacheMgr()
.getMetaStoreCache((HMSExternalCatalog) table.getCatalog());
List<Type> partitionColumnTypes = table.getPartitionColumnTypes();
HiveMetaStoreCache.HivePartitionValues partitionValues = null;
- List<HivePartition> hivePartitions = Lists.newArrayList();
- int samplePartitionSize = Config.hive_stats_partition_sample_size;
- int totalPartitionSize = 1;
// Get table partitions from cache.
if (!partitionColumnTypes.isEmpty()) {
// It is ok to get partition values from cache,
@@ -588,17 +630,28 @@ public class StatisticsUtil {
// because it has enough space to keep partition info of all
tables in cache.
partitionValues = cache.getPartitionValues(table.getDbName(),
table.getName(), partitionColumnTypes);
}
+ return partitionValues;
+ }
+
+ public static List<HiveMetaStoreCache.FileCacheValue>
getFilesForPartitions(
+ HMSExternalTable table, HiveMetaStoreCache.HivePartitionValues
partitionValues, int sampleSize) {
+ if (table.isView()) {
+ return Lists.newArrayList();
+ }
+ HiveMetaStoreCache cache = Env.getCurrentEnv().getExtMetaCacheMgr()
+ .getMetaStoreCache((HMSExternalCatalog) table.getCatalog());
+ List<HivePartition> hivePartitions = Lists.newArrayList();
if (partitionValues != null) {
Map<Long, PartitionItem> idToPartitionItem =
partitionValues.getIdToPartitionItem();
- totalPartitionSize = idToPartitionItem.size();
+ int totalPartitionSize = idToPartitionItem.size();
Collection<PartitionItem> partitionItems;
List<List<String>> partitionValuesList;
// If partition number is too large, randomly choose part of them
to estimate the whole table.
- if (samplePartitionSize < totalPartitionSize) {
+ if (sampleSize > 0 && sampleSize < totalPartitionSize) {
List<PartitionItem> items = new
ArrayList<>(idToPartitionItem.values());
Collections.shuffle(items);
- partitionItems = items.subList(0, samplePartitionSize);
- partitionValuesList =
Lists.newArrayListWithCapacity(samplePartitionSize);
+ partitionItems = items.subList(0, sampleSize);
+ partitionValuesList =
Lists.newArrayListWithCapacity(sampleSize);
} else {
partitionItems = idToPartitionItem.values();
partitionValuesList =
Lists.newArrayListWithCapacity(totalPartitionSize);
@@ -609,34 +662,14 @@ public class StatisticsUtil {
// get partitions without cache, so that it will not invalid the
cache when executing
// non query request such as `show table status`
hivePartitions =
cache.getAllPartitionsWithoutCache(table.getDbName(), table.getName(),
- partitionValuesList);
+ partitionValuesList);
} else {
hivePartitions.add(new HivePartition(table.getDbName(),
table.getName(), true,
table.getRemoteTable().getSd().getInputFormat(),
table.getRemoteTable().getSd().getLocation(), null));
}
// Get files for all partitions.
- List<HiveMetaStoreCache.FileCacheValue> filesByPartitions =
cache.getFilesByPartitionsWithoutCache(
- hivePartitions, true);
- long totalSize = 0;
- // Calculate the total file size.
- for (HiveMetaStoreCache.FileCacheValue files : filesByPartitions) {
- for (HiveMetaStoreCache.HiveFileStatus file : files.getFiles()) {
- totalSize += file.getLength();
- }
- }
- // Estimate row count: totalSize/estimatedRowSize
- long estimatedRowSize = 0;
- for (Column column : table.getFullSchema()) {
- estimatedRowSize += column.getDataType().getSlotSize();
- }
- if (estimatedRowSize == 0) {
- return 1;
- }
- if (samplePartitionSize < totalPartitionSize) {
- totalSize = totalSize * totalPartitionSize / samplePartitionSize;
- }
- return totalSize / estimatedRowSize;
+ return cache.getFilesByPartitionsWithoutCache(hivePartitions, true);
}
/**
diff --git a/fe/fe-core/src/main/jflex/sql_scanner.flex
b/fe/fe-core/src/main/jflex/sql_scanner.flex
index c4791093b48..ca3d2eea319 100644
--- a/fe/fe-core/src/main/jflex/sql_scanner.flex
+++ b/fe/fe-core/src/main/jflex/sql_scanner.flex
@@ -377,6 +377,7 @@ import org.apache.doris.qe.SqlModeHelper;
keywordMap.put("read", new Integer(SqlParserSymbols.KW_READ));
keywordMap.put("real", new Integer(SqlParserSymbols.KW_DOUBLE));
keywordMap.put("rebalance", new
Integer(SqlParserSymbols.KW_REBALANCE));
+ keywordMap.put("recent", new Integer(SqlParserSymbols.KW_RECENT));
keywordMap.put("recover", new Integer(SqlParserSymbols.KW_RECOVER));
keywordMap.put("recycle", new Integer(SqlParserSymbols.KW_RECYCLE));
keywordMap.put("refresh", new Integer(SqlParserSymbols.KW_REFRESH));
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java
b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java
index 253f9c9332a..636e32ea4e1 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java
@@ -17,13 +17,32 @@
package org.apache.doris.statistics;
+import org.apache.doris.analysis.AnalyzeProperties;
+import org.apache.doris.analysis.AnalyzeTblStmt;
+import org.apache.doris.analysis.PartitionNames;
+import org.apache.doris.analysis.TableName;
+import org.apache.doris.catalog.Column;
+import org.apache.doris.catalog.OlapTable;
+import org.apache.doris.catalog.PrimitiveType;
+import org.apache.doris.common.DdlException;
+import org.apache.doris.statistics.AnalysisInfo.ScheduleType;
+import org.apache.doris.statistics.util.StatisticsUtil;
+
+import com.google.common.annotations.VisibleForTesting;
+import mockit.Expectations;
+import mockit.Injectable;
import mockit.Mock;
import mockit.MockUp;
import mockit.Mocked;
-import org.junit.Test;
+import org.apache.hadoop.util.Lists;
import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+import java.util.ArrayList;
+import java.util.Collection;
import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
import java.util.Map;
public class AnalysisManagerTest {
@@ -70,4 +89,248 @@ public class AnalysisManagerTest {
manager.updateTaskStatus(taskInfo2, AnalysisState.FINISHED, "", 0);
Assertions.assertEquals(job.state, AnalysisState.FINISHED);
}
+
+ // test build sync job
+ @Test
+ public void testBuildAndAssignJob1() throws Exception {
+ AnalysisInfo analysisInfo = new
AnalysisInfoBuilder().setColToPartitions(new HashMap<>()).build();
+ new MockUp<StatisticsUtil>() {
+
+ @Mock
+ public boolean statsTblAvailable() {
+ return true;
+ }
+ };
+ new MockUp<AnalysisManager>() {
+
+ @Mock
+ public AnalysisInfo buildAnalysisJobInfo(AnalyzeTblStmt stmt)
throws DdlException {
+ return analysisInfo;
+ }
+
+ @Mock
+ @VisibleForTesting
+ public void createTaskForExternalTable(AnalysisInfo jobInfo,
+ Map<Long, BaseAnalysisTask> analysisTasks,
+ boolean isSync) throws DdlException {
+ // DO NOTHING
+ }
+
+ @Mock
+ public void createTaskForEachColumns(AnalysisInfo jobInfo,
Map<Long, BaseAnalysisTask> analysisTasks,
+ boolean isSync) throws DdlException {
+ // DO NOTHING
+ }
+
+ @Mock
+ public void syncExecute(Collection<BaseAnalysisTask> tasks) {
+ // DO NOTHING
+ }
+
+ @Mock
+ public void updateTableStats(AnalysisInfo jobInfo) {
+ // DO NOTHING
+ }
+ };
+ AnalyzeTblStmt analyzeTblStmt = new AnalyzeTblStmt(new
TableName("test"),
+ new PartitionNames(false, new ArrayList<String>() {
+ {
+ add("p1");
+ add("p2");
+ }
+ }), new ArrayList<String>() {
+ {
+ add("c1");
+ add("c2");
+ }
+ }, new AnalyzeProperties(new HashMap<String, String>() {
+ {
+ put(AnalyzeProperties.PROPERTY_SYNC, "true");
+ }
+ }));
+
+ AnalysisManager analysisManager = new AnalysisManager();
+
Assertions.assertNull(analysisManager.buildAndAssignJob(analyzeTblStmt));
+ analysisInfo.colToPartitions.put("c1", new HashSet<String>() {
+ {
+ add("p1");
+ add("p2");
+ }
+ });
+ analysisManager.buildAndAssignJob(analyzeTblStmt);
+ new Expectations() {
+ {
+ analysisManager.syncExecute((Collection<BaseAnalysisTask>)
any);
+ times = 1;
+ analysisManager.updateTableStats((AnalysisInfo) any);
+ times = 1;
+ // Jmockit would try to invoke this method with `null` when
initiate instance of Expectations
+ // and cause NPE, comment these lines until find other way to
test behavior that don't invoke something.
+ // analysisManager.persistAnalysisJob((AnalysisInfo) any);
+ // times = 0;
+ }
+ };
+ }
+
+ // test build async job
+ @Test
+ public void testBuildAndAssignJob2(@Injectable OlapAnalysisTask
analysisTask) throws Exception {
+ AnalysisInfo analysisInfo = new
AnalysisInfoBuilder().setColToPartitions(new HashMap<>())
+ .setScheduleType(ScheduleType.PERIOD)
+ .build();
+ new MockUp<StatisticsUtil>() {
+
+ @Mock
+ public boolean statsTblAvailable() {
+ return true;
+ }
+ };
+ new MockUp<AnalysisManager>() {
+
+ @Mock
+ public AnalysisInfo buildAnalysisJobInfo(AnalyzeTblStmt stmt)
throws DdlException {
+ return analysisInfo;
+ }
+
+ @Mock
+ @VisibleForTesting
+ public void createTaskForExternalTable(AnalysisInfo jobInfo,
+ Map<Long, BaseAnalysisTask> analysisTasks,
+ boolean isSync) throws DdlException {
+ // DO NOTHING
+ }
+
+ @Mock
+ public void createTaskForEachColumns(AnalysisInfo jobInfo,
Map<Long, BaseAnalysisTask> analysisTasks,
+ boolean isSync) throws DdlException {
+ analysisTasks.put(1L, analysisTask);
+ }
+
+ @Mock
+ public void syncExecute(Collection<BaseAnalysisTask> tasks) {
+ // DO NOTHING
+ }
+
+ @Mock
+ public void updateTableStats(AnalysisInfo jobInfo) {
+ // DO NOTHING
+ }
+
+ @Mock
+ public void logCreateAnalysisJob(AnalysisInfo analysisJob) {
+
+ }
+ };
+ AnalyzeTblStmt analyzeTblStmt = new AnalyzeTblStmt(new
TableName("test"),
+ new PartitionNames(false, new ArrayList<String>() {
+ {
+ add("p1");
+ add("p2");
+ }
+ }), new ArrayList<String>() {
+ {
+ add("c1");
+ add("c2");
+ }
+ }, new AnalyzeProperties(new HashMap<String, String>() {
+ {
+ put(AnalyzeProperties.PROPERTY_SYNC, "false");
+ put(AnalyzeProperties.PROPERTY_PERIOD_SECONDS, "100");
+ }
+ }));
+ AnalysisManager analysisManager = new AnalysisManager();
+ analysisInfo.colToPartitions.put("c1", new HashSet<String>() {
+ {
+ add("p1");
+ add("p2");
+ }
+ });
+ analysisManager.buildAndAssignJob(analyzeTblStmt);
+ new Expectations() {
+ {
+ analysisManager.recordAnalysisJob(analysisInfo);
+ times = 1;
+ }
+ };
+ }
+
+ @Test
+ public void testSystemJobStatusUpdater() {
+ new MockUp<BaseAnalysisTask>() {
+
+ @Mock
+ protected void init(AnalysisInfo info) {
+
+ }
+ };
+
+ new MockUp<AnalysisManager>() {
+ @Mock
+ public void updateTableStats(AnalysisInfo jobInfo) {}
+
+ @Mock
+ protected void logAutoJob(AnalysisInfo autoJob) {
+
+ }
+ };
+
+ AnalysisManager analysisManager = new AnalysisManager();
+ AnalysisInfo job = new AnalysisInfoBuilder()
+ .setJobId(0)
+ .setColName("col1, col2").build();
+ analysisManager.systemJobInfoMap.put(job.jobId, job);
+ AnalysisInfo task1 = new AnalysisInfoBuilder()
+ .setJobId(0)
+ .setTaskId(1)
+ .setState(AnalysisState.RUNNING)
+ .setColName("col1").build();
+ AnalysisInfo task2 = new AnalysisInfoBuilder()
+ .setJobId(0)
+ .setTaskId(1)
+ .setState(AnalysisState.FINISHED)
+ .setColName("col2").build();
+ OlapAnalysisTask ot1 = new OlapAnalysisTask(task1);
+ OlapAnalysisTask ot2 = new OlapAnalysisTask(task2);
+ Map<Long, BaseAnalysisTask> taskMap = new HashMap<>();
+ taskMap.put(ot1.info.taskId, ot1);
+ taskMap.put(ot2.info.taskId, ot2);
+ analysisManager.analysisJobIdToTaskMap.put(job.jobId, taskMap);
+
+ // test invalid job
+ AnalysisInfo invalidJob = new
AnalysisInfoBuilder().setJobId(-1).build();
+ analysisManager.systemJobStatusUpdater.apply(new
TaskStatusWrapper(invalidJob,
+ AnalysisState.FAILED, "", 0));
+
+ // test finished
+ analysisManager.systemJobStatusUpdater.apply(new
TaskStatusWrapper(task1, AnalysisState.FAILED, "", 0));
+ analysisManager.systemJobStatusUpdater.apply(new
TaskStatusWrapper(task1, AnalysisState.FINISHED, "", 0));
+ Assertions.assertEquals(1, analysisManager.autoJobs.size());
+ Assertions.assertTrue(analysisManager.systemJobInfoMap.isEmpty());
+ }
+
+ @Test
+ public void testReAnalyze() {
+ new MockUp<OlapTable>() {
+
+ int count = 0;
+ int[] rowCount = new int[]{100, 200};
+ @Mock
+ public long getRowCount() {
+ return rowCount[count++];
+ }
+
+ @Mock
+ public List<Column> getBaseSchema() {
+ return Lists.newArrayList(new Column("col1",
PrimitiveType.INT));
+ }
+
+ };
+ OlapTable olapTable = new OlapTable();
+ TableStatsMeta stats1 = new TableStatsMeta(0, 50, new
AnalysisInfoBuilder().setColName("col1").build());
+ Assertions.assertTrue(olapTable.needReAnalyzeTable(stats1));
+ TableStatsMeta stats2 = new TableStatsMeta(0, 190, new
AnalysisInfoBuilder().setColName("col1").build());
+ Assertions.assertFalse(olapTable.needReAnalyzeTable(stats2));
+
+ }
+
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
new file mode 100644
index 00000000000..d618a5fa538
--- /dev/null
+++
b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
@@ -0,0 +1,70 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.statistics;
+
+import org.apache.doris.analysis.TableSample;
+import org.apache.doris.catalog.DatabaseIf;
+import org.apache.doris.catalog.TableIf;
+import org.apache.doris.common.Config;
+import org.apache.doris.datasource.CatalogIf;
+import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod;
+
+import mockit.Expectations;
+import mockit.Mocked;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+public class OlapAnalysisTaskTest {
+
+ @Test
+ public void testAutoSample(@Mocked CatalogIf catalogIf, @Mocked DatabaseIf
databaseIf, @Mocked TableIf tableIf) {
+ new Expectations() {
+ {
+ tableIf.getDataSize(true);
+ result = 60_0000_0000L;
+ }
+ };
+
+ AnalysisInfoBuilder analysisInfoBuilder = new AnalysisInfoBuilder()
+ .setAnalysisMethod(AnalysisMethod.FULL);
+ OlapAnalysisTask olapAnalysisTask = new OlapAnalysisTask();
+ olapAnalysisTask.info = analysisInfoBuilder.build();
+ olapAnalysisTask.tbl = tableIf;
+ Config.enable_auto_sample = true;
+ TableSample tableSample = olapAnalysisTask.getTableSample();
+ Assertions.assertEquals(4194304, tableSample.getSampleValue());
+ Assertions.assertFalse(tableSample.isPercent());
+
+ new Expectations() {
+ {
+ tableIf.getDataSize(true);
+ result = 1_0000_0000L;
+ }
+ };
+ tableSample = olapAnalysisTask.getTableSample();
+ Assertions.assertNull(tableSample);
+
+ analysisInfoBuilder.setSampleRows(10);
+ analysisInfoBuilder.setAnalysisMethod(AnalysisMethod.SAMPLE);
+ olapAnalysisTask.info = analysisInfoBuilder.build();
+ tableSample = olapAnalysisTask.getTableSample();
+ Assertions.assertEquals(10, tableSample.getSampleValue());
+ Assertions.assertFalse(tableSample.isPercent());
+ }
+
+}
diff --git a/gensrc/thrift/PaloInternalService.thrift
b/gensrc/thrift/PaloInternalService.thrift
index b4c852b4ce1..818bc538b20 100644
--- a/gensrc/thrift/PaloInternalService.thrift
+++ b/gensrc/thrift/PaloInternalService.thrift
@@ -246,6 +246,7 @@ struct TQueryOptions {
// use is_report_success any more
84: optional bool enable_profile = false;
85: optional bool enable_page_cache = false;
+ 86: optional i32 analyze_timeout = 43200
}
diff --git
a/regression-test/data/external_table_p2/hive/test_hive_partition_statistic.out
b/regression-test/data/external_table_p2/hive/test_hive_partition_statistic.out
new file mode 100644
index 00000000000..0e32ebe4775
--- /dev/null
+++
b/regression-test/data/external_table_p2/hive/test_hive_partition_statistic.out
@@ -0,0 +1,87 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !01 --
+event_day=1956-09-07 39
+event_day=2008-09-25 39
+
+-- !1 --
+event_day=2008-09-25 10000 1 0 0 0
+event_day=2008-09-25 10000 1 0 2008-09-25 2008-09-25
+event_day=2008-09-25 10000 11 0 0 10
+event_day=2008-09-25 10000 13 0 MFGR#12 MFGR#52
+event_day=2008-09-25 10000 13 0 antique wheat
+event_day=2008-09-25 10000 16 0 JUMBO BAG WRAP PACK
+event_day=2008-09-25 10000 17 0 1 48
+event_day=2008-09-25 10000 17 0 64078 113087
+event_day=2008-09-25 10000 17 0 754035 763603
+event_day=2008-09-25 10000 17 0 ECONOMY ANODIZED BRASS
STANDARD POLISHED TIN
+event_day=2008-09-25 10000 17 0 MFGR#1221 MFGR#528
+event_day=2008-09-25 10000 17 0 burnished drab violet firebrick
+event_day=2008-09-25 10000 2362 0 19920101 19980802
+event_day=2008-09-25 10000 2382 0 19920203 19981027
+event_day=2008-09-25 10000 25 0 ALGERIA VIETNAM
+event_day=2008-09-25 10000 25 0 ALGERIA VIETNAM
+event_day=2008-09-25 10000 250 0 ALGERIA 0 VIETNAM 9
+event_day=2008-09-25 10000 250 0 ALGERIA 0 VIETNAM 9
+event_day=2008-09-25 10000 5 0 1-URGENT 5-LOW
+event_day=2008-09-25 10000 5 0 AFRICA MIDDLE EAST
+event_day=2008-09-25 10000 5 0 AFRICA MIDDLE EAST
+event_day=2008-09-25 10000 5 0 AUTOMOBILE MACHINERY
+event_day=2008-09-25 10000 5 0 MFGR#1 MFGR#5
+event_day=2008-09-25 10000 50 0 1 50
+event_day=2008-09-25 10000 6074 0 96748 9388900
+event_day=2008-09-25 10000 7 0 1 7
+event_day=2008-09-25 10000 7 0 AIR TRUCK
+event_day=2008-09-25 10000 845 0 106797 9423950
+event_day=2008-09-25 10000 9 0 0 8
+event_day=2008-09-25 10000 9775 0 119 2999848
+event_day=2008-09-25 10000 9794 0 107970 45833194
+event_day=2008-09-25 10000 9837 0 MGHV8XBriO zzlztYTFMFW
+event_day=2008-09-25 10000 9846 0 Customer#000000119
Customer#002999848
+event_day=2008-09-25 10000 9861 0 13091 599962401
+event_day=2008-09-25 10000 9879 0 10-100-337-6599 34-999-684-2905
+event_day=2008-09-25 10000 9883 0 Supplier#000000001
Supplier#000199983
+event_day=2008-09-25 10000 9896 0 B5YhCdkaxR232CrXx
zyxtAvAViHMabnr,1UQybiW
+event_day=2008-09-25 10000 9927 0 10-105-800-9296 34-998-982-7450
+event_day=2008-09-25 10000 9971 0 1 199983
+
+-- !2 --
+event_day=1956-09-07 10000 1 0 0 0
+event_day=1956-09-07 10000 1 0 1956-09-07 1956-09-07
+event_day=1956-09-07 10000 11 0 0 10
+event_day=1956-09-07 10000 13 0 MFGR#12 MFGR#52
+event_day=1956-09-07 10000 13 0 antique wheat
+event_day=1956-09-07 10000 16 0 JUMBO BAG WRAP PACK
+event_day=1956-09-07 10000 17 0 1 48
+event_day=1956-09-07 10000 17 0 64078 113087
+event_day=1956-09-07 10000 17 0 754035 763603
+event_day=1956-09-07 10000 17 0 ECONOMY ANODIZED BRASS
STANDARD POLISHED TIN
+event_day=1956-09-07 10000 17 0 MFGR#1221 MFGR#528
+event_day=1956-09-07 10000 17 0 burnished drab violet firebrick
+event_day=1956-09-07 10000 2362 0 19920101 19980802
+event_day=1956-09-07 10000 2382 0 19920203 19981027
+event_day=1956-09-07 10000 25 0 ALGERIA VIETNAM
+event_day=1956-09-07 10000 25 0 ALGERIA VIETNAM
+event_day=1956-09-07 10000 250 0 ALGERIA 0 VIETNAM 9
+event_day=1956-09-07 10000 250 0 ALGERIA 0 VIETNAM 9
+event_day=1956-09-07 10000 5 0 1-URGENT 5-LOW
+event_day=1956-09-07 10000 5 0 AFRICA MIDDLE EAST
+event_day=1956-09-07 10000 5 0 AFRICA MIDDLE EAST
+event_day=1956-09-07 10000 5 0 AUTOMOBILE MACHINERY
+event_day=1956-09-07 10000 5 0 MFGR#1 MFGR#5
+event_day=1956-09-07 10000 50 0 1 50
+event_day=1956-09-07 10000 6074 0 96748 9388900
+event_day=1956-09-07 10000 7 0 1 7
+event_day=1956-09-07 10000 7 0 AIR TRUCK
+event_day=1956-09-07 10000 845 0 106797 9423950
+event_day=1956-09-07 10000 9 0 0 8
+event_day=1956-09-07 10000 9775 0 119 2999848
+event_day=1956-09-07 10000 9794 0 107970 45833194
+event_day=1956-09-07 10000 9837 0 MGHV8XBriO zzlztYTFMFW
+event_day=1956-09-07 10000 9846 0 Customer#000000119
Customer#002999848
+event_day=1956-09-07 10000 9861 0 13091 599962401
+event_day=1956-09-07 10000 9879 0 10-100-337-6599 34-999-684-2905
+event_day=1956-09-07 10000 9883 0 Supplier#000000001
Supplier#000199983
+event_day=1956-09-07 10000 9896 0 B5YhCdkaxR232CrXx
zyxtAvAViHMabnr,1UQybiW
+event_day=1956-09-07 10000 9927 0 10-105-800-9296 34-998-982-7450
+event_day=1956-09-07 10000 9971 0 1 199983
+
diff --git
a/regression-test/data/external_table_p2/hive/test_hive_statistic_timeout.out
b/regression-test/data/external_table_p2/hive/test_hive_statistic_timeout.out
new file mode 100644
index 00000000000..e906deea593
--- /dev/null
+++
b/regression-test/data/external_table_p2/hive/test_hive_statistic_timeout.out
@@ -0,0 +1,7 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !01 --
+p_container 200000000 40 0 JUMBO BAG WRAP PKG
+p_partkey 200000000 200778064 0 1 200000000
+p_retailprice 200000000 120014 0 900.00 2099.00
+p_type 200000000 150 0 ECONOMY ANODIZED BRASS STANDARD
POLISHED TIN
+
diff --git
a/regression-test/suites/external_table_p0/jdbc/test_mysql_jdbc_statistics.groovy
b/regression-test/suites/external_table_p0/jdbc/test_mysql_jdbc_statistics.groovy
index d6f0ca351dd..e58e17cfcd6 100644
---
a/regression-test/suites/external_table_p0/jdbc/test_mysql_jdbc_statistics.groovy
+++
b/regression-test/suites/external_table_p0/jdbc/test_mysql_jdbc_statistics.groovy
@@ -42,7 +42,7 @@ suite("test_mysql_jdbc_statistics",
"p0,external,mysql,external_docker,external_
assertTrue(result[0][1] == "5.0")
assertTrue(result[0][2] == "5.0")
assertTrue(result[0][3] == "0.0")
- assertTrue(result[0][4] == "18.0")
+ assertTrue(result[0][4] == "15.0")
assertTrue(result[0][5] == "3.0")
assertTrue(result[0][6] == "'abc'")
assertTrue(result[0][7] == "'abg'")
@@ -53,7 +53,7 @@ suite("test_mysql_jdbc_statistics",
"p0,external,mysql,external_docker,external_
assertTrue(result[0][1] == "5.0")
assertTrue(result[0][2] == "5.0")
assertTrue(result[0][3] == "0.0")
- assertTrue(result[0][4] == "24.0")
+ assertTrue(result[0][4] == "20.0")
assertTrue(result[0][5] == "4.0")
assertTrue(result[0][6] == "111")
assertTrue(result[0][7] == "115")
diff --git
a/regression-test/suites/external_table_p2/hive/test_hive_partition_statistic.groovy
b/regression-test/suites/external_table_p2/hive/test_hive_partition_statistic.groovy
new file mode 100644
index 00000000000..9f4b462237f
--- /dev/null
+++
b/regression-test/suites/external_table_p2/hive/test_hive_partition_statistic.groovy
@@ -0,0 +1,53 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hive_partition_statistic",
"p2,external,hive,external_remote,external_remote_hive") {
+ String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
+ if (enabled != null && enabled.equalsIgnoreCase("true")) {
+ String extHiveHmsHost =
context.config.otherConfigs.get("extHiveHmsHost")
+ String extHiveHmsPort =
context.config.otherConfigs.get("extHiveHmsPort")
+ String catalog_name = "test_hive_partition_statistic"
+ sql """drop catalog if exists ${catalog_name};"""
+ sql """
+ create catalog if not exists ${catalog_name} properties (
+ 'type'='hms',
+ 'hadoop.username' = 'hadoop',
+ 'hive.metastore.uris' =
'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
+ );
+ """
+ logger.info("catalog " + catalog_name + " created")
+
+ sql """use ${catalog_name}.multi_partition"""
+ sql """analyze table multi_partition_orc partitions
(`event_day=2008-09-25`, `event_day=1956-09-07`) with sync"""
+
+ def ctlId
+ def result = sql """show proc '/catalogs'"""
+
+ for (int i = 0; i < result.size(); i++) {
+ if (result[i][1] == catalog_name) {
+ ctlId = result[i][0]
+ }
+ }
+
+ qt_01 """select part_id, count(*) from
internal.__internal_schema.column_statistics where catalog_id='$ctlId' group by
part_id order by part_id;"""
+ order_qt_1 """select part_id, count, ndv, null_count, min, max from
internal.__internal_schema.column_statistics where catalog_id='$ctlId' and
part_id='event_day=2008-09-25'"""
+ order_qt_2 """select part_id, count, ndv, null_count, min, max from
internal.__internal_schema.column_statistics where catalog_id='$ctlId' and
part_id='event_day=1956-09-07'"""
+
+ sql """drop catalog ${catalog_name}""";
+ }
+}
+
diff --git
a/regression-test/suites/external_table_p2/hive/test_hive_sample_statistic.groovy
b/regression-test/suites/external_table_p2/hive/test_hive_sample_statistic.groovy
new file mode 100644
index 00000000000..c2a21e3994b
--- /dev/null
+++
b/regression-test/suites/external_table_p2/hive/test_hive_sample_statistic.groovy
@@ -0,0 +1,99 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hive_sample_statistic",
"p2,external,hive,external_remote,external_remote_hive") {
+ String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
+ if (enabled != null && enabled.equalsIgnoreCase("true")) {
+ String extHiveHmsHost =
context.config.otherConfigs.get("extHiveHmsHost")
+ String extHiveHmsPort =
context.config.otherConfigs.get("extHiveHmsPort")
+ String catalog_name = "test_hive_sample_statistic"
+ sql """drop catalog if exists ${catalog_name};"""
+ sql """
+ create catalog if not exists ${catalog_name} properties (
+ 'type'='hms',
+ 'hadoop.username' = 'hadoop',
+ 'hive.metastore.uris' =
'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
+ );
+ """
+ logger.info("catalog " + catalog_name + " created")
+
+ sql """use ${catalog_name}.tpch_1000_parquet"""
+ sql """analyze table part with sample percent 10 with sync;"""
+
+ def result = sql """show table stats part"""
+ assertTrue(result.size() == 1)
+ assertTrue(Long.parseLong(result[0][2]) >= 200000000)
+ assertTrue(Long.parseLong(result[0][2]) < 220000000)
+
+ def ctlId
+ result = sql """show proc '/catalogs'"""
+
+ for (int i = 0; i < result.size(); i++) {
+ if (result[i][1] == catalog_name) {
+ ctlId = result[i][0]
+ }
+ }
+
+ result = sql """select count from
internal.__internal_schema.column_statistics where catalog_id='$ctlId' and
col_id='p_partkey'"""
+ assertTrue(result.size() == 1)
+ assertTrue(result[0][0] >= 200000000)
+ assertTrue(result[0][0] < 220000000)
+
+ result = sql """select count from
internal.__internal_schema.column_statistics where catalog_id='$ctlId' and
col_id='p_name'"""
+ assertTrue(result.size() == 1)
+ assertTrue(result[0][0] >= 200000000)
+ assertTrue(result[0][0] < 220000000)
+
+ result = sql """select count from
internal.__internal_schema.column_statistics where catalog_id='$ctlId' and
col_id='p_mfgr'"""
+ assertTrue(result.size() == 1)
+ assertTrue(result[0][0] >= 200000000)
+ assertTrue(result[0][0] < 220000000)
+
+ result = sql """select count from
internal.__internal_schema.column_statistics where catalog_id='$ctlId' and
col_id='p_brand'"""
+ assertTrue(result.size() == 1)
+ assertTrue(result[0][0] >= 200000000)
+ assertTrue(result[0][0] < 220000000)
+
+ result = sql """select count from
internal.__internal_schema.column_statistics where catalog_id='$ctlId' and
col_id='p_type'"""
+ assertTrue(result.size() == 1)
+ assertTrue(result[0][0] >= 200000000)
+ assertTrue(result[0][0] < 220000000)
+
+ result = sql """select count from
internal.__internal_schema.column_statistics where catalog_id='$ctlId' and
col_id='p_size'"""
+ assertTrue(result.size() == 1)
+ assertTrue(result[0][0] >= 200000000)
+ assertTrue(result[0][0] < 220000000)
+
+ result = sql """select count from
internal.__internal_schema.column_statistics where catalog_id='$ctlId' and
col_id='p_container'"""
+ assertTrue(result.size() == 1)
+ assertTrue(result[0][0] >= 200000000)
+ assertTrue(result[0][0] < 220000000)
+
+ result = sql """select count from
internal.__internal_schema.column_statistics where catalog_id='$ctlId' and
col_id='p_retailprice'"""
+ assertTrue(result.size() == 1)
+ assertTrue(result[0][0] >= 200000000)
+ assertTrue(result[0][0] < 220000000)
+
+ result = sql """select count from
internal.__internal_schema.column_statistics where catalog_id='$ctlId' and
col_id='p_comment'"""
+ assertTrue(result.size() == 1)
+ assertTrue(result[0][0] >= 200000000)
+ assertTrue(result[0][0] < 220000000)
+
+ sql """drop catalog ${catalog_name}""";
+ }
+}
+
diff --git
a/regression-test/suites/external_table_p2/hive/test_hive_statistic.groovy
b/regression-test/suites/external_table_p2/hive/test_hive_statistic.groovy
index 1160a2f8dd6..49142b12e36 100644
--- a/regression-test/suites/external_table_p2/hive/test_hive_statistic.groovy
+++ b/regression-test/suites/external_table_p2/hive/test_hive_statistic.groovy
@@ -30,6 +30,10 @@ suite("test_hive_statistic", "p2") {
);
"""
logger.info("catalog " + catalog_name + " created")
+
+ // Test analyze table without init.
+ sql """analyze table ${catalog_name}.tpch_1000_parquet.region with
sync"""
+
sql """switch ${catalog_name};"""
logger.info("switched to catalog " + catalog_name)
sql """use statistics;"""
@@ -234,11 +238,11 @@ suite("test_hive_statistic", "p2") {
sql """analyze database `statistics` with sync"""
result = sql """show table stats statistics"""
assertTrue(result.size() == 1)
- assertTrue(result[0][0] == "100")
+ assertTrue(result[0][2] == "100")
result = sql """show table cached stats statistics"""
assertTrue(result.size() == 1)
- assertTrue(result[0][0] == "100")
+ assertTrue(result[0][2] == "100")
sql """drop stats statistics"""
result = sql """show column cached stats statistics"""
diff --git
a/regression-test/suites/external_table_p2/hive/test_hive_statistic_auto.groovy
b/regression-test/suites/external_table_p2/hive/test_hive_statistic_auto.groovy
new file mode 100644
index 00000000000..f766069346e
--- /dev/null
+++
b/regression-test/suites/external_table_p2/hive/test_hive_statistic_auto.groovy
@@ -0,0 +1,87 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hive_statistic_auto",
"p2,external,hive,external_remote,external_remote_hive") {
+ String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
+ if (enabled != null && enabled.equalsIgnoreCase("true")) {
+ String extHiveHmsHost =
context.config.otherConfigs.get("extHiveHmsHost")
+ String extHiveHmsPort =
context.config.otherConfigs.get("extHiveHmsPort")
+ String catalog_name = "test_hive_statistic_auto"
+ sql """drop catalog if exists ${catalog_name};"""
+ sql """
+ create catalog if not exists ${catalog_name} properties (
+ 'type'='hms',
+ 'hadoop.username' = 'hadoop',
+ 'hive.metastore.uris' =
'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
+ );
+ """
+ logger.info("catalog " + catalog_name + " created")
+
+ // Test analyze table without init.
+ sql """analyze database ${catalog_name}.statistics
PROPERTIES("use.auto.analyzer"="true")"""
+ sql """use ${catalog_name}.statistics"""
+
+ for (int i = 0; i < 10; i++) {
+ Thread.sleep(1000)
+ def result = sql """show column stats `statistics` (lo_quantity)"""
+ if (result.size <= 0) {
+ continue;
+ }
+ assertTrue(result.size() == 1)
+ assertTrue(result[0][0] == "lo_quantity")
+ assertTrue(result[0][1] == "100.0")
+ assertTrue(result[0][2] == "46.0")
+ assertTrue(result[0][3] == "0.0")
+ assertTrue(result[0][4] == "404.0")
+ assertTrue(result[0][5] == "4.0")
+ assertTrue(result[0][6] == "1")
+ assertTrue(result[0][7] == "50")
+
+ result = sql """show column stats `statistics` (lo_orderkey)"""
+ if (result.size <= 0) {
+ continue;
+ }
+ assertTrue(result.size() == 1)
+ assertTrue(result[0][0] == "lo_orderkey")
+ assertTrue(result[0][1] == "100.0")
+ assertTrue(result[0][2] == "26.0")
+ assertTrue(result[0][3] == "0.0")
+ assertTrue(result[0][4] == "404.0")
+ assertTrue(result[0][5] == "4.0")
+ assertTrue(result[0][6] == "1")
+ assertTrue(result[0][7] == "98")
+
+ result = sql """show column stats `statistics` (lo_linenumber)"""
+ if (result.size <= 0) {
+ continue;
+ }
+ assertTrue(result.size() == 1)
+ assertTrue(result[0][0] == "lo_linenumber")
+ assertTrue(result[0][1] == "100.0")
+ assertTrue(result[0][2] == "7.0")
+ assertTrue(result[0][3] == "0.0")
+ assertTrue(result[0][4] == "404.0")
+ assertTrue(result[0][5] == "4.0")
+ assertTrue(result[0][6] == "1")
+ assertTrue(result[0][7] == "7")
+ }
+
+ sql """drop catalog ${catalog_name}"""
+
+ }
+}
+
diff --git
a/regression-test/suites/external_table_p2/hive/test_hive_statistic_cache.groovy
b/regression-test/suites/external_table_p2/hive/test_hive_statistic_cache.groovy
index d1399ef49b1..d7b8f00e3e1 100644
---
a/regression-test/suites/external_table_p2/hive/test_hive_statistic_cache.groovy
+++
b/regression-test/suites/external_table_p2/hive/test_hive_statistic_cache.groovy
@@ -29,6 +29,40 @@ suite("test_hive_statistic_cache", "p2") {
'hive.metastore.uris' =
'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
);
"""
+ sql """use ${catalog_name}.tpch_1000_parquet"""
+ sql """desc customer""";
+ sql """desc lineitem""";
+ sql """desc region""";
+ sql """desc nation""";
+ sql """desc orders""";
+ sql """desc part""";
+ sql """desc partsupp""";
+ sql """desc supplier""";
+ Thread.sleep(1000);
+ def result = sql """show table cached stats customer"""
+ assertTrue(result[0][2] == "150000000")
+
+ result = sql """show table cached stats lineitem"""
+ assertTrue(result[0][2] == "5999989709")
+
+ result = sql """show table cached stats region"""
+ assertTrue(result[0][2] == "5")
+
+ result = sql """show table cached stats nation"""
+ assertTrue(result[0][2] == "25")
+
+ result = sql """show table cached stats orders"""
+ assertTrue(result[0][2] == "1500000000")
+
+ result = sql """show table cached stats part"""
+ assertTrue(result[0][2] == "200000000")
+
+ result = sql """show table cached stats partsupp"""
+ assertTrue(result[0][2] == "800000000")
+
+ result = sql """show table cached stats supplier"""
+ assertTrue(result[0][2] == "10000000")
+
logger.info("catalog " + catalog_name + " created")
sql """switch ${catalog_name};"""
logger.info("switched to catalog " + catalog_name)
@@ -37,7 +71,7 @@ suite("test_hive_statistic_cache", "p2") {
sql """analyze table `stats` with sync;"""
sql """select count(*) from stats"""
Thread.sleep(5000);
- def result = sql """show column cached stats `stats` (lo_orderkey)"""
+ result = sql """show column cached stats `stats` (lo_orderkey)"""
assertTrue(result[0][0] == "lo_orderkey")
assertTrue(result[0][1] == "100.0")
assertTrue(result[0][2] == "26.0")
diff --git
a/regression-test/suites/external_table_p2/hive/test_hive_statistic_sample.groovy
b/regression-test/suites/external_table_p2/hive/test_hive_statistic_sample.groovy
new file mode 100644
index 00000000000..960bec31df5
--- /dev/null
+++
b/regression-test/suites/external_table_p2/hive/test_hive_statistic_sample.groovy
@@ -0,0 +1,150 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hive_statistic_sample",
"p2,external,hive,external_remote,external_remote_hive") {
+ String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
+ if (enabled != null && enabled.equalsIgnoreCase("true")) {
+ String extHiveHmsHost =
context.config.otherConfigs.get("extHiveHmsHost")
+ String extHiveHmsPort =
context.config.otherConfigs.get("extHiveHmsPort")
+ String catalog_name = "test_hive_statistic_sample"
+ sql """drop catalog if exists ${catalog_name};"""
+ sql """
+ create catalog if not exists ${catalog_name} properties (
+ 'type'='hms',
+ 'hadoop.username' = 'hadoop',
+ 'hive.metastore.uris' =
'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
+ );
+ """
+ logger.info("catalog " + catalog_name + " created")
+
+ sql """analyze table ${catalog_name}.tpch_1000_parquet.region with
sample percent 10 with sync"""
+ sql """analyze table ${catalog_name}.tpch_1000_parquet.supplier with
sample percent 10 with sync"""
+ sql """use ${catalog_name}.tpch_1000_parquet"""
+ def result = sql """show column stats region (r_regionkey)"""
+ assertTrue(result.size() == 1)
+ assertTrue(result[0][0] == "r_regionkey")
+ assertTrue(result[0][1] == "5.0")
+ assertTrue(result[0][2] == "5.0")
+ assertTrue(result[0][3] == "0.0")
+ assertTrue(result[0][4] == "20.0")
+ assertTrue(result[0][5] == "4.0")
+ assertTrue(result[0][6] == "0")
+ assertTrue(result[0][7] == "4")
+
+ result = sql """show column stats region (r_name)"""
+ assertTrue(result.size() == 1)
+ assertTrue(result[0][0] == "r_name")
+ assertTrue(result[0][1] == "5.0")
+ assertTrue(result[0][2] == "5.0")
+ assertTrue(result[0][3] == "0.0")
+ assertTrue(result[0][4] == "34.0")
+ assertTrue(result[0][5] == "6.8")
+ assertTrue(result[0][6] == "\'AFRICA\'")
+ assertTrue(result[0][7] == "\'MIDDLE EAST\'")
+
+ result = sql """show column stats region (r_comment)"""
+ assertTrue(result.size() == 1)
+ assertTrue(result[0][0] == "r_comment")
+ assertTrue(result[0][1] == "5.0")
+ assertTrue(result[0][2] == "5.0")
+ assertTrue(result[0][3] == "0.0")
+ assertTrue(result[0][4] == "330.0")
+ assertTrue(result[0][5] == "66.0")
+ assertTrue(result[0][6] == "\'ges. thinly even pinto beans ca\'")
+ assertTrue(result[0][7] == "\'uickly special accounts cajole carefully
blithely close requests. carefully final asymptotes haggle furiousl\'")
+
+ result = sql """show column stats supplier (s_suppkey)"""
+ assertTrue(result.size() == 1)
+ assertTrue(result[0][0] == "s_suppkey")
+ assertTrue(result[0][1] == "9998799.0")
+ assertTrue(result[0][2] == "9970222.0")
+ assertTrue(result[0][3] == "0.0")
+ assertTrue(result[0][4] == "3.9995194E7")
+ assertTrue(result[0][5] == "3.9999997999759773")
+ assertTrue(result[0][6] == "1885331")
+ assertTrue(result[0][7] == "9395153")
+
+ result = sql """show column stats supplier (s_name)"""
+ assertTrue(result.size() == 1)
+ assertTrue(result[0][0] == "s_name")
+ assertTrue(result[0][1] == "9998799.0")
+ assertTrue(result[0][2] == "1.004004E7")
+ assertTrue(result[0][3] == "0.0")
+ assertTrue(result[0][4] == "1.79978374E8")
+ assertTrue(result[0][5] == "17.999999199903908")
+ assertTrue(result[0][6] == "\'Supplier#001885331\'")
+ assertTrue(result[0][7] == "\'Supplier#009395153\'")
+
+ result = sql """show column stats supplier (s_address)"""
+ assertTrue(result.size() == 1)
+ assertTrue(result[0][0] == "s_address")
+ assertTrue(result[0][1] == "9998799.0")
+ assertTrue(result[0][2] == "9998862.0")
+ assertTrue(result[0][3] == "0.0")
+ assertTrue(result[0][4] == "2.50070604E8")
+ assertTrue(result[0][5] == "25.010064108699456")
+ assertTrue(result[0][6] == "\' E,WAW2ZEx\'")
+ assertTrue(result[0][7] == "\'zzzw X3bpxu,OCpzgv6BdyMVMKzaB1DbH\'")
+
+ result = sql """show column stats supplier (s_nationkey)"""
+ assertTrue(result.size() == 1)
+ assertTrue(result[0][0] == "s_nationkey")
+ assertTrue(result[0][1] == "9998799.0")
+ assertTrue(result[0][2] == "25.0")
+ assertTrue(result[0][3] == "0.0")
+ assertTrue(result[0][4] == "3.9995194E7")
+ assertTrue(result[0][5] == "3.9999997999759773")
+ assertTrue(result[0][6] == "0")
+ assertTrue(result[0][7] == "24")
+
+ result = sql """show column stats supplier (s_phone)"""
+ assertTrue(result.size() == 1)
+ assertTrue(result[0][0] == "s_phone")
+ assertTrue(result[0][1] == "9998799.0")
+ assertTrue(result[0][2] == "9928006.0")
+ assertTrue(result[0][3] == "0.0")
+ assertTrue(result[0][4] == "1.49981978E8")
+ assertTrue(result[0][5] == "14.99999929991592")
+ assertTrue(result[0][6] == "\'10-100-128-4513\'")
+ assertTrue(result[0][7] == "\'34-999-967-7296\'")
+
+ result = sql """show column stats supplier (s_acctbal)"""
+ assertTrue(result.size() == 1)
+ assertTrue(result[0][0] == "s_acctbal")
+ assertTrue(result[0][1] == "9998799.0")
+ assertTrue(result[0][2] == "4766937.0")
+ assertTrue(result[0][3] == "0.0")
+ assertTrue(result[0][4] == "7.9990388E7")
+ assertTrue(result[0][5] == "7.999999599951955")
+ assertTrue(result[0][6] == "-999.99")
+ assertTrue(result[0][7] == "9999.99")
+
+ result = sql """show column stats supplier (s_comment)"""
+ assertTrue(result.size() == 1)
+ assertTrue(result[0][0] == "s_comment")
+ assertTrue(result[0][1] == "9998799.0")
+ assertTrue(result[0][2] == "9931298.0")
+ assertTrue(result[0][3] == "0.0")
+ assertTrue(result[0][4] == "6.24883849E8")
+ assertTrue(result[0][5] == "62.49589065646784")
+ assertTrue(result[0][6] == "\' Customer across the pinto beans.
pinRecommends\'")
+ assertTrue(result[0][7] == "\'zzle? express, regular foxes haggle
final ac\'")
+
+ sql """drop catalog ${catalog_name}"""
+ }
+}
+
diff --git
a/regression-test/suites/external_table_p2/hive/test_hive_statistic_timeout.groovy
b/regression-test/suites/external_table_p2/hive/test_hive_statistic_timeout.groovy
new file mode 100644
index 00000000000..a3754d9d2a4
--- /dev/null
+++
b/regression-test/suites/external_table_p2/hive/test_hive_statistic_timeout.groovy
@@ -0,0 +1,54 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hive_statistic_timeout",
"p2,external,hive,external_remote,external_remote_hive") {
+ String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
+ if (enabled != null && enabled.equalsIgnoreCase("true")) {
+ String extHiveHmsHost =
context.config.otherConfigs.get("extHiveHmsHost")
+ String extHiveHmsPort =
context.config.otherConfigs.get("extHiveHmsPort")
+ String catalog_name = "test_hive_statistic_timeout"
+ sql """drop catalog if exists ${catalog_name};"""
+ sql """
+ create catalog if not exists ${catalog_name} properties (
+ 'type'='hms',
+ 'hadoop.username' = 'hadoop',
+ 'hive.metastore.uris' =
'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
+ );
+ """
+ logger.info("catalog " + catalog_name + " created")
+
+ sql """use ${catalog_name}.tpch_1000_parquet"""
+ sql """set query_timeout=1"""
+ sql """analyze table part (p_partkey, p_container, p_type,
p_retailprice) with sync;"""
+
+ def result = sql """show column stats part"""
+ assertTrue(result.size() == 4)
+
+ def ctlId
+ result = sql """show proc '/catalogs'"""
+
+ for (int i = 0; i < result.size(); i++) {
+ if (result[i][1] == catalog_name) {
+ ctlId = result[i][0]
+ }
+ }
+
+ qt_01 """select col_id, count, ndv, null_count, min, max from
internal.__internal_schema.column_statistics where catalog_id='$ctlId' order by
col_id;"""
+ sql """drop catalog ${catalog_name}""";
+ }
+}
+
diff --git a/regression-test/suites/statistics/test_basic_statistics.groovy
b/regression-test/suites/statistics/test_basic_statistics.groovy
new file mode 100644
index 00000000000..a885ac1c11c
--- /dev/null
+++ b/regression-test/suites/statistics/test_basic_statistics.groovy
@@ -0,0 +1,75 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_basic_statistics") {
+ String db = "test_basic_statistics"
+ String tbl = "test_table_1"
+
+ sql """
+ DROP DATABASE IF EXISTS `${db}`
+ """
+
+ sql """
+ CREATE DATABASE `${db}`
+ """
+
+ sql """ use `${db}`"""
+
+ sql """
+ DROP TABLE IF EXISTS `${tbl}`
+ """
+
+ sql """
+ CREATE TABLE IF NOT EXISTS `${tbl}` (
+ `id` int(11) not null comment "",
+ `name` varchar(100) null comment ""
+ ) engine=olap
+ DUPLICATE KEY(`id`)
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1 properties("replication_num" = "1")
+ """
+
+ sql """
+ INSERT INTO `${tbl}` VALUES (1, 'name1'), (2, 'name2'), (3, 'name3'),
(4, 'name4'), (5, 'name5'), (6, 'name6'), (7, 'name7'), (8, 'name8'), (9,
'name9')
+ """
+
+ sql """ analyze table ${tbl} with sync"""
+ def result = sql """show column stats ${tbl} (id)"""
+ assertTrue(result.size() == 1)
+ assertTrue(result[0][0] == "id")
+ assertTrue(result[0][1] == "9.0")
+ assertTrue(result[0][2] == "9.0")
+ assertTrue(result[0][3] == "0.0")
+ assertTrue(result[0][4] == "36.0")
+ assertTrue(result[0][5] == "4.0")
+ assertTrue(result[0][6] == "1")
+ assertTrue(result[0][7] == "9")
+
+ result = sql """show column stats ${tbl} (name)"""
+ assertTrue(result.size() == 1)
+ assertTrue(result[0][0] == "name")
+ assertTrue(result[0][1] == "9.0")
+ assertTrue(result[0][2] == "9.0")
+ assertTrue(result[0][3] == "0.0")
+ assertTrue(result[0][4] == "45.0")
+ assertTrue(result[0][5] == "5.0")
+ assertTrue(result[0][6] == "\'name1\'")
+ assertTrue(result[0][7] == "\'name9\'")
+
+ sql """drop table ${tbl}"""
+ sql """drop database ${db}"""
+
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]