This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new f9c56d59fc [improvement](statistics)Support external table show table
stats, modify column stats and drop stats (#21624)
f9c56d59fc is described below
commit f9c56d59fcd01b0b891cec1c8023e509830a0426
Author: Jibing-Li <[email protected]>
AuthorDate: Mon Jul 10 11:33:06 2023 +0800
[improvement](statistics)Support external table show table stats, modify
column stats and drop stats (#21624)
Support external table show table stats, modify column stats and drop stats.
---
.../doris/analysis/AlterColumnStatsStmt.java | 26 +++------
.../org/apache/doris/analysis/DropStatsStmt.java | 17 ++++++
.../apache/doris/analysis/ShowTableStatsStmt.java | 3 --
.../apache/doris/statistics/AnalysisManager.java | 3 ++
.../apache/doris/statistics/HMSAnalysisTask.java | 29 ++++++++--
.../doris/statistics/StatisticsRepository.java | 12 +++++
.../hive/test_hive_statistic.out | 57 ++++++++++++++++++++
.../hive/test_hive_statistic.groovy | 62 ++++++++++++++++++++++
8 files changed, 183 insertions(+), 26 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java
index 93edefcab1..3e16a380e4 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java
@@ -17,10 +17,9 @@
package org.apache.doris.analysis;
-import org.apache.doris.catalog.Database;
+import org.apache.doris.catalog.DatabaseIf;
import org.apache.doris.catalog.Env;
-import org.apache.doris.catalog.OlapTable;
-import org.apache.doris.catalog.Table;
+import org.apache.doris.catalog.TableIf;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.Config;
import org.apache.doris.common.ErrorCode;
@@ -28,7 +27,7 @@ import org.apache.doris.common.ErrorReport;
import org.apache.doris.common.FeNameFormat;
import org.apache.doris.common.UserException;
import org.apache.doris.common.util.PrintableMap;
-import org.apache.doris.common.util.Util;
+import org.apache.doris.datasource.CatalogIf;
import org.apache.doris.mysql.privilege.PrivPredicate;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.statistics.ColumnStatistic;
@@ -109,9 +108,6 @@ public class AlterColumnStatsStmt extends DdlStmt {
// check table name
tableName.analyze(analyzer);
- // disallow external catalog
- Util.prohibitExternalCatalog(tableName.getCtl(),
this.getClass().getSimpleName());
-
// check partition & column
checkColumnNames();
@@ -138,19 +134,11 @@ public class AlterColumnStatsStmt extends DdlStmt {
});
}
- /**
- * TODO(wzt): Support for external tables
- */
private void checkColumnNames() throws AnalysisException {
- Database db =
analyzer.getEnv().getInternalCatalog().getDbOrAnalysisException(tableName.getDb());
- Table table = db.getTableOrAnalysisException(tableName.getTbl());
-
- if (table.getType() != Table.TableType.OLAP) {
- throw new AnalysisException("Only OLAP table statistics are
supported");
- }
-
- OlapTable olapTable = (OlapTable) table;
- if (olapTable.getColumn(columnName) == null) {
+ CatalogIf catalog =
analyzer.getEnv().getCatalogMgr().getCatalog(tableName.getCtl());
+ DatabaseIf db = catalog.getDbOrAnalysisException(tableName.getDb());
+ TableIf table = db.getTableOrAnalysisException(tableName.getTbl());
+ if (table.getColumn(columnName) == null) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_WRONG_COLUMN_NAME,
columnName, FeNameFormat.getColumnNameRegex());
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropStatsStmt.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropStatsStmt.java
index e821fecaed..aa80f664dc 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropStatsStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropStatsStmt.java
@@ -27,6 +27,7 @@ import org.apache.doris.common.ErrorCode;
import org.apache.doris.common.ErrorReport;
import org.apache.doris.common.UserException;
import org.apache.doris.datasource.CatalogIf;
+import org.apache.doris.datasource.InternalCatalog;
import org.apache.doris.mysql.privilege.PrivPredicate;
import org.apache.doris.qe.ConnectContext;
@@ -51,6 +52,8 @@ public class DropStatsStmt extends DdlStmt {
private final TableName tableName;
private Set<String> columnNames;
+ // Flag to drop external table row count in table_statistics.
+ private boolean dropTableRowCount;
private long tblId;
@@ -58,6 +61,7 @@ public class DropStatsStmt extends DdlStmt {
this.dropExpired = dropExpired;
this.tableName = null;
this.columnNames = null;
+ this.dropTableRowCount = false;
}
public DropStatsStmt(TableName tableName,
@@ -65,6 +69,11 @@ public class DropStatsStmt extends DdlStmt {
this.tableName = tableName;
if (columnNames != null) {
this.columnNames = new HashSet<>(columnNames);
+ this.dropTableRowCount = false;
+ } else {
+ // columnNames == null means drop all columns, in this case,
+ // external table need to drop the table row count as well.
+ dropTableRowCount = true;
}
dropExpired = false;
}
@@ -81,6 +90,10 @@ public class DropStatsStmt extends DdlStmt {
}
tableName.analyze(analyzer);
String catalogName = tableName.getCtl();
+ if (InternalCatalog.INTERNAL_CATALOG_NAME.equals(catalogName)) {
+ // Internal table doesn't need to drop table row count.
+ dropTableRowCount = false;
+ }
String dbName = tableName.getDb();
String tblName = tableName.getTbl();
CatalogIf catalog = analyzer.getEnv().getCatalogMgr()
@@ -115,6 +128,10 @@ public class DropStatsStmt extends DdlStmt {
return columnNames;
}
+ public boolean dropTableRowCount() {
+ return dropTableRowCount;
+ }
+
@Override
public String toSql() {
StringBuilder sb = new StringBuilder();
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
index 845111d036..e462c8585c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
@@ -27,7 +27,6 @@ import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.ErrorCode;
import org.apache.doris.common.ErrorReport;
import org.apache.doris.common.UserException;
-import org.apache.doris.common.util.Util;
import org.apache.doris.datasource.CatalogIf;
import org.apache.doris.mysql.privilege.PrivPredicate;
import org.apache.doris.qe.ConnectContext;
@@ -76,8 +75,6 @@ public class ShowTableStatsStmt extends ShowStmt {
throw new AnalysisException("Only one partition name could be
specified");
}
}
- // disallow external catalog
- Util.prohibitExternalCatalog(tableName.getCtl(),
this.getClass().getSimpleName());
CatalogIf<DatabaseIf> catalog =
Env.getCurrentEnv().getCatalogMgr().getCatalog(tableName.getCtl());
if (catalog == null) {
ErrorReport.reportAnalysisException("Catalog: {} not exists",
tableName.getCtl());
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
index 1b8f2e7e0d..fde1407c11 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
@@ -712,6 +712,9 @@ public class AnalysisManager extends Daemon implements
Writable {
for (String col : cols) {
Env.getCurrentEnv().getStatisticsCache().invalidate(tblId, -1L,
col);
}
+ if (dropStatsStmt.dropTableRowCount()) {
+ StatisticsRepository.dropExternalTableStatistics(tblId);
+ }
}
public void handleKillAnalyzeStmt(KillAnalysisJobStmt killAnalysisJobStmt)
throws DdlException {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
index a7b45c13cb..4483f738a7 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
@@ -49,6 +49,25 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
public static final String TIMESTAMP = "transient_lastDdlTime";
private static final String ANALYZE_SQL_TABLE_TEMPLATE = "INSERT INTO "
+ + "${internalDB}.${columnStatTbl}"
+ + " SELECT "
+ + "CONCAT(${tblId}, '-', ${idxId}, '-', '${colId}') AS id, "
+ + "${catalogId} AS catalog_id, "
+ + "${dbId} AS db_id, "
+ + "${tblId} AS tbl_id, "
+ + "${idxId} AS idx_id, "
+ + "'${colId}' AS col_id, "
+ + "NULL AS part_id, "
+ + "COUNT(1) AS row_count, "
+ + "NDV(`${colName}`) AS ndv, "
+ + "SUM(CASE WHEN `${colName}` IS NULL THEN 1 ELSE 0 END) AS
null_count, "
+ + "MIN(`${colName}`) AS min, "
+ + "MAX(`${colName}`) AS max, "
+ + "${dataSizeFunction} AS data_size, "
+ + "NOW() "
+ + "FROM `${catalogName}`.`${dbName}`.`${tblName}`";
+
+ private static final String ANALYZE_SQL_PARTITION_TEMPLATE = "INSERT INTO "
+ "${internalDB}.${columnStatTbl}"
+ " SELECT "
+ "CONCAT(${tblId}, '-', ${idxId}, '-', '${colId}') AS id, "
@@ -104,7 +123,7 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
if (isPartitionOnly) {
for (String partId : partitionNames) {
StringBuilder sb = new StringBuilder();
- sb.append(ANALYZE_TABLE_COUNT_TEMPLATE);
+ sb.append(ANALYZE_SQL_PARTITION_TEMPLATE);
sb.append(" where ");
String[] splits = partId.split("/");
for (int i = 0; i < splits.length; i++) {
@@ -122,7 +141,7 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
StatisticsRepository.persistTableStats(params);
}
} else {
- Map<String, String> params = buildTableStatsParams("NULL");
+ Map<String, String> params = buildTableStatsParams(null);
List<InternalQueryResult.ResultRow> columnResult =
StatisticsUtil.execStatisticQuery(new
StringSubstitutor(params)
.replace(ANALYZE_TABLE_COUNT_TEMPLATE));
@@ -226,8 +245,11 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
private Map<String, String> buildTableStatsParams(String partId) {
Map<String, String> commonParams = new HashMap<>();
String id = StatisticsUtil.constructId(tbl.getId(), -1);
- if (!partId.equals("NULL")) {
+ if (partId == null) {
+ commonParams.put("partId", "NULL");
+ } else {
id = StatisticsUtil.constructId(id, partId);
+ commonParams.put("partId", "\'" + partId + "\'");
}
commonParams.put("id", id);
commonParams.put("catalogId", String.valueOf(catalog.getId()));
@@ -235,7 +257,6 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
commonParams.put("tblId", String.valueOf(tbl.getId()));
commonParams.put("indexId", "-1");
commonParams.put("idxId", "-1");
- commonParams.put("partId", "\'" + partId + "\'");
commonParams.put("catalogName", catalog.getName());
commonParams.put("dbName", db.getFullName());
commonParams.put("tblName", tbl.getName());
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
index ced8e1e6a6..70c495f29b 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
@@ -202,6 +202,18 @@ public class StatisticsRepository {
dropStatisticsByColName(tblId, colNames,
StatisticConstants.HISTOGRAM_TBL_NAME);
}
+ public static void dropExternalTableStatistics(long tblId) throws
DdlException {
+ Map<String, String> params = new HashMap<>();
+ String inPredicate = String.format("tbl_id = %s", tblId);
+ params.put("tblName", StatisticConstants.ANALYSIS_TBL_NAME);
+ params.put("condition", inPredicate);
+ try {
+ StatisticsUtil.execUpdate(new
StringSubstitutor(params).replace(DROP_TABLE_STATISTICS_TEMPLATE));
+ } catch (Exception e) {
+ throw new DdlException(e.getMessage(), e);
+ }
+ }
+
public static void dropStatisticsByColName(long tblId, Set<String>
colNames, String statsTblName)
throws DdlException {
Map<String, String> params = new HashMap<>();
diff --git
a/regression-test/data/external_table_emr_p2/hive/test_hive_statistic.out
b/regression-test/data/external_table_emr_p2/hive/test_hive_statistic.out
new file mode 100644
index 0000000000..ad8b494c00
--- /dev/null
+++ b/regression-test/data/external_table_emr_p2/hive/test_hive_statistic.out
@@ -0,0 +1,57 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !1 --
+lo_quantity 100.0 46.0 0.0 404.0 4.0 1 50
+
+-- !2 --
+lo_orderkey 100.0 26.0 0.0 404.0 4.0 1 98
+
+-- !3 --
+lo_linenumber 100.0 7.0 0.0 404.0 4.0 1 7
+
+-- !4 --
+lo_custkey 100.0 26.0 0.0 404.0 4.0 67423 2735521
+
+-- !5 --
+lo_partkey 100.0 100.0 0.0 404.0 4.0 2250 989601
+
+-- !6 --
+lo_suppkey 100.0 100.0 0.0 404.0 4.0 4167 195845
+
+-- !7 --
+lo_orderdate 100.0 26.0 0.0 404.0 4.0 19920221 19980721
+
+-- !8 --
+lo_orderpriority 100.0 5.0 0.0 888.8000000000001 8.8
'1-URGENT' '5-LOW'
+
+-- !9 --
+lo_shippriority 100.0 1.0 0.0 404.0 4.0 0 0
+
+-- !10 --
+lo_extendedprice 100.0 100.0 0.0 404.0 4.0 104300 9066094
+
+-- !11 --
+lo_ordtotalprice 100.0 26.0 0.0 404.0 4.0 3428256 36771805
+
+-- !12 --
+lo_discount 100.0 11.0 0.0 404.0 4.0 0 10
+
+-- !13 --
+lo_revenue 100.0 100.0 0.0 404.0 4.0 101171 8703450
+
+-- !14 --
+lo_supplycost 100.0 100.0 0.0 404.0 4.0 58023 121374
+
+-- !15 --
+lo_tax 100.0 9.0 0.0 404.0 4.0 0 8
+
+-- !16 --
+lo_commitdate 100.0 95.0 0.0 404.0 4.0 19920515 19981016
+
+-- !17 --
+lo_shipmode 100.0 7.0 0.0 425.21 4.21 'AIR' 'TRUCK'
+
+-- !18 --
+lo_shipmode 6001215.0 0.0 0.0 0.0 0.0 'NULL' 'NULL'
+
+-- !19 --
+
diff --git
a/regression-test/suites/external_table_emr_p2/hive/test_hive_statistic.groovy
b/regression-test/suites/external_table_emr_p2/hive/test_hive_statistic.groovy
new file mode 100644
index 0000000000..ff184e4506
--- /dev/null
+++
b/regression-test/suites/external_table_emr_p2/hive/test_hive_statistic.groovy
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hive_statistic", "p2") {
+ String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
+ if (enabled != null && enabled.equalsIgnoreCase("true")) {
+ String extHiveHmsHost =
context.config.otherConfigs.get("extHiveHmsHost")
+ String extHiveHmsPort =
context.config.otherConfigs.get("extHiveHmsPort")
+ String catalog_name = "test_hive_statistic"
+ sql """drop catalog if exists ${catalog_name};"""
+ sql """
+ create catalog if not exists ${catalog_name} properties (
+ 'type'='hms',
+ 'hadoop.username' = 'hadoop',
+ 'hive.metastore.uris' =
'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
+ );
+ """
+ logger.info("catalog " + catalog_name + " created")
+ sql """switch ${catalog_name};"""
+ logger.info("switched to catalog " + catalog_name)
+ sql """use statistics;"""
+ sql """analyze table `statistics` with sync"""
+ qt_1 "show column stats `statistics` (lo_quantity)"
+ qt_2 "show column stats `statistics` (lo_orderkey)"
+ qt_3 "show column stats `statistics` (lo_linenumber)"
+ qt_4 "show column stats `statistics` (lo_custkey)"
+ qt_5 "show column stats `statistics` (lo_partkey)"
+ qt_6 "show column stats `statistics` (lo_suppkey)"
+ qt_7 "show column stats `statistics` (lo_orderdate)"
+ qt_8 "show column stats `statistics` (lo_orderpriority)"
+ qt_9 "show column stats `statistics` (lo_shippriority)"
+ qt_10 "show column stats `statistics` (lo_extendedprice)"
+ qt_11 "show column stats `statistics` (lo_ordtotalprice)"
+ qt_12 "show column stats `statistics` (lo_discount)"
+ qt_13 "show column stats `statistics` (lo_revenue)"
+ qt_14 "show column stats `statistics` (lo_supplycost)"
+ qt_15 "show column stats `statistics` (lo_tax)"
+ qt_16 "show column stats `statistics` (lo_commitdate)"
+ qt_17 "show column stats `statistics` (lo_shipmode)"
+
+ sql """ALTER TABLE statistics MODIFY COLUMN lo_shipmode SET STATS
('row_count'='6001215')"""
+ qt_18 "show column stats `statistics` (lo_shipmode)"
+
+ sql """drop stats statistics"""
+ qt_19 "show column stats statistics"
+ }
+}
+
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]