This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new f9c56d59fc [improvement](statistics)Support external table show table 
stats, modify column stats and drop stats (#21624)
f9c56d59fc is described below

commit f9c56d59fcd01b0b891cec1c8023e509830a0426
Author: Jibing-Li <[email protected]>
AuthorDate: Mon Jul 10 11:33:06 2023 +0800

    [improvement](statistics)Support external table show table stats, modify 
column stats and drop stats (#21624)
    
    Support external table show table stats, modify column stats and drop stats.
---
 .../doris/analysis/AlterColumnStatsStmt.java       | 26 +++------
 .../org/apache/doris/analysis/DropStatsStmt.java   | 17 ++++++
 .../apache/doris/analysis/ShowTableStatsStmt.java  |  3 --
 .../apache/doris/statistics/AnalysisManager.java   |  3 ++
 .../apache/doris/statistics/HMSAnalysisTask.java   | 29 ++++++++--
 .../doris/statistics/StatisticsRepository.java     | 12 +++++
 .../hive/test_hive_statistic.out                   | 57 ++++++++++++++++++++
 .../hive/test_hive_statistic.groovy                | 62 ++++++++++++++++++++++
 8 files changed, 183 insertions(+), 26 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java
index 93edefcab1..3e16a380e4 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java
@@ -17,10 +17,9 @@
 
 package org.apache.doris.analysis;
 
-import org.apache.doris.catalog.Database;
+import org.apache.doris.catalog.DatabaseIf;
 import org.apache.doris.catalog.Env;
-import org.apache.doris.catalog.OlapTable;
-import org.apache.doris.catalog.Table;
+import org.apache.doris.catalog.TableIf;
 import org.apache.doris.common.AnalysisException;
 import org.apache.doris.common.Config;
 import org.apache.doris.common.ErrorCode;
@@ -28,7 +27,7 @@ import org.apache.doris.common.ErrorReport;
 import org.apache.doris.common.FeNameFormat;
 import org.apache.doris.common.UserException;
 import org.apache.doris.common.util.PrintableMap;
-import org.apache.doris.common.util.Util;
+import org.apache.doris.datasource.CatalogIf;
 import org.apache.doris.mysql.privilege.PrivPredicate;
 import org.apache.doris.qe.ConnectContext;
 import org.apache.doris.statistics.ColumnStatistic;
@@ -109,9 +108,6 @@ public class AlterColumnStatsStmt extends DdlStmt {
         // check table name
         tableName.analyze(analyzer);
 
-        // disallow external catalog
-        Util.prohibitExternalCatalog(tableName.getCtl(), 
this.getClass().getSimpleName());
-
         // check partition & column
         checkColumnNames();
 
@@ -138,19 +134,11 @@ public class AlterColumnStatsStmt extends DdlStmt {
         });
     }
 
-    /**
-     * TODO(wzt): Support for external tables
-     */
     private void checkColumnNames() throws AnalysisException {
-        Database db = 
analyzer.getEnv().getInternalCatalog().getDbOrAnalysisException(tableName.getDb());
-        Table table = db.getTableOrAnalysisException(tableName.getTbl());
-
-        if (table.getType() != Table.TableType.OLAP) {
-            throw new AnalysisException("Only OLAP table statistics are 
supported");
-        }
-
-        OlapTable olapTable = (OlapTable) table;
-        if (olapTable.getColumn(columnName) == null) {
+        CatalogIf catalog = 
analyzer.getEnv().getCatalogMgr().getCatalog(tableName.getCtl());
+        DatabaseIf db = catalog.getDbOrAnalysisException(tableName.getDb());
+        TableIf table = db.getTableOrAnalysisException(tableName.getTbl());
+        if (table.getColumn(columnName) == null) {
             
ErrorReport.reportAnalysisException(ErrorCode.ERR_WRONG_COLUMN_NAME,
                     columnName, FeNameFormat.getColumnNameRegex());
         }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropStatsStmt.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropStatsStmt.java
index e821fecaed..aa80f664dc 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropStatsStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropStatsStmt.java
@@ -27,6 +27,7 @@ import org.apache.doris.common.ErrorCode;
 import org.apache.doris.common.ErrorReport;
 import org.apache.doris.common.UserException;
 import org.apache.doris.datasource.CatalogIf;
+import org.apache.doris.datasource.InternalCatalog;
 import org.apache.doris.mysql.privilege.PrivPredicate;
 import org.apache.doris.qe.ConnectContext;
 
@@ -51,6 +52,8 @@ public class DropStatsStmt extends DdlStmt {
 
     private final TableName tableName;
     private Set<String> columnNames;
+    // Flag to drop external table row count in table_statistics.
+    private boolean dropTableRowCount;
 
     private long tblId;
 
@@ -58,6 +61,7 @@ public class DropStatsStmt extends DdlStmt {
         this.dropExpired = dropExpired;
         this.tableName = null;
         this.columnNames = null;
+        this.dropTableRowCount = false;
     }
 
     public DropStatsStmt(TableName tableName,
@@ -65,6 +69,11 @@ public class DropStatsStmt extends DdlStmt {
         this.tableName = tableName;
         if (columnNames != null) {
             this.columnNames = new HashSet<>(columnNames);
+            this.dropTableRowCount = false;
+        } else {
+            // columnNames == null means drop all columns, in this case,
+            // external table need to drop the table row count as well.
+            dropTableRowCount = true;
         }
         dropExpired = false;
     }
@@ -81,6 +90,10 @@ public class DropStatsStmt extends DdlStmt {
         }
         tableName.analyze(analyzer);
         String catalogName = tableName.getCtl();
+        if (InternalCatalog.INTERNAL_CATALOG_NAME.equals(catalogName)) {
+            // Internal table doesn't need to drop table row count.
+            dropTableRowCount = false;
+        }
         String dbName = tableName.getDb();
         String tblName = tableName.getTbl();
         CatalogIf catalog = analyzer.getEnv().getCatalogMgr()
@@ -115,6 +128,10 @@ public class DropStatsStmt extends DdlStmt {
         return columnNames;
     }
 
+    public boolean dropTableRowCount() {
+        return dropTableRowCount;
+    }
+
     @Override
     public String toSql() {
         StringBuilder sb = new StringBuilder();
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
index 845111d036..e462c8585c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
@@ -27,7 +27,6 @@ import org.apache.doris.common.AnalysisException;
 import org.apache.doris.common.ErrorCode;
 import org.apache.doris.common.ErrorReport;
 import org.apache.doris.common.UserException;
-import org.apache.doris.common.util.Util;
 import org.apache.doris.datasource.CatalogIf;
 import org.apache.doris.mysql.privilege.PrivPredicate;
 import org.apache.doris.qe.ConnectContext;
@@ -76,8 +75,6 @@ public class ShowTableStatsStmt extends ShowStmt {
                 throw new AnalysisException("Only one partition name could be 
specified");
             }
         }
-        // disallow external catalog
-        Util.prohibitExternalCatalog(tableName.getCtl(), 
this.getClass().getSimpleName());
         CatalogIf<DatabaseIf> catalog = 
Env.getCurrentEnv().getCatalogMgr().getCatalog(tableName.getCtl());
         if (catalog == null) {
             ErrorReport.reportAnalysisException("Catalog: {} not exists", 
tableName.getCtl());
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
index 1b8f2e7e0d..fde1407c11 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
@@ -712,6 +712,9 @@ public class AnalysisManager extends Daemon implements 
Writable {
         for (String col : cols) {
             Env.getCurrentEnv().getStatisticsCache().invalidate(tblId, -1L, 
col);
         }
+        if (dropStatsStmt.dropTableRowCount()) {
+            StatisticsRepository.dropExternalTableStatistics(tblId);
+        }
     }
 
     public void handleKillAnalyzeStmt(KillAnalysisJobStmt killAnalysisJobStmt) 
throws DdlException {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
index a7b45c13cb..4483f738a7 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
@@ -49,6 +49,25 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
     public static final String TIMESTAMP = "transient_lastDdlTime";
 
     private static final String ANALYZE_SQL_TABLE_TEMPLATE = "INSERT INTO "
+            + "${internalDB}.${columnStatTbl}"
+            + " SELECT "
+            + "CONCAT(${tblId}, '-', ${idxId}, '-', '${colId}') AS id, "
+            + "${catalogId} AS catalog_id, "
+            + "${dbId} AS db_id, "
+            + "${tblId} AS tbl_id, "
+            + "${idxId} AS idx_id, "
+            + "'${colId}' AS col_id, "
+            + "NULL AS part_id, "
+            + "COUNT(1) AS row_count, "
+            + "NDV(`${colName}`) AS ndv, "
+            + "SUM(CASE WHEN `${colName}` IS NULL THEN 1 ELSE 0 END) AS 
null_count, "
+            + "MIN(`${colName}`) AS min, "
+            + "MAX(`${colName}`) AS max, "
+            + "${dataSizeFunction} AS data_size, "
+            + "NOW() "
+            + "FROM `${catalogName}`.`${dbName}`.`${tblName}`";
+
+    private static final String ANALYZE_SQL_PARTITION_TEMPLATE = "INSERT INTO "
             + "${internalDB}.${columnStatTbl}"
             + " SELECT "
             + "CONCAT(${tblId}, '-', ${idxId}, '-', '${colId}') AS id, "
@@ -104,7 +123,7 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
         if (isPartitionOnly) {
             for (String partId : partitionNames) {
                 StringBuilder sb = new StringBuilder();
-                sb.append(ANALYZE_TABLE_COUNT_TEMPLATE);
+                sb.append(ANALYZE_SQL_PARTITION_TEMPLATE);
                 sb.append(" where ");
                 String[] splits = partId.split("/");
                 for (int i = 0; i < splits.length; i++) {
@@ -122,7 +141,7 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
                 StatisticsRepository.persistTableStats(params);
             }
         } else {
-            Map<String, String> params = buildTableStatsParams("NULL");
+            Map<String, String> params = buildTableStatsParams(null);
             List<InternalQueryResult.ResultRow> columnResult =
                     StatisticsUtil.execStatisticQuery(new 
StringSubstitutor(params)
                     .replace(ANALYZE_TABLE_COUNT_TEMPLATE));
@@ -226,8 +245,11 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
     private Map<String, String> buildTableStatsParams(String partId) {
         Map<String, String> commonParams = new HashMap<>();
         String id = StatisticsUtil.constructId(tbl.getId(), -1);
-        if (!partId.equals("NULL")) {
+        if (partId == null) {
+            commonParams.put("partId", "NULL");
+        } else {
             id = StatisticsUtil.constructId(id, partId);
+            commonParams.put("partId", "\'" + partId + "\'");
         }
         commonParams.put("id", id);
         commonParams.put("catalogId", String.valueOf(catalog.getId()));
@@ -235,7 +257,6 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
         commonParams.put("tblId", String.valueOf(tbl.getId()));
         commonParams.put("indexId", "-1");
         commonParams.put("idxId", "-1");
-        commonParams.put("partId", "\'" + partId + "\'");
         commonParams.put("catalogName", catalog.getName());
         commonParams.put("dbName", db.getFullName());
         commonParams.put("tblName", tbl.getName());
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
index ced8e1e6a6..70c495f29b 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
@@ -202,6 +202,18 @@ public class StatisticsRepository {
         dropStatisticsByColName(tblId, colNames, 
StatisticConstants.HISTOGRAM_TBL_NAME);
     }
 
+    public static void dropExternalTableStatistics(long tblId) throws 
DdlException {
+        Map<String, String> params = new HashMap<>();
+        String inPredicate = String.format("tbl_id = %s", tblId);
+        params.put("tblName", StatisticConstants.ANALYSIS_TBL_NAME);
+        params.put("condition", inPredicate);
+        try {
+            StatisticsUtil.execUpdate(new 
StringSubstitutor(params).replace(DROP_TABLE_STATISTICS_TEMPLATE));
+        } catch (Exception e) {
+            throw new DdlException(e.getMessage(), e);
+        }
+    }
+
     public static void dropStatisticsByColName(long tblId, Set<String> 
colNames, String statsTblName)
             throws DdlException {
         Map<String, String> params = new HashMap<>();
diff --git 
a/regression-test/data/external_table_emr_p2/hive/test_hive_statistic.out 
b/regression-test/data/external_table_emr_p2/hive/test_hive_statistic.out
new file mode 100644
index 0000000000..ad8b494c00
--- /dev/null
+++ b/regression-test/data/external_table_emr_p2/hive/test_hive_statistic.out
@@ -0,0 +1,57 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !1 --
+lo_quantity    100.0   46.0    0.0     404.0   4.0     1       50
+
+-- !2 --
+lo_orderkey    100.0   26.0    0.0     404.0   4.0     1       98
+
+-- !3 --
+lo_linenumber  100.0   7.0     0.0     404.0   4.0     1       7
+
+-- !4 --
+lo_custkey     100.0   26.0    0.0     404.0   4.0     67423   2735521
+
+-- !5 --
+lo_partkey     100.0   100.0   0.0     404.0   4.0     2250    989601
+
+-- !6 --
+lo_suppkey     100.0   100.0   0.0     404.0   4.0     4167    195845
+
+-- !7 --
+lo_orderdate   100.0   26.0    0.0     404.0   4.0     19920221        19980721
+
+-- !8 --
+lo_orderpriority       100.0   5.0     0.0     888.8000000000001       8.8     
'1-URGENT'      '5-LOW'
+
+-- !9 --
+lo_shippriority        100.0   1.0     0.0     404.0   4.0     0       0
+
+-- !10 --
+lo_extendedprice       100.0   100.0   0.0     404.0   4.0     104300  9066094
+
+-- !11 --
+lo_ordtotalprice       100.0   26.0    0.0     404.0   4.0     3428256 36771805
+
+-- !12 --
+lo_discount    100.0   11.0    0.0     404.0   4.0     0       10
+
+-- !13 --
+lo_revenue     100.0   100.0   0.0     404.0   4.0     101171  8703450
+
+-- !14 --
+lo_supplycost  100.0   100.0   0.0     404.0   4.0     58023   121374
+
+-- !15 --
+lo_tax 100.0   9.0     0.0     404.0   4.0     0       8
+
+-- !16 --
+lo_commitdate  100.0   95.0    0.0     404.0   4.0     19920515        19981016
+
+-- !17 --
+lo_shipmode    100.0   7.0     0.0     425.21  4.21    'AIR'   'TRUCK'
+
+-- !18 --
+lo_shipmode    6001215.0       0.0     0.0     0.0     0.0     'NULL'  'NULL'
+
+-- !19 --
+
diff --git 
a/regression-test/suites/external_table_emr_p2/hive/test_hive_statistic.groovy 
b/regression-test/suites/external_table_emr_p2/hive/test_hive_statistic.groovy
new file mode 100644
index 0000000000..ff184e4506
--- /dev/null
+++ 
b/regression-test/suites/external_table_emr_p2/hive/test_hive_statistic.groovy
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hive_statistic", "p2") {
+    String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
+    if (enabled != null && enabled.equalsIgnoreCase("true")) {
+        String extHiveHmsHost = 
context.config.otherConfigs.get("extHiveHmsHost")
+        String extHiveHmsPort = 
context.config.otherConfigs.get("extHiveHmsPort")
+        String catalog_name = "test_hive_statistic"
+        sql """drop catalog if exists ${catalog_name};"""
+        sql """
+            create catalog if not exists ${catalog_name} properties (
+                'type'='hms',
+                'hadoop.username' = 'hadoop',
+                'hive.metastore.uris' = 
'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
+            );
+        """
+        logger.info("catalog " + catalog_name + " created")
+        sql """switch ${catalog_name};"""
+        logger.info("switched to catalog " + catalog_name)
+        sql """use statistics;"""
+        sql """analyze table `statistics` with sync"""
+        qt_1 "show column stats `statistics` (lo_quantity)"
+        qt_2 "show column stats `statistics` (lo_orderkey)"
+        qt_3 "show column stats `statistics` (lo_linenumber)"
+        qt_4 "show column stats `statistics` (lo_custkey)"
+        qt_5 "show column stats `statistics` (lo_partkey)"
+        qt_6 "show column stats `statistics` (lo_suppkey)"
+        qt_7 "show column stats `statistics` (lo_orderdate)"
+        qt_8 "show column stats `statistics` (lo_orderpriority)"
+        qt_9 "show column stats `statistics` (lo_shippriority)"
+        qt_10 "show column stats `statistics` (lo_extendedprice)"
+        qt_11 "show column stats `statistics` (lo_ordtotalprice)"
+        qt_12 "show column stats `statistics` (lo_discount)"
+        qt_13 "show column stats `statistics` (lo_revenue)"
+        qt_14 "show column stats `statistics` (lo_supplycost)"
+        qt_15 "show column stats `statistics` (lo_tax)"
+        qt_16 "show column stats `statistics` (lo_commitdate)"
+        qt_17 "show column stats `statistics` (lo_shipmode)"
+
+        sql """ALTER TABLE statistics MODIFY COLUMN lo_shipmode SET STATS 
('row_count'='6001215')"""
+        qt_18 "show column stats `statistics` (lo_shipmode)"
+
+        sql """drop stats statistics"""
+        qt_19 "show column stats statistics"
+    }
+}
+


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to