[ 
https://issues.apache.org/jira/browse/HIVE-26504?focusedWorklogId=809562&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-809562
 ]

ASF GitHub Bot logged work on HIVE-26504:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 16/Sep/22 15:14
            Start Date: 16/Sep/22 15:14
    Worklog Time Spent: 10m 
      Work Description: deniskuzZ commented on code in PR #3557:
URL: https://github.com/apache/hive/pull/3557#discussion_r973135097


##########
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java:
##########
@@ -1011,96 +1015,61 @@ private Path constructRenamedPath(Path defaultNewPath, 
Path currentPath) {
         defaultNewPath.toUri().getPath());
   }
 
-  @VisibleForTesting
-  public static List<ColumnStatistics> 
alterTableUpdateTableColumnStats(RawStore msdb, Table oldTable, Table newTable,
-      EnvironmentContext ec, String validWriteIds, Configuration conf, 
List<String> deletedCols)
-      throws MetaException, InvalidObjectException {
-    String catName = normalizeIdentifier(oldTable.isSetCatName() ? 
oldTable.getCatName() :
-        getDefaultCatalog(conf));
+  public static List<ColumnStatistics> getColumnStats(RawStore msdb, Table 
oldTable)
+      throws NoSuchObjectException, MetaException {
+    String catName = normalizeIdentifier(oldTable.isSetCatName()
+        ? oldTable.getCatName()
+        : getDefaultCatalog(msdb.getConf()));
     String dbName = oldTable.getDbName().toLowerCase();
     String tableName = normalizeIdentifier(oldTable.getTableName());
-    String newDbName = newTable.getDbName().toLowerCase();
-    String newTableName = normalizeIdentifier(newTable.getTableName());
-    //if its not called from cahced store then update the table
-    boolean doAlterTable = deletedCols == null;
-    List<ColumnStatistics> newMultiColStats = new ArrayList<>();
+    return msdb.getTableColumnStatistics(catName, dbName, tableName,
+        
oldTable.getSd().getCols().stream().map(FieldSchema::getName).collect(Collectors.toList()));
+  }
 
+  @VisibleForTesting
+  public static List<ColumnStatistics> deleteTableColumnStats(RawStore msdb, 
Table oldTable, Table newTable, List<ColumnStatistics> multiColStats)
+      throws InvalidObjectException, MetaException {
+    List<ColumnStatistics> newMultiColStats = new ArrayList<>();
     try {
-      List<FieldSchema> oldCols = oldTable.getSd().getCols();
-      List<FieldSchema> newCols = newTable.getSd().getCols();
-      List<ColumnStatistics> multiColStats = null;
-      boolean updateColumnStats = !newDbName.equals(dbName) || 
!newTableName.equals(tableName)
-          || !MetaStoreServerUtils.columnsIncludedByNameType(oldCols, newCols);
-      // Don't bother in the case of ACID conversion.
-      updateColumnStats = updateColumnStats
-          && (TxnUtils.isAcidTable(oldTable) == 
TxnUtils.isAcidTable(newTable));
-      if (updateColumnStats) {
-        List<String> oldColNames = new ArrayList<>(oldCols.size());
-        for (FieldSchema oldCol : oldCols) {
-          oldColNames.add(oldCol.getName());
-        }
+      String catName = normalizeIdentifier(oldTable.isSetCatName()
+          ? oldTable.getCatName()
+          : getDefaultCatalog(msdb.getConf()));
+      String dbName = oldTable.getDbName().toLowerCase();
+      String tableName = normalizeIdentifier(oldTable.getTableName());
+      String newDbName = newTable.getDbName().toLowerCase();
+      String newTableName = normalizeIdentifier(newTable.getTableName());
+      List<FieldSchema> oldTableCols = oldTable.getSd().getCols();
+      List<FieldSchema> newTableCols = newTable.getSd().getCols();
+
+      boolean nameChanged = !newDbName.equals(dbName) || 
!newTableName.equals(tableName);
+
+      if ((nameChanged || 
!MetaStoreServerUtils.columnsIncludedByNameType(oldTableCols, newTableCols)) &&
+          // Don't bother in the case of ACID conversion.
+          TxnUtils.isAcidTable(oldTable) == TxnUtils.isAcidTable(newTable)) {
+        for (ColumnStatistics colStats : multiColStats) {
+          List<ColumnStatisticsObj> statsObjs = colStats.getStatsObj();
+          List<ColumnStatisticsObj> newStatsObjs = new ArrayList<>();
 
-        // NOTE: this doesn't check stats being compliant, but the alterTable 
call below does.
-        //       The worst we can do is delete the stats.
-        // Collect column stats which need to be rewritten and remove old 
stats.
-        multiColStats = msdb.getTableColumnStatistics(catName, dbName, 
tableName, oldColNames);
-        if (multiColStats.isEmpty()) {
-          updateColumnStats = false;
-        } else {
-          for (ColumnStatistics colStats : multiColStats) {
-            List<ColumnStatisticsObj> statsObjs = colStats.getStatsObj();
-            if (statsObjs != null) {
-              // for out para, this value is initialized by caller.
-              if (deletedCols == null) {
-                deletedCols = new ArrayList<>();
+          if (statsObjs != null) {
+            for (ColumnStatisticsObj statsObj : statsObjs) {
+              boolean found = newTableCols.stream().anyMatch(c -> 
statsObj.getColName().equalsIgnoreCase(c.getName()) &&

Review Comment:
   can't we reuse filterColumnStatsForTableColumns by supplying negative 
condition?





Issue Time Tracking
-------------------

    Worklog Id:     (was: 809562)
    Time Spent: 1h 50m  (was: 1h 40m)

> User is not able to drop table
> ------------------------------
>
>                 Key: HIVE-26504
>                 URL: https://issues.apache.org/jira/browse/HIVE-26504
>             Project: Hive
>          Issue Type: Bug
>          Components: Hive
>            Reporter: László Végh
>            Assignee: László Végh
>            Priority: Major
>              Labels: pull-request-available
>          Time Spent: 1h 50m
>  Remaining Estimate: 0h
>
> Hive won't store anything in *TAB_COL_STATS* for partitioned table, whereas 
> impala stores complete column stats in TAB_COL_STATS for partitioned table. 
> Deleting entries in TAB_COL_STATS is based on (DB_NAME, TABLE_NAME), not by 
> TBL_ID. Renamed tables were having old names in TAB_COL_STATS.
> To Repro:
> {code:java}
> beeline:
> set hive.create.as.insert.only=false;
> set hive.create.as.acid=false;
> create table testes.table_name_with_partition (id tinyint, name string) 
> partitioned by (col_to_partition bigint) stored as parquet;
> insert into testes.table_name_with_partition (id, name, col_to_partition) 
> values (1, "a", 2020), (2, "b", 2021), (3, "c", 2022);
> impala:
> compute stats testes.table_name_with_partition; -- backend shows new entries 
> in TAB_COL_STATS
> beeline:
> alter table testes.table_name_with_partition rename to 
> testes2.table_that_cant_be_droped;
> drop table testes2.table_that_cant_be_droped; -- This fails with 
> TAB_COL_STATS_fkey constraint violation.
> {code}
> Exception trace for drop table failure
> {code:java}
> Caused by: org.postgresql.util.PSQLException: ERROR: update or delete on 
> table "TBLS" violates foreign key constraint "TAB_COL_STATS_fkey" on table 
> "TAB_COL_STATS"
>   Detail: Key (TBL_ID)=(19816) is still referenced from table "TAB_COL_STATS".
>         at 
> org.postgresql.core.v3.QueryExecutorImpl.receiveErrorResponse(QueryExecutorImpl.java:2532)
>         at 
> org.postgresql.core.v3.QueryExecutorImpl.processResults(QueryExecutorImpl.java:2267)
>         ... 50 more
> {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to