Repository: hive
Updated Branches:
  refs/heads/branch-1.0 d687bfb81 -> 8026f3914


backport HIVE-9720: Metastore does not properly migrate column stats when 
renaming a table across databases (Chaoyu via Xuefu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8026f391
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8026f391
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8026f391

Branch: refs/heads/branch-1.0
Commit: 8026f3914013825e224e62b6c540b3745459cb9e
Parents: d687bfb
Author: Pengcheng Xiong <pxi...@apache.org>
Authored: Mon Aug 31 14:26:35 2015 -0700
Committer: Pengcheng Xiong <pxi...@apache.org>
Committed: Mon Aug 31 14:26:35 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hive/metastore/HiveAlterHandler.java | 161 +++++++++++++++++++
 .../hadoop/hive/metastore/MetaStoreUtils.java   |  23 +++
 .../alter_table_invalidate_column_stats_2.q     |   6 +
 .../alter_table_invalidate_column_stats_2.q.out |  51 ++++++
 4 files changed, 241 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/8026f391/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
----------------------------------------------------------------------
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java 
b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
index fc6215a..09c1c56 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
@@ -33,8 +33,11 @@ import org.apache.hadoop.hive.common.FileUtils;
 import org.apache.hadoop.hive.common.ObjectPair;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.Database;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.InvalidInputException;
 import org.apache.hadoop.hive.metastore.api.InvalidObjectException;
 import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
 import org.apache.hadoop.hive.metastore.api.MetaException;
@@ -42,6 +45,9 @@ import 
org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
 import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
+import org.apache.hive.common.util.HiveStringUtils;
+
+import com.google.common.collect.Lists;
 
 /**
  * Hive specific implementation of alter
@@ -191,6 +197,13 @@ public class HiveAlterHandler implements AlterHandler {
             Path newPartLocPath = new Path(oldUri.getScheme(), 
oldUri.getAuthority(), newPath);
             altps.add(ObjectPair.create(part, part.getSd().getLocation()));
             part.getSd().setLocation(newPartLocPath.toString());
+            String oldPartName = 
Warehouse.makePartName(oldt.getPartitionKeys(), part.getValues());
+            try {
+              //existing partition column stats is no longer valid, remove them
+              msdb.deletePartitionColumnStatistics(dbname, name, oldPartName, 
part.getValues(), null);
+            } catch (InvalidInputException iie) {
+              throw new InvalidOperationException("Unable to update partition 
stats in table rename." + iie);
+            }
             msdb.alterPartition(dbname, name, part.getValues(), part);
           }
         }
@@ -201,6 +214,7 @@ public class HiveAlterHandler implements AlterHandler {
           // alterPartition()
           MetaStoreUtils.updateUnpartitionedTableStatsFast(db, newt, wh, 
false, true);
       }
+      updateTableColumnStatsForAlterTable(msdb, oldt, newt);
       // now finally call alter table
       msdb.alterTable(dbname, name, newt);
       // commit the changes
@@ -293,6 +307,7 @@ public class HiveAlterHandler implements AlterHandler {
         if (MetaStoreUtils.requireCalStats(hiveConf, oldPart, new_part, tbl)) {
           MetaStoreUtils.updatePartitionStatsFast(new_part, wh, false, true);
         }
+        updatePartColumnStats(msdb, dbname, name, new_part.getValues(), 
new_part);
         msdb.alterPartition(dbname, name, new_part.getValues(), new_part);
       } catch (InvalidObjectException e) {
         throw new InvalidOperationException("alter is not possible");
@@ -331,6 +346,15 @@ public class HiveAlterHandler implements AlterHandler {
       // if the external partition is renamed, the file should not change
       if (tbl.getTableType().equals(TableType.EXTERNAL_TABLE.toString())) {
         new_part.getSd().setLocation(oldPart.getSd().getLocation());
+        String oldPartName = Warehouse.makePartName(tbl.getPartitionKeys(), 
oldPart.getValues());
+        try {
+          //existing partition column stats is no longer valid, remove
+          msdb.deletePartitionColumnStatistics(dbname, name, oldPartName, 
oldPart.getValues(), null);
+        } catch (NoSuchObjectException nsoe) {
+          //ignore
+        } catch (InvalidInputException iie) {
+          throw new InvalidOperationException("Unable to update partition 
stats in table rename." + iie);
+        }
         msdb.alterPartition(dbname, name, part_vals, new_part);
       } else {
         try {
@@ -377,6 +401,15 @@ public class HiveAlterHandler implements AlterHandler {
           if (MetaStoreUtils.requireCalStats(hiveConf, oldPart, new_part, 
tbl)) {
             MetaStoreUtils.updatePartitionStatsFast(new_part, wh, false, true);
           }
+          String oldPartName = Warehouse.makePartName(tbl.getPartitionKeys(), 
oldPart.getValues());
+          try {
+            //existing partition column stats is no longer valid, remove
+            msdb.deletePartitionColumnStatistics(dbname, name, oldPartName, 
oldPart.getValues(), null);
+          } catch (NoSuchObjectException nsoe) {
+            //ignore
+          } catch (InvalidInputException iie) {
+            throw new InvalidOperationException("Unable to update partition 
stats in table rename." + iie);
+          }
           msdb.alterPartition(dbname, name, part_vals, new_part);
         }
       }
@@ -443,6 +476,7 @@ public class HiveAlterHandler implements AlterHandler {
         if (MetaStoreUtils.requireCalStats(hiveConf, oldTmpPart, tmpPart, 
tbl)) {
           MetaStoreUtils.updatePartitionStatsFast(tmpPart, wh, false, true);
         }
+        updatePartColumnStats(msdb, dbname, name, oldTmpPart.getValues(), 
tmpPart);
       }
       msdb.alterPartitions(dbname, name, partValsList, new_parts);
     } catch (InvalidObjectException e) {
@@ -490,4 +524,131 @@ public class HiveAlterHandler implements AlterHandler {
     return new Path(currentUri.getScheme(), currentUri.getAuthority(),
         defaultNewPath.toUri().getPath());
   }
+
+  private void updatePartColumnStatsForAlterColumns(RawStore msdb, Partition 
oldPartition,
+      String oldPartName, List<String> partVals, List<FieldSchema> oldCols, 
Partition newPart)
+          throws MetaException, InvalidObjectException {
+    String dbName = oldPartition.getDbName();
+    String tableName = oldPartition.getTableName();
+    try {
+      List<String> oldPartNames = Lists.newArrayList(oldPartName);
+      List<String> oldColNames = new ArrayList<String>(oldCols.size());
+      for (FieldSchema oldCol : oldCols) {
+        oldColNames.add(oldCol.getName());
+      }
+      List<FieldSchema> newCols = newPart.getSd().getCols();
+      List<ColumnStatistics> partsColStats = 
msdb.getPartitionColumnStatistics(dbName, tableName,
+          oldPartNames, oldColNames);
+      assert (partsColStats.size() <= 1);
+      for (ColumnStatistics partColStats : partsColStats) { //actually only at 
most one loop
+        List<ColumnStatisticsObj> statsObjs = partColStats.getStatsObj();
+        for (ColumnStatisticsObj statsObj : statsObjs) {
+          boolean found =false;
+          for (FieldSchema newCol : newCols) {
+            if (statsObj.getColName().equals(newCol.getName())
+                && statsObj.getColType().equals(newCol.getType())) {
+              found = true;
+              break;
+            }
+          }
+          if (!found) {
+            msdb.deletePartitionColumnStatistics(dbName, tableName, 
oldPartName, partVals,
+                statsObj.getColName());
+          }
+        }
+      }
+    } catch (NoSuchObjectException nsoe) {
+      LOG.debug("Could not find db entry." + nsoe);
+      //ignore
+    } catch (InvalidInputException iie) {
+      throw new InvalidObjectException
+      ("Invalid input to update partition column stats in alter table change 
columns" + iie);
+    }
+  }
+
+  private void updatePartColumnStats(RawStore msdb, String dbName, String 
tableName,
+      List<String> partVals, Partition newPart) throws MetaException, 
InvalidObjectException {
+    dbName = HiveStringUtils.normalizeIdentifier(dbName);
+    tableName = HiveStringUtils.normalizeIdentifier(tableName);
+    String newDbName = 
HiveStringUtils.normalizeIdentifier(newPart.getDbName());
+    String newTableName = 
HiveStringUtils.normalizeIdentifier(newPart.getTableName());
+
+    Table oldTable = msdb.getTable(dbName, tableName);
+    if (oldTable == null) {
+      return;
+    }
+
+    try {
+      String oldPartName = Warehouse.makePartName(oldTable.getPartitionKeys(), 
partVals);
+      String newPartName = Warehouse.makePartName(oldTable.getPartitionKeys(), 
newPart.getValues());
+      if (!dbName.equals(newDbName) || !tableName.equals(newTableName)
+          || !oldPartName.equals(newPartName)) {
+        msdb.deletePartitionColumnStatistics(dbName, tableName, oldPartName, 
partVals, null);
+      } else {
+        Partition oldPartition = msdb.getPartition(dbName, tableName, 
partVals);
+        if (oldPartition == null) {
+          return;
+        }
+        if (oldPartition.getSd() != null && newPart.getSd() != null) {
+        List<FieldSchema> oldCols = oldPartition.getSd().getCols();
+          if (!MetaStoreUtils.areSameColumns(oldCols, 
newPart.getSd().getCols())) {
+            updatePartColumnStatsForAlterColumns(msdb, oldPartition, 
oldPartName, partVals, oldCols, newPart);
+          }
+        }
+      }
+    } catch (NoSuchObjectException nsoe) {
+      LOG.debug("Could not find db entry." + nsoe);
+      //ignore
+    } catch (InvalidInputException iie) {
+      throw new InvalidObjectException("Invalid input to update partition 
column stats." + iie);
+    }
+  }
+
+  private void updateTableColumnStatsForAlterTable(RawStore msdb, Table 
oldTable, Table newTable)
+      throws MetaException, InvalidObjectException {
+    String dbName = oldTable.getDbName();
+    String tableName = oldTable.getTableName();
+    String newDbName = 
HiveStringUtils.normalizeIdentifier(newTable.getDbName());
+    String newTableName = 
HiveStringUtils.normalizeIdentifier(newTable.getTableName());
+
+    try {
+      if (!dbName.equals(newDbName) || !tableName.equals(newTableName)) {
+        msdb.deleteTableColumnStatistics(dbName, tableName, null);
+      } else {
+        List<FieldSchema> oldCols = oldTable.getSd().getCols();
+        List<FieldSchema> newCols = newTable.getSd().getCols();
+        if (!MetaStoreUtils.areSameColumns(oldCols, newCols)) {
+          List<String> oldColNames = new ArrayList<String>(oldCols.size());
+          for (FieldSchema oldCol : oldCols) {
+            oldColNames.add(oldCol.getName());
+          }
+
+          ColumnStatistics cs = msdb.getTableColumnStatistics(dbName, 
tableName, oldColNames);
+          if (cs == null) {
+            return;
+          }
+
+          List<ColumnStatisticsObj> statsObjs = cs.getStatsObj();
+          for (ColumnStatisticsObj statsObj : statsObjs) {
+            boolean found = false;
+            for (FieldSchema newCol : newCols) {
+              if (statsObj.getColName().equalsIgnoreCase(newCol.getName())
+                  && statsObj.getColType().equals(newCol.getType())) {
+                found = true;
+                break;
+              }
+            }
+            if (!found) {
+              msdb.deleteTableColumnStatistics(dbName, tableName, 
statsObj.getColName());
+            }
+          }
+        }
+      }
+    } catch (NoSuchObjectException nsoe) {
+      LOG.debug("Could not find db entry." + nsoe);
+    } catch (InvalidInputException e) {
+      //should not happen since the input were verified before passed in
+      throw new InvalidObjectException("Invalid inputs to update table column 
stats: " + e);
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/8026f391/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
----------------------------------------------------------------------
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java 
b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
index 3cf9f17..35e39b3 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
@@ -552,6 +552,29 @@ public class MetaStoreUtils {
     }
   }
 
+  static boolean isCascadeNeededInAlterTable(Table oldTable, Table newTable) {
+    //currently cascade only supports add/replace columns and
+    //changing column type/position/name/comments
+    List<FieldSchema> oldCols = oldTable.getSd().getCols();
+    List<FieldSchema> newCols = newTable.getSd().getCols();
+    return !areSameColumns(oldCols, newCols);
+  }
+
+  static boolean areSameColumns(List<FieldSchema> oldCols, List<FieldSchema> 
newCols) {
+    if (oldCols.size() != newCols.size()) {
+      return false;
+    } else {
+      for (int i = 0; i < oldCols.size(); i++) {
+        FieldSchema oldCol = oldCols.get(i);
+        FieldSchema newCol = newCols.get(i);
+        if(!oldCol.equals(newCol)) {
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
   /**
    * @return true if oldType and newType are compatible.
    * Two types are compatible if we have internal functions to cast one to 
another.

http://git-wip-us.apache.org/repos/asf/hive/blob/8026f391/ql/src/test/queries/clientpositive/alter_table_invalidate_column_stats_2.q
----------------------------------------------------------------------
diff --git 
a/ql/src/test/queries/clientpositive/alter_table_invalidate_column_stats_2.q 
b/ql/src/test/queries/clientpositive/alter_table_invalidate_column_stats_2.q
new file mode 100644
index 0000000..5dd82c6
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/alter_table_invalidate_column_stats_2.q
@@ -0,0 +1,6 @@
+create table testpart1(id int) partitioned by (dept string);
+alter table testpart1 add partition(dept='a');
+insert into table testpart1 partition(dept='a') values (1);
+analyze table testpart1 partition(dept='a') compute statistics for columns;
+alter table testpart1 rename to testpart1_rename;
+drop table testpart1_rename;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/8026f391/ql/src/test/results/clientpositive/alter_table_invalidate_column_stats_2.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/alter_table_invalidate_column_stats_2.q.out
 
b/ql/src/test/results/clientpositive/alter_table_invalidate_column_stats_2.q.out
new file mode 100644
index 0000000..8ff9e81
--- /dev/null
+++ 
b/ql/src/test/results/clientpositive/alter_table_invalidate_column_stats_2.q.out
@@ -0,0 +1,51 @@
+PREHOOK: query: create table testpart1(id int) partitioned by (dept string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@testpart1
+POSTHOOK: query: create table testpart1(id int) partitioned by (dept string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@testpart1
+PREHOOK: query: alter table testpart1 add partition(dept='a')
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@testpart1
+POSTHOOK: query: alter table testpart1 add partition(dept='a')
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@testpart1
+POSTHOOK: Output: default@testpart1@dept=a
+PREHOOK: query: insert into table testpart1 partition(dept='a') values (1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@testpart1@dept=a
+POSTHOOK: query: insert into table testpart1 partition(dept='a') values (1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@testpart1@dept=a
+POSTHOOK: Lineage: testpart1 PARTITION(dept=a).id EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+PREHOOK: query: analyze table testpart1 partition(dept='a') compute statistics 
for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@testpart1
+PREHOOK: Input: default@testpart1@dept=a
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table testpart1 partition(dept='a') compute 
statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@testpart1
+POSTHOOK: Input: default@testpart1@dept=a
+#### A masked pattern was here ####
+PREHOOK: query: alter table testpart1 rename to testpart1_rename
+PREHOOK: type: ALTERTABLE_RENAME
+PREHOOK: Input: default@testpart1
+PREHOOK: Output: default@testpart1
+POSTHOOK: query: alter table testpart1 rename to testpart1_rename
+POSTHOOK: type: ALTERTABLE_RENAME
+POSTHOOK: Input: default@testpart1
+POSTHOOK: Output: default@testpart1
+POSTHOOK: Output: default@testpart1_rename
+PREHOOK: query: drop table testpart1_rename
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@testpart1_rename
+PREHOOK: Output: default@testpart1_rename
+POSTHOOK: query: drop table testpart1_rename
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@testpart1_rename
+POSTHOOK: Output: default@testpart1_rename

Reply via email to