Repository: hive
Updated Branches:
  refs/heads/master 1e3e74e54 -> c52aba1a6


HIVE-17421: Clear incorrect stats after replication (Daniel Dai, reviewed by 
Anishek Agarwal, Thejas Nair)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c52aba1a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c52aba1a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c52aba1a

Branch: refs/heads/master
Commit: c52aba1a6bc7c983a8c5776723ce138a76b52064
Parents: 1e3e74e
Author: Daniel Dai <da...@hortonworks.com>
Authored: Thu Sep 7 12:17:57 2017 -0700
Committer: Daniel Dai <da...@hortonworks.com>
Committed: Thu Sep 7 12:17:57 2017 -0700

----------------------------------------------------------------------
 .../hive/ql/parse/TestReplicationScenarios.java | 70 ++++++++++++++++++++
 .../hive/ql/parse/ImportSemanticAnalyzer.java   |  5 ++
 2 files changed, 75 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/c52aba1a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java
----------------------------------------------------------------------
diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java
index 9667449..6a2e400 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java
@@ -141,6 +141,7 @@ public class TestReplicationScenarios {
     hconf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false");
     hconf.set(HiveConf.ConfVars.METASTORE_RAW_STORE_IMPL.varname,
               
"org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore");
+    hconf.setBoolVar(HiveConf.ConfVars.HIVEOPTIMIZEMETADATAQUERIES, true);
     System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " ");
     System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " ");
 
@@ -2852,6 +2853,75 @@ public class TestReplicationScenarios {
     }
   }
 
+  @Test
+  public void testRemoveStats() throws IOException {
+    String name = testName.getMethodName();
+    String dbName = createDB(name, driver);
+
+    String[] unptn_data = new String[]{ "1" , "2" };
+    String[] ptn_data_1 = new String[]{ "5", "7", "8"};
+    String[] ptn_data_2 = new String[]{ "3", "2", "9"};
+
+    String unptn_locn = new Path(TEST_PATH, name + "_unptn").toUri().getPath();
+    String ptn_locn_1 = new Path(TEST_PATH, name + "_ptn1").toUri().getPath();
+    String ptn_locn_2 = new Path(TEST_PATH, name + "_ptn2").toUri().getPath();
+
+    createTestDataFile(unptn_locn, unptn_data);
+    createTestDataFile(ptn_locn_1, ptn_data_1);
+    createTestDataFile(ptn_locn_2, ptn_data_2);
+
+    run("CREATE TABLE " + dbName + ".unptned(a int) STORED AS TEXTFILE", 
driver);
+    run("LOAD DATA LOCAL INPATH '" + unptn_locn + "' OVERWRITE INTO TABLE " + 
dbName + ".unptned", driver);
+    run("CREATE TABLE " + dbName + ".ptned(a int) partitioned by (b int) 
STORED AS TEXTFILE", driver);
+    run("LOAD DATA LOCAL INPATH '" + ptn_locn_1 + "' OVERWRITE INTO TABLE " + 
dbName + ".ptned PARTITION(b=1)", driver);
+    run("ANALYZE TABLE " + dbName + ".unptned COMPUTE STATISTICS FOR COLUMNS", 
driver);
+    run("ANALYZE TABLE " + dbName + ".unptned COMPUTE STATISTICS", driver);
+    run("ANALYZE TABLE " + dbName + ".ptned partition(b) COMPUTE STATISTICS 
FOR COLUMNS", driver);
+    run("ANALYZE TABLE " + dbName + ".ptned partition(b) COMPUTE STATISTICS", 
driver);
+
+    verifySetup("SELECT * from " + dbName + ".unptned", unptn_data, driver);
+    verifySetup("SELECT a from " + dbName + ".ptned WHERE b=1", ptn_data_1, 
driver);
+    verifySetup("SELECT count(*) from " + dbName + ".unptned", new 
String[]{"2"}, driver);
+    verifySetup("SELECT count(*) from " + dbName + ".ptned", new 
String[]{"3"}, driver);
+    verifySetup("SELECT max(a) from " + dbName + ".unptned", new 
String[]{"2"}, driver);
+    verifySetup("SELECT max(a) from " + dbName + ".ptned where b=1", new 
String[]{"8"}, driver);
+
+    advanceDumpDir();
+    run("REPL DUMP " + dbName, driver);
+    String replDumpLocn = getResult(0,0,driver);
+    String replDumpId = getResult(0,1,true,driver);
+    LOG.info("Dumped to {} with id {}",replDumpLocn,replDumpId);
+    run("REPL LOAD " + dbName + "_dupe FROM '" + replDumpLocn + "'", 
driverMirror);
+
+    verifyRun("SELECT count(*) from " + dbName + "_dupe.unptned", new 
String[]{"2"}, driverMirror);
+    verifyRun("SELECT count(*) from " + dbName + "_dupe.ptned", new 
String[]{"3"}, driverMirror);
+    verifyRun("SELECT max(a) from " + dbName + "_dupe.unptned", new 
String[]{"2"}, driverMirror);
+    verifyRun("SELECT max(a) from " + dbName + "_dupe.ptned where b=1", new 
String[]{"8"}, driverMirror);
+
+    run("CREATE TABLE " + dbName + ".unptned2(a int) STORED AS TEXTFILE", 
driver);
+    run("LOAD DATA LOCAL INPATH '" + unptn_locn + "' OVERWRITE INTO TABLE " + 
dbName + ".unptned2", driver);
+    run("CREATE TABLE " + dbName + ".ptned2(a int) partitioned by (b int) 
STORED AS TEXTFILE", driver);
+    run("LOAD DATA LOCAL INPATH '" + ptn_locn_1 + "' OVERWRITE INTO TABLE " + 
dbName + ".ptned2 PARTITION(b=1)", driver);
+    run("ANALYZE TABLE " + dbName + ".unptned2 COMPUTE STATISTICS FOR 
COLUMNS", driver);
+    run("ANALYZE TABLE " + dbName + ".unptned2 COMPUTE STATISTICS", driver);
+    run("ANALYZE TABLE " + dbName + ".ptned2 partition(b) COMPUTE STATISTICS 
FOR COLUMNS", driver);
+    run("ANALYZE TABLE " + dbName + ".ptned2 partition(b) COMPUTE STATISTICS", 
driver);
+
+    advanceDumpDir();
+    run("REPL DUMP " + dbName + " FROM " + replDumpId, driver);
+    String incrementalDumpLocn = getResult(0,0,driver);
+    String incrementalDumpId = getResult(0,1,true,driver);
+    LOG.info("Dumped to {} with id {}", incrementalDumpLocn, 
incrementalDumpId);
+    run("EXPLAIN REPL LOAD " + dbName + "_dupe FROM '" + incrementalDumpLocn + 
"'", driverMirror);
+    printOutput(driverMirror);
+    run("REPL LOAD " + dbName + "_dupe FROM '"+incrementalDumpLocn+"'", 
driverMirror);
+
+    verifyRun("SELECT count(*) from " + dbName + "_dupe.unptned2", new 
String[]{"2"}, driverMirror);
+    verifyRun("SELECT count(*) from " + dbName + "_dupe.ptned2", new 
String[]{"3"}, driverMirror);
+    verifyRun("SELECT max(a) from " + dbName + "_dupe.unptned2", new 
String[]{"2"}, driverMirror);
+    verifyRun("SELECT max(a) from " + dbName + "_dupe.ptned2 where b=1", new 
String[]{"8"}, driverMirror);
+  }
+
   private static String createDB(String name, Driver myDriver) {
     LOG.info("Testing " + name);
     String dbName = name + "_" + tid;

http://git-wip-us.apache.org/repos/asf/hive/blob/c52aba1a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java
index 7f3460f..aa4c660 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java
@@ -37,6 +37,7 @@ import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.FileUtils;
+import org.apache.hadoop.hive.common.StatsSetupConst;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.MetaStoreUtils;
 import org.apache.hadoop.hive.metastore.TableType;
@@ -231,6 +232,7 @@ public class ImportSemanticAnalyzer extends 
BaseSemanticAnalyzer {
 
     if ((replicationSpec != null) && replicationSpec.isInReplicationScope()){
       tblDesc.setReplicationSpec(replicationSpec);
+      tblDesc.getTblProps().remove(StatsSetupConst.COLUMN_STATS_ACCURATE);
     }
 
     if (isExternalSet){
@@ -254,6 +256,9 @@ public class ImportSemanticAnalyzer extends 
BaseSemanticAnalyzer {
     for (Partition partition : partitions) {
       // TODO: this should ideally not create AddPartitionDesc per partition
       AddPartitionDesc partsDesc = 
getBaseAddPartitionDescFromPartition(fromPath, dbname, tblDesc, partition);
+      if ((replicationSpec != null) && replicationSpec.isInReplicationScope()){
+        
partsDesc.getPartition(0).getPartParams().remove(StatsSetupConst.COLUMN_STATS_ACCURATE);
+      }
       partitionDescs.add(partsDesc);
     }
 

Reply via email to