Repository: hive Updated Branches: refs/heads/master 1e3e74e54 -> c52aba1a6
HIVE-17421: Clear incorrect stats after replication (Daniel Dai, reviewed by Anishek Agarwal, Thejas Nair) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c52aba1a Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c52aba1a Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c52aba1a Branch: refs/heads/master Commit: c52aba1a6bc7c983a8c5776723ce138a76b52064 Parents: 1e3e74e Author: Daniel Dai <da...@hortonworks.com> Authored: Thu Sep 7 12:17:57 2017 -0700 Committer: Daniel Dai <da...@hortonworks.com> Committed: Thu Sep 7 12:17:57 2017 -0700 ---------------------------------------------------------------------- .../hive/ql/parse/TestReplicationScenarios.java | 70 ++++++++++++++++++++ .../hive/ql/parse/ImportSemanticAnalyzer.java | 5 ++ 2 files changed, 75 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/c52aba1a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java ---------------------------------------------------------------------- diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java index 9667449..6a2e400 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java @@ -141,6 +141,7 @@ public class TestReplicationScenarios { hconf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); hconf.set(HiveConf.ConfVars.METASTORE_RAW_STORE_IMPL.varname, "org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore"); + hconf.setBoolVar(HiveConf.ConfVars.HIVEOPTIMIZEMETADATAQUERIES, true); System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); @@ -2852,6 +2853,75 @@ public class TestReplicationScenarios { } } + @Test + public void testRemoveStats() throws IOException { + String name = testName.getMethodName(); + String dbName = createDB(name, driver); + + String[] unptn_data = new String[]{ "1" , "2" }; + String[] ptn_data_1 = new String[]{ "5", "7", "8"}; + String[] ptn_data_2 = new String[]{ "3", "2", "9"}; + + String unptn_locn = new Path(TEST_PATH, name + "_unptn").toUri().getPath(); + String ptn_locn_1 = new Path(TEST_PATH, name + "_ptn1").toUri().getPath(); + String ptn_locn_2 = new Path(TEST_PATH, name + "_ptn2").toUri().getPath(); + + createTestDataFile(unptn_locn, unptn_data); + createTestDataFile(ptn_locn_1, ptn_data_1); + createTestDataFile(ptn_locn_2, ptn_data_2); + + run("CREATE TABLE " + dbName + ".unptned(a int) STORED AS TEXTFILE", driver); + run("LOAD DATA LOCAL INPATH '" + unptn_locn + "' OVERWRITE INTO TABLE " + dbName + ".unptned", driver); + run("CREATE TABLE " + dbName + ".ptned(a int) partitioned by (b int) STORED AS TEXTFILE", driver); + run("LOAD DATA LOCAL INPATH '" + ptn_locn_1 + "' OVERWRITE INTO TABLE " + dbName + ".ptned PARTITION(b=1)", driver); + run("ANALYZE TABLE " + dbName + ".unptned COMPUTE STATISTICS FOR COLUMNS", driver); + run("ANALYZE TABLE " + dbName + ".unptned COMPUTE STATISTICS", driver); + run("ANALYZE TABLE " + dbName + ".ptned partition(b) COMPUTE STATISTICS FOR COLUMNS", driver); + run("ANALYZE TABLE " + dbName + ".ptned partition(b) COMPUTE STATISTICS", driver); + + verifySetup("SELECT * from " + dbName + ".unptned", unptn_data, driver); + verifySetup("SELECT a from " + dbName + ".ptned WHERE b=1", ptn_data_1, driver); + verifySetup("SELECT count(*) from " + dbName + ".unptned", new String[]{"2"}, driver); + verifySetup("SELECT count(*) from " + dbName + ".ptned", new String[]{"3"}, driver); + verifySetup("SELECT max(a) from " + dbName + ".unptned", new String[]{"2"}, driver); + verifySetup("SELECT max(a) from " + dbName + ".ptned where b=1", new String[]{"8"}, driver); + + advanceDumpDir(); + run("REPL DUMP " + dbName, driver); + String replDumpLocn = getResult(0,0,driver); + String replDumpId = getResult(0,1,true,driver); + LOG.info("Dumped to {} with id {}",replDumpLocn,replDumpId); + run("REPL LOAD " + dbName + "_dupe FROM '" + replDumpLocn + "'", driverMirror); + + verifyRun("SELECT count(*) from " + dbName + "_dupe.unptned", new String[]{"2"}, driverMirror); + verifyRun("SELECT count(*) from " + dbName + "_dupe.ptned", new String[]{"3"}, driverMirror); + verifyRun("SELECT max(a) from " + dbName + "_dupe.unptned", new String[]{"2"}, driverMirror); + verifyRun("SELECT max(a) from " + dbName + "_dupe.ptned where b=1", new String[]{"8"}, driverMirror); + + run("CREATE TABLE " + dbName + ".unptned2(a int) STORED AS TEXTFILE", driver); + run("LOAD DATA LOCAL INPATH '" + unptn_locn + "' OVERWRITE INTO TABLE " + dbName + ".unptned2", driver); + run("CREATE TABLE " + dbName + ".ptned2(a int) partitioned by (b int) STORED AS TEXTFILE", driver); + run("LOAD DATA LOCAL INPATH '" + ptn_locn_1 + "' OVERWRITE INTO TABLE " + dbName + ".ptned2 PARTITION(b=1)", driver); + run("ANALYZE TABLE " + dbName + ".unptned2 COMPUTE STATISTICS FOR COLUMNS", driver); + run("ANALYZE TABLE " + dbName + ".unptned2 COMPUTE STATISTICS", driver); + run("ANALYZE TABLE " + dbName + ".ptned2 partition(b) COMPUTE STATISTICS FOR COLUMNS", driver); + run("ANALYZE TABLE " + dbName + ".ptned2 partition(b) COMPUTE STATISTICS", driver); + + advanceDumpDir(); + run("REPL DUMP " + dbName + " FROM " + replDumpId, driver); + String incrementalDumpLocn = getResult(0,0,driver); + String incrementalDumpId = getResult(0,1,true,driver); + LOG.info("Dumped to {} with id {}", incrementalDumpLocn, incrementalDumpId); + run("EXPLAIN REPL LOAD " + dbName + "_dupe FROM '" + incrementalDumpLocn + "'", driverMirror); + printOutput(driverMirror); + run("REPL LOAD " + dbName + "_dupe FROM '"+incrementalDumpLocn+"'", driverMirror); + + verifyRun("SELECT count(*) from " + dbName + "_dupe.unptned2", new String[]{"2"}, driverMirror); + verifyRun("SELECT count(*) from " + dbName + "_dupe.ptned2", new String[]{"3"}, driverMirror); + verifyRun("SELECT max(a) from " + dbName + "_dupe.unptned2", new String[]{"2"}, driverMirror); + verifyRun("SELECT max(a) from " + dbName + "_dupe.ptned2 where b=1", new String[]{"8"}, driverMirror); + } + private static String createDB(String name, Driver myDriver) { LOG.info("Testing " + name); String dbName = name + "_" + tid; http://git-wip-us.apache.org/repos/asf/hive/blob/c52aba1a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java index 7f3460f..aa4c660 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java @@ -37,6 +37,7 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; +import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.TableType; @@ -231,6 +232,7 @@ public class ImportSemanticAnalyzer extends BaseSemanticAnalyzer { if ((replicationSpec != null) && replicationSpec.isInReplicationScope()){ tblDesc.setReplicationSpec(replicationSpec); + tblDesc.getTblProps().remove(StatsSetupConst.COLUMN_STATS_ACCURATE); } if (isExternalSet){ @@ -254,6 +256,9 @@ public class ImportSemanticAnalyzer extends BaseSemanticAnalyzer { for (Partition partition : partitions) { // TODO: this should ideally not create AddPartitionDesc per partition AddPartitionDesc partsDesc = getBaseAddPartitionDescFromPartition(fromPath, dbname, tblDesc, partition); + if ((replicationSpec != null) && replicationSpec.isInReplicationScope()){ + partsDesc.getPartition(0).getPartParams().remove(StatsSetupConst.COLUMN_STATS_ACCURATE); + } partitionDescs.add(partsDesc); }