This is an automated email from the ASF dual-hosted git repository. sankarh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 8e3c5fc HIVE-21206: Bootstrap replication is slow as it opens lot of metastore connections (Sankar Hariappan, reviewed by Mahesh Kumar Behera) 8e3c5fc is described below commit 8e3c5fcbf16b2688ca435bf7361da3a3d4b1a699 Author: Sankar Hariappan <sank...@apache.org> AuthorDate: Tue Feb 12 10:28:35 2019 +0530 HIVE-21206: Bootstrap replication is slow as it opens lot of metastore connections (Sankar Hariappan, reviewed by Mahesh Kumar Behera) Signed-off-by: Sankar Hariappan <sank...@apache.org> --- ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java | 5 +---- ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java | 4 +++- ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java | 9 +++++++++ 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java index 822051c..179f291 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java @@ -23,7 +23,6 @@ import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.repl.util.ReplUtils; import org.apache.hadoop.hive.ql.io.AcidUtils; -import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.parse.EximUtil; import org.apache.hadoop.hive.ql.parse.ReplicationSpec; import org.apache.hadoop.hive.ql.plan.CopyWork; @@ -69,8 +68,6 @@ public class ReplCopyTask extends Task<ReplCopyWork> implements Serializable { Path toPath = null; try { - Hive hiveDb = getHive(); - // Note: CopyWork supports copying multiple files, but ReplCopyWork doesn't. // Not clear of ReplCopyWork should inherit from CopyWork. if (work.getFromPaths().length > 1 || work.getToPaths().length > 1) { @@ -168,7 +165,7 @@ public class ReplCopyTask extends Task<ReplCopyWork> implements Serializable { // its a replace (insert overwrite ) operation. if (work.getDeleteDestIfExist() && dstFs.exists(toPath)) { LOG.debug(" path " + toPath + " is cleaned before renaming"); - hiveDb.cleanUpOneDirectoryForReplace(toPath, dstFs, HIDDEN_FILES_PATH_FILTER, conf, work.getNeedRecycle(), + getHive().cleanUpOneDirectoryForReplace(toPath, dstFs, HIDDEN_FILES_PATH_FILTER, conf, work.getNeedRecycle(), work.getIsAutoPurge()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java index 11ef62c..b39771f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java @@ -184,7 +184,9 @@ public abstract class Task<T extends Serializable> implements Serializable, Node protected Hive getHive() { try { - return Hive.getWithFastCheck(conf); + // Hive.getWithFastCheck shouldn't be used here as it always re-opens metastore connection. + // The conf object in HMS client is always different from the one used here. + return Hive.get(conf); } catch (HiveException e) { LOG.error(StringUtils.stringifyException(e)); throw new RuntimeException(e); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java index 33c25ed..6a2e0ca 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java @@ -64,6 +64,7 @@ import org.apache.hadoop.hive.ql.io.AcidOutputFormat; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.BucketCodec; import org.apache.hadoop.hive.ql.lockmgr.TestDbTxnManager2; +import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; import org.apache.hadoop.hive.ql.session.SessionState; @@ -358,6 +359,8 @@ public class TestTxnCommands extends TxnCommandsBaseForTests { throws Exception, MetaException, TException, NoSuchObjectException { hiveConf.setBoolean("hive.stats.autogather", true); hiveConf.setBoolean("hive.stats.column.autogather", true); + // Need to close the thread local Hive object so that configuration change is reflected to HMS. + Hive.closeCurrent(); runStatementOnDriver("drop table if exists " + tableName); runStatementOnDriver(String.format("create table %s (a int) stored as orc " + "TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only')", @@ -433,6 +436,8 @@ public class TestTxnCommands extends TxnCommandsBaseForTests { String tableName = "mm_table"; hiveConf.setBoolean("hive.stats.autogather", true); hiveConf.setBoolean("hive.stats.column.autogather", true); + // Need to close the thread local Hive object so that configuration change is reflected to HMS. + Hive.closeCurrent(); runStatementOnDriver("drop table if exists " + tableName); runStatementOnDriver(String.format("create table %s (a int) stored as orc " + "TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only')", @@ -459,6 +464,8 @@ public class TestTxnCommands extends TxnCommandsBaseForTests { Assert.assertEquals(1, stats.size()); msClient.close(); hiveConf.setBoolean(MetastoreConf.ConfVars.HIVE_TXN_STATS_ENABLED.getVarname(), false); + // Need to close the thread local Hive object so that configuration change is reflected to HMS. + Hive.closeCurrent(); // Running the query with stats disabled will cause stats in metastore itself to become invalid. runStatementOnDriver(String.format("insert into %s (a) values (1)", tableName)); hiveConf.setBoolean(MetastoreConf.ConfVars.HIVE_TXN_STATS_ENABLED.getVarname(), true); @@ -1224,6 +1231,8 @@ public class TestTxnCommands extends TxnCommandsBaseForTests { @Test public void testVersioning() throws Exception { hiveConf.set(MetastoreConf.ConfVars.CREATE_TABLES_AS_ACID.getVarname(), "true"); + // Need to close the thread local Hive object so that configuration change is reflected to HMS. + Hive.closeCurrent(); runStatementOnDriver("drop table if exists T"); runStatementOnDriver("create table T (a int, b int) stored as orc"); int[][] data = {{1, 2}};