This is an automated email from the ASF dual-hosted git repository. tchoi pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new d480aa575f0 HIVE-26598: Fix unsetting of db params for optimized bootstrap when repl dump initiates data copy (Rakshith Chandraiah, reviewed by Teddy Choi) d480aa575f0 is described below commit d480aa575f09e815bd169c4e2cff0f337eea6371 Author: Rakshith C <56068841+rakshith...@users.noreply.github.com> AuthorDate: Thu Jan 19 10:46:28 2023 +0530 HIVE-26598: Fix unsetting of db params for optimized bootstrap when repl dump initiates data copy (Rakshith Chandraiah, reviewed by Teddy Choi) Co-authored-by: rakshithc <rakshi...@cloudera.com> --- .../parse/TestReplicationOptimisedBootstrap.java | 57 ++++++++++++++++++++++ .../hadoop/hive/ql/exec/repl/ReplDumpTask.java | 6 +-- .../hadoop/hive/ql/exec/repl/ReplDumpWork.java | 9 ++++ 3 files changed, 68 insertions(+), 4 deletions(-) diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java index 165b4d438e2..396abd24b47 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java @@ -55,6 +55,7 @@ import java.util.Map; import static org.apache.hadoop.hdfs.protocol.HdfsConstants.QUOTA_DONT_SET; import static org.apache.hadoop.hdfs.protocol.HdfsConstants.QUOTA_RESET; +import static org.apache.hadoop.hive.common.repl.ReplConst.REPL_ENABLE_BACKGROUND_THREAD; import static org.apache.hadoop.hive.common.repl.ReplConst.REPL_TARGET_DB_PROPERTY; import static org.apache.hadoop.hive.common.repl.ReplConst.TARGET_OF_REPLICATION; import static org.apache.hadoop.hive.metastore.ReplChangeManager.SOURCE_OF_REPLICATION; @@ -1092,4 +1093,60 @@ public class TestReplicationOptimisedBootstrap extends BaseReplicationScenariosA } return txnHandler.getOpenTxns(txnListExcludingReplCreated).getOpen_txns(); } + + @Test + public void testDbParametersAfterOptimizedBootstrap() throws Throwable { + List<String> withClause = Arrays.asList( + String.format("'%s'='%s'", HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname, "false"), + String.format("'%s'='%s'", HiveConf.ConfVars.HIVE_REPL_FAILOVER_START.varname, "true") + ); + + // bootstrap + primary.run("use " + primaryDbName) + .run("create table t1 (id int) clustered by(id) into 3 buckets stored as orc " + + "tblproperties (\"transactional\"=\"true\")") + .run("insert into table t1 values (1),(2)") + .dump(primaryDbName, withClause); + replica.load(replicatedDbName, primaryDbName, withClause); + + // incremental + primary.run("use " + primaryDbName) + .run("insert into table t1 values (3)") + .dump(primaryDbName, withClause); + replica.load(replicatedDbName, primaryDbName, withClause); + + // make some changes on primary + primary.run("use " + primaryDbName) + .run("insert into table t1 values (4)"); + + withClause = Arrays.asList( + String.format("'%s'='%s'", HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname, "false") + ); + // 1st cycle of optimized bootstrap + replica.dump(replicatedDbName, withClause); + primary.load(primaryDbName, replicatedDbName, withClause); + + String[] dbParams = new String[]{ + TARGET_OF_REPLICATION, + CURR_STATE_ID_SOURCE.toString(), + CURR_STATE_ID_TARGET.toString(), + REPL_TARGET_DB_PROPERTY, + REPL_ENABLE_BACKGROUND_THREAD + }; + //verify if all db parameters are set + for (String paramKey : dbParams) { + assertTrue(replica.getDatabase(replicatedDbName).getParameters().containsKey(paramKey)); + } + + // 2nd cycle of optimized bootstrap + replica.dump(replicatedDbName, withClause); + primary.load(primaryDbName, replicatedDbName, withClause); + + for (String paramKey : dbParams) { + assertFalse(replica.getDatabase(replicatedDbName).getParameters().containsKey(paramKey)); + } + // ensure optimized bootstrap was successful. + primary.run(String.format("select * from %s.t1", primaryDbName)) + .verifyResults(new String[]{"1", "2", "3"}); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java index e7c329a5f25..02815334fc5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java @@ -162,7 +162,6 @@ public class ReplDumpTask extends Task<ReplDumpWork> implements Serializable { private Set<String> tablesForBootstrap = new HashSet<>(); private List<TxnType> excludedTxns = Arrays.asList(TxnType.READ_ONLY, TxnType.REPL_CREATED); private boolean createEventMarker = false; - private boolean unsetDbPropertiesForOptimisedBootstrap; public enum ConstraintFileType {COMMON("common", "c_"), FOREIGNKEY("fk", "f_"); private final String name; @@ -264,8 +263,7 @@ public class ReplDumpTask extends Task<ReplDumpWork> implements Serializable { assert isTableDiffDirectoryPresent; - // Set boolean to determine the db properties need to sorted once dump is complete - unsetDbPropertiesForOptimisedBootstrap = true; + work.setSecondDumpAfterFailover(true); long fromEventId = Long.parseLong(getEventIdFromFile(previousValidHiveDumpPath.getParent(), conf)[1]); LOG.info("Starting optimised bootstrap from event id {} for database {}", fromEventId, @@ -474,7 +472,7 @@ public class ReplDumpTask extends Task<ReplDumpWork> implements Serializable { + ReplAck.DUMP_ACKNOWLEDGEMENT); // Check if we need to unset database properties after successful optimised bootstrap. - if (unsetDbPropertiesForOptimisedBootstrap) { + if (work.isSecondDumpAfterFailover()) { Hive hiveDb = getHive(); Database database = hiveDb.getDatabase(work.dbNameOrPattern); LinkedHashMap<String, String> dbParams = new LinkedHashMap<>(database.getParameters()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpWork.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpWork.java index 4ac1bf51db3..65d9c17a675 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpWork.java @@ -73,6 +73,7 @@ public class ReplDumpWork implements Serializable { private ReplLogger replLogger; private FailoverMetaData fmd; private boolean firstDumpAfterFailover; + private boolean secondDumpAfterFailover; public static void injectNextDumpDirForTest(String dumpDir) { injectNextDumpDirForTest(dumpDir, false); @@ -355,4 +356,12 @@ public class ReplDumpWork implements Serializable { public void setReplLogger(ReplLogger replLogger) { this.replLogger = replLogger; } + + public boolean isSecondDumpAfterFailover() { + return secondDumpAfterFailover; + } + + public void setSecondDumpAfterFailover(boolean secondDumpAfterFailover) { + this.secondDumpAfterFailover = secondDumpAfterFailover; + } }