ayushtkn commented on a change in pull request #2980:
URL: https://github.com/apache/hive/pull/2980#discussion_r799203703



##########
File path: 
itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
##########
@@ -734,4 +743,167 @@ public NotificationEventResponse apply(@Nullable 
NotificationEventResponse event
       InjectableBehaviourObjectStore.resetGetNextNotificationBehaviour();  // 
reset the behaviour
     }
   }
+
+
+  @Test
+  public void testReverseBootstrap() throws Throwable {
+    List<String> withClause = 
ReplicationTestUtils.includeExternalTableClause(true);
+    withClause.add("'" + HiveConf.ConfVars.REPLDIR.varname + "'='" + 
primary.repldDir + "'");
+
+    // Do a bootstrap cycle.
+    primary.dump(primaryDbName, withClause);
+    replica.load(replicatedDbName, primaryDbName, withClause);
+
+    // Create 4 managed tables and do a dump & load.
+    WarehouseInstance.Tuple tuple = primary.run("use " + primaryDbName)
+        .run("create table t1 (id int)")
+        .run("insert into table t1 values (1)")
+        .run("insert into table t1 values (2),(3),(4)")
+        .run("create table t2 (place string) partitioned by (country string)")
+        .run("insert into table t2 partition(country='india') values 
('chennai')")
+        .run("insert into table t2 partition(country='us') values ('new 
york')")
+        .run("create table t3 (id int)")
+        .run("insert into table t3 values (10)")
+        .run("insert into table t3 values (20),(31),(42)")
+        .run("create table t4 (place string) partitioned by (country string)")
+        .run("insert into table t4 partition(country='india') values 
('bangalore')")
+        .run("insert into table t4 partition(country='us') values ('austin')")
+        .dump(primaryDbName, withClause);
+
+    // Do the load and check all the external & managed tables are present.
+    replica.load(replicatedDbName, primaryDbName, withClause)
+        .run("repl status " + replicatedDbName)
+        .verifyResult(tuple.lastReplicationId)
+        .run("use " + replicatedDbName)
+        .run("show tables like 't1'")
+        .verifyResult("t1")
+        .run("show tables like 't2'")
+        .verifyResult("t2")
+        .run("show tables like 't3'")
+        .verifyResult("t3")
+        .run("show tables like 't4'")
+        .verifyResult("t4")
+        .verifyReplTargetProperty(replicatedDbName);
+
+
+    // Do some modifications on original source cluster. The diff 
becomes(tnew_managed, t1, t2, t3)
+    primary.run("use " + primaryDbName)
+        .run("create table tnew_managed (id int)")
+        .run("insert into table t1 values (25)")
+        .run("insert into table tnew_managed values (110)")
+        .run("insert into table t2 partition(country='france') values 
('lyon')")
+        .run("drop table t3");
+
+    // Do some modifications on the target cluster. (t1, t2, t3: bootstrap & 
t4, t5: incremental)
+    replica.run("use " + replicatedDbName)
+        .run("insert into table t1 values (101)")
+        .run("insert into table t1 values (210),(321)")
+        .run("insert into table t2 partition(country='india') values 
('delhi')")
+        .run("insert into table t3 values (11)")
+        .run("insert into table t4 partition(country='india') values 
('lucknow')")
+        .run("create table t5 (place string) partitioned by (country string)")
+        .run("insert into table t5 partition(country='china') values 
('beejing')");
+
+    // Prepare for reverse replication.
+    DistributedFileSystem replicaFs = replica.miniDFSCluster.getFileSystem();
+    Path newReplDir = new Path(replica.repldDir + "1");
+    replicaFs.mkdirs(newReplDir);

Review comment:
       Nopes, it doesn't. It just need to make sure after the operation 
directory should be there, if already exist, it just returns

##########
File path: 
itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
##########
@@ -734,4 +743,167 @@ public NotificationEventResponse apply(@Nullable 
NotificationEventResponse event
       InjectableBehaviourObjectStore.resetGetNextNotificationBehaviour();  // 
reset the behaviour
     }
   }
+
+
+  @Test
+  public void testReverseBootstrap() throws Throwable {
+    List<String> withClause = 
ReplicationTestUtils.includeExternalTableClause(true);
+    withClause.add("'" + HiveConf.ConfVars.REPLDIR.varname + "'='" + 
primary.repldDir + "'");
+
+    // Do a bootstrap cycle.
+    primary.dump(primaryDbName, withClause);
+    replica.load(replicatedDbName, primaryDbName, withClause);
+
+    // Create 4 managed tables and do a dump & load.
+    WarehouseInstance.Tuple tuple = primary.run("use " + primaryDbName)
+        .run("create table t1 (id int)")
+        .run("insert into table t1 values (1)")
+        .run("insert into table t1 values (2),(3),(4)")
+        .run("create table t2 (place string) partitioned by (country string)")
+        .run("insert into table t2 partition(country='india') values 
('chennai')")
+        .run("insert into table t2 partition(country='us') values ('new 
york')")
+        .run("create table t3 (id int)")
+        .run("insert into table t3 values (10)")
+        .run("insert into table t3 values (20),(31),(42)")
+        .run("create table t4 (place string) partitioned by (country string)")
+        .run("insert into table t4 partition(country='india') values 
('bangalore')")
+        .run("insert into table t4 partition(country='us') values ('austin')")
+        .dump(primaryDbName, withClause);
+
+    // Do the load and check all the external & managed tables are present.
+    replica.load(replicatedDbName, primaryDbName, withClause)
+        .run("repl status " + replicatedDbName)
+        .verifyResult(tuple.lastReplicationId)
+        .run("use " + replicatedDbName)
+        .run("show tables like 't1'")
+        .verifyResult("t1")
+        .run("show tables like 't2'")
+        .verifyResult("t2")
+        .run("show tables like 't3'")
+        .verifyResult("t3")
+        .run("show tables like 't4'")
+        .verifyResult("t4")
+        .verifyReplTargetProperty(replicatedDbName);
+
+
+    // Do some modifications on original source cluster. The diff 
becomes(tnew_managed, t1, t2, t3)
+    primary.run("use " + primaryDbName)
+        .run("create table tnew_managed (id int)")
+        .run("insert into table t1 values (25)")
+        .run("insert into table tnew_managed values (110)")
+        .run("insert into table t2 partition(country='france') values 
('lyon')")
+        .run("drop table t3");
+
+    // Do some modifications on the target cluster. (t1, t2, t3: bootstrap & 
t4, t5: incremental)
+    replica.run("use " + replicatedDbName)
+        .run("insert into table t1 values (101)")
+        .run("insert into table t1 values (210),(321)")
+        .run("insert into table t2 partition(country='india') values 
('delhi')")
+        .run("insert into table t3 values (11)")
+        .run("insert into table t4 partition(country='india') values 
('lucknow')")
+        .run("create table t5 (place string) partitioned by (country string)")
+        .run("insert into table t5 partition(country='china') values 
('beejing')");
+
+    // Prepare for reverse replication.
+    DistributedFileSystem replicaFs = replica.miniDFSCluster.getFileSystem();
+    Path newReplDir = new Path(replica.repldDir + "1");
+    replicaFs.mkdirs(newReplDir);
+    withClause = ReplicationTestUtils.includeExternalTableClause(true);
+    withClause.add("'" + HiveConf.ConfVars.REPLDIR.varname + "'='" + 
newReplDir + "'");
+
+    // Do a reverse dump
+    tuple = replica.dump(replicatedDbName, withClause);
+
+    // Check the event ack file got created.
+    assertTrue(new Path(tuple.dumpLocation, EVENT_ACK_FILE).toString() + " 
doesn't exist",
+        replicaFs.exists(new Path(tuple.dumpLocation, EVENT_ACK_FILE)));
+
+    Path dumpPath = new Path(tuple.dumpLocation);
+
+    // Do a load, this should create a table_diff_complete directory
+    primary.load(primaryDbName, replicatedDbName, withClause);
+
+    // Check the table diff directory exist.
+    assertTrue(new Path(tuple.dumpLocation, 
TABLE_DIFF_COMPLETE_DIRECTORY).toString() + " doesn't exist",
+        replicaFs.exists(new Path(tuple.dumpLocation, 
TABLE_DIFF_COMPLETE_DIRECTORY)));
+
+    // Check the table diff has all the modified table, including the dropped 
and empty ones
+    HashSet<String> tableDiffEntries = getTablesFromTableDiffFile(dumpPath, 
conf);
+    assertTrue("Table Diff Contains " + tableDiffEntries, tableDiffEntries
+        .containsAll(Arrays.asList("tnew_managed", "t1", "t2", "t3")));
+
+    // Do a reverse second dump, this should do a bootstrap dump for the 
tables in the table_diff and incremental for
+    // rest.
+    tuple = replica.dump(replicatedDbName, withClause);
+
+    String hiveDumpDir = tuple.dumpLocation + File.separator + 
ReplUtils.REPL_HIVE_BASE_DIR;
+    // _bootstrap directory should be created as bootstrap enabled on external 
tables.
+    Path dumpPath1 = new Path(hiveDumpDir, INC_BOOTSTRAP_ROOT_DIR_NAME 
+"/metadata/" + replicatedDbName);
+    FileStatus[] listStatus =
+        dumpPath1.getFileSystem(conf).listStatus(dumpPath1);

Review comment:
       Done
   




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to