rpuch commented on code in PR #4315: URL: https://github.com/apache/ignite-3/pull/4315#discussion_r1739635012
########## modules/system-disaster-recovery/src/integrationTest/java/org/apache/ignite/internal/disaster/system/ItCmgDisasterRecoveryTest.java: ########## @@ -223,4 +245,110 @@ void nodesThatSawNoReparationHaveSeparatePhysicalTopology() throws Exception { "Nodes from different clusters were able to establish a connection" ); } + + @Test + void migratesNodesThatSawNoReparationToNewCluster() throws Exception { + cluster.startAndInit(2, paramsBuilder -> { + paramsBuilder.cmgNodeNames(nodeNames(0)); + paramsBuilder.metaStorageNodeNames(nodeNames(1)); + }); + waitTillClusterStateIsSavedToVaultOnConductor(1); + + // This makes the CMG majority go away. + cluster.stopNode(0); + + // Repair CMG with just node 1. + initiateCmgRepairVia(igniteImpl(1), 1); + IgniteImpl restartedIgniteImpl1 = waitTillNodeRestartsInternally(1); + waitTillCmgHasMajority(restartedIgniteImpl1); + + migrate(0, 1); + + LogicalTopologySnapshot topologySnapshot = igniteImpl(1).logicalTopologyService().logicalTopologyOnLeader().get(10, SECONDS); + assertTopologyContainsNode(0, topologySnapshot); + } + + private void assertTopologyContainsNode(int nodeIndex, LogicalTopologySnapshot topologySnapshot) { + assertTrue(topologySnapshot.nodes().stream().anyMatch(node -> node.name().equals(cluster.nodeName(nodeIndex)))); + } + + private void migrate(int oldClusterNodeIndex, int newClusterNodeIndex) throws Exception { + // Starting the node that did not see the repair. + IgniteImpl nodeMissingRepair = ((IgniteServerImpl) cluster.startEmbeddedNode(oldClusterNodeIndex).server()).igniteImpl(); + + initiateMigrationToNewCluster(nodeMissingRepair, igniteImpl(newClusterNodeIndex)); + + waitTillNodeRestartsInternally(oldClusterNodeIndex); + } + + private static void initiateMigrationToNewCluster(IgniteImpl nodeMissingRepair, IgniteImpl repairedNode) throws Exception { + // TODO: IGNITE-22879 - initiate migration via CLI. + + ClusterState newClusterState = clusterState(repairedNode); + + CompletableFuture<Void> migrationFuture = nodeMissingRepair.systemDisasterRecoveryManager().migrate(newClusterState); + assertThat(migrationFuture, willCompleteSuccessfully()); + } + + @Test + void migratesManyNodesThatSawNoReparationToNewCluster() throws Exception { + cluster.startAndInit(5, paramsBuilder -> { + paramsBuilder.cmgNodeNames(nodeNames(0, 1, 2)); + paramsBuilder.metaStorageNodeNames(nodeNames(2, 3, 4)); + }); + waitTillClusterStateIsSavedToVaultOnConductor(2); + + // Stop the majority of CMG. + IntStream.of(0, 1).parallel().forEach(cluster::stopNode); + + // Repair CMG with nodes 2, 3, 4. + initiateCmgRepairVia(igniteImpl(2), 2, 3, 4); + IgniteImpl restartedIgniteImpl2 = waitTillNodeRestartsInternally(2); + waitTillCmgHasMajority(restartedIgniteImpl2); + + // Starting the nodes that did not see the repair. + List<IgniteImpl> partialNodes = IntStream.of(0, 1).parallel() Review Comment: To start them in parallel and save some test time -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: notifications-unsubscr...@ignite.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org