This is an automated email from the ASF dual-hosted git repository.

sk0x50 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/ignite-3.git


The following commit(s) were added to refs/heads/main by this push:
     new cf34a315479 IGNITE-26215 ItNodeTest fixes (#7988)
cf34a315479 is described below

commit cf34a315479655a28d33eb10e97cea293bea9a6c
Author: Anton Laletin <[email protected]>
AuthorDate: Mon Apr 20 12:14:54 2026 +0400

    IGNITE-26215 ItNodeTest fixes (#7988)
---
 .../apache/ignite/raft/jraft/core/ItNodeTest.java  | 101 ++++++++++++++-------
 ...ilablePartitionsRecoveryByFilterUpdateTest.java |   1 -
 2 files changed, 69 insertions(+), 33 deletions(-)

diff --git 
a/modules/raft/src/integrationTest/java/org/apache/ignite/raft/jraft/core/ItNodeTest.java
 
b/modules/raft/src/integrationTest/java/org/apache/ignite/raft/jraft/core/ItNodeTest.java
index 2e5712edefe..a0301d179cd 100644
--- 
a/modules/raft/src/integrationTest/java/org/apache/ignite/raft/jraft/core/ItNodeTest.java
+++ 
b/modules/raft/src/integrationTest/java/org/apache/ignite/raft/jraft/core/ItNodeTest.java
@@ -1767,10 +1767,11 @@ public class ItNodeTest extends BaseIgniteAbstractTest {
         assertTrue(cluster.start(peer1, false, 300));
 
         // add peer1
-        CountDownLatch latch = new CountDownLatch(1);
         peers.add(peer1);
-        leader.addPeer(peer1.getPeerId(), 5L, new ExpectClosure(latch));
-        waitLatch(latch);
+        SynchronizedClosure addPeer1Done = new SynchronizedClosure();
+        leader.addPeer(peer1.getPeerId(), 5L, addPeer1Done);
+        Status addPeer1Status = 
assertTimeoutPreemptively(Duration.ofSeconds(30), addPeer1Done::await);
+        assertTrue(addPeer1Status.isOk(), "addPeer(peer1) failed: " + 
addPeer1Status);
 
         cluster.ensureSame();
         assertEquals(2, cluster.getFsms().size());
@@ -1779,25 +1780,34 @@ public class ItNodeTest extends BaseIgniteAbstractTest {
 
         // add peer2 but not start
         peers.add(peer2);
-        latch = new CountDownLatch(1);
-        leader.addPeer(peer2.getPeerId(), 6L, new 
ExpectClosure(RaftError.ECATCHUP, latch));
-        waitLatch(latch);
+        SynchronizedClosure addPeer2BeforeStartDone = new 
SynchronizedClosure();
+        leader.addPeer(peer2.getPeerId(), 6L, addPeer2BeforeStartDone);
+        Status addPeer2BeforeStartStatus = 
assertTimeoutPreemptively(Duration.ofSeconds(30), 
addPeer2BeforeStartDone::await);
+        assertEquals(RaftError.ECATCHUP, 
addPeer2BeforeStartStatus.getRaftError(),
+                "addPeer(peer2) before startup should fail with ECATCHUP: " + 
addPeer2BeforeStartStatus);
 
         // start peer2 after 2 seconds
         Thread.sleep(2000);
         assertTrue(cluster.start(peer2, false, 300));
 
         // re-add peer2
-        latch = new CountDownLatch(2);
-        leader.addPeer(peer2.getPeerId(), 7L, new ExpectClosure(latch));
+        SynchronizedClosure addPeer2Done = new SynchronizedClosure();
+        leader.addPeer(peer2.getPeerId(), 7L, addPeer2Done);
         // concurrent configuration change
-        leader.addPeer(peer3.getPeerId(), 8L, new 
ExpectClosure(RaftError.EBUSY, latch));
-        waitLatch(latch);
+        SynchronizedClosure addPeer3Done = new SynchronizedClosure();
+        leader.addPeer(peer3.getPeerId(), 8L, addPeer3Done);
+
+        Status addPeer2Status = 
assertTimeoutPreemptively(Duration.ofSeconds(30), addPeer2Done::await);
+        assertTrue(addPeer2Status.isOk(), "re-add peer2 failed: " + 
addPeer2Status);
+
+        Status addPeer3Status = 
assertTimeoutPreemptively(Duration.ofSeconds(30), addPeer3Done::await);
+        assertEquals(RaftError.EBUSY, addPeer3Status.getRaftError(),
+                "concurrent addPeer(peer3) should fail with EBUSY: " + 
addPeer3Status);
 
         // re-add peer2 directly
 
         try {
-            leader.addPeer(peer2.getPeerId(), 9L, new ExpectClosure(latch));
+            leader.addPeer(peer2.getPeerId(), 9L, new SynchronizedClosure());
             fail();
         }
         catch (IllegalArgumentException e) {
@@ -2968,7 +2978,8 @@ public class ItNodeTest extends BaseIgniteAbstractTest {
         // Start node C, it should install snapshot from leader.
         log.info("Start node [id={}].", 
peers.get(2).getPeerId().getConsistentId());
         assertTrue(cluster.start(peers.get(2)));
-        assertTrue(waitForCondition(() -> 
cluster.getNode(peers.get(2).getPeerId()).isInstallingSnapshot(), 10_000));
+        await().atMost(10, TimeUnit.SECONDS)
+                .until(() -> 
cluster.getNode(peers.get(2).getPeerId()).isInstallingSnapshot());
 
         log.info("Waiting for snapshot to start executing.");
         assertThat(snapshotStartedFuture, willCompleteSuccessfully());
@@ -2978,7 +2989,8 @@ public class ItNodeTest extends BaseIgniteAbstractTest {
         cluster.stop(leader.getLeaderId());
         log.info("Leader stopped.");
 
-        
assertTrue(cluster.getNode(peers.get(2).getPeerId()).isInstallingSnapshot());
+        await().atMost(10, TimeUnit.SECONDS)
+                .until(() -> 
cluster.getNode(peers.get(2).getPeerId()).isInstallingSnapshot());
     }
 
     /**
@@ -3043,7 +3055,8 @@ public class ItNodeTest extends BaseIgniteAbstractTest {
 
             return false;
         });
-        assertTrue(waitForCondition(() -> 
cluster.getNode(peers.get(2).getPeerId()).isInstallingSnapshot(), 10_000));
+        await().atMost(10, TimeUnit.SECONDS)
+                .until(() -> 
cluster.getNode(peers.get(2).getPeerId()).isInstallingSnapshot());
         // While snapshot is being installed, stop the leader.
 
         log.info("Waiting for snapshot to start executing.");
@@ -3053,9 +3066,8 @@ public class ItNodeTest extends BaseIgniteAbstractTest {
         cluster.stop(leader.getLeaderId());
         log.info("Leader stopped.");
 
-        Thread.sleep(30_000);
-
-        
assertTrue(cluster.getNode(peers.get(2).getPeerId()).isInstallingSnapshot());
+        await().atMost(30, TimeUnit.SECONDS)
+                .until(() -> 
cluster.getNode(peers.get(2).getPeerId()).isInstallingSnapshot());
     }
 
     @Test
@@ -3110,13 +3122,15 @@ public class ItNodeTest extends BaseIgniteAbstractTest {
         log.info("Start node [id={}].", 
peers.get(2).getPeerId().getConsistentId());
         assertTrue(cluster.start(peers.get(2)));
 
-        assertTrue(waitForCondition(() -> 
cluster.getNode(peers.get(2).getPeerId()).isInstallingSnapshot(), 10_000));
+        await().atMost(10, TimeUnit.SECONDS)
+                .until(() -> 
cluster.getNode(peers.get(2).getPeerId()).isInstallingSnapshot());
         // While snapshot is being installed, stop the leader.
         log.info("Stopping leader [id={}].", leader.getLeaderId());
         cluster.stop(leader.getLeaderId());
         log.info("Leader stopped.");
 
-        
assertFalse(cluster.getNode(peers.get(2).getPeerId()).isInstallingSnapshot());
+        await().timeout(10, TimeUnit.SECONDS)
+                .until(() -> 
!cluster.getNode(peers.get(2).getPeerId()).isInstallingSnapshot());
     }
 
     private void tapIntoSnapshotCopier(
@@ -4127,6 +4141,39 @@ public class ItNodeTest extends BaseIgniteAbstractTest {
         });
     }
 
+    private void changePeersAndLearnersWithRetry(Configuration conf, long 
timeoutMillis) throws InterruptedException {
+        Status lastStatus = null;
+        long deadlineNanos = System.nanoTime() + 
TimeUnit.MILLISECONDS.toNanos(timeoutMillis);
+
+        while (System.nanoTime() < deadlineNanos) {
+            Node leader = cluster.waitAndGetLeader();
+
+            if (leader == null) {
+                continue;
+            }
+
+            SynchronizedClosure done = new SynchronizedClosure();
+
+            leader.changePeersAndLearners(conf, leader.getCurrentTerm(), done);
+
+            lastStatus = done.await();
+
+            if (lastStatus.isOk()) {
+                return;
+            }
+
+            RaftError error = lastStatus.getRaftError();
+
+            if (error != RaftError.EBUSY && error != RaftError.EPERM && error 
!= RaftError.ECATCHUP) {
+                break;
+            }
+
+            Thread.sleep(100);
+        }
+
+        assertTrue(lastStatus != null && lastStatus.isOk(), 
String.valueOf(lastStatus));
+    }
+
     @Test
     public void testChangePeersAndLearnersChaosWithSnapshot() throws Exception 
{
         // start cluster
@@ -4161,11 +4208,7 @@ public class ItNodeTest extends BaseIgniteAbstractTest {
         }
         arg.stop = true;
         future.get();
-        SynchronizedClosure done = new SynchronizedClosure();
-        Node leader = cluster.waitAndGetLeader();
-        leader.changePeersAndLearners(new 
Configuration(peers.stream().map(TestPeer::getPeerId).collect(toList())), 
leader.getCurrentTerm(), done);
-        Status st = done.await();
-        assertTrue(st.isOk(), st.getErrorMsg());
+        changePeersAndLearnersWithRetry(new 
Configuration(peers.stream().map(TestPeer::getPeerId).collect(toList())), 
10_000);
         cluster.ensureSame();
         assertEquals(10, cluster.getFsms().size());
         for (MockStateMachine fsm : cluster.getFsms())
@@ -4207,10 +4250,7 @@ public class ItNodeTest extends BaseIgniteAbstractTest {
         }
         arg.stop = true;
         future.get();
-        SynchronizedClosure done = new SynchronizedClosure();
-        Node leader = cluster.waitAndGetLeader();
-        leader.changePeersAndLearners(new 
Configuration(peers.stream().map(TestPeer::getPeerId).collect(toList())), 
leader.getCurrentTerm(), done);
-        assertTrue(done.await().isOk());
+        changePeersAndLearnersWithRetry(new 
Configuration(peers.stream().map(TestPeer::getPeerId).collect(toList())), 
10_000);
         cluster.ensureSame();
         assertEquals(10, cluster.getFsms().size());
         for (MockStateMachine fsm : cluster.getFsms()) {
@@ -4280,10 +4320,7 @@ public class ItNodeTest extends BaseIgniteAbstractTest {
         for (Future<?> future : futures)
             future.get();
 
-        SynchronizedClosure done = new SynchronizedClosure();
-        Node leader = cluster.waitAndGetLeader();
-        leader.changePeersAndLearners(new 
Configuration(peers.stream().map(TestPeer::getPeerId).collect(toList())), 
leader.getCurrentTerm(), done);
-        assertTrue(done.await().isOk());
+        changePeersAndLearnersWithRetry(new 
Configuration(peers.stream().map(TestPeer::getPeerId).collect(toList())), 
10_000);
         cluster.ensureSame();
         assertEquals(10, cluster.getFsms().size());
 
diff --git 
a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/distributed/disaster/ItHighAvailablePartitionsRecoveryByFilterUpdateTest.java
 
b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/distributed/disaster/ItHighAvailablePartitionsRecoveryByFilterUpdateTest.java
index 6d4e941551e..40aa36be7d8 100644
--- 
a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/distributed/disaster/ItHighAvailablePartitionsRecoveryByFilterUpdateTest.java
+++ 
b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/distributed/disaster/ItHighAvailablePartitionsRecoveryByFilterUpdateTest.java
@@ -197,7 +197,6 @@ public class 
ItHighAvailablePartitionsRecoveryByFilterUpdateTest extends Abstrac
      * @throws Exception If failed.
      */
     @Test
-    @Disabled("https://issues.apache.org/jira/browse/IGNITE-28013";)
     void testSeveralHaResetsAndSomeNodeRestart() throws Exception {
         for (int i = 1; i < 8; i++) {
             startNode(i, CUSTOM_NODES_CONFIG);

Reply via email to