This is an automated email from the ASF dual-hosted git repository.
sk0x50 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/ignite-3.git
The following commit(s) were added to refs/heads/main by this push:
new cf34a315479 IGNITE-26215 ItNodeTest fixes (#7988)
cf34a315479 is described below
commit cf34a315479655a28d33eb10e97cea293bea9a6c
Author: Anton Laletin <[email protected]>
AuthorDate: Mon Apr 20 12:14:54 2026 +0400
IGNITE-26215 ItNodeTest fixes (#7988)
---
.../apache/ignite/raft/jraft/core/ItNodeTest.java | 101 ++++++++++++++-------
...ilablePartitionsRecoveryByFilterUpdateTest.java | 1 -
2 files changed, 69 insertions(+), 33 deletions(-)
diff --git
a/modules/raft/src/integrationTest/java/org/apache/ignite/raft/jraft/core/ItNodeTest.java
b/modules/raft/src/integrationTest/java/org/apache/ignite/raft/jraft/core/ItNodeTest.java
index 2e5712edefe..a0301d179cd 100644
---
a/modules/raft/src/integrationTest/java/org/apache/ignite/raft/jraft/core/ItNodeTest.java
+++
b/modules/raft/src/integrationTest/java/org/apache/ignite/raft/jraft/core/ItNodeTest.java
@@ -1767,10 +1767,11 @@ public class ItNodeTest extends BaseIgniteAbstractTest {
assertTrue(cluster.start(peer1, false, 300));
// add peer1
- CountDownLatch latch = new CountDownLatch(1);
peers.add(peer1);
- leader.addPeer(peer1.getPeerId(), 5L, new ExpectClosure(latch));
- waitLatch(latch);
+ SynchronizedClosure addPeer1Done = new SynchronizedClosure();
+ leader.addPeer(peer1.getPeerId(), 5L, addPeer1Done);
+ Status addPeer1Status =
assertTimeoutPreemptively(Duration.ofSeconds(30), addPeer1Done::await);
+ assertTrue(addPeer1Status.isOk(), "addPeer(peer1) failed: " +
addPeer1Status);
cluster.ensureSame();
assertEquals(2, cluster.getFsms().size());
@@ -1779,25 +1780,34 @@ public class ItNodeTest extends BaseIgniteAbstractTest {
// add peer2 but not start
peers.add(peer2);
- latch = new CountDownLatch(1);
- leader.addPeer(peer2.getPeerId(), 6L, new
ExpectClosure(RaftError.ECATCHUP, latch));
- waitLatch(latch);
+ SynchronizedClosure addPeer2BeforeStartDone = new
SynchronizedClosure();
+ leader.addPeer(peer2.getPeerId(), 6L, addPeer2BeforeStartDone);
+ Status addPeer2BeforeStartStatus =
assertTimeoutPreemptively(Duration.ofSeconds(30),
addPeer2BeforeStartDone::await);
+ assertEquals(RaftError.ECATCHUP,
addPeer2BeforeStartStatus.getRaftError(),
+ "addPeer(peer2) before startup should fail with ECATCHUP: " +
addPeer2BeforeStartStatus);
// start peer2 after 2 seconds
Thread.sleep(2000);
assertTrue(cluster.start(peer2, false, 300));
// re-add peer2
- latch = new CountDownLatch(2);
- leader.addPeer(peer2.getPeerId(), 7L, new ExpectClosure(latch));
+ SynchronizedClosure addPeer2Done = new SynchronizedClosure();
+ leader.addPeer(peer2.getPeerId(), 7L, addPeer2Done);
// concurrent configuration change
- leader.addPeer(peer3.getPeerId(), 8L, new
ExpectClosure(RaftError.EBUSY, latch));
- waitLatch(latch);
+ SynchronizedClosure addPeer3Done = new SynchronizedClosure();
+ leader.addPeer(peer3.getPeerId(), 8L, addPeer3Done);
+
+ Status addPeer2Status =
assertTimeoutPreemptively(Duration.ofSeconds(30), addPeer2Done::await);
+ assertTrue(addPeer2Status.isOk(), "re-add peer2 failed: " +
addPeer2Status);
+
+ Status addPeer3Status =
assertTimeoutPreemptively(Duration.ofSeconds(30), addPeer3Done::await);
+ assertEquals(RaftError.EBUSY, addPeer3Status.getRaftError(),
+ "concurrent addPeer(peer3) should fail with EBUSY: " +
addPeer3Status);
// re-add peer2 directly
try {
- leader.addPeer(peer2.getPeerId(), 9L, new ExpectClosure(latch));
+ leader.addPeer(peer2.getPeerId(), 9L, new SynchronizedClosure());
fail();
}
catch (IllegalArgumentException e) {
@@ -2968,7 +2978,8 @@ public class ItNodeTest extends BaseIgniteAbstractTest {
// Start node C, it should install snapshot from leader.
log.info("Start node [id={}].",
peers.get(2).getPeerId().getConsistentId());
assertTrue(cluster.start(peers.get(2)));
- assertTrue(waitForCondition(() ->
cluster.getNode(peers.get(2).getPeerId()).isInstallingSnapshot(), 10_000));
+ await().atMost(10, TimeUnit.SECONDS)
+ .until(() ->
cluster.getNode(peers.get(2).getPeerId()).isInstallingSnapshot());
log.info("Waiting for snapshot to start executing.");
assertThat(snapshotStartedFuture, willCompleteSuccessfully());
@@ -2978,7 +2989,8 @@ public class ItNodeTest extends BaseIgniteAbstractTest {
cluster.stop(leader.getLeaderId());
log.info("Leader stopped.");
-
assertTrue(cluster.getNode(peers.get(2).getPeerId()).isInstallingSnapshot());
+ await().atMost(10, TimeUnit.SECONDS)
+ .until(() ->
cluster.getNode(peers.get(2).getPeerId()).isInstallingSnapshot());
}
/**
@@ -3043,7 +3055,8 @@ public class ItNodeTest extends BaseIgniteAbstractTest {
return false;
});
- assertTrue(waitForCondition(() ->
cluster.getNode(peers.get(2).getPeerId()).isInstallingSnapshot(), 10_000));
+ await().atMost(10, TimeUnit.SECONDS)
+ .until(() ->
cluster.getNode(peers.get(2).getPeerId()).isInstallingSnapshot());
// While snapshot is being installed, stop the leader.
log.info("Waiting for snapshot to start executing.");
@@ -3053,9 +3066,8 @@ public class ItNodeTest extends BaseIgniteAbstractTest {
cluster.stop(leader.getLeaderId());
log.info("Leader stopped.");
- Thread.sleep(30_000);
-
-
assertTrue(cluster.getNode(peers.get(2).getPeerId()).isInstallingSnapshot());
+ await().atMost(30, TimeUnit.SECONDS)
+ .until(() ->
cluster.getNode(peers.get(2).getPeerId()).isInstallingSnapshot());
}
@Test
@@ -3110,13 +3122,15 @@ public class ItNodeTest extends BaseIgniteAbstractTest {
log.info("Start node [id={}].",
peers.get(2).getPeerId().getConsistentId());
assertTrue(cluster.start(peers.get(2)));
- assertTrue(waitForCondition(() ->
cluster.getNode(peers.get(2).getPeerId()).isInstallingSnapshot(), 10_000));
+ await().atMost(10, TimeUnit.SECONDS)
+ .until(() ->
cluster.getNode(peers.get(2).getPeerId()).isInstallingSnapshot());
// While snapshot is being installed, stop the leader.
log.info("Stopping leader [id={}].", leader.getLeaderId());
cluster.stop(leader.getLeaderId());
log.info("Leader stopped.");
-
assertFalse(cluster.getNode(peers.get(2).getPeerId()).isInstallingSnapshot());
+ await().timeout(10, TimeUnit.SECONDS)
+ .until(() ->
!cluster.getNode(peers.get(2).getPeerId()).isInstallingSnapshot());
}
private void tapIntoSnapshotCopier(
@@ -4127,6 +4141,39 @@ public class ItNodeTest extends BaseIgniteAbstractTest {
});
}
+ private void changePeersAndLearnersWithRetry(Configuration conf, long
timeoutMillis) throws InterruptedException {
+ Status lastStatus = null;
+ long deadlineNanos = System.nanoTime() +
TimeUnit.MILLISECONDS.toNanos(timeoutMillis);
+
+ while (System.nanoTime() < deadlineNanos) {
+ Node leader = cluster.waitAndGetLeader();
+
+ if (leader == null) {
+ continue;
+ }
+
+ SynchronizedClosure done = new SynchronizedClosure();
+
+ leader.changePeersAndLearners(conf, leader.getCurrentTerm(), done);
+
+ lastStatus = done.await();
+
+ if (lastStatus.isOk()) {
+ return;
+ }
+
+ RaftError error = lastStatus.getRaftError();
+
+ if (error != RaftError.EBUSY && error != RaftError.EPERM && error
!= RaftError.ECATCHUP) {
+ break;
+ }
+
+ Thread.sleep(100);
+ }
+
+ assertTrue(lastStatus != null && lastStatus.isOk(),
String.valueOf(lastStatus));
+ }
+
@Test
public void testChangePeersAndLearnersChaosWithSnapshot() throws Exception
{
// start cluster
@@ -4161,11 +4208,7 @@ public class ItNodeTest extends BaseIgniteAbstractTest {
}
arg.stop = true;
future.get();
- SynchronizedClosure done = new SynchronizedClosure();
- Node leader = cluster.waitAndGetLeader();
- leader.changePeersAndLearners(new
Configuration(peers.stream().map(TestPeer::getPeerId).collect(toList())),
leader.getCurrentTerm(), done);
- Status st = done.await();
- assertTrue(st.isOk(), st.getErrorMsg());
+ changePeersAndLearnersWithRetry(new
Configuration(peers.stream().map(TestPeer::getPeerId).collect(toList())),
10_000);
cluster.ensureSame();
assertEquals(10, cluster.getFsms().size());
for (MockStateMachine fsm : cluster.getFsms())
@@ -4207,10 +4250,7 @@ public class ItNodeTest extends BaseIgniteAbstractTest {
}
arg.stop = true;
future.get();
- SynchronizedClosure done = new SynchronizedClosure();
- Node leader = cluster.waitAndGetLeader();
- leader.changePeersAndLearners(new
Configuration(peers.stream().map(TestPeer::getPeerId).collect(toList())),
leader.getCurrentTerm(), done);
- assertTrue(done.await().isOk());
+ changePeersAndLearnersWithRetry(new
Configuration(peers.stream().map(TestPeer::getPeerId).collect(toList())),
10_000);
cluster.ensureSame();
assertEquals(10, cluster.getFsms().size());
for (MockStateMachine fsm : cluster.getFsms()) {
@@ -4280,10 +4320,7 @@ public class ItNodeTest extends BaseIgniteAbstractTest {
for (Future<?> future : futures)
future.get();
- SynchronizedClosure done = new SynchronizedClosure();
- Node leader = cluster.waitAndGetLeader();
- leader.changePeersAndLearners(new
Configuration(peers.stream().map(TestPeer::getPeerId).collect(toList())),
leader.getCurrentTerm(), done);
- assertTrue(done.await().isOk());
+ changePeersAndLearnersWithRetry(new
Configuration(peers.stream().map(TestPeer::getPeerId).collect(toList())),
10_000);
cluster.ensureSame();
assertEquals(10, cluster.getFsms().size());
diff --git
a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/distributed/disaster/ItHighAvailablePartitionsRecoveryByFilterUpdateTest.java
b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/distributed/disaster/ItHighAvailablePartitionsRecoveryByFilterUpdateTest.java
index 6d4e941551e..40aa36be7d8 100644
---
a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/distributed/disaster/ItHighAvailablePartitionsRecoveryByFilterUpdateTest.java
+++
b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/distributed/disaster/ItHighAvailablePartitionsRecoveryByFilterUpdateTest.java
@@ -197,7 +197,6 @@ public class
ItHighAvailablePartitionsRecoveryByFilterUpdateTest extends Abstrac
* @throws Exception If failed.
*/
@Test
- @Disabled("https://issues.apache.org/jira/browse/IGNITE-28013")
void testSeveralHaResetsAndSomeNodeRestart() throws Exception {
for (int i = 1; i < 8; i++) {
startNode(i, CUSTOM_NODES_CONFIG);