This is an automated email from the ASF dual-hosted git repository.
szetszwo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ratis.git
The following commit(s) were added to refs/heads/master by this push:
new 35615d936 RATIS-2261. Intermittent failure in
TestRaftSnapshotWithGrpc.testInstallSnapshotDuringBootstrap. (#1287)
35615d936 is described below
commit 35615d9368c6299195071d6c0383a977eb0040cd
Author: Tsz-Wo Nicholas Sze <[email protected]>
AuthorDate: Mon Sep 22 10:12:59 2025 -0700
RATIS-2261. Intermittent failure in
TestRaftSnapshotWithGrpc.testInstallSnapshotDuringBootstrap. (#1287)
---
.../ratis/statemachine/RaftSnapshotBaseTest.java | 81 ++++++++++------------
.../impl/SimpleStateMachine4Testing.java | 11 +--
.../ratis/grpc/TestRaftSnapshotWithGrpc.java | 12 +---
.../ratis/netty/TestRaftSnapshotWithNetty.java | 11 ++-
.../TestRaftSnapshotWithSimulatedRpc.java | 11 ++-
5 files changed, 54 insertions(+), 72 deletions(-)
diff --git
a/ratis-server/src/test/java/org/apache/ratis/statemachine/RaftSnapshotBaseTest.java
b/ratis-server/src/test/java/org/apache/ratis/statemachine/RaftSnapshotBaseTest.java
index 2c4ac2eee..44ae74c4c 100644
---
a/ratis-server/src/test/java/org/apache/ratis/statemachine/RaftSnapshotBaseTest.java
+++
b/ratis-server/src/test/java/org/apache/ratis/statemachine/RaftSnapshotBaseTest.java
@@ -49,15 +49,12 @@ import org.apache.ratis.util.FileUtils;
import org.apache.ratis.util.JavaUtils;
import org.apache.ratis.util.LifeCycle;
import org.apache.ratis.util.Slf4jUtils;
-import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
-import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
@@ -67,11 +64,18 @@ import java.util.stream.LongStream;
import org.apache.ratis.thirdparty.com.codahale.metrics.Timer;
import org.slf4j.event.Level;
-public abstract class RaftSnapshotBaseTest extends BaseTest {
+public abstract class RaftSnapshotBaseTest<CLUSTER extends MiniRaftCluster>
+ extends BaseTest
+ implements MiniRaftCluster.Factory.Get<CLUSTER> {
{
Slf4jUtils.setLogLevel(RaftServer.Division.LOG, Level.DEBUG);
Slf4jUtils.setLogLevel(RaftLog.LOG, Level.DEBUG);
- Slf4jUtils.setLogLevel(RaftClient.LOG, Level.DEBUG);
+
+ final RaftProperties p = getProperties();
+ p.setClass(MiniRaftCluster.STATEMACHINE_CLASS_KEY,
SimpleStateMachine4Testing.class, StateMachine.class);
+ RaftServerConfigKeys.Snapshot.setAutoTriggerThreshold(p,
SNAPSHOT_TRIGGER_THRESHOLD);
+ RaftServerConfigKeys.Snapshot.setAutoTriggerEnabled(p, true);
+ RaftServerConfigKeys.LeaderElection.setMemberMajorityAdd(p, true);
}
static final Logger LOG =
LoggerFactory.getLogger(RaftSnapshotBaseTest.class);
@@ -119,29 +123,6 @@ public abstract class RaftSnapshotBaseTest extends
BaseTest {
}
}
- private MiniRaftCluster cluster;
-
- public abstract MiniRaftCluster.Factory<?> getFactory();
-
- @BeforeEach
- public void setup() throws IOException {
- final RaftProperties prop = new RaftProperties();
- prop.setClass(MiniRaftCluster.STATEMACHINE_CLASS_KEY,
- SimpleStateMachine4Testing.class, StateMachine.class);
- RaftServerConfigKeys.Snapshot.setAutoTriggerThreshold(
- prop, SNAPSHOT_TRIGGER_THRESHOLD);
- RaftServerConfigKeys.Snapshot.setAutoTriggerEnabled(prop, true);
- this.cluster = getFactory().newCluster(1, prop);
- cluster.start();
- }
-
- @AfterEach
- public void tearDown() {
- if (cluster != null) {
- cluster.shutdown();
- }
- }
-
/**
* Keep generating writing traffic and make sure snapshots are taken.
* We then restart the whole raft peer and check if it can correctly load
@@ -149,8 +130,13 @@ public abstract class RaftSnapshotBaseTest extends
BaseTest {
*/
@Test
public void testRestartPeer() throws Exception {
- RaftTestUtil.waitForLeader(cluster);
- final RaftPeerId leaderId = cluster.getLeader().getId();
+ runWithNewCluster(1, this::runTestRestartPeer);
+
+ }
+
+ void runTestRestartPeer(CLUSTER cluster) throws Exception {
+ LOG.info("runTestRestartPeer");
+ final RaftPeerId leaderId = RaftTestUtil.waitForLeader(cluster).getId();
int i = 0;
try(final RaftClient client = cluster.createClient(leaderId)) {
for (; i < SNAPSHOT_TRIGGER_THRESHOLD * 2 - 1; i++) {
@@ -180,7 +166,7 @@ public abstract class RaftSnapshotBaseTest extends BaseTest
{
public static boolean exists(File f) {
if (f.exists()) {
- LOG.info("File exists: " + f);
+ LOG.info("File exists: {}", f);
return true;
}
return false;
@@ -193,11 +179,15 @@ public abstract class RaftSnapshotBaseTest extends
BaseTest {
*/
@Test
public void testBasicInstallSnapshot() throws Exception {
+ runWithNewCluster(1, this::runTestBasicInstallSnapshot);
+ }
+
+ void runTestBasicInstallSnapshot(CLUSTER cluster) throws Exception {
+ LOG.info("runTestBasicInstallSnapshot");
final List<LogSegmentPath> logs;
int i = 0;
try {
- RaftTestUtil.waitForLeader(cluster);
- final RaftPeerId leaderId = cluster.getLeader().getId();
+ final RaftPeerId leaderId = RaftTestUtil.waitForLeader(cluster).getId();
try(final RaftClient client = cluster.createClient(leaderId)) {
for (; i < SNAPSHOT_TRIGGER_THRESHOLD * 2 - 1; i++) {
@@ -236,16 +226,14 @@ public abstract class RaftSnapshotBaseTest extends
BaseTest {
Assertions.assertTrue(client.io().send(new SimpleMessage("m" +
i)).isSuccess());
}
- // add two more peers
- String[] newPeers = new String[]{"s3", "s4"};
- MiniRaftCluster.PeerChanges change = cluster.addNewPeers(
- newPeers, true, false);
+ // add a new peer
+ final MiniRaftCluster.PeerChanges change = cluster.addNewPeers(1, true,
true);
// trigger setConfiguration
RaftServerTestUtil.runWithMinorityPeers(cluster,
Arrays.asList(change.allPeersInNewConf),
peers ->
cluster.setConfiguration(peers.toArray(RaftPeer.emptyArray())));
- for (String newPeer : newPeers) {
- final RaftServer.Division s =
cluster.getDivision(RaftPeerId.valueOf(newPeer));
+ for (RaftPeer newPeer : change.newPeers) {
+ final RaftServer.Division s = cluster.getDivision(newPeer.getId());
SimpleStateMachine4Testing simpleStateMachine =
SimpleStateMachine4Testing.get(s);
Assertions.assertSame(LifeCycle.State.RUNNING,
simpleStateMachine.getLifeCycleState());
}
@@ -275,6 +263,11 @@ public abstract class RaftSnapshotBaseTest extends
BaseTest {
*/
@Test
public void testInstallSnapshotDuringBootstrap() throws Exception {
+ runWithNewCluster(1, this::runTestInstallSnapshotDuringBootstrap);
+ }
+
+ void runTestInstallSnapshotDuringBootstrap(CLUSTER cluster) throws Exception
{
+ LOG.info("runTestInstallSnapshotDuringBootstrap");
int i = 0;
try {
RaftTestUtil.waitForLeader(cluster);
@@ -299,16 +292,14 @@ public abstract class RaftSnapshotBaseTest extends
BaseTest {
assertLeaderContent(cluster);
- // add two more peers
- String[] newPeers = new String[]{"s3", "s4"};
- MiniRaftCluster.PeerChanges change = cluster.addNewPeers(
- newPeers, true, false);
+ // add a new peer
+ final MiniRaftCluster.PeerChanges change = cluster.addNewPeers(1, true,
true);
// trigger setConfiguration
RaftServerTestUtil.runWithMinorityPeers(cluster,
Arrays.asList(change.allPeersInNewConf),
peers ->
cluster.setConfiguration(peers.toArray(RaftPeer.emptyArray())));
- for (String newPeer : newPeers) {
- final RaftServer.Division s =
cluster.getDivision(RaftPeerId.valueOf(newPeer));
+ for (RaftPeer newPeer : change.newPeers) {
+ final RaftServer.Division s = cluster.getDivision(newPeer.getId());
SimpleStateMachine4Testing simpleStateMachine =
SimpleStateMachine4Testing.get(s);
Assertions.assertSame(LifeCycle.State.RUNNING,
simpleStateMachine.getLifeCycleState());
}
diff --git
a/ratis-server/src/test/java/org/apache/ratis/statemachine/impl/SimpleStateMachine4Testing.java
b/ratis-server/src/test/java/org/apache/ratis/statemachine/impl/SimpleStateMachine4Testing.java
index afab27680..1ffbdbcb9 100644
---
a/ratis-server/src/test/java/org/apache/ratis/statemachine/impl/SimpleStateMachine4Testing.java
+++
b/ratis-server/src/test/java/org/apache/ratis/statemachine/impl/SimpleStateMachine4Testing.java
@@ -210,7 +210,7 @@ public class SimpleStateMachine4Testing extends
BaseStateMachine {
@Override
public synchronized void initialize(RaftServer server, RaftGroupId
raftGroupId,
RaftStorage raftStorage) throws IOException {
- LOG.info("Initializing " + this);
+ LOG.info("Initializing {}", this);
this.groupId = raftGroupId;
getLifeCycle().startAndTransition(() -> {
super.initialize(server, raftGroupId, raftStorage);
@@ -233,7 +233,10 @@ public class SimpleStateMachine4Testing extends
BaseStateMachine {
@Override
public synchronized void reinitialize() throws IOException {
- LOG.info("Reinitializing " + this);
+ LOG.info("Reinitializing {}", this);
+ indexMap.clear();
+ dataMap.clear();
+
loadSnapshot(storage.getLatestSnapshot());
if (getLifeCycleState() == LifeCycle.State.PAUSED) {
getLifeCycle().transition(LifeCycle.State.STARTING);
@@ -328,14 +331,14 @@ public class SimpleStateMachine4Testing extends
BaseStateMachine {
final String string = request.getContent().toStringUtf8();
Exception exception;
try {
- LOG.info("query " + string);
+ LOG.info("query {}", string);
final LogEntryProto entry = dataMap.get(string);
if (entry != null) {
return
CompletableFuture.completedFuture(Message.valueOf(entry.toByteString()));
}
exception = new IndexOutOfBoundsException(getId() + ": LogEntry not
found for query " + string);
} catch (Exception e) {
- LOG.warn("Failed request " + request, e);
+ LOG.warn("Failed request {}", request, e);
exception = e;
}
return JavaUtils.completeExceptionally(new StateMachineException(
diff --git
a/ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftSnapshotWithGrpc.java
b/ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftSnapshotWithGrpc.java
index e6c2f6613..7c94fb3bf 100644
---
a/ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftSnapshotWithGrpc.java
+++
b/ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftSnapshotWithGrpc.java
@@ -20,22 +20,16 @@ package org.apache.ratis.grpc;
import java.util.Optional;
import org.apache.ratis.metrics.LongCounter;
-import org.apache.ratis.server.impl.MiniRaftCluster;
import org.apache.ratis.metrics.MetricRegistries;
import org.apache.ratis.metrics.MetricRegistryInfo;
import org.apache.ratis.metrics.RatisMetricRegistry;
import org.apache.ratis.server.RaftServer;
import org.apache.ratis.statemachine.RaftSnapshotBaseTest;
-import org.apache.ratis.test.tag.Flaky;
import org.junit.jupiter.api.Assertions;
-@Flaky("RATIS-2261")
-public class TestRaftSnapshotWithGrpc extends RaftSnapshotBaseTest {
- @Override
- public MiniRaftCluster.Factory<?> getFactory() {
- return MiniRaftClusterWithGrpc.FACTORY;
- }
-
+public class TestRaftSnapshotWithGrpc
+ extends RaftSnapshotBaseTest<MiniRaftClusterWithGrpc>
+ implements MiniRaftClusterWithGrpc.FactoryGet {
@Override
protected void verifyInstallSnapshotMetric(RaftServer.Division leader) {
MetricRegistryInfo info = new
MetricRegistryInfo(leader.getMemberId().toString(),
diff --git
a/ratis-test/src/test/java/org/apache/ratis/netty/TestRaftSnapshotWithNetty.java
b/ratis-test/src/test/java/org/apache/ratis/netty/TestRaftSnapshotWithNetty.java
index f1340efc7..ae16f41ed 100644
---
a/ratis-test/src/test/java/org/apache/ratis/netty/TestRaftSnapshotWithNetty.java
+++
b/ratis-test/src/test/java/org/apache/ratis/netty/TestRaftSnapshotWithNetty.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -17,12 +17,9 @@
*/
package org.apache.ratis.netty;
-import org.apache.ratis.server.impl.MiniRaftCluster;
import org.apache.ratis.statemachine.RaftSnapshotBaseTest;
-public class TestRaftSnapshotWithNetty extends RaftSnapshotBaseTest {
- @Override
- public MiniRaftCluster.Factory<?> getFactory() {
- return MiniRaftClusterWithNetty.FACTORY;
- }
+public class TestRaftSnapshotWithNetty
+ extends RaftSnapshotBaseTest<MiniRaftClusterWithNetty>
+ implements MiniRaftClusterWithNetty.FactoryGet {
}
diff --git
a/ratis-test/src/test/java/org/apache/ratis/server/simulation/TestRaftSnapshotWithSimulatedRpc.java
b/ratis-test/src/test/java/org/apache/ratis/server/simulation/TestRaftSnapshotWithSimulatedRpc.java
index 1c76f7b00..62ee387de 100644
---
a/ratis-test/src/test/java/org/apache/ratis/server/simulation/TestRaftSnapshotWithSimulatedRpc.java
+++
b/ratis-test/src/test/java/org/apache/ratis/server/simulation/TestRaftSnapshotWithSimulatedRpc.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -17,12 +17,9 @@
*/
package org.apache.ratis.server.simulation;
-import org.apache.ratis.server.impl.MiniRaftCluster;
import org.apache.ratis.statemachine.RaftSnapshotBaseTest;
-public class TestRaftSnapshotWithSimulatedRpc extends RaftSnapshotBaseTest {
- @Override
- public MiniRaftCluster.Factory<?> getFactory() {
- return MiniRaftClusterWithSimulatedRpc.FACTORY;
- }
+public class TestRaftSnapshotWithSimulatedRpc
+ extends RaftSnapshotBaseTest<MiniRaftClusterWithSimulatedRpc>
+ implements MiniRaftClusterWithSimulatedRpc.FactoryGet {
}