[
https://issues.apache.org/jira/browse/IGNITE-27501?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Mirza Aliev updated IGNITE-27501:
---------------------------------
Description:
The following code leads to OOM locally,
{code:java}
@Test
public void testAppendEntriesWhenFollowerIsInErrorState() throws Exception {
List<TestPeer> peers = TestUtils.generatePeers(testInfo, 3);
cluster = new TestCluster("unitest", dataPath, peers,
ELECTION_TIMEOUT_MILLIS, testInfo);
for (TestPeer peer : peers)
assertTrue(cluster.start(peer));
Node oldLeader = cluster.waitAndGetLeader();
assertNotNull(oldLeader);
List<Node> followers = cluster.getFollowers();
assertEquals(2, followers.size());
Node errorNode = followers.get(0);
Node errorNode2 = followers.get(1);
cluster.stop(errorNode.getNodeId().getPeerId());
cluster.stop(errorNode2.getNodeId().getPeerId());
sendTestTaskAndWait(oldLeader, 0, 1_000_000, RaftError.SUCCESS);
}
void sendTestTaskAndWait(Node node, int start, int amount,
RaftError err) throws InterruptedException
{
CountDownLatch latch = new CountDownLatch(amount);
MockStateMachine fsm = (MockStateMachine) node.getOptions().getFsm();
FSMCaller fsmCaller = ((NodeImpl) node).fsmCaller();
long appliedIndexBeforeRunCommands = fsm.getAppliedIndex();
long lastAppliedIndexBeforeRunCommands =
fsmCaller.getLastAppliedIndex();
for (int i = start; i < start + amount; i++) {
byte[] bytes = new byte[154216]; // ~150 KB
ByteBuffer data = ByteBuffer.wrap(bytes);
Task task = new Task(data, null);
node.apply(task);
}
waitLatch(latch);
}
{code}
was:TBD
> Raft group under the load and lost majority could die with OOM
> --------------------------------------------------------------
>
> Key: IGNITE-27501
> URL: https://issues.apache.org/jira/browse/IGNITE-27501
> Project: Ignite
> Issue Type: Bug
> Reporter: Mirza Aliev
> Assignee: Mirza Aliev
> Priority: Major
> Labels: ignite-3
>
> The following code leads to OOM locally,
> {code:java}
> @Test
> public void testAppendEntriesWhenFollowerIsInErrorState() throws
> Exception {
> List<TestPeer> peers = TestUtils.generatePeers(testInfo, 3);
> cluster = new TestCluster("unitest", dataPath, peers,
> ELECTION_TIMEOUT_MILLIS, testInfo);
> for (TestPeer peer : peers)
> assertTrue(cluster.start(peer));
> Node oldLeader = cluster.waitAndGetLeader();
> assertNotNull(oldLeader);
> List<Node> followers = cluster.getFollowers();
> assertEquals(2, followers.size());
> Node errorNode = followers.get(0);
> Node errorNode2 = followers.get(1);
> cluster.stop(errorNode.getNodeId().getPeerId());
> cluster.stop(errorNode2.getNodeId().getPeerId());
> sendTestTaskAndWait(oldLeader, 0, 1_000_000, RaftError.SUCCESS);
> }
> void sendTestTaskAndWait(Node node, int start, int amount,
> RaftError err) throws
> InterruptedException {
> CountDownLatch latch = new CountDownLatch(amount);
> MockStateMachine fsm = (MockStateMachine) node.getOptions().getFsm();
> FSMCaller fsmCaller = ((NodeImpl) node).fsmCaller();
> long appliedIndexBeforeRunCommands = fsm.getAppliedIndex();
> long lastAppliedIndexBeforeRunCommands =
> fsmCaller.getLastAppliedIndex();
> for (int i = start; i < start + amount; i++) {
> byte[] bytes = new byte[154216]; // ~150 KB
> ByteBuffer data = ByteBuffer.wrap(bytes);
> Task task = new Task(data, null);
> node.apply(task);
> }
> waitLatch(latch);
> }
> {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)