jt2594838 commented on code in PR #17257:
URL: https://github.com/apache/iotdb/pull/17257#discussion_r2893566501


##########
integration-test/src/test/java/org/apache/iotdb/db/it/iotconsensusv2/IoTDBIoTConsensusV23C3DBasicITBase.java:
##########
@@ -0,0 +1,252 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.db.it.iotconsensusv2;
+
+import 
org.apache.iotdb.confignode.it.regionmigration.IoTDBRegionOperationReliabilityITFramework;
+import org.apache.iotdb.consensus.ConsensusFactory;
+import org.apache.iotdb.isession.SessionConfig;
+import org.apache.iotdb.it.env.EnvFactory;
+import org.apache.iotdb.it.env.cluster.node.DataNodeWrapper;
+import org.apache.iotdb.itbase.env.BaseEnv;
+
+import org.apache.tsfile.utils.Pair;
+import org.awaitility.Awaitility;
+import org.junit.Assert;
+import org.junit.Before;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.sql.Connection;
+import java.sql.ResultSet;
+import java.sql.Statement;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+
+import static org.apache.iotdb.util.MagicUtils.makeItCloseQuietly;
+
+/**
+ * Abstract base for IoTConsensusV2 3C3D integration tests. Subclasses specify 
batch or stream mode.
+ *
+ * <p>Verifies that a 3C3D cluster with IoTConsensusV2 can: 1. Start 
successfully 2. Write data 3.
+ * Execute flush on cluster 4. Query and verify data was written successfully
+ *
+ * <p>Additionally tests replica consistency: after stopping the leader 
DataNode, the follower
+ * should be elected as new leader and serve the same data.
+ */
+public abstract class IoTDBIoTConsensusV23C3DBasicITBase
+    extends IoTDBRegionOperationReliabilityITFramework {
+
+  private static final Logger LOGGER =
+      LoggerFactory.getLogger(IoTDBIoTConsensusV23C3DBasicITBase.class);
+
+  protected static final int CONFIG_NODE_NUM = 3;
+  protected static final int DATA_NODE_NUM = 3;
+  protected static final int DATA_REPLICATION_FACTOR = 2;
+  protected static final int SCHEMA_REPLICATION_FACTOR = 3;
+
+  /** Timeout in seconds for 3C3D cluster init. */
+  protected static final int CLUSTER_INIT_TIMEOUT_SECONDS = 300;
+
+  protected static final String INSERTION1 =
+      "INSERT INTO root.sg.d1(timestamp,speed,temperature) values(100, 1, 2)";
+  protected static final String INSERTION2 =
+      "INSERT INTO root.sg.d1(timestamp,speed,temperature) values(101, 3, 4)";
+  protected static final String INSERTION3 =
+      "INSERT INTO root.sg.d1(timestamp,speed,temperature) values(102, 5, 6)";
+  protected static final String FLUSH_COMMAND = "flush on cluster";
+  protected static final String COUNT_QUERY = "select count(*) from 
root.sg.**";
+  protected static final String SELECT_ALL_QUERY = "select speed, temperature 
from root.sg.d1";
+
+  /**
+   * Returns IoTConsensusV2 mode: {@link 
ConsensusFactory#IOT_CONSENSUS_V2_BATCH_MODE} or {@link
+   * ConsensusFactory#IOT_CONSENSUS_V2_STREAM_MODE}.
+   */
+  protected abstract String getIoTConsensusV2Mode();
+
+  @Override
+  @Before
+  public void setUp() throws Exception {
+    super.setUp();
+    EnvFactory.getEnv()
+        .getConfig()
+        .getCommonConfig()
+        .setDataReplicationFactor(DATA_REPLICATION_FACTOR)
+        .setSchemaReplicationFactor(SCHEMA_REPLICATION_FACTOR)
+        .setIoTConsensusV2Mode(getIoTConsensusV2Mode());
+
+    EnvFactory.getEnv()
+        .initClusterEnvironment(CONFIG_NODE_NUM, DATA_NODE_NUM, 
CLUSTER_INIT_TIMEOUT_SECONDS);
+  }
+
+  public void test3C3DWriteFlushAndQuery() throws Exception {
+    try (Connection connection = 
makeItCloseQuietly(EnvFactory.getEnv().getConnection());
+        Statement statement = 
makeItCloseQuietly(connection.createStatement())) {
+
+      LOGGER.info("Writing data to 3C3D cluster (mode: {})...", 
getIoTConsensusV2Mode());
+      statement.execute(INSERTION1);
+      statement.execute(INSERTION2);
+      statement.execute(INSERTION3);
+
+      LOGGER.info("Executing flush on cluster...");
+      statement.execute(FLUSH_COMMAND);
+
+      verifyDataConsistency(statement);
+
+      LOGGER.info("3C3D IoTConsensusV2 {} basic test passed", 
getIoTConsensusV2Mode());
+    }
+  }
+
+  /**
+   * Test replica consistency: with replication factor 2, stop the leader 
DataNode and verify the
+   * follower serves the same data.
+   */
+  public void testReplicaConsistencyAfterLeaderStop() throws Exception {
+    try (Connection connection = 
makeItCloseQuietly(EnvFactory.getEnv().getConnection());
+        Statement statement = 
makeItCloseQuietly(connection.createStatement())) {
+
+      LOGGER.info("Writing data to 3C3D cluster (mode: {})...", 
getIoTConsensusV2Mode());
+      statement.execute(INSERTION1);
+      statement.execute(INSERTION2);
+      statement.execute(INSERTION3);
+      statement.execute(FLUSH_COMMAND);
+
+      verifyDataConsistency(statement);
+
+      LOGGER.info("Sleeping 2 seconds to wait replicate ...");
+      Thread.sleep(1000 * 2);

Review Comment:
   This is very unstable in the test environment; it is better to check the 
data directory of each node to ensure that two nodes have the associated 
TsFiles.



##########
integration-test/src/test/java/org/apache/iotdb/db/it/iotconsensusv2/IoTDBIoTConsensusV23C3DBasicITBase.java:
##########
@@ -0,0 +1,252 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.db.it.iotconsensusv2;
+
+import 
org.apache.iotdb.confignode.it.regionmigration.IoTDBRegionOperationReliabilityITFramework;
+import org.apache.iotdb.consensus.ConsensusFactory;
+import org.apache.iotdb.isession.SessionConfig;
+import org.apache.iotdb.it.env.EnvFactory;
+import org.apache.iotdb.it.env.cluster.node.DataNodeWrapper;
+import org.apache.iotdb.itbase.env.BaseEnv;
+
+import org.apache.tsfile.utils.Pair;
+import org.awaitility.Awaitility;
+import org.junit.Assert;
+import org.junit.Before;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.sql.Connection;
+import java.sql.ResultSet;
+import java.sql.Statement;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+
+import static org.apache.iotdb.util.MagicUtils.makeItCloseQuietly;
+
+/**
+ * Abstract base for IoTConsensusV2 3C3D integration tests. Subclasses specify 
batch or stream mode.
+ *
+ * <p>Verifies that a 3C3D cluster with IoTConsensusV2 can: 1. Start 
successfully 2. Write data 3.
+ * Execute flush on cluster 4. Query and verify data was written successfully
+ *
+ * <p>Additionally tests replica consistency: after stopping the leader 
DataNode, the follower
+ * should be elected as new leader and serve the same data.
+ */
+public abstract class IoTDBIoTConsensusV23C3DBasicITBase
+    extends IoTDBRegionOperationReliabilityITFramework {
+
+  private static final Logger LOGGER =
+      LoggerFactory.getLogger(IoTDBIoTConsensusV23C3DBasicITBase.class);
+
+  protected static final int CONFIG_NODE_NUM = 3;
+  protected static final int DATA_NODE_NUM = 3;
+  protected static final int DATA_REPLICATION_FACTOR = 2;
+  protected static final int SCHEMA_REPLICATION_FACTOR = 3;
+
+  /** Timeout in seconds for 3C3D cluster init. */
+  protected static final int CLUSTER_INIT_TIMEOUT_SECONDS = 300;
+
+  protected static final String INSERTION1 =
+      "INSERT INTO root.sg.d1(timestamp,speed,temperature) values(100, 1, 2)";
+  protected static final String INSERTION2 =
+      "INSERT INTO root.sg.d1(timestamp,speed,temperature) values(101, 3, 4)";
+  protected static final String INSERTION3 =
+      "INSERT INTO root.sg.d1(timestamp,speed,temperature) values(102, 5, 6)";
+  protected static final String FLUSH_COMMAND = "flush on cluster";
+  protected static final String COUNT_QUERY = "select count(*) from 
root.sg.**";
+  protected static final String SELECT_ALL_QUERY = "select speed, temperature 
from root.sg.d1";
+
+  /**
+   * Returns IoTConsensusV2 mode: {@link 
ConsensusFactory#IOT_CONSENSUS_V2_BATCH_MODE} or {@link
+   * ConsensusFactory#IOT_CONSENSUS_V2_STREAM_MODE}.
+   */
+  protected abstract String getIoTConsensusV2Mode();
+
+  @Override
+  @Before
+  public void setUp() throws Exception {
+    super.setUp();
+    EnvFactory.getEnv()
+        .getConfig()
+        .getCommonConfig()
+        .setDataReplicationFactor(DATA_REPLICATION_FACTOR)
+        .setSchemaReplicationFactor(SCHEMA_REPLICATION_FACTOR)
+        .setIoTConsensusV2Mode(getIoTConsensusV2Mode());
+
+    EnvFactory.getEnv()
+        .initClusterEnvironment(CONFIG_NODE_NUM, DATA_NODE_NUM, 
CLUSTER_INIT_TIMEOUT_SECONDS);
+  }
+
+  public void test3C3DWriteFlushAndQuery() throws Exception {
+    try (Connection connection = 
makeItCloseQuietly(EnvFactory.getEnv().getConnection());
+        Statement statement = 
makeItCloseQuietly(connection.createStatement())) {
+
+      LOGGER.info("Writing data to 3C3D cluster (mode: {})...", 
getIoTConsensusV2Mode());
+      statement.execute(INSERTION1);
+      statement.execute(INSERTION2);
+      statement.execute(INSERTION3);
+
+      LOGGER.info("Executing flush on cluster...");
+      statement.execute(FLUSH_COMMAND);
+
+      verifyDataConsistency(statement);
+
+      LOGGER.info("3C3D IoTConsensusV2 {} basic test passed", 
getIoTConsensusV2Mode());
+    }
+  }
+
+  /**
+   * Test replica consistency: with replication factor 2, stop the leader 
DataNode and verify the
+   * follower serves the same data.
+   */
+  public void testReplicaConsistencyAfterLeaderStop() throws Exception {
+    try (Connection connection = 
makeItCloseQuietly(EnvFactory.getEnv().getConnection());
+        Statement statement = 
makeItCloseQuietly(connection.createStatement())) {
+
+      LOGGER.info("Writing data to 3C3D cluster (mode: {})...", 
getIoTConsensusV2Mode());
+      statement.execute(INSERTION1);
+      statement.execute(INSERTION2);
+      statement.execute(INSERTION3);
+      statement.execute(FLUSH_COMMAND);
+
+      verifyDataConsistency(statement);
+
+      LOGGER.info("Sleeping 2 seconds to wait replicate ...");
+      Thread.sleep(1000 * 2);
+
+      Map<Integer, Pair<Integer, Set<Integer>>> dataRegionMap =
+          getDataRegionMapWithLeader(statement);
+
+      int targetRegionId = -1;
+      int leaderDataNodeId = -1;
+      int followerDataNodeId = -1;
+      for (Map.Entry<Integer, Pair<Integer, Set<Integer>>> entry : 
dataRegionMap.entrySet()) {
+        Pair<Integer, Set<Integer>> leaderAndReplicas = entry.getValue();
+        if (leaderAndReplicas.getRight().size() > 1
+            && leaderAndReplicas.getRight().size() <= DATA_REPLICATION_FACTOR
+            && leaderAndReplicas.getLeft() > 0) {
+          targetRegionId = entry.getKey();
+          leaderDataNodeId = leaderAndReplicas.getLeft();
+          final int lambdaLeaderDataNodeId = leaderDataNodeId;
+          followerDataNodeId =
+              leaderAndReplicas.getRight().stream()
+                  .filter(i -> i != lambdaLeaderDataNodeId)
+                  .findAny()
+                  .orElse(-1);
+          break;
+        }
+      }
+
+      Assert.assertTrue(
+          "Should find a data region with leader for root.sg",
+          targetRegionId > 0 && leaderDataNodeId > 0 && followerDataNodeId > 
0);
+
+      DataNodeWrapper leaderNode =
+          EnvFactory.getEnv()
+              .dataNodeIdToWrapper(leaderDataNodeId)
+              .orElseThrow(() -> new AssertionError("DataNode not found in 
cluster"));
+      LOGGER.info(
+          "Stopping leader DataNode {} (region {}) for replica consistency 
test",
+          leaderDataNodeId,
+          targetRegionId);
+
+      leaderNode.stopForcibly();
+      Assert.assertFalse("Leader should be stopped", leaderNode.isAlive());

Review Comment:
   Killing the node may result in losing some unflushed data.
   Although you may have called FLUSH previously, the data may not have been 
synchronized then, so the FLUSH command may not work on the follower.
   You should first wait until the unsealed TsFile appears on the follower, and 
then FLUSH.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to