This is an automated email from the ASF dual-hosted git repository.
tanxinyu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iotdb.git
The following commit(s) were added to refs/heads/master by this push:
new adcedc0bc57 [remove datanode] Refuse to remove when there are any
other unknown or readonly DataNodes in the consensus group (#14145)
adcedc0bc57 is described below
commit adcedc0bc57cf61dea112074f201be98381b60b6
Author: Xiangpeng Hu <[email protected]>
AuthorDate: Thu Nov 21 17:01:13 2024 +0800
[remove datanode] Refuse to remove when there are any other unknown or
readonly DataNodes in the consensus group (#14145)
* add uknown check
* remove useless check
---
.../iotdb/confignode/manager/ProcedureManager.java | 24 ++++++++++++++++++++++
.../procedure/env/RemoveDataNodeHandler.java | 20 +++++++++++++-----
.../confignode/manager/ProcedureManagerTest.java | 24 ++++++++++++++++++++++
3 files changed, 63 insertions(+), 5 deletions(-)
diff --git
a/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/manager/ProcedureManager.java
b/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/manager/ProcedureManager.java
index c4eda21c437..2b5172719db 100644
---
a/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/manager/ProcedureManager.java
+++
b/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/manager/ProcedureManager.java
@@ -692,6 +692,30 @@ public class ProcedureManager {
}
removedDataNodesRegionSet.add(regionMigrationPlan.getRegionId());
}
+
+ // 4. Check if there are any other unknown or readonly DataNodes in the
consensus group that are
+ // not the remove DataNodes
+
+ for (TDataNodeLocation removeDataNode : dataNodeLocations) {
+ Set<TDataNodeLocation> relatedDataNodes =
+
getEnv().getRemoveDataNodeHandler().getRelatedDataNodeLocations(removeDataNode);
+ relatedDataNodes.remove(removeDataNode);
+
+ for (TDataNodeLocation relatedDataNode : relatedDataNodes) {
+ NodeStatus nodeStatus =
+
getConfigManager().getLoadManager().getNodeStatus(relatedDataNode.getDataNodeId());
+ if (nodeStatus == NodeStatus.Unknown || nodeStatus ==
NodeStatus.ReadOnly) {
+ failMessage =
+ String.format(
+ "Submit RemoveDataNodesProcedure failed, "
+ + "because when there are other unknown or readonly
nodes in the consensus group that are not remove nodes, "
+ + "the remove operation cannot be performed for security
reasons. "
+ + "Please check the status of the node %s and ensure it
is running.",
+ relatedDataNode.getDataNodeId());
+ }
+ }
+ }
+
if (failMessage != null) {
LOGGER.warn(failMessage);
TSStatus failStatus = new
TSStatus(TSStatusCode.REMOVE_DATANODE_ERROR.getStatusCode());
diff --git
a/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/procedure/env/RemoveDataNodeHandler.java
b/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/procedure/env/RemoveDataNodeHandler.java
index 19e0b8f422f..c64d5e6164f 100644
---
a/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/procedure/env/RemoveDataNodeHandler.java
+++
b/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/procedure/env/RemoveDataNodeHandler.java
@@ -20,7 +20,6 @@
package org.apache.iotdb.confignode.procedure.env;
import org.apache.iotdb.common.rpc.thrift.TConsensusGroupId;
-import org.apache.iotdb.common.rpc.thrift.TConsensusGroupType;
import org.apache.iotdb.common.rpc.thrift.TDataNodeConfiguration;
import org.apache.iotdb.common.rpc.thrift.TDataNodeLocation;
import org.apache.iotdb.common.rpc.thrift.TRegionReplicaSet;
@@ -474,11 +473,22 @@ public class RemoveDataNodeHandler {
*/
public List<TConsensusGroupId> getMigratedDataNodeRegions(TDataNodeLocation
removedDataNode) {
return configManager.getPartitionManager().getAllReplicaSets().stream()
- .filter(
- replicaSet ->
- replicaSet.getDataNodeLocations().contains(removedDataNode)
- && replicaSet.regionId.getType() !=
TConsensusGroupType.ConfigRegion)
+ .filter(replicaSet ->
replicaSet.getDataNodeLocations().contains(removedDataNode))
.map(TRegionReplicaSet::getRegionId)
.collect(Collectors.toList());
}
+
+ /**
+ * Retrieves all DataNodes related to the specified DataNode.
+ *
+ * @param removedDataNode the DataNode to be removed
+ * @return a set of TDataNodeLocation representing the DataNodes associated
with the specified
+ * DataNode
+ */
+ public Set<TDataNodeLocation> getRelatedDataNodeLocations(TDataNodeLocation
removedDataNode) {
+ return configManager.getPartitionManager().getAllReplicaSets().stream()
+ .filter(replicaSet ->
replicaSet.getDataNodeLocations().contains(removedDataNode))
+ .flatMap(replicaSet -> replicaSet.getDataNodeLocations().stream())
+ .collect(Collectors.toSet());
+ }
}
diff --git
a/iotdb-core/confignode/src/test/java/org/apache/iotdb/confignode/manager/ProcedureManagerTest.java
b/iotdb-core/confignode/src/test/java/org/apache/iotdb/confignode/manager/ProcedureManagerTest.java
index e57cbd0ddb5..09a8acf16f4 100644
---
a/iotdb-core/confignode/src/test/java/org/apache/iotdb/confignode/manager/ProcedureManagerTest.java
+++
b/iotdb-core/confignode/src/test/java/org/apache/iotdb/confignode/manager/ProcedureManagerTest.java
@@ -25,6 +25,7 @@ import org.apache.iotdb.common.rpc.thrift.TDataNodeLocation;
import org.apache.iotdb.common.rpc.thrift.TEndPoint;
import org.apache.iotdb.common.rpc.thrift.TSStatus;
import org.apache.iotdb.commons.cluster.NodeStatus;
+import org.apache.iotdb.confignode.manager.load.LoadManager;
import org.apache.iotdb.confignode.procedure.Procedure;
import org.apache.iotdb.confignode.procedure.ProcedureExecutor;
import org.apache.iotdb.confignode.procedure.env.ConfigNodeProcedureEnv;
@@ -57,6 +58,8 @@ public class ProcedureManagerTest {
private static RemoveDataNodeHandler REMOVE_DATA_NODE_HANDLER;
+ private static LoadManager LOAD_MANAGER;
+
private static final ConcurrentHashMap<Long,
Procedure<ConfigNodeProcedureEnv>> procedureMap =
new ConcurrentHashMap<>();
@@ -117,6 +120,9 @@ public class ProcedureManagerTest {
RemoveDataNodeHandler removeDataNodeHandler =
ENV.getRemoveDataNodeHandler();
REMOVE_DATA_NODE_HANDLER = spy(removeDataNodeHandler);
+ LoadManager loadManager = CONFIG_MANAGER.getLoadManager();
+ LOAD_MANAGER = spy(loadManager);
+
when(PROCEDURE_MANAGER.getExecutor()).thenReturn(PROCEDURE_EXECUTOR);
when(PROCEDURE_EXECUTOR.getProcedures()).thenReturn(procedureMap);
when(PROCEDURE_MANAGER.getEnv()).thenReturn(ENV);
@@ -171,4 +177,22 @@ public class ProcedureManagerTest {
TSStatus status = PROCEDURE_MANAGER.checkRemoveDataNodes(removedDataNodes);
Assert.assertTrue(isFailed(status));
}
+
+ @Test
+ public void testCheckRemoveDataNodeWithAnotherUnknownDataNode() {
+ Set<TDataNodeLocation> relatedDataNodes = new HashSet<>();
+ relatedDataNodes.add(removeDataNodeLocationA);
+ relatedDataNodes.add(coordinatorDataNodeLocation);
+
+
when(REMOVE_DATA_NODE_HANDLER.getRelatedDataNodeLocations(removeDataNodeLocationA))
+ .thenReturn(relatedDataNodes);
+
+ when(LOAD_MANAGER.getNodeStatus(removeDataNodeLocationA.getDataNodeId()))
+ .thenReturn(NodeStatus.Running);
+
when(LOAD_MANAGER.getNodeStatus(coordinatorDataNodeLocation.getDataNodeId()))
+ .thenReturn(NodeStatus.Unknown);
+
+ TSStatus status = PROCEDURE_MANAGER.checkRemoveDataNodes(removedDataNodes);
+ Assert.assertTrue(isFailed(status));
+ }
}