This is an automated email from the ASF dual-hosted git repository.

caogaofei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iotdb.git


The following commit(s) were added to refs/heads/master by this push:
     new f776aa5958 [IOTDB-4361] Add a precheck in removing datanode (#7264)
f776aa5958 is described below

commit f776aa5958bbaa365641f0b44885c00c6966e7dc
Author: Itami Sho <[email protected]>
AuthorDate: Wed Sep 14 09:25:02 2022 +0800

    [IOTDB-4361] Add a precheck in removing datanode (#7264)
---
 .../procedure/env/DataNodeRemoveHandler.java       | 60 ++++++++++++++++++++--
 .../org/apache/iotdb/it/env/ConfigFactory.java     |  1 -
 .../java/org/apache/iotdb/rpc/TSStatusCode.java    |  3 +-
 3 files changed, 58 insertions(+), 6 deletions(-)

diff --git 
a/confignode/src/main/java/org/apache/iotdb/confignode/procedure/env/DataNodeRemoveHandler.java
 
b/confignode/src/main/java/org/apache/iotdb/confignode/procedure/env/DataNodeRemoveHandler.java
index e20f691d49..556498cf4d 100644
--- 
a/confignode/src/main/java/org/apache/iotdb/confignode/procedure/env/DataNodeRemoveHandler.java
+++ 
b/confignode/src/main/java/org/apache/iotdb/confignode/procedure/env/DataNodeRemoveHandler.java
@@ -29,13 +29,17 @@ import org.apache.iotdb.commons.cluster.NodeStatus;
 import org.apache.iotdb.confignode.client.DataNodeRequestType;
 import 
org.apache.iotdb.confignode.client.async.datanode.AsyncDataNodeClientPool;
 import org.apache.iotdb.confignode.client.sync.datanode.SyncDataNodeClientPool;
+import org.apache.iotdb.confignode.conf.ConfigNodeConfig;
+import org.apache.iotdb.confignode.conf.ConfigNodeDescriptor;
 import org.apache.iotdb.confignode.consensus.request.write.RemoveDataNodePlan;
 import 
org.apache.iotdb.confignode.consensus.request.write.UpdateRegionLocationPlan;
 import org.apache.iotdb.confignode.consensus.response.DataNodeToStatusResp;
 import org.apache.iotdb.confignode.manager.ConfigManager;
+import org.apache.iotdb.confignode.manager.load.heartbeat.BaseNodeCache;
 import org.apache.iotdb.confignode.persistence.NodeInfo;
 import org.apache.iotdb.confignode.procedure.exception.ProcedureException;
 import org.apache.iotdb.confignode.procedure.scheduler.LockQueue;
+import org.apache.iotdb.consensus.ConsensusFactory;
 import org.apache.iotdb.mpp.rpc.thrift.TCreatePeerReq;
 import org.apache.iotdb.mpp.rpc.thrift.TDisableDataNodeReq;
 import org.apache.iotdb.mpp.rpc.thrift.TMaintainPeerReq;
@@ -53,6 +57,8 @@ import java.util.stream.Collectors;
 public class DataNodeRemoveHandler {
   private static final Logger LOGGER = 
LoggerFactory.getLogger(DataNodeRemoveHandler.class);
 
+  private static final ConfigNodeConfig CONF = 
ConfigNodeDescriptor.getInstance().getConf();
+
   private final ConfigManager configManager;
 
   /** region migrate lock */
@@ -386,14 +392,20 @@ public class DataNodeRemoveHandler {
   public DataNodeToStatusResp checkRemoveDataNodeRequest(RemoveDataNodePlan 
removeDataNodePlan) {
     DataNodeToStatusResp dataSet = new DataNodeToStatusResp();
     dataSet.setStatus(new 
TSStatus(TSStatusCode.SUCCESS_STATUS.getStatusCode()));
-    TSStatus status = checkRegionReplication(removeDataNodePlan);
-    if (status.getCode() != TSStatusCode.SUCCESS_STATUS.getStatusCode()) {
+
+    TSStatus status = checkClusterProtocol();
+    if (isFailed(status)) {
+      dataSet.setStatus(status);
+      return dataSet;
+    }
+    status = checkRegionReplication(removeDataNodePlan);
+    if (isFailed(status)) {
       dataSet.setStatus(status);
       return dataSet;
     }
 
     status = checkDataNodeExist(removeDataNodePlan);
-    if (status.getCode() != TSStatusCode.SUCCESS_STATUS.getStatusCode()) {
+    if (isFailed(status)) {
       dataSet.setStatus(status);
       return dataSet;
     }
@@ -433,8 +445,31 @@ public class DataNodeRemoveHandler {
    */
   private TSStatus checkRegionReplication(RemoveDataNodePlan 
removeDataNodePlan) {
     TSStatus status = new 
TSStatus(TSStatusCode.SUCCESS_STATUS.getStatusCode());
-    int removedDataNodeSize = removeDataNodePlan.getDataNodeLocations().size();
+    List<TDataNodeLocation> removedDataNodes = 
removeDataNodePlan.getDataNodeLocations();
     int allDataNodeSize = 
configManager.getNodeManager().getRegisteredDataNodeCount();
+
+    // when the configuration is one replication, it will be failed if the 
data node is not in
+    // running state.
+    if (CONF.getSchemaReplicationFactor() == 1 || 
CONF.getDataReplicationFactor() == 1) {
+      for (TDataNodeLocation dataNodeLocation : removedDataNodes) {
+        // check whether removed data node is in running state
+        BaseNodeCache nodeCache =
+            
configManager.getNodeManager().getNodeCacheMap().get(dataNodeLocation.getDataNodeId());
+        if (!nodeCache.getNodeStatus().getStatus().equals("Running")) {
+          removedDataNodes.remove(dataNodeLocation);
+          LOGGER.error(
+              "Failed to remove data node {} because it is not in running and 
the configuration of cluster is one replication",
+              dataNodeLocation);
+        }
+        if (removedDataNodes.size() == 0) {
+          status.setCode(TSStatusCode.LACK_REPLICATION.getStatusCode());
+          status.setMessage("Failed to remove all requested data nodes");
+          return status;
+        }
+      }
+    }
+
+    int removedDataNodeSize = removeDataNodePlan.getDataNodeLocations().size();
     if (allDataNodeSize - removedDataNodeSize < NodeInfo.getMinimumDataNode()) 
{
       status.setCode(TSStatusCode.LACK_REPLICATION.getStatusCode());
       status.setMessage(
@@ -492,4 +527,21 @@ public class DataNodeRemoveHandler {
     // TODO replace findAny() by select the low load node.
     return regionReplicaNodes.stream().filter(e -> 
!e.equals(filterLocation)).findAny();
   }
+
+  /**
+   * Check the protocol of the cluster, standalone is not supported to remove 
data node currently
+   *
+   * @return SUCCEED_STATUS if the Cluster is not standalone protocol, 
REMOVE_DATANODE_FAILED
+   *     otherwise
+   */
+  private TSStatus checkClusterProtocol() {
+    TSStatus status = new 
TSStatus(TSStatusCode.SUCCESS_STATUS.getStatusCode());
+    if 
(CONF.getDataRegionConsensusProtocolClass().equals(ConsensusFactory.StandAloneConsensus)
+        || CONF.getSchemaRegionConsensusProtocolClass()
+            .equals(ConsensusFactory.StandAloneConsensus)) {
+      status.setCode(TSStatusCode.REMOVE_DATANODE_FAILED.getStatusCode());
+      status.setMessage("standalone protocol is not supported to remove data 
node");
+    }
+    return status;
+  }
 }
diff --git 
a/integration-test/src/main/java/org/apache/iotdb/it/env/ConfigFactory.java 
b/integration-test/src/main/java/org/apache/iotdb/it/env/ConfigFactory.java
index ec84b74510..22f48193b5 100644
--- a/integration-test/src/main/java/org/apache/iotdb/it/env/ConfigFactory.java
+++ b/integration-test/src/main/java/org/apache/iotdb/it/env/ConfigFactory.java
@@ -38,7 +38,6 @@ public class ConfigFactory {
             break;
           case "LocalStandaloneOnMpp":
           case "Cluster1":
-          case "Cluster2":
             config = new MppConfig();
             break;
           case "Remote":
diff --git a/service-rpc/src/main/java/org/apache/iotdb/rpc/TSStatusCode.java 
b/service-rpc/src/main/java/org/apache/iotdb/rpc/TSStatusCode.java
index edb1675f4d..d611b32bd8 100644
--- a/service-rpc/src/main/java/org/apache/iotdb/rpc/TSStatusCode.java
+++ b/service-rpc/src/main/java/org/apache/iotdb/rpc/TSStatusCode.java
@@ -152,7 +152,8 @@ public enum TSStatusCode {
   REGION_MIGRATE_FAILED(915),
   LACK_REPLICATION(916),
   DATANODE_STOP_ERROR(917),
-  REGION_LEADER_CHANGE_FAILED(918);
+  REGION_LEADER_CHANGE_FAILED(918),
+  REMOVE_DATANODE_FAILED(919);
 
   private int statusCode;
 

Reply via email to