This is an automated email from the ASF dual-hosted git repository.
caogaofei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iotdb.git
The following commit(s) were added to refs/heads/master by this push:
new f776aa5958 [IOTDB-4361] Add a precheck in removing datanode (#7264)
f776aa5958 is described below
commit f776aa5958bbaa365641f0b44885c00c6966e7dc
Author: Itami Sho <[email protected]>
AuthorDate: Wed Sep 14 09:25:02 2022 +0800
[IOTDB-4361] Add a precheck in removing datanode (#7264)
---
.../procedure/env/DataNodeRemoveHandler.java | 60 ++++++++++++++++++++--
.../org/apache/iotdb/it/env/ConfigFactory.java | 1 -
.../java/org/apache/iotdb/rpc/TSStatusCode.java | 3 +-
3 files changed, 58 insertions(+), 6 deletions(-)
diff --git
a/confignode/src/main/java/org/apache/iotdb/confignode/procedure/env/DataNodeRemoveHandler.java
b/confignode/src/main/java/org/apache/iotdb/confignode/procedure/env/DataNodeRemoveHandler.java
index e20f691d49..556498cf4d 100644
---
a/confignode/src/main/java/org/apache/iotdb/confignode/procedure/env/DataNodeRemoveHandler.java
+++
b/confignode/src/main/java/org/apache/iotdb/confignode/procedure/env/DataNodeRemoveHandler.java
@@ -29,13 +29,17 @@ import org.apache.iotdb.commons.cluster.NodeStatus;
import org.apache.iotdb.confignode.client.DataNodeRequestType;
import
org.apache.iotdb.confignode.client.async.datanode.AsyncDataNodeClientPool;
import org.apache.iotdb.confignode.client.sync.datanode.SyncDataNodeClientPool;
+import org.apache.iotdb.confignode.conf.ConfigNodeConfig;
+import org.apache.iotdb.confignode.conf.ConfigNodeDescriptor;
import org.apache.iotdb.confignode.consensus.request.write.RemoveDataNodePlan;
import
org.apache.iotdb.confignode.consensus.request.write.UpdateRegionLocationPlan;
import org.apache.iotdb.confignode.consensus.response.DataNodeToStatusResp;
import org.apache.iotdb.confignode.manager.ConfigManager;
+import org.apache.iotdb.confignode.manager.load.heartbeat.BaseNodeCache;
import org.apache.iotdb.confignode.persistence.NodeInfo;
import org.apache.iotdb.confignode.procedure.exception.ProcedureException;
import org.apache.iotdb.confignode.procedure.scheduler.LockQueue;
+import org.apache.iotdb.consensus.ConsensusFactory;
import org.apache.iotdb.mpp.rpc.thrift.TCreatePeerReq;
import org.apache.iotdb.mpp.rpc.thrift.TDisableDataNodeReq;
import org.apache.iotdb.mpp.rpc.thrift.TMaintainPeerReq;
@@ -53,6 +57,8 @@ import java.util.stream.Collectors;
public class DataNodeRemoveHandler {
private static final Logger LOGGER =
LoggerFactory.getLogger(DataNodeRemoveHandler.class);
+ private static final ConfigNodeConfig CONF =
ConfigNodeDescriptor.getInstance().getConf();
+
private final ConfigManager configManager;
/** region migrate lock */
@@ -386,14 +392,20 @@ public class DataNodeRemoveHandler {
public DataNodeToStatusResp checkRemoveDataNodeRequest(RemoveDataNodePlan
removeDataNodePlan) {
DataNodeToStatusResp dataSet = new DataNodeToStatusResp();
dataSet.setStatus(new
TSStatus(TSStatusCode.SUCCESS_STATUS.getStatusCode()));
- TSStatus status = checkRegionReplication(removeDataNodePlan);
- if (status.getCode() != TSStatusCode.SUCCESS_STATUS.getStatusCode()) {
+
+ TSStatus status = checkClusterProtocol();
+ if (isFailed(status)) {
+ dataSet.setStatus(status);
+ return dataSet;
+ }
+ status = checkRegionReplication(removeDataNodePlan);
+ if (isFailed(status)) {
dataSet.setStatus(status);
return dataSet;
}
status = checkDataNodeExist(removeDataNodePlan);
- if (status.getCode() != TSStatusCode.SUCCESS_STATUS.getStatusCode()) {
+ if (isFailed(status)) {
dataSet.setStatus(status);
return dataSet;
}
@@ -433,8 +445,31 @@ public class DataNodeRemoveHandler {
*/
private TSStatus checkRegionReplication(RemoveDataNodePlan
removeDataNodePlan) {
TSStatus status = new
TSStatus(TSStatusCode.SUCCESS_STATUS.getStatusCode());
- int removedDataNodeSize = removeDataNodePlan.getDataNodeLocations().size();
+ List<TDataNodeLocation> removedDataNodes =
removeDataNodePlan.getDataNodeLocations();
int allDataNodeSize =
configManager.getNodeManager().getRegisteredDataNodeCount();
+
+ // when the configuration is one replication, it will be failed if the
data node is not in
+ // running state.
+ if (CONF.getSchemaReplicationFactor() == 1 ||
CONF.getDataReplicationFactor() == 1) {
+ for (TDataNodeLocation dataNodeLocation : removedDataNodes) {
+ // check whether removed data node is in running state
+ BaseNodeCache nodeCache =
+
configManager.getNodeManager().getNodeCacheMap().get(dataNodeLocation.getDataNodeId());
+ if (!nodeCache.getNodeStatus().getStatus().equals("Running")) {
+ removedDataNodes.remove(dataNodeLocation);
+ LOGGER.error(
+ "Failed to remove data node {} because it is not in running and
the configuration of cluster is one replication",
+ dataNodeLocation);
+ }
+ if (removedDataNodes.size() == 0) {
+ status.setCode(TSStatusCode.LACK_REPLICATION.getStatusCode());
+ status.setMessage("Failed to remove all requested data nodes");
+ return status;
+ }
+ }
+ }
+
+ int removedDataNodeSize = removeDataNodePlan.getDataNodeLocations().size();
if (allDataNodeSize - removedDataNodeSize < NodeInfo.getMinimumDataNode())
{
status.setCode(TSStatusCode.LACK_REPLICATION.getStatusCode());
status.setMessage(
@@ -492,4 +527,21 @@ public class DataNodeRemoveHandler {
// TODO replace findAny() by select the low load node.
return regionReplicaNodes.stream().filter(e ->
!e.equals(filterLocation)).findAny();
}
+
+ /**
+ * Check the protocol of the cluster, standalone is not supported to remove
data node currently
+ *
+ * @return SUCCEED_STATUS if the Cluster is not standalone protocol,
REMOVE_DATANODE_FAILED
+ * otherwise
+ */
+ private TSStatus checkClusterProtocol() {
+ TSStatus status = new
TSStatus(TSStatusCode.SUCCESS_STATUS.getStatusCode());
+ if
(CONF.getDataRegionConsensusProtocolClass().equals(ConsensusFactory.StandAloneConsensus)
+ || CONF.getSchemaRegionConsensusProtocolClass()
+ .equals(ConsensusFactory.StandAloneConsensus)) {
+ status.setCode(TSStatusCode.REMOVE_DATANODE_FAILED.getStatusCode());
+ status.setMessage("standalone protocol is not supported to remove data
node");
+ }
+ return status;
+ }
}
diff --git
a/integration-test/src/main/java/org/apache/iotdb/it/env/ConfigFactory.java
b/integration-test/src/main/java/org/apache/iotdb/it/env/ConfigFactory.java
index ec84b74510..22f48193b5 100644
--- a/integration-test/src/main/java/org/apache/iotdb/it/env/ConfigFactory.java
+++ b/integration-test/src/main/java/org/apache/iotdb/it/env/ConfigFactory.java
@@ -38,7 +38,6 @@ public class ConfigFactory {
break;
case "LocalStandaloneOnMpp":
case "Cluster1":
- case "Cluster2":
config = new MppConfig();
break;
case "Remote":
diff --git a/service-rpc/src/main/java/org/apache/iotdb/rpc/TSStatusCode.java
b/service-rpc/src/main/java/org/apache/iotdb/rpc/TSStatusCode.java
index edb1675f4d..d611b32bd8 100644
--- a/service-rpc/src/main/java/org/apache/iotdb/rpc/TSStatusCode.java
+++ b/service-rpc/src/main/java/org/apache/iotdb/rpc/TSStatusCode.java
@@ -152,7 +152,8 @@ public enum TSStatusCode {
REGION_MIGRATE_FAILED(915),
LACK_REPLICATION(916),
DATANODE_STOP_ERROR(917),
- REGION_LEADER_CHANGE_FAILED(918);
+ REGION_LEADER_CHANGE_FAILED(918),
+ REMOVE_DATANODE_FAILED(919);
private int statusCode;