This is an automated email from the ASF dual-hosted git repository. tanxinyu pushed a commit to branch fix_removed_confignode_rejoin_without_delete_data_cp in repository https://gitbox.apache.org/repos/asf/iotdb.git
commit 6caedc8889c7929777b5ed1113d24fd0c2b50354 Author: Potato <[email protected]> AuthorDate: Tue Mar 18 16:41:19 2025 +0800 Make removed confignode can rejoin cluster without deleting data dir (#15118) * finish Signed-off-by: OneSizeFitQuorum <[email protected]> * fix IT Signed-off-by: OneSizeFitQuorum <[email protected]> * fix IT Signed-off-by: OneSizeFitQuorum <[email protected]> --------- Signed-off-by: OneSizeFitQuorum <[email protected]> --- .../confignode/it/cluster/IoTDBClusterNodeGetterIT.java | 2 +- .../iotdb/confignode/client/CnToCnNodeRequestType.java | 2 +- .../confignode/client/sync/SyncConfigNodeClientPool.java | 6 +++--- .../confignode/procedure/env/ConfigNodeProcedureEnv.java | 5 +++-- .../procedure/impl/node/RemoveConfigNodeProcedure.java | 8 ++++---- .../confignode/procedure/state/RemoveConfigNodeState.java | 2 +- .../service/thrift/ConfigNodeRPCServiceProcessor.java | 10 ++++++---- .../apache/iotdb/db/protocol/client/ConfigNodeClient.java | 4 ++-- .../thrift/impl/DataNodeInternalRPCServiceImpl.java | 15 ++++++++------- .../config/executor/ClusterConfigTaskExecutor.java | 4 +--- .../thrift-confignode/src/main/thrift/confignode.thrift | 4 ++-- 11 files changed, 32 insertions(+), 30 deletions(-) diff --git a/integration-test/src/test/java/org/apache/iotdb/confignode/it/cluster/IoTDBClusterNodeGetterIT.java b/integration-test/src/test/java/org/apache/iotdb/confignode/it/cluster/IoTDBClusterNodeGetterIT.java index 1c1afe917c0..2ca034497f0 100644 --- a/integration-test/src/test/java/org/apache/iotdb/confignode/it/cluster/IoTDBClusterNodeGetterIT.java +++ b/integration-test/src/test/java/org/apache/iotdb/confignode/it/cluster/IoTDBClusterNodeGetterIT.java @@ -239,7 +239,7 @@ public class IoTDBClusterNodeGetterIT { } // Test stop ConfigNode - status = client.stopConfigNode(removedConfigNodeLocation); + status = client.stopAndClearConfigNode(removedConfigNodeLocation); assertEquals(TSStatusCode.SUCCESS_STATUS.getStatusCode(), status.getCode()); } } diff --git a/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/client/CnToCnNodeRequestType.java b/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/client/CnToCnNodeRequestType.java index 3864f26cbc8..e422e45dff0 100644 --- a/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/client/CnToCnNodeRequestType.java +++ b/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/client/CnToCnNodeRequestType.java @@ -27,7 +27,7 @@ public enum CnToCnNodeRequestType { REMOVE_CONFIG_NODE, DELETE_CONFIG_NODE_PEER, REPORT_CONFIG_NODE_SHUTDOWN, - STOP_CONFIG_NODE, + STOP_AND_CLEAR_CONFIG_NODE, SET_CONFIGURATION, SHOW_CONFIGURATION, SUBMIT_TEST_CONNECTION_TASK, diff --git a/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/client/sync/SyncConfigNodeClientPool.java b/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/client/sync/SyncConfigNodeClientPool.java index cc26be000a8..6282924d0a0 100644 --- a/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/client/sync/SyncConfigNodeClientPool.java +++ b/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/client/sync/SyncConfigNodeClientPool.java @@ -87,9 +87,9 @@ public class SyncConfigNodeClientPool { return client.deleteConfigNodePeer((TConfigNodeLocation) req); case REPORT_CONFIG_NODE_SHUTDOWN: return client.reportConfigNodeShutdown((TConfigNodeLocation) req); - case STOP_CONFIG_NODE: - // Only use stopConfigNode when the ConfigNode is removed. - return client.stopConfigNode((TConfigNodeLocation) req); + case STOP_AND_CLEAR_CONFIG_NODE: + // Only use stopAndClearConfigNode when the ConfigNode is removed. + return client.stopAndClearConfigNode((TConfigNodeLocation) req); case SET_CONFIGURATION: return client.setConfiguration((TSetConfigurationReq) req); case SHOW_CONFIGURATION: diff --git a/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/procedure/env/ConfigNodeProcedureEnv.java b/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/procedure/env/ConfigNodeProcedureEnv.java index ceb7d3cfb38..5ae4a303037 100644 --- a/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/procedure/env/ConfigNodeProcedureEnv.java +++ b/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/procedure/env/ConfigNodeProcedureEnv.java @@ -317,14 +317,15 @@ public class ConfigNodeProcedureEnv { * @param tConfigNodeLocation config node location * @throws ProcedureException if failed status */ - public void stopConfigNode(TConfigNodeLocation tConfigNodeLocation) throws ProcedureException { + public void stopAndClearConfigNode(TConfigNodeLocation tConfigNodeLocation) + throws ProcedureException { TSStatus tsStatus = (TSStatus) SyncConfigNodeClientPool.getInstance() .sendSyncRequestToConfigNodeWithRetry( tConfigNodeLocation.getInternalEndPoint(), tConfigNodeLocation, - CnToCnNodeRequestType.STOP_CONFIG_NODE); + CnToCnNodeRequestType.STOP_AND_CLEAR_CONFIG_NODE); if (tsStatus.getCode() != TSStatusCode.SUCCESS_STATUS.getStatusCode()) { throw new ProcedureException(tsStatus.getMessage()); diff --git a/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/procedure/impl/node/RemoveConfigNodeProcedure.java b/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/procedure/impl/node/RemoveConfigNodeProcedure.java index 0910f06605d..2780214c3a8 100644 --- a/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/procedure/impl/node/RemoveConfigNodeProcedure.java +++ b/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/procedure/impl/node/RemoveConfigNodeProcedure.java @@ -65,12 +65,12 @@ public class RemoveConfigNodeProcedure extends AbstractNodeProcedure<RemoveConfi break; case DELETE_PEER: env.deleteConfigNodePeer(removedConfigNode); - setNextState(RemoveConfigNodeState.STOP_CONFIG_NODE); + setNextState(RemoveConfigNodeState.STOP_AND_CLEAR_CONFIG_NODE); LOG.info("Delete peer for ConfigNode: {}", removedConfigNode); break; - case STOP_CONFIG_NODE: - env.stopConfigNode(removedConfigNode); - LOG.info("Stop ConfigNode: {}", removedConfigNode); + case STOP_AND_CLEAR_CONFIG_NODE: + env.stopAndClearConfigNode(removedConfigNode); + LOG.info("Stop and clear ConfigNode: {}", removedConfigNode); return Flow.NO_MORE_STATE; } } catch (Exception e) { diff --git a/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/procedure/state/RemoveConfigNodeState.java b/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/procedure/state/RemoveConfigNodeState.java index 864ee97e0cc..312b3c3cb49 100644 --- a/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/procedure/state/RemoveConfigNodeState.java +++ b/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/procedure/state/RemoveConfigNodeState.java @@ -22,5 +22,5 @@ package org.apache.iotdb.confignode.procedure.state; public enum RemoveConfigNodeState { REMOVE_PEER, DELETE_PEER, - STOP_CONFIG_NODE + STOP_AND_CLEAR_CONFIG_NODE } diff --git a/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/service/thrift/ConfigNodeRPCServiceProcessor.java b/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/service/thrift/ConfigNodeRPCServiceProcessor.java index 76bfe8d251c..08363bab296 100644 --- a/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/service/thrift/ConfigNodeRPCServiceProcessor.java +++ b/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/service/thrift/ConfigNodeRPCServiceProcessor.java @@ -43,6 +43,7 @@ import org.apache.iotdb.commons.utils.TestOnly; import org.apache.iotdb.confignode.conf.ConfigNodeConfig; import org.apache.iotdb.confignode.conf.ConfigNodeConstant; import org.apache.iotdb.confignode.conf.ConfigNodeDescriptor; +import org.apache.iotdb.confignode.conf.ConfigNodeSystemPropertiesHandler; import org.apache.iotdb.confignode.conf.SystemPropertiesUtils; import org.apache.iotdb.confignode.consensus.request.ConfigPhysicalPlanType; import org.apache.iotdb.confignode.consensus.request.read.ainode.GetAINodeConfigurationPlan; @@ -792,7 +793,7 @@ public class ConfigNodeRPCServiceProcessor implements IConfigNodeRPCService.Ifac /** Stop ConfigNode */ @Override - public TSStatus stopConfigNode(TConfigNodeLocation configNodeLocation) { + public TSStatus stopAndClearConfigNode(TConfigNodeLocation configNodeLocation) { new Thread( // TODO: Perhaps we should find some other way of shutting down the config node, adding // a hard dependency @@ -801,18 +802,19 @@ public class ConfigNodeRPCServiceProcessor implements IConfigNodeRPCService.Ifac // instance is created feels cleaner. () -> { try { - // Sleep 1s before stop itself - TimeUnit.SECONDS.sleep(1); + // Sleep 5s before stop itself + TimeUnit.SECONDS.sleep(5); } catch (InterruptedException e) { Thread.currentThread().interrupt(); LOGGER.warn(e.getMessage()); } finally { + ConfigNodeSystemPropertiesHandler.getInstance().delete(); configNode.stop(); } }) .start(); return new TSStatus(TSStatusCode.SUCCESS_STATUS.getStatusCode()) - .setMessage("Stop ConfigNode success."); + .setMessage("Stop And Clear ConfigNode Success."); } @Override diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/protocol/client/ConfigNodeClient.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/protocol/client/ConfigNodeClient.java index 6585c39cf6a..f809c92be5c 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/protocol/client/ConfigNodeClient.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/protocol/client/ConfigNodeClient.java @@ -708,8 +708,8 @@ public class ConfigNodeClient implements IConfigNodeRPCService.Iface, ThriftClie } @Override - public TSStatus stopConfigNode(TConfigNodeLocation configNodeLocation) throws TException { - throw new TException("DataNode to ConfigNode client doesn't support stopConfigNode."); + public TSStatus stopAndClearConfigNode(TConfigNodeLocation configNodeLocation) throws TException { + throw new TException("DataNode to ConfigNode client doesn't support stopAndClearConfigNode."); } @Override diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/protocol/thrift/impl/DataNodeInternalRPCServiceImpl.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/protocol/thrift/impl/DataNodeInternalRPCServiceImpl.java index e40e0996e23..c5e92c754ac 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/protocol/thrift/impl/DataNodeInternalRPCServiceImpl.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/protocol/thrift/impl/DataNodeInternalRPCServiceImpl.java @@ -2385,18 +2385,19 @@ public class DataNodeInternalRPCServiceImpl implements IDataNodeRPCService.Iface @Override public TSStatus stopAndClearDataNode() { TSStatus status = new TSStatus(TSStatusCode.SUCCESS_STATUS.getStatusCode()); - LOGGER.info("Execute stopDataNode RPC method"); + LOGGER.info("Execute stopAndClearDataNode RPC method"); - // kill the datanode process 20 seconds later + // kill the datanode process 30 seconds later // because datanode process cannot exit normally for the reason of InterruptedException new Thread( () -> { try { - TimeUnit.SECONDS.sleep(20); + TimeUnit.SECONDS.sleep(30); } catch (InterruptedException e) { - LOGGER.warn("Meets InterruptedException in stopDataNode RPC method"); + LOGGER.warn("Meets InterruptedException in stopAndClearDataNode RPC method"); } finally { - LOGGER.info("Executing system.exit(0) in stopDataNode RPC method after 20 seconds"); + LOGGER.info( + "Executing system.exit(0) in stopAndClearDataNode RPC method after 30 seconds"); System.exit(0); } }) @@ -2404,10 +2405,10 @@ public class DataNodeInternalRPCServiceImpl implements IDataNodeRPCService.Iface try { DataNode.getInstance().stop(); - status.setMessage("stop datanode succeed"); + status.setMessage("Stop And Clear Data Node succeed"); DataNode.getInstance().deleteDataNodeSystemProperties(); } catch (Exception e) { - LOGGER.warn("Stop Data Node error", e); + LOGGER.warn("Stop And Clear Data Node error", e); status.setCode(TSStatusCode.DATANODE_STOP_ERROR.getStatusCode()); status.setMessage(e.getMessage()); } diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/execution/config/executor/ClusterConfigTaskExecutor.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/execution/config/executor/ClusterConfigTaskExecutor.java index a26185b832c..ef909165c4a 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/execution/config/executor/ClusterConfigTaskExecutor.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/execution/config/executor/ClusterConfigTaskExecutor.java @@ -2766,9 +2766,7 @@ public class ClusterConfigTaskExecutor implements IConfigTaskExecutor { future.setException(new IOException("Remove ConfigNode failed: " + status.getMessage())); return future; } else { - LOGGER.info( - "ConfigNode: {} is removed. If the confignode data directory is no longer needed, you can delete it manually.", - removeConfigNodeId); + LOGGER.info("ConfigNode: {} is removed.", removeConfigNodeId); future.set(new ConfigTaskResult(TSStatusCode.SUCCESS_STATUS)); } diff --git a/iotdb-protocol/thrift-confignode/src/main/thrift/confignode.thrift b/iotdb-protocol/thrift-confignode/src/main/thrift/confignode.thrift index 0adf5ade231..f8551590964 100644 --- a/iotdb-protocol/thrift-confignode/src/main/thrift/confignode.thrift +++ b/iotdb-protocol/thrift-confignode/src/main/thrift/confignode.thrift @@ -1335,8 +1335,8 @@ service IConfigNodeRPCService { */ common.TSStatus reportConfigNodeShutdown(common.TConfigNodeLocation configNodeLocation) - /** Stop the specific ConfigNode */ - common.TSStatus stopConfigNode(common.TConfigNodeLocation configNodeLocation) + /** Stop the specific ConfigNode and clear data */ + common.TSStatus stopAndClearConfigNode(common.TConfigNodeLocation configNodeLocation) /** The ConfigNode-leader will ping other ConfigNodes periodically */ TConfigNodeHeartbeatResp getConfigNodeHeartBeat(TConfigNodeHeartbeatReq req)
