This is an automated email from the ASF dual-hosted git repository. jackietien pushed a commit to branch rc/1.3.5 in repository https://gitbox.apache.org/repos/asf/iotdb.git
commit 4b9df07a0586204823d4344f51f0c41e94b6c373 Author: Yongzao <[email protected]> AuthorDate: Tue Jun 17 16:02:40 2025 +0800 [To dev/1.3] Enhance "remove region" sql to handle the case of DataNode not exist (#15744) * Enhance "remove region" sql to handle the case of DataNode not exist (#15728) * finish --------- Co-authored-by: Li Yu Heng <[email protected]> --- .../iotdb/confignode/manager/ProcedureManager.java | 45 ++++++++++++++++------ .../partition/DatabasePartitionTable.java | 10 +++-- .../persistence/partition/PartitionInfo.java | 2 +- .../persistence/partition/RegionGroup.java | 6 ++- 4 files changed, 45 insertions(+), 18 deletions(-) diff --git a/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/manager/ProcedureManager.java b/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/manager/ProcedureManager.java index d94bef75488..460ed5276ca 100644 --- a/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/manager/ProcedureManager.java +++ b/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/manager/ProcedureManager.java @@ -23,6 +23,7 @@ import org.apache.iotdb.common.rpc.thrift.TConsensusGroupId; import org.apache.iotdb.common.rpc.thrift.TConsensusGroupType; import org.apache.iotdb.common.rpc.thrift.TDataNodeConfiguration; import org.apache.iotdb.common.rpc.thrift.TDataNodeLocation; +import org.apache.iotdb.common.rpc.thrift.TEndPoint; import org.apache.iotdb.common.rpc.thrift.TSStatus; import org.apache.iotdb.commons.cluster.NodeStatus; import org.apache.iotdb.commons.conf.CommonConfig; @@ -136,6 +137,8 @@ import org.apache.tsfile.utils.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import javax.annotation.Nullable; + import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; @@ -778,15 +781,11 @@ public class ProcedureManager { private TSStatus checkRemoveRegion( TRemoveRegionReq req, TConsensusGroupId regionId, - TDataNodeLocation targetDataNode, + @Nullable TDataNodeLocation targetDataNode, TDataNodeLocation coordinator) { String failMessage = regionOperationCommonCheck( - regionId, - targetDataNode, - Arrays.asList( - new Pair<>("Target DataNode", targetDataNode), - new Pair<>("Coordinator", coordinator))); + regionId, targetDataNode, Arrays.asList(new Pair<>("Coordinator", coordinator))); ConfigNodeConfig conf = ConfigNodeDescriptor.getInstance().getConf(); if (configManager @@ -796,11 +795,12 @@ public class ProcedureManager { .getDataNodeLocationsSize() == 1) { failMessage = String.format("%s only has 1 replica, it cannot be removed", regionId); - } else if (configManager - .getPartitionManager() - .getAllReplicaSets(targetDataNode.getDataNodeId()) - .stream() - .noneMatch(replicaSet -> replicaSet.getRegionId().equals(regionId))) { + } else if (targetDataNode != null + && configManager + .getPartitionManager() + .getAllReplicaSets(targetDataNode.getDataNodeId()) + .stream() + .noneMatch(replicaSet -> replicaSet.getRegionId().equals(regionId))) { failMessage = String.format( "Target DataNode %s doesn't contain Region %s", req.getDataNodeId(), regionId); @@ -1131,6 +1131,23 @@ public class ProcedureManager { return status; } + // SPECIAL CASE + if (targetDataNode == null) { + // If targetDataNode is null, it means the target DataNode does not exist in the + // NodeManager. + // In this case, simply clean up the partition table once and do nothing else. + LOGGER.warn( + "Remove region: Target DataNode {} not found, will simply clean up the partition table of region {} and do nothing else.", + req.getDataNodeId(), + req.getRegionId()); + this.executor + .getEnvironment() + .getRegionMaintainHandler() + .removeRegionLocation( + regionId, buildFakeDataNodeLocation(req.getDataNodeId(), "FakeIpForRemoveRegion")); + return new TSStatus(TSStatusCode.SUCCESS_STATUS.getStatusCode()); + } + // submit procedure RemoveRegionPeerProcedure procedure = new RemoveRegionPeerProcedure(regionId, coordinator, targetDataNode); @@ -1142,6 +1159,12 @@ public class ProcedureManager { } } + private static TDataNodeLocation buildFakeDataNodeLocation(int dataNodeId, String message) { + TEndPoint fakeEndPoint = new TEndPoint(message, -1); + return new TDataNodeLocation( + dataNodeId, fakeEndPoint, fakeEndPoint, fakeEndPoint, fakeEndPoint, fakeEndPoint); + } + // endregion /** diff --git a/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/persistence/partition/DatabasePartitionTable.java b/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/persistence/partition/DatabasePartitionTable.java index 112a89c47e6..b301b13b56e 100644 --- a/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/persistence/partition/DatabasePartitionTable.java +++ b/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/persistence/partition/DatabasePartitionTable.java @@ -538,7 +538,7 @@ public class DatabasePartitionTable { regionGroup.addRegionLocation(node); } - void removeRegionLocation(TConsensusGroupId regionId, TDataNodeLocation node) { + void removeRegionLocation(TConsensusGroupId regionId, int nodeId) { RegionGroup regionGroup = regionGroupMap.get(regionId); if (regionGroup == null) { LOGGER.warn( @@ -547,16 +547,18 @@ public class DatabasePartitionTable { databaseName); return; } - if (!regionGroup.getReplicaSet().getDataNodeLocations().contains(node)) { + if (regionGroup.getReplicaSet().getDataNodeLocations().stream() + .map(TDataNodeLocation::getDataNodeId) + .noneMatch(id -> id == nodeId)) { LOGGER.info( "Node is not in region locations when removeRegionOldLocation in {}, " + "no need to remove it, node: {}, region: {}", databaseName, - node, + nodeId, regionId); return; } - regionGroup.removeRegionLocation(node); + regionGroup.removeRegionLocation(nodeId); } /** diff --git a/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/persistence/partition/PartitionInfo.java b/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/persistence/partition/PartitionInfo.java index 4711ce05f8a..8c91c99859e 100644 --- a/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/persistence/partition/PartitionInfo.java +++ b/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/persistence/partition/PartitionInfo.java @@ -611,7 +611,7 @@ public class PartitionInfo implements SnapshotProcessor { .forEach( databasePartitionTable -> databasePartitionTable.removeRegionLocation( - req.getRegionId(), req.getDeprecatedLocation())); + req.getRegionId(), req.getDeprecatedLocation().getDataNodeId())); return new TSStatus(TSStatusCode.SUCCESS_STATUS.getStatusCode()); } diff --git a/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/persistence/partition/RegionGroup.java b/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/persistence/partition/RegionGroup.java index 69143d8e287..6f9860b6bce 100644 --- a/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/persistence/partition/RegionGroup.java +++ b/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/persistence/partition/RegionGroup.java @@ -95,8 +95,10 @@ public class RegionGroup { replicaSet.getDataNodeLocations().sort(TDataNodeLocation::compareTo); } - public synchronized void removeRegionLocation(TDataNodeLocation node) { - replicaSet.getDataNodeLocations().remove(node); + public synchronized void removeRegionLocation(int nodeId) { + replicaSet + .getDataNodeLocations() + .removeIf(tDataNodeLocation -> nodeId == tDataNodeLocation.getDataNodeId()); replicaSet.getDataNodeLocations().sort(TDataNodeLocation::compareTo); }
