OneSizeFitsQuorum commented on code in PR #12376: URL: https://github.com/apache/iotdb/pull/12376#discussion_r1571861284
########## iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/procedure/env/RegionMaintainHandler.java: ########## @@ -661,50 +660,48 @@ public void removeDataNodePersistence(TDataNodeLocation dataNodeLocation) { * @param regionId The region to be migrated * @param originalDataNode The DataNode where the region locates */ - public void changeRegionLeader(TConsensusGroupId regionId, TDataNodeLocation originalDataNode) { + public void transferRegionLeader(TConsensusGroupId regionId, TDataNodeLocation originalDataNode) + throws ProcedureException { Optional<TDataNodeLocation> newLeaderNode = filterDataNodeWithOtherRegionReplica(regionId, originalDataNode); - - if (TConsensusGroupType.DataRegion.equals(regionId.getType()) - && IOT_CONSENSUS.equals(CONF.getDataRegionConsensusProtocolClass())) { - if (newLeaderNode.isPresent()) { - configManager - .getLoadManager() - .forceUpdateConsensusGroupCache( - Collections.singletonMap( - regionId, - new ConsensusGroupHeartbeatSample( - System.nanoTime(), newLeaderNode.get().getDataNodeId()))); - LOGGER.info( - "{}, Change region leader finished for IOT_CONSENSUS, regionId: {}, newLeaderNode: {}", - REGION_MIGRATE_PROCESS, - regionId, - newLeaderNode); + newLeaderNode.orElseThrow(() -> new ProcedureException("Cannot find the new leader")); + + // ratis needs DataNode to do election by itself + if (TConsensusGroupType.SchemaRegion.equals(regionId.getType()) + || TConsensusGroupType.DataRegion.equals(regionId.getType()) + && RATIS_CONSENSUS.equals(CONF.getDataRegionConsensusProtocolClass())) { + final int MAX_RETRY_TIME = 10; + int retryTime = 0; + while (true) { + TRegionLeaderChangeResp resp = + SyncDataNodeClientPool.getInstance() + .changeRegionLeader( + regionId, originalDataNode.getInternalEndPoint(), newLeaderNode.get()); + if (resp.getStatus().getCode() == TSStatusCode.SUCCESS_STATUS.getStatusCode()) { + break; + } + if (retryTime++ > MAX_RETRY_TIME) { + throw new ProcedureException("Transfer leader fail"); + } + LOGGER.warn("Call changeRegionLeader fail for the {} time", retryTime); } - - return; } - if (newLeaderNode.isPresent()) { - TRegionLeaderChangeResp resp = - SyncDataNodeClientPool.getInstance() - .changeRegionLeader( - regionId, originalDataNode.getInternalEndPoint(), newLeaderNode.get()); - if (resp.getStatus().getCode() == TSStatusCode.SUCCESS_STATUS.getStatusCode()) { - configManager - .getLoadManager() - .forceUpdateConsensusGroupCache( - Collections.singletonMap( - regionId, - new ConsensusGroupHeartbeatSample( - resp.getConsensusLogicalTimestamp(), newLeaderNode.get().getDataNodeId()))); - } - LOGGER.info( - "{}, Change region leader finished for RATIS_CONSENSUS, regionId: {}, newLeaderNode: {}", - REGION_MIGRATE_PROCESS, - regionId, - newLeaderNode); - } + configManager + .getLoadManager() + .forceUpdateConsensusGroupCache( + Collections.singletonMap( + regionId, + new ConsensusGroupHeartbeatSample( Review Comment: should be the TRegionLeaderChangeResp. getConsensusLogicalTimestamp() for ratisconsensus? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@iotdb.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org