This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit f007ff2839c9ca1dbf661a68175d79558bdeddcc Author: yujun <[email protected]> AuthorDate: Sun Aug 13 10:18:01 2023 +0800 [fix](tablet clone) fix tablet sched failed when tablet missing tag and version incomplete (#22861) --- .../main/java/org/apache/doris/catalog/Tablet.java | 46 +++++++++++----------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java index af83e1ba8a..8b3bbe9ae7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java @@ -476,7 +476,8 @@ public class Tablet extends MetaObject implements Writable { Map<Tag, Short> allocMap = replicaAlloc.getAllocMap(); - Map<Tag, Short> currentAllocMap = Maps.newHashMap(); + Map<Tag, Short> stableAllocMap = Maps.newHashMap(); + Map<Tag, Short> stableVersionCompleteAllocMap = Maps.newHashMap(); short replicationNum = replicaAlloc.getTotalReplicaNum(); int alive = 0; @@ -496,32 +497,30 @@ public class Tablet extends MetaObject implements Writable { // ATTN: Replicas on same host is a bug of previous Doris version, so we fix it by this way. continue; } + alive++; - // this replica is alive but version incomplete - if (replica.getLastFailedVersion() > 0 || replica.getVersion() < visibleVersion) { - if (replica.needFurtherRepair() && backend.isScheduleAvailable()) { + boolean versionCompleted = replica.getLastFailedVersion() < 0 && replica.getVersion() >= visibleVersion; + if (versionCompleted) { + aliveAndVersionComplete++; + } + + if (backend.isScheduleAvailable()) { + if (replica.needFurtherRepair() && (needFurtherRepairReplica == null || !versionCompleted)) { needFurtherRepairReplica = replica; } - continue; - } - aliveAndVersionComplete++; + short allocNum = stableAllocMap.getOrDefault(backend.getLocationTag(), (short) 0); + stableAllocMap.put(backend.getLocationTag(), (short) (allocNum + 1)); - if (!backend.isScheduleAvailable()) { - // this replica is alive, version complete, but backend is not available - continue; - } - stable++; + if (versionCompleted) { + stable++; + versions.add(replica.getVersionCount()); - if (replica.needFurtherRepair() && needFurtherRepairReplica == null) { - needFurtherRepairReplica = replica; + allocNum = stableVersionCompleteAllocMap.getOrDefault(backend.getLocationTag(), (short) 0); + stableVersionCompleteAllocMap.put(backend.getLocationTag(), (short) (allocNum + 1)); + } } - - versions.add(replica.getVersionCount()); - - short curNum = currentAllocMap.getOrDefault(backend.getLocationTag(), (short) 0); - currentAllocMap.put(backend.getLocationTag(), (short) (curNum + 1)); } // 0. We can not choose a good replica as src to repair this tablet. @@ -583,9 +582,12 @@ public class Tablet extends MetaObject implements Writable { // 4. got enough healthy replicas, check tag for (Map.Entry<Tag, Short> alloc : allocMap.entrySet()) { - if (!currentAllocMap.containsKey(alloc.getKey()) - || currentAllocMap.get(alloc.getKey()) < alloc.getValue()) { - return Pair.of(TabletStatus.REPLICA_MISSING_FOR_TAG, TabletSchedCtx.Priority.NORMAL); + if (stableVersionCompleteAllocMap.getOrDefault(alloc.getKey(), (short) 0) < alloc.getValue()) { + if (stableAllocMap.getOrDefault(alloc.getKey(), (short) 0) >= alloc.getValue()) { + return Pair.of(TabletStatus.VERSION_INCOMPLETE, TabletSchedCtx.Priority.NORMAL); + } else { + return Pair.of(TabletStatus.REPLICA_MISSING_FOR_TAG, TabletSchedCtx.Priority.NORMAL); + } } } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
