This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 777bdce5a5 [minor](clone) add more debug log for tablet scheduler 
(#19892)
777bdce5a5 is described below

commit 777bdce5a563b40bfa805386c88fae39e605fd1c
Author: Mingyu Chen <[email protected]>
AuthorDate: Sat May 20 15:59:26 2023 +0800

    [minor](clone) add more debug log for tablet scheduler (#19892)
    
    Sometimes I find that the tablet scheduler can not schedule tablet, and 
with no more info for debugging.
    So I add some debug log for this process.
    No logic is changed.
---
 .../apache/doris/clone/BackendLoadStatistic.java   |  2 ++
 .../org/apache/doris/clone/TabletSchedCtx.java     | 35 ++++++++++++++++++--
 .../org/apache/doris/clone/TabletScheduler.java    | 38 +++++++++++++++++++---
 3 files changed, 68 insertions(+), 7 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/clone/BackendLoadStatistic.java 
b/fe/fe-core/src/main/java/org/apache/doris/clone/BackendLoadStatistic.java
index 47befaaccb..d040b8053c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/BackendLoadStatistic.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/BackendLoadStatistic.java
@@ -303,6 +303,8 @@ public class BackendLoadStatistic {
             RootPathLoadStatistic pathStatistic = pathStatistics.get(i);
             // if this is a supplement task, ignore the storage medium
             if (!isSupplement && pathStatistic.getStorageMedium() != medium) {
+                LOG.debug("backend {} path {}'s storage medium {} is not {} 
storage medium, actual: {}",
+                        beId, pathStatistic.getPath(), 
pathStatistic.getStorageMedium(), medium);
                 continue;
             }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java 
b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
index 990153a00a..b889a70430 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
@@ -488,6 +488,7 @@ public class TabletSchedCtx implements 
Comparable<TabletSchedCtx> {
         if (backend == null) {
             // containsBE() is currently only used for choosing dest backend 
to do clone task.
             // return true so that it won't choose this backend.
+            LOG.debug("desc backend {} does not exist, skip. tablet: {}", 
beId, tabletId);
             return true;
         }
         String host = backend.getHost();
@@ -495,13 +496,18 @@ public class TabletSchedCtx implements 
Comparable<TabletSchedCtx> {
             Backend be = infoService.getBackend(replica.getBackendId());
             if (be == null) {
                 // BE has been dropped, skip it
+                LOG.debug("replica's backend {} does not exist, skip. tablet: 
{}", replica.getBackendId(), tabletId);
                 continue;
             }
             if (!Config.allow_replica_on_same_host && 
!FeConstants.runningUnitTest && host.equals(be.getHost())) {
+                LOG.debug("replica's backend {} is on same host {}, skip. 
tablet: {}",
+                        replica.getBackendId(), host, tabletId);
                 return true;
             }
 
             if (replica.getBackendId() == beId) {
+                LOG.debug("replica's backend {} is same as dest backend {}, 
skip. tablet: {}",
+                        replica.getBackendId(), beId, tabletId);
                 return true;
             }
         }
@@ -557,24 +563,34 @@ public class TabletSchedCtx implements 
Comparable<TabletSchedCtx> {
         List<Replica> candidates = Lists.newArrayList();
         for (Replica replica : tablet.getReplicas()) {
             if (exceptBeId != -1 && replica.getBackendId() == exceptBeId) {
+                LOG.debug("replica's backend {} is same as except backend {}, 
skip. tablet: {}",
+                        replica.getBackendId(), exceptBeId, tabletId);
                 continue;
             }
 
             if (replica.isBad() || replica.tooSlow()) {
+                LOG.debug("replica {} is bad({}) or too slow({}), skip. 
tablet: {}",
+                        replica.getId(), replica.isBad(), replica.tooSlow(), 
tabletId);
                 continue;
             }
 
             Backend be = infoService.getBackend(replica.getBackendId());
             if (be == null || !be.isAlive()) {
                 // backend which is in decommission can still be the source 
backend
+                LOG.debug("replica's backend {} does not exist or is not 
alive, skip. tablet: {}",
+                        replica.getBackendId(), tabletId);
                 continue;
             }
 
             if (replica.getLastFailedVersion() > 0) {
+                LOG.debug("replica {} has failed version {}, skip. tablet: {}",
+                        replica.getId(), replica.getLastFailedVersion(), 
tabletId);
                 continue;
             }
 
             if (!replica.checkVersionCatchUp(visibleVersion, false)) {
+                LOG.debug("replica {} version {} has not catch up to visible 
version {}, skip. tablet: {}",
+                        replica.getId(), replica.getVersion(), visibleVersion, 
tabletId);
                 continue;
             }
 
@@ -591,14 +607,19 @@ public class TabletSchedCtx implements 
Comparable<TabletSchedCtx> {
         for (Replica srcReplica : candidates) {
             PathSlot slot = 
backendsWorkingSlots.get(srcReplica.getBackendId());
             if (slot == null) {
+                LOG.debug("replica's backend {} does not have working slot, 
skip. tablet: {}",
+                        srcReplica.getBackendId(), tabletId);
                 continue;
             }
 
             long srcPathHash = slot.takeSlot(srcReplica.getPathHash());
-            if (srcPathHash != -1) {
-                setSrc(srcReplica);
-                return;
+            if (srcPathHash == -1) {
+                LOG.debug("replica's backend {} does not have available slot, 
skip. tablet: {}",
+                        srcReplica.getBackendId(), tabletId);
+                continue;
             }
+            setSrc(srcReplica);
+            return;
         }
         throw new SchedException(Status.SCHEDULE_FAILED, "unable to find 
source slot");
     }
@@ -629,11 +650,15 @@ public class TabletSchedCtx implements 
Comparable<TabletSchedCtx> {
         Replica chosenReplica = null;
         for (Replica replica : tablet.getReplicas()) {
             if (replica.isBad()) {
+                LOG.debug("replica {} is bad, skip. tablet: {}",
+                        replica.getId(), tabletId);
                 continue;
             }
 
             Backend be = infoService.getBackend(replica.getBackendId());
             if (be == null || !be.isScheduleAvailable()) {
+                LOG.debug("replica's backend {} does not exist or is not 
scheduler available, skip. tablet: {}",
+                        replica.getBackendId(), tabletId);
                 continue;
             }
 
@@ -644,10 +669,14 @@ public class TabletSchedCtx implements 
Comparable<TabletSchedCtx> {
                     && ((replica.getVersion() == visibleVersion)
                     || replica.getVersion() > visibleVersion) && 
replica.getState() != ReplicaState.DECOMMISSION) {
                 // skip healthy replica
+                LOG.debug("replica {} version {} is healthy, visible version 
{}, replica state {}, skip. tablet: {}",
+                        replica.getId(), replica.getVersion(), visibleVersion, 
replica.getState(), tabletId);
                 continue;
             }
 
             if (replica.needFurtherRepair()) {
+                LOG.debug("replica {} need further repair, choose it. tablet: 
{}",
+                        replica.getId(), tabletId);
                 chosenReplica = replica;
                 break;
             }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java 
b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
index 7d2e7cc3b9..470cf0a669 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
@@ -1311,11 +1311,15 @@ public class TabletScheduler extends MasterDaemon {
         List<RootPathLoadStatistic> allFitPaths = Lists.newArrayList();
         for (BackendLoadStatistic bes : beStatistics) {
             if (!bes.isAvailable()) {
+                LOG.debug("backend {} is not available, skip. tablet: {}", 
bes.getBeId(), tabletCtx.getTabletId());
                 continue;
             }
 
             // exclude BE which already has replica of this tablet or another 
BE at same host has this replica
             if (tabletCtx.filterDestBE(bes.getBeId())) {
+                LOG.debug("backend {} already has replica of this tablet or 
another BE "
+                                + "at same host has this replica, skip. 
tablet: {}",
+                        bes.getBeId(), tabletCtx.getTabletId());
                 continue;
             }
 
@@ -1323,9 +1327,13 @@ public class TabletScheduler extends MasterDaemon {
             // Else, check the tag.
             if (forColocate) {
                 if 
(!tabletCtx.getColocateBackendsSet().contains(bes.getBeId())) {
+                    LOG.debug("backend {} is not in colocate backend set, 
skip. tablet: {}",
+                            bes.getBeId(), tabletCtx.getTabletId());
                     continue;
                 }
             } else if (!bes.getTag().equals(tag)) {
+                LOG.debug("backend {}'s tag {} is not equal to tablet's tag 
{}, skip. tablet: {}",
+                        bes.getBeId(), bes.getTag(), tag, 
tabletCtx.getTabletId());
                 continue;
             }
 
@@ -1334,6 +1342,7 @@ public class TabletScheduler extends MasterDaemon {
                     resultPaths, tabletCtx.getTabletStatus() != 
TabletStatus.REPLICA_RELOCATING
                     /* if REPLICA_RELOCATING, then it is not a supplement task 
*/);
             if (!st.ok()) {
+                LOG.debug("unable to find path for tablet: {}. {}", tabletCtx, 
st);
                 // This is to solve, when we decommission some BEs with SSD 
disks,
                 // if there are no SSD disks on the remaining BEs, it will be 
impossible to select a
                 // suitable destination path.
@@ -1361,31 +1370,50 @@ public class TabletScheduler extends MasterDaemon {
         // we try to find a path with specified media type, if not find, 
arbitrarily use one.
         for (RootPathLoadStatistic rootPathLoadStatistic : allFitPaths) {
             if (rootPathLoadStatistic.getStorageMedium() != 
tabletCtx.getStorageMedium()) {
+                LOG.debug("backend {}'s path {}'s storage medium {} "
+                                + "is not equal to tablet's storage medium {}, 
skip. tablet: {}",
+                        rootPathLoadStatistic.getBeId(), 
rootPathLoadStatistic.getPathHash(),
+                        rootPathLoadStatistic.getStorageMedium(), 
tabletCtx.getStorageMedium(),
+                        tabletCtx.getTabletId());
                 continue;
             }
 
             PathSlot slot = 
backendsWorkingSlots.get(rootPathLoadStatistic.getBeId());
             if (slot == null) {
+                LOG.debug("backend {}'s path {}'s slot is null, skip. tablet: 
{}",
+                        rootPathLoadStatistic.getBeId(), 
rootPathLoadStatistic.getPathHash(),
+                        tabletCtx.getTabletId());
                 continue;
             }
 
             long pathHash = slot.takeSlot(rootPathLoadStatistic.getPathHash());
-            if (pathHash != -1) {
-                return rootPathLoadStatistic;
+            if (pathHash == -1) {
+                LOG.debug("backend {}'s path {}'s slot is full, skip. tablet: 
{}",
+                        rootPathLoadStatistic.getBeId(), 
rootPathLoadStatistic.getPathHash(),
+                        tabletCtx.getTabletId());
+                continue;
             }
+            return rootPathLoadStatistic;
         }
 
         // no root path with specified media type is found, get arbitrary one.
         for (RootPathLoadStatistic rootPathLoadStatistic : allFitPaths) {
             PathSlot slot = 
backendsWorkingSlots.get(rootPathLoadStatistic.getBeId());
             if (slot == null) {
+                LOG.debug("backend {}'s path {}'s slot is null, skip. tablet: 
{}",
+                        rootPathLoadStatistic.getBeId(), 
rootPathLoadStatistic.getPathHash(),
+                        tabletCtx.getTabletId());
                 continue;
             }
 
             long pathHash = slot.takeSlot(rootPathLoadStatistic.getPathHash());
-            if (pathHash != -1) {
-                return rootPathLoadStatistic;
+            if (pathHash == -1) {
+                LOG.debug("backend {}'s path {}'s slot is full, skip. tablet: 
{}",
+                        rootPathLoadStatistic.getBeId(), 
rootPathLoadStatistic.getPathHash(),
+                        tabletCtx.getTabletId());
+                continue;
             }
+            return rootPathLoadStatistic;
         }
 
         throw new SchedException(Status.SCHEDULE_FAILED, "unable to find dest 
path which can be fit in");
@@ -1712,10 +1740,12 @@ public class TabletScheduler extends MasterDaemon {
 
             Slot slot = pathSlots.get(pathHash);
             if (slot == null) {
+                LOG.debug("path {} is not exist", pathHash);
                 return -1;
             }
             slot.rectify();
             if (slot.available <= 0) {
+                LOG.debug("path {} has no available slot", pathHash);
                 return -1;
             }
             slot.available--;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to