This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 777bdce5a5 [minor](clone) add more debug log for tablet scheduler
(#19892)
777bdce5a5 is described below
commit 777bdce5a563b40bfa805386c88fae39e605fd1c
Author: Mingyu Chen <[email protected]>
AuthorDate: Sat May 20 15:59:26 2023 +0800
[minor](clone) add more debug log for tablet scheduler (#19892)
Sometimes I find that the tablet scheduler can not schedule tablet, and
with no more info for debugging.
So I add some debug log for this process.
No logic is changed.
---
.../apache/doris/clone/BackendLoadStatistic.java | 2 ++
.../org/apache/doris/clone/TabletSchedCtx.java | 35 ++++++++++++++++++--
.../org/apache/doris/clone/TabletScheduler.java | 38 +++++++++++++++++++---
3 files changed, 68 insertions(+), 7 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/clone/BackendLoadStatistic.java
b/fe/fe-core/src/main/java/org/apache/doris/clone/BackendLoadStatistic.java
index 47befaaccb..d040b8053c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/BackendLoadStatistic.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/BackendLoadStatistic.java
@@ -303,6 +303,8 @@ public class BackendLoadStatistic {
RootPathLoadStatistic pathStatistic = pathStatistics.get(i);
// if this is a supplement task, ignore the storage medium
if (!isSupplement && pathStatistic.getStorageMedium() != medium) {
+ LOG.debug("backend {} path {}'s storage medium {} is not {}
storage medium, actual: {}",
+ beId, pathStatistic.getPath(),
pathStatistic.getStorageMedium(), medium);
continue;
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
index 990153a00a..b889a70430 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
@@ -488,6 +488,7 @@ public class TabletSchedCtx implements
Comparable<TabletSchedCtx> {
if (backend == null) {
// containsBE() is currently only used for choosing dest backend
to do clone task.
// return true so that it won't choose this backend.
+ LOG.debug("desc backend {} does not exist, skip. tablet: {}",
beId, tabletId);
return true;
}
String host = backend.getHost();
@@ -495,13 +496,18 @@ public class TabletSchedCtx implements
Comparable<TabletSchedCtx> {
Backend be = infoService.getBackend(replica.getBackendId());
if (be == null) {
// BE has been dropped, skip it
+ LOG.debug("replica's backend {} does not exist, skip. tablet:
{}", replica.getBackendId(), tabletId);
continue;
}
if (!Config.allow_replica_on_same_host &&
!FeConstants.runningUnitTest && host.equals(be.getHost())) {
+ LOG.debug("replica's backend {} is on same host {}, skip.
tablet: {}",
+ replica.getBackendId(), host, tabletId);
return true;
}
if (replica.getBackendId() == beId) {
+ LOG.debug("replica's backend {} is same as dest backend {},
skip. tablet: {}",
+ replica.getBackendId(), beId, tabletId);
return true;
}
}
@@ -557,24 +563,34 @@ public class TabletSchedCtx implements
Comparable<TabletSchedCtx> {
List<Replica> candidates = Lists.newArrayList();
for (Replica replica : tablet.getReplicas()) {
if (exceptBeId != -1 && replica.getBackendId() == exceptBeId) {
+ LOG.debug("replica's backend {} is same as except backend {},
skip. tablet: {}",
+ replica.getBackendId(), exceptBeId, tabletId);
continue;
}
if (replica.isBad() || replica.tooSlow()) {
+ LOG.debug("replica {} is bad({}) or too slow({}), skip.
tablet: {}",
+ replica.getId(), replica.isBad(), replica.tooSlow(),
tabletId);
continue;
}
Backend be = infoService.getBackend(replica.getBackendId());
if (be == null || !be.isAlive()) {
// backend which is in decommission can still be the source
backend
+ LOG.debug("replica's backend {} does not exist or is not
alive, skip. tablet: {}",
+ replica.getBackendId(), tabletId);
continue;
}
if (replica.getLastFailedVersion() > 0) {
+ LOG.debug("replica {} has failed version {}, skip. tablet: {}",
+ replica.getId(), replica.getLastFailedVersion(),
tabletId);
continue;
}
if (!replica.checkVersionCatchUp(visibleVersion, false)) {
+ LOG.debug("replica {} version {} has not catch up to visible
version {}, skip. tablet: {}",
+ replica.getId(), replica.getVersion(), visibleVersion,
tabletId);
continue;
}
@@ -591,14 +607,19 @@ public class TabletSchedCtx implements
Comparable<TabletSchedCtx> {
for (Replica srcReplica : candidates) {
PathSlot slot =
backendsWorkingSlots.get(srcReplica.getBackendId());
if (slot == null) {
+ LOG.debug("replica's backend {} does not have working slot,
skip. tablet: {}",
+ srcReplica.getBackendId(), tabletId);
continue;
}
long srcPathHash = slot.takeSlot(srcReplica.getPathHash());
- if (srcPathHash != -1) {
- setSrc(srcReplica);
- return;
+ if (srcPathHash == -1) {
+ LOG.debug("replica's backend {} does not have available slot,
skip. tablet: {}",
+ srcReplica.getBackendId(), tabletId);
+ continue;
}
+ setSrc(srcReplica);
+ return;
}
throw new SchedException(Status.SCHEDULE_FAILED, "unable to find
source slot");
}
@@ -629,11 +650,15 @@ public class TabletSchedCtx implements
Comparable<TabletSchedCtx> {
Replica chosenReplica = null;
for (Replica replica : tablet.getReplicas()) {
if (replica.isBad()) {
+ LOG.debug("replica {} is bad, skip. tablet: {}",
+ replica.getId(), tabletId);
continue;
}
Backend be = infoService.getBackend(replica.getBackendId());
if (be == null || !be.isScheduleAvailable()) {
+ LOG.debug("replica's backend {} does not exist or is not
scheduler available, skip. tablet: {}",
+ replica.getBackendId(), tabletId);
continue;
}
@@ -644,10 +669,14 @@ public class TabletSchedCtx implements
Comparable<TabletSchedCtx> {
&& ((replica.getVersion() == visibleVersion)
|| replica.getVersion() > visibleVersion) &&
replica.getState() != ReplicaState.DECOMMISSION) {
// skip healthy replica
+ LOG.debug("replica {} version {} is healthy, visible version
{}, replica state {}, skip. tablet: {}",
+ replica.getId(), replica.getVersion(), visibleVersion,
replica.getState(), tabletId);
continue;
}
if (replica.needFurtherRepair()) {
+ LOG.debug("replica {} need further repair, choose it. tablet:
{}",
+ replica.getId(), tabletId);
chosenReplica = replica;
break;
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
index 7d2e7cc3b9..470cf0a669 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
@@ -1311,11 +1311,15 @@ public class TabletScheduler extends MasterDaemon {
List<RootPathLoadStatistic> allFitPaths = Lists.newArrayList();
for (BackendLoadStatistic bes : beStatistics) {
if (!bes.isAvailable()) {
+ LOG.debug("backend {} is not available, skip. tablet: {}",
bes.getBeId(), tabletCtx.getTabletId());
continue;
}
// exclude BE which already has replica of this tablet or another
BE at same host has this replica
if (tabletCtx.filterDestBE(bes.getBeId())) {
+ LOG.debug("backend {} already has replica of this tablet or
another BE "
+ + "at same host has this replica, skip.
tablet: {}",
+ bes.getBeId(), tabletCtx.getTabletId());
continue;
}
@@ -1323,9 +1327,13 @@ public class TabletScheduler extends MasterDaemon {
// Else, check the tag.
if (forColocate) {
if
(!tabletCtx.getColocateBackendsSet().contains(bes.getBeId())) {
+ LOG.debug("backend {} is not in colocate backend set,
skip. tablet: {}",
+ bes.getBeId(), tabletCtx.getTabletId());
continue;
}
} else if (!bes.getTag().equals(tag)) {
+ LOG.debug("backend {}'s tag {} is not equal to tablet's tag
{}, skip. tablet: {}",
+ bes.getBeId(), bes.getTag(), tag,
tabletCtx.getTabletId());
continue;
}
@@ -1334,6 +1342,7 @@ public class TabletScheduler extends MasterDaemon {
resultPaths, tabletCtx.getTabletStatus() !=
TabletStatus.REPLICA_RELOCATING
/* if REPLICA_RELOCATING, then it is not a supplement task
*/);
if (!st.ok()) {
+ LOG.debug("unable to find path for tablet: {}. {}", tabletCtx,
st);
// This is to solve, when we decommission some BEs with SSD
disks,
// if there are no SSD disks on the remaining BEs, it will be
impossible to select a
// suitable destination path.
@@ -1361,31 +1370,50 @@ public class TabletScheduler extends MasterDaemon {
// we try to find a path with specified media type, if not find,
arbitrarily use one.
for (RootPathLoadStatistic rootPathLoadStatistic : allFitPaths) {
if (rootPathLoadStatistic.getStorageMedium() !=
tabletCtx.getStorageMedium()) {
+ LOG.debug("backend {}'s path {}'s storage medium {} "
+ + "is not equal to tablet's storage medium {},
skip. tablet: {}",
+ rootPathLoadStatistic.getBeId(),
rootPathLoadStatistic.getPathHash(),
+ rootPathLoadStatistic.getStorageMedium(),
tabletCtx.getStorageMedium(),
+ tabletCtx.getTabletId());
continue;
}
PathSlot slot =
backendsWorkingSlots.get(rootPathLoadStatistic.getBeId());
if (slot == null) {
+ LOG.debug("backend {}'s path {}'s slot is null, skip. tablet:
{}",
+ rootPathLoadStatistic.getBeId(),
rootPathLoadStatistic.getPathHash(),
+ tabletCtx.getTabletId());
continue;
}
long pathHash = slot.takeSlot(rootPathLoadStatistic.getPathHash());
- if (pathHash != -1) {
- return rootPathLoadStatistic;
+ if (pathHash == -1) {
+ LOG.debug("backend {}'s path {}'s slot is full, skip. tablet:
{}",
+ rootPathLoadStatistic.getBeId(),
rootPathLoadStatistic.getPathHash(),
+ tabletCtx.getTabletId());
+ continue;
}
+ return rootPathLoadStatistic;
}
// no root path with specified media type is found, get arbitrary one.
for (RootPathLoadStatistic rootPathLoadStatistic : allFitPaths) {
PathSlot slot =
backendsWorkingSlots.get(rootPathLoadStatistic.getBeId());
if (slot == null) {
+ LOG.debug("backend {}'s path {}'s slot is null, skip. tablet:
{}",
+ rootPathLoadStatistic.getBeId(),
rootPathLoadStatistic.getPathHash(),
+ tabletCtx.getTabletId());
continue;
}
long pathHash = slot.takeSlot(rootPathLoadStatistic.getPathHash());
- if (pathHash != -1) {
- return rootPathLoadStatistic;
+ if (pathHash == -1) {
+ LOG.debug("backend {}'s path {}'s slot is full, skip. tablet:
{}",
+ rootPathLoadStatistic.getBeId(),
rootPathLoadStatistic.getPathHash(),
+ tabletCtx.getTabletId());
+ continue;
}
+ return rootPathLoadStatistic;
}
throw new SchedException(Status.SCHEDULE_FAILED, "unable to find dest
path which can be fit in");
@@ -1712,10 +1740,12 @@ public class TabletScheduler extends MasterDaemon {
Slot slot = pathSlots.get(pathHash);
if (slot == null) {
+ LOG.debug("path {} is not exist", pathHash);
return -1;
}
slot.rectify();
if (slot.available <= 0) {
+ LOG.debug("path {} has no available slot", pathHash);
return -1;
}
slot.available--;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]