This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new c28fe0cd1ac [fix](cloud) modify CloudTabletRebalancer and
CloudTabletStatMgr to reduce memory (#61318)
c28fe0cd1ac is described below
commit c28fe0cd1ac7ef5d31feb1da30e7bc95753ae05c
Author: meiyi <[email protected]>
AuthorDate: Sat Mar 14 10:10:52 2026 +0800
[fix](cloud) modify CloudTabletRebalancer and CloudTabletStatMgr to reduce
memory (#61318)
### What problem does this PR solve?
Issue Number: close #xxx
Related PR: #xxx
Problem Summary:
Reduce FE memory by
1. moving top-N table stats filtering from PrometheusMetricVisitor into
CloudTabletStatMgr so it's computed once per stat cycle instead of per
Prometheus scrape,
2. removing the unused beToTablets field from InfightTask to avoid
retaining a large map reference
3. changing InfightTablet.tabletId from Long to long to avoid boxing
overhead.
### Release note
None
### Check List (For Author)
- Test <!-- At least one of them must be included. -->
- [ ] Regression test
- [ ] Unit Test
- [ ] Manual test (add detailed scripts or steps below)
- [ ] No need to test or manual test. Explain why:
- [ ] This is a refactor/code format and no logic has been changed.
- [ ] Previous test can cover this change.
- [ ] No code files have been changed.
- [ ] Other reason <!-- Add your reason? -->
- Behavior changed:
- [ ] No.
- [ ] Yes. <!-- Explain the behavior change -->
- Does this need documentation?
- [ ] No.
- [ ] Yes. <!-- Add document PR link here. eg:
https://github.com/apache/doris-website/pull/1214 -->
### Check List (For Reviewer who merge this PR)
- [ ] Confirm the release note
- [ ] Confirm test cases
- [ ] Confirm document
- [ ] Add branch pick label <!-- Add branch pick label that this PR
should merge into -->
---
.../apache/doris/catalog/CloudTabletStatMgr.java | 31 +++++++++++++++++++++-
.../doris/cloud/catalog/CloudTabletRebalancer.java | 13 ++++-----
.../doris/metric/PrometheusMetricVisitor.java | 26 +-----------------
3 files changed, 36 insertions(+), 34 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/catalog/CloudTabletStatMgr.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/CloudTabletStatMgr.java
index 5b14786c1e7..fd500fac1a0 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/CloudTabletStatMgr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/CloudTabletStatMgr.java
@@ -35,7 +35,9 @@ import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.ArrayList;
+import java.util.Comparator;
import java.util.List;
+import java.util.PriorityQueue;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@@ -48,6 +50,8 @@ import java.util.concurrent.Future;
public class CloudTabletStatMgr extends MasterDaemon {
private static final Logger LOG =
LogManager.getLogger(CloudTabletStatMgr.class);
+ private volatile long totalTableSize = 0;
+ // keep Config.prom_output_table_metrics_limit tables with the largest
data size, used for prometheus output
private volatile List<OlapTable.Statistics> cloudTableStatsList = new
ArrayList<>();
private static final ExecutorService GET_TABLET_STATS_THREAD_POOL =
Executors.newFixedThreadPool(
@@ -290,7 +294,8 @@ public class CloudTabletStatMgr extends MasterDaemon {
newCloudTableStatsList.add(tableStats);
}
}
- this.cloudTableStatsList = newCloudTableStatsList;
+ filterTopTableStatsByDataSize(newCloudTableStatsList);
+ this.totalTableSize = totalTableSize;
if (MetricRepo.isInit) {
MetricRepo.GAUGE_MAX_TABLE_SIZE_BYTES.setValue(maxTableSize.second);
@@ -365,4 +370,28 @@ public class CloudTabletStatMgr extends MasterDaemon {
public List<OlapTable.Statistics> getCloudTableStats() {
return this.cloudTableStatsList;
}
+
+ public long getTotalTableSize() {
+ return this.totalTableSize;
+ }
+
+ private void filterTopTableStatsByDataSize(List<OlapTable.Statistics>
newCloudTableStatsList) {
+ int limit = Config.prom_output_table_metrics_limit;
+ if (limit <= 0 || newCloudTableStatsList.size() <= limit) {
+ this.cloudTableStatsList = newCloudTableStatsList;
+ return;
+ }
+ // only copy elements if number of tables >
prom_output_table_metrics_limit
+ PriorityQueue<OlapTable.Statistics> topStats = new
PriorityQueue<>(limit,
+ Comparator.comparingLong(OlapTable.Statistics::getDataSize));
+ for (OlapTable.Statistics stats : newCloudTableStatsList) {
+ if (topStats.size() < limit) {
+ topStats.offer(stats);
+ } else if (!topStats.isEmpty() && stats.getDataSize() >
topStats.peek().getDataSize()) {
+ topStats.poll();
+ topStats.offer(stats);
+ }
+ }
+ this.cloudTableStatsList = new ArrayList<>(topStats);
+ }
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudTabletRebalancer.java
b/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudTabletRebalancer.java
index e4a92b1ed9b..50cb79fb616 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudTabletRebalancer.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudTabletRebalancer.java
@@ -312,10 +312,10 @@ public class CloudTabletRebalancer extends MasterDaemon {
@Getter
private class InfightTablet {
- private final Long tabletId;
+ private final long tabletId;
private final String clusterId;
- public InfightTablet(Long tabletId, String clusterId) {
+ public InfightTablet(long tabletId, String clusterId) {
this.tabletId = tabletId;
this.clusterId = clusterId;
}
@@ -329,7 +329,7 @@ public class CloudTabletRebalancer extends MasterDaemon {
return false;
}
InfightTablet that = (InfightTablet) o;
- return tabletId.equals(that.tabletId) &&
clusterId.equals(that.clusterId);
+ return tabletId == that.tabletId &&
clusterId.equals(that.clusterId);
}
@Override
@@ -342,7 +342,6 @@ public class CloudTabletRebalancer extends MasterDaemon {
public long pickedTabletId;
public long srcBe;
public long destBe;
- public Map<Long, Set<Long>> beToTablets;
public long startTimestamp;
BalanceType balanceType;
}
@@ -1950,8 +1949,7 @@ public class CloudTabletRebalancer extends MasterDaemon {
futurePartitionToTablets, futureBeToTabletsInTable)) {
continue;
}
- boolean moved = preheatAndUpdateTablet(pickedTabletId, srcBe,
destBe,
- clusterId, balanceType, beToTablets);
+ boolean moved = preheatAndUpdateTablet(pickedTabletId, srcBe,
destBe, clusterId, balanceType);
if (moved) {
updateBalanceStatus(balanceType);
}
@@ -2065,7 +2063,7 @@ public class CloudTabletRebalancer extends MasterDaemon {
}
private boolean preheatAndUpdateTablet(long pickedTabletId, long srcBe,
long destBe, String clusterId,
- BalanceType balanceType, Map<Long,
Set<Long>> beToTablets) {
+ BalanceType balanceType) {
Backend srcBackend = cloudSystemInfoService.getBackend(srcBe);
Backend destBackend = cloudSystemInfoService.getBackend(destBe);
if (srcBackend == null || destBackend == null) {
@@ -2079,7 +2077,6 @@ public class CloudTabletRebalancer extends MasterDaemon {
task.srcBe = srcBe;
task.destBe = destBe;
task.balanceType = balanceType;
- task.beToTablets = beToTablets;
task.startTimestamp = System.currentTimeMillis() / 1000;
InfightTablet key = new InfightTablet(pickedTabletId, clusterId);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/metric/PrometheusMetricVisitor.java
b/fe/fe-core/src/main/java/org/apache/doris/metric/PrometheusMetricVisitor.java
index 764002aaed0..2103dcb6c21 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/metric/PrometheusMetricVisitor.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/metric/PrometheusMetricVisitor.java
@@ -35,12 +35,10 @@ import org.apache.logging.log4j.Logger;
import java.util.ArrayList;
import java.util.Collection;
-import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
-import java.util.PriorityQueue;
import java.util.Set;
import java.util.stream.Collectors;
@@ -252,30 +250,8 @@ public class PrometheusMetricVisitor extends MetricVisitor
{
StringBuilder tableRowCountBuilder = new StringBuilder();
Collection<OlapTable.Statistics> values =
tabletStatMgr.getCloudTableStats();
- // calc totalTableSize
- long totalTableSize = 0;
+ long totalTableSize = tabletStatMgr.getTotalTableSize();
for (OlapTable.Statistics stats : values) {
- totalTableSize += stats.getDataSize();
- }
- // output top N metrics
- if (values.size() > Config.prom_output_table_metrics_limit) {
- // only copy elements if number of tables >
prom_output_table_metrics_limit
- PriorityQueue<OlapTable.Statistics> topStats = new PriorityQueue<>(
- Config.prom_output_table_metrics_limit,
-
Comparator.comparingLong(OlapTable.Statistics::getDataSize));
- for (OlapTable.Statistics stats : values) {
- if (topStats.size() < Config.prom_output_table_metrics_limit) {
- topStats.offer(stats);
- } else if (!topStats.isEmpty()
- && stats.getDataSize() >
topStats.peek().getDataSize()) {
- topStats.poll();
- topStats.offer(stats);
- }
- }
- values = topStats;
- }
- for (OlapTable.Statistics stats : values) {
-
dataSizeBuilder.append("doris_fe_table_data_size{db_name=\"");
dataSizeBuilder.append(stats.getDbName());
dataSizeBuilder.append("\", table_name=\"");
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]