This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch fix/partition-near-limit-gauge
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 4a81380ddef69c1021e85718d7110a2fa7445dc7
Author: Yongqiang YANG <[email protected]>
AuthorDate: Sat Mar 28 00:13:31 2026 -0700

    [fix](metric) Change partition near-limit metrics from counters to gauges
    
    The auto_partition_near_limit_count and dynamic_partition_near_limit_count
    metrics were LongCounterMetric (monotonically increasing) and never
    decreased, even when the near-limit condition resolved. Changed them to
    GaugeMetricImpl updated by TabletStatMgr's periodic table scan, so they
    reflect the current number of tables near the partition limit.
    
    Co-Authored-By: Claude Opus 4.6 <[email protected]>
---
 .../org/apache/doris/catalog/TabletStatMgr.java     | 21 ++++++++++++++++++++-
 .../doris/common/util/DynamicPartitionUtil.java     |  4 ----
 .../java/org/apache/doris/metric/MetricRepo.java    | 21 +++++++++++----------
 .../apache/doris/service/FrontendServiceImpl.java   |  4 ----
 4 files changed, 31 insertions(+), 19 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java
index 37b198652be..07e1cee5c7f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java
@@ -128,6 +128,8 @@ public class TabletStatMgr extends MasterDaemon {
         long tabletCount = 0L;
         long partitionCount = 0L;
         long tableCount = 0L;
+        long autoPartitionNearLimitCount = 0L;
+        long dynamicPartitionNearLimitCount = 0L;
         List<Long> dbIds = Env.getCurrentInternalCatalog().getDbIds();
         for (Long dbId : dbIds) {
             Database db = Env.getCurrentInternalCatalog().getDbNullable(dbId);
@@ -162,7 +164,22 @@ public class TabletStatMgr extends MasterDaemon {
                 }
                 try {
                     List<Partition> allPartitions = 
olapTable.getAllPartitions();
-                    partitionCount += allPartitions.size();
+                    int tablePartitionNum = allPartitions.size();
+                    partitionCount += tablePartitionNum;
+                    // Check if this table's partition count is near the limit 
(>80%)
+                    if 
(olapTable.getPartitionInfo().enableAutomaticPartition()) {
+                        int limit = Config.max_auto_partition_num;
+                        if (tablePartitionNum > limit * 8L / 10) {
+                            autoPartitionNearLimitCount++;
+                        }
+                    }
+                    if (olapTable.dynamicPartitionExists()
+                            && 
olapTable.getTableProperty().getDynamicPartitionProperty().getEnable()) {
+                        int limit = Config.max_dynamic_partition_num;
+                        if (tablePartitionNum > limit * 8L / 10) {
+                            dynamicPartitionNearLimitCount++;
+                        }
+                    }
                     for (Partition partition : allPartitions) {
                         long partitionDataSize = 0L;
                         long version = partition.getVisibleVersion();
@@ -295,6 +312,8 @@ public class TabletStatMgr extends MasterDaemon {
             // avoid ArithmeticException: / by zero
             long avgTabletSize = totalTableSize / Math.max(1, tabletCount);
             MetricRepo.GAUGE_AVG_TABLET_SIZE_BYTES.setValue(avgTabletSize);
+            
MetricRepo.GAUGE_AUTO_PARTITION_NEAR_LIMIT.setValue(autoPartitionNearLimitCount);
+            
MetricRepo.GAUGE_DYNAMIC_PARTITION_NEAR_LIMIT.setValue(dynamicPartitionNearLimitCount);
 
             LOG.info("OlapTable num=" + tableCount
                     + ", partition num=" + partitionCount + ", tablet num=" + 
tabletCount
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java
 
b/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java
index 516d6942478..09b4a9f18e8 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java
@@ -42,7 +42,6 @@ import org.apache.doris.common.ErrorReport;
 import org.apache.doris.common.FeConstants;
 import org.apache.doris.common.FeNameFormat;
 import org.apache.doris.common.UserException;
-import org.apache.doris.metric.MetricRepo;
 import org.apache.doris.policy.StoragePolicy;
 import org.apache.doris.resource.Tag;
 import org.apache.doris.thrift.TStorageMedium;
@@ -652,9 +651,6 @@ public class DynamicPartitionUtil {
                 LOG.warn("Dynamic partition count {} is approaching limit {} 
(>80%)."
                         + " Consider increasing max_dynamic_partition_num.",
                         expectCreatePartitionNum, dynamicPartitionLimit);
-                if (MetricRepo.isInit) {
-                    
MetricRepo.COUNTER_DYNAMIC_PARTITION_NEAR_LIMIT.increase(1L);
-                }
             }
         }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java 
b/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java
index c28c2aeb99f..d05c9f45ff1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java
@@ -256,9 +256,9 @@ public final class MetricRepo {
     public static GaugeMetricImpl<Long> GAUGE_AVG_PARTITION_SIZE_BYTES;
     public static GaugeMetricImpl<Long> GAUGE_AVG_TABLET_SIZE_BYTES;
 
-    // Partition near-limit warnings
-    public static LongCounterMetric COUNTER_AUTO_PARTITION_NEAR_LIMIT;
-    public static LongCounterMetric COUNTER_DYNAMIC_PARTITION_NEAR_LIMIT;
+    // Partition near-limit warnings (gauges: current number of tables near 
the partition limit)
+    public static GaugeMetricImpl<Long> GAUGE_AUTO_PARTITION_NEAR_LIMIT;
+    public static GaugeMetricImpl<Long> GAUGE_DYNAMIC_PARTITION_NEAR_LIMIT;
 
     // Agent task
     public static LongCounterMetric COUNTER_AGENT_TASK_REQUEST_TOTAL;
@@ -1044,15 +1044,16 @@ public final class MetricRepo {
         GAUGE_AVG_TABLET_SIZE_BYTES = new 
GaugeMetricImpl<>("avg_tablet_size_bytes", MetricUnit.BYTES, "", 0L);
         DORIS_METRIC_REGISTER.addMetrics(GAUGE_AVG_TABLET_SIZE_BYTES);
 
-        // Partition near-limit warning counters
-        COUNTER_AUTO_PARTITION_NEAR_LIMIT = new 
LongCounterMetric("auto_partition_near_limit_count",
+        // Partition near-limit warning gauges (updated by TabletStatMgr 
periodic scan)
+        GAUGE_AUTO_PARTITION_NEAR_LIMIT = new 
GaugeMetricImpl<>("auto_partition_near_limit_count",
                 MetricUnit.NOUNIT,
-                "number of times auto partition count exceeded 80% of 
max_auto_partition_num");
-        DORIS_METRIC_REGISTER.addMetrics(COUNTER_AUTO_PARTITION_NEAR_LIMIT);
-        COUNTER_DYNAMIC_PARTITION_NEAR_LIMIT = new 
LongCounterMetric("dynamic_partition_near_limit_count",
+                "number of auto partition tables where partition count 
exceeded 80% of max_auto_partition_num", 0L);
+        DORIS_METRIC_REGISTER.addMetrics(GAUGE_AUTO_PARTITION_NEAR_LIMIT);
+        GAUGE_DYNAMIC_PARTITION_NEAR_LIMIT = new 
GaugeMetricImpl<>("dynamic_partition_near_limit_count",
                 MetricUnit.NOUNIT,
-                "number of times dynamic partition count exceeded 80% of 
max_dynamic_partition_num");
-        DORIS_METRIC_REGISTER.addMetrics(COUNTER_DYNAMIC_PARTITION_NEAR_LIMIT);
+                "number of dynamic partition tables where partition count 
exceeded 80% of max_dynamic_partition_num",
+                0L);
+        DORIS_METRIC_REGISTER.addMetrics(GAUGE_DYNAMIC_PARTITION_NEAR_LIMIT);
 
         COUNTER_AGENT_TASK_REQUEST_TOTAL = new 
LongCounterMetric("agent_task_request_total", MetricUnit.NOUNIT,
                 "total agent batch task request send to BE");
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java 
b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java
index 44410af0163..6d5cbaab063 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java
@@ -96,7 +96,6 @@ import 
org.apache.doris.load.routineload.RoutineLoadJob.JobState;
 import org.apache.doris.load.routineload.RoutineLoadManager;
 import org.apache.doris.master.MasterImpl;
 import org.apache.doris.meta.MetaContext;
-import org.apache.doris.metric.MetricRepo;
 import org.apache.doris.mysql.privilege.AccessControllerManager;
 import org.apache.doris.mysql.privilege.PrivPredicate;
 import org.apache.doris.nereids.trees.plans.PlanNodeAndHash;
@@ -4407,9 +4406,6 @@ public class FrontendServiceImpl implements 
FrontendService.Iface {
             LOG.warn("Table {}.{} auto partition count {} is approaching limit 
{} (>80%)."
                         + " Consider increasing max_auto_partition_num.",
                     db.getFullName(), olapTable.getName(), partitionNum, 
autoPartitionLimit);
-            if (MetricRepo.isInit) {
-                MetricRepo.COUNTER_AUTO_PARTITION_NEAR_LIMIT.increase(1L);
-            }
         }
 
         // build partition & tablets


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to