This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch 
improve/partition-limit-defaults-and-metrics
in repository https://gitbox.apache.org/repos/asf/doris.git

commit b3f35907bc29ae21d77e0be709385677b6513267
Author: Yongqiang YANG <[email protected]>
AuthorDate: Wed Mar 18 23:38:28 2026 -0700

    [improve](partition) Increase max_dynamic/auto_partition_num defaults to 
20000 and add near-limit metrics
    
    Raise the default limits from 500/2000 to 20000 for both 
max_dynamic_partition_num
    and max_auto_partition_num to better match modern production workloads.
    
    Add warning logs and Prometheus counter metrics 
(auto_partition_near_limit_count,
    dynamic_partition_near_limit_count) that fire when partition counts exceed 
80%
    of their configured limits, enabling proactive monitoring before hard 
failures.
    
    Co-Authored-By: Claude Opus 4.6 <[email protected]>
---
 .../src/main/java/org/apache/doris/common/Config.java    |  6 +++---
 .../apache/doris/common/util/DynamicPartitionUtil.java   | 16 +++++++++++++---
 .../main/java/org/apache/doris/metric/MetricRepo.java    | 14 ++++++++++++++
 .../org/apache/doris/service/FrontendServiceImpl.java    | 14 ++++++++++++--
 4 files changed, 42 insertions(+), 8 deletions(-)

diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java 
b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
index f9be5d87559..514e28b722c 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
@@ -1521,7 +1521,7 @@ public class Config extends ConfigBase {
      * The number is determined by "start" and "end" in the dynamic partition 
parameters.
      */
     @ConfField(mutable = true, masterOnly = true)
-    public static int max_dynamic_partition_num = 500;
+    public static int max_dynamic_partition_num = 20000;
 
     /**
      * Used to limit the maximum number of partitions that can be created when 
creating multi partition,
@@ -2690,8 +2690,8 @@ public class Config extends ConfigBase {
 
     @ConfField(mutable = true, masterOnly = true, description = {
             "For auto-partitioned tables to prevent users from accidentally 
creating a large number of partitions, "
-                    + "the number of partitions allowed per OLAP table is 
`max_auto_partition_num`. Default 2000."})
-    public static int max_auto_partition_num = 2000;
+                    + "the number of partitions allowed per OLAP table is 
`max_auto_partition_num`. Default 20000."})
+    public static int max_auto_partition_num = 20000;
 
     @ConfField(mutable = true, masterOnly = true, description = {
             "The maximum difference in the number of tablets of each BE in 
partition rebalance mode. "
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java
 
b/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java
index db12f6266ea..733e8a38f4d 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java
@@ -42,6 +42,7 @@ import org.apache.doris.common.ErrorReport;
 import org.apache.doris.common.FeConstants;
 import org.apache.doris.common.FeNameFormat;
 import org.apache.doris.common.UserException;
+import org.apache.doris.metric.MetricRepo;
 import org.apache.doris.policy.StoragePolicy;
 import org.apache.doris.resource.Tag;
 import org.apache.doris.thrift.TStorageMedium;
@@ -641,10 +642,19 @@ public class DynamicPartitionUtil {
         }
         expectCreatePartitionNum = (long) end - start;
 
-        if (!isReplay && hasEnd && (expectCreatePartitionNum > 
Config.max_dynamic_partition_num)
+        if (!isReplay && hasEnd
                 && 
Boolean.parseBoolean(analyzedProperties.getOrDefault(DynamicPartitionProperty.ENABLE,
 "true"))) {
-            throw new DdlException("Too many dynamic partitions: "
-                    + expectCreatePartitionNum + ". Limit: " + 
Config.max_dynamic_partition_num);
+            if (expectCreatePartitionNum > Config.max_dynamic_partition_num) {
+                throw new DdlException("Too many dynamic partitions: "
+                        + expectCreatePartitionNum + ". Limit: " + 
Config.max_dynamic_partition_num);
+            } else if (expectCreatePartitionNum > 
Config.max_dynamic_partition_num * 0.8) {
+                LOG.warn("Dynamic partition count {} is approaching limit {} 
(>80%)."
+                                + " Consider increasing 
max_dynamic_partition_num.",
+                        expectCreatePartitionNum, 
Config.max_dynamic_partition_num);
+                if (MetricRepo.isInit) {
+                    
MetricRepo.COUNTER_DYNAMIC_PARTITION_NEAR_LIMIT.increase(1L);
+                }
+            }
         }
 
         if 
(properties.containsKey(DynamicPartitionProperty.START_DAY_OF_MONTH)) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java 
b/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java
index cd0755fd9da..c28c2aeb99f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java
@@ -256,6 +256,10 @@ public final class MetricRepo {
     public static GaugeMetricImpl<Long> GAUGE_AVG_PARTITION_SIZE_BYTES;
     public static GaugeMetricImpl<Long> GAUGE_AVG_TABLET_SIZE_BYTES;
 
+    // Partition near-limit warnings
+    public static LongCounterMetric COUNTER_AUTO_PARTITION_NEAR_LIMIT;
+    public static LongCounterMetric COUNTER_DYNAMIC_PARTITION_NEAR_LIMIT;
+
     // Agent task
     public static LongCounterMetric COUNTER_AGENT_TASK_REQUEST_TOTAL;
     public static AutoMappedMetric<LongCounterMetric> COUNTER_AGENT_TASK_TOTAL;
@@ -1040,6 +1044,16 @@ public final class MetricRepo {
         GAUGE_AVG_TABLET_SIZE_BYTES = new 
GaugeMetricImpl<>("avg_tablet_size_bytes", MetricUnit.BYTES, "", 0L);
         DORIS_METRIC_REGISTER.addMetrics(GAUGE_AVG_TABLET_SIZE_BYTES);
 
+        // Partition near-limit warning counters
+        COUNTER_AUTO_PARTITION_NEAR_LIMIT = new 
LongCounterMetric("auto_partition_near_limit_count",
+                MetricUnit.NOUNIT,
+                "number of times auto partition count exceeded 80% of 
max_auto_partition_num");
+        DORIS_METRIC_REGISTER.addMetrics(COUNTER_AUTO_PARTITION_NEAR_LIMIT);
+        COUNTER_DYNAMIC_PARTITION_NEAR_LIMIT = new 
LongCounterMetric("dynamic_partition_near_limit_count",
+                MetricUnit.NOUNIT,
+                "number of times dynamic partition count exceeded 80% of 
max_dynamic_partition_num");
+        DORIS_METRIC_REGISTER.addMetrics(COUNTER_DYNAMIC_PARTITION_NEAR_LIMIT);
+
         COUNTER_AGENT_TASK_REQUEST_TOTAL = new 
LongCounterMetric("agent_task_request_total", MetricUnit.NOUNIT,
                 "total agent batch task request send to BE");
         DORIS_METRIC_REGISTER.addMetrics(COUNTER_AGENT_TASK_REQUEST_TOTAL);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java 
b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java
index f074711854c..cc0f64afd4e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java
@@ -95,6 +95,7 @@ import org.apache.doris.load.routineload.RoutineLoadJob;
 import org.apache.doris.load.routineload.RoutineLoadJob.JobState;
 import org.apache.doris.load.routineload.RoutineLoadManager;
 import org.apache.doris.master.MasterImpl;
+import org.apache.doris.metric.MetricRepo;
 import org.apache.doris.meta.MetaContext;
 import org.apache.doris.mysql.privilege.AccessControllerManager;
 import org.apache.doris.mysql.privilege.PrivPredicate;
@@ -4391,15 +4392,24 @@ public class FrontendServiceImpl implements 
FrontendService.Iface {
         // check partition's number limit. because partitions in 
addPartitionClauseMap may be duplicated with existing
         // partitions, which would lead to false positive. so we should check 
the partition number AFTER adding new
         // partitions using its ACTUAL NUMBER, rather than the sum of existing 
and requested partitions.
-        if (olapTable.getPartitionNum() > Config.max_auto_partition_num) {
+        int partitionNum = olapTable.getPartitionNum();
+        int autoPartitionLimit = Config.max_auto_partition_num;
+        if (partitionNum > autoPartitionLimit) {
             String errorMessage = String.format(
                     "partition numbers %d exceeded limit of variable 
max_auto_partition_num %d",
-                    olapTable.getPartitionNum(), 
Config.max_auto_partition_num);
+                    partitionNum, autoPartitionLimit);
             LOG.warn(errorMessage);
             errorStatus.setErrorMsgs(Lists.newArrayList(errorMessage));
             result.setStatus(errorStatus);
             LOG.warn("send create partition error status: {}", result);
             return result;
+        } else if (partitionNum > autoPartitionLimit * 0.8) {
+            LOG.warn("Table {}.{} auto partition count {} is approaching limit 
{} (>80%)."
+                            + " Consider increasing max_auto_partition_num.",
+                    db.getFullName(), olapTable.getName(), partitionNum, 
autoPartitionLimit);
+            if (MetricRepo.isInit) {
+                MetricRepo.COUNTER_AUTO_PARTITION_NEAR_LIMIT.increase(1L);
+            }
         }
 
         // build partition & tablets


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to