This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch improve/partition-limit-defaults-and-metrics in repository https://gitbox.apache.org/repos/asf/doris.git
commit b3f35907bc29ae21d77e0be709385677b6513267 Author: Yongqiang YANG <[email protected]> AuthorDate: Wed Mar 18 23:38:28 2026 -0700 [improve](partition) Increase max_dynamic/auto_partition_num defaults to 20000 and add near-limit metrics Raise the default limits from 500/2000 to 20000 for both max_dynamic_partition_num and max_auto_partition_num to better match modern production workloads. Add warning logs and Prometheus counter metrics (auto_partition_near_limit_count, dynamic_partition_near_limit_count) that fire when partition counts exceed 80% of their configured limits, enabling proactive monitoring before hard failures. Co-Authored-By: Claude Opus 4.6 <[email protected]> --- .../src/main/java/org/apache/doris/common/Config.java | 6 +++--- .../apache/doris/common/util/DynamicPartitionUtil.java | 16 +++++++++++++--- .../main/java/org/apache/doris/metric/MetricRepo.java | 14 ++++++++++++++ .../org/apache/doris/service/FrontendServiceImpl.java | 14 ++++++++++++-- 4 files changed, 42 insertions(+), 8 deletions(-) diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index f9be5d87559..514e28b722c 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -1521,7 +1521,7 @@ public class Config extends ConfigBase { * The number is determined by "start" and "end" in the dynamic partition parameters. */ @ConfField(mutable = true, masterOnly = true) - public static int max_dynamic_partition_num = 500; + public static int max_dynamic_partition_num = 20000; /** * Used to limit the maximum number of partitions that can be created when creating multi partition, @@ -2690,8 +2690,8 @@ public class Config extends ConfigBase { @ConfField(mutable = true, masterOnly = true, description = { "For auto-partitioned tables to prevent users from accidentally creating a large number of partitions, " - + "the number of partitions allowed per OLAP table is `max_auto_partition_num`. Default 2000."}) - public static int max_auto_partition_num = 2000; + + "the number of partitions allowed per OLAP table is `max_auto_partition_num`. Default 20000."}) + public static int max_auto_partition_num = 20000; @ConfField(mutable = true, masterOnly = true, description = { "The maximum difference in the number of tablets of each BE in partition rebalance mode. " diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java index db12f6266ea..733e8a38f4d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java @@ -42,6 +42,7 @@ import org.apache.doris.common.ErrorReport; import org.apache.doris.common.FeConstants; import org.apache.doris.common.FeNameFormat; import org.apache.doris.common.UserException; +import org.apache.doris.metric.MetricRepo; import org.apache.doris.policy.StoragePolicy; import org.apache.doris.resource.Tag; import org.apache.doris.thrift.TStorageMedium; @@ -641,10 +642,19 @@ public class DynamicPartitionUtil { } expectCreatePartitionNum = (long) end - start; - if (!isReplay && hasEnd && (expectCreatePartitionNum > Config.max_dynamic_partition_num) + if (!isReplay && hasEnd && Boolean.parseBoolean(analyzedProperties.getOrDefault(DynamicPartitionProperty.ENABLE, "true"))) { - throw new DdlException("Too many dynamic partitions: " - + expectCreatePartitionNum + ". Limit: " + Config.max_dynamic_partition_num); + if (expectCreatePartitionNum > Config.max_dynamic_partition_num) { + throw new DdlException("Too many dynamic partitions: " + + expectCreatePartitionNum + ". Limit: " + Config.max_dynamic_partition_num); + } else if (expectCreatePartitionNum > Config.max_dynamic_partition_num * 0.8) { + LOG.warn("Dynamic partition count {} is approaching limit {} (>80%)." + + " Consider increasing max_dynamic_partition_num.", + expectCreatePartitionNum, Config.max_dynamic_partition_num); + if (MetricRepo.isInit) { + MetricRepo.COUNTER_DYNAMIC_PARTITION_NEAR_LIMIT.increase(1L); + } + } } if (properties.containsKey(DynamicPartitionProperty.START_DAY_OF_MONTH)) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java b/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java index cd0755fd9da..c28c2aeb99f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java @@ -256,6 +256,10 @@ public final class MetricRepo { public static GaugeMetricImpl<Long> GAUGE_AVG_PARTITION_SIZE_BYTES; public static GaugeMetricImpl<Long> GAUGE_AVG_TABLET_SIZE_BYTES; + // Partition near-limit warnings + public static LongCounterMetric COUNTER_AUTO_PARTITION_NEAR_LIMIT; + public static LongCounterMetric COUNTER_DYNAMIC_PARTITION_NEAR_LIMIT; + // Agent task public static LongCounterMetric COUNTER_AGENT_TASK_REQUEST_TOTAL; public static AutoMappedMetric<LongCounterMetric> COUNTER_AGENT_TASK_TOTAL; @@ -1040,6 +1044,16 @@ public final class MetricRepo { GAUGE_AVG_TABLET_SIZE_BYTES = new GaugeMetricImpl<>("avg_tablet_size_bytes", MetricUnit.BYTES, "", 0L); DORIS_METRIC_REGISTER.addMetrics(GAUGE_AVG_TABLET_SIZE_BYTES); + // Partition near-limit warning counters + COUNTER_AUTO_PARTITION_NEAR_LIMIT = new LongCounterMetric("auto_partition_near_limit_count", + MetricUnit.NOUNIT, + "number of times auto partition count exceeded 80% of max_auto_partition_num"); + DORIS_METRIC_REGISTER.addMetrics(COUNTER_AUTO_PARTITION_NEAR_LIMIT); + COUNTER_DYNAMIC_PARTITION_NEAR_LIMIT = new LongCounterMetric("dynamic_partition_near_limit_count", + MetricUnit.NOUNIT, + "number of times dynamic partition count exceeded 80% of max_dynamic_partition_num"); + DORIS_METRIC_REGISTER.addMetrics(COUNTER_DYNAMIC_PARTITION_NEAR_LIMIT); + COUNTER_AGENT_TASK_REQUEST_TOTAL = new LongCounterMetric("agent_task_request_total", MetricUnit.NOUNIT, "total agent batch task request send to BE"); DORIS_METRIC_REGISTER.addMetrics(COUNTER_AGENT_TASK_REQUEST_TOTAL); diff --git a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java index f074711854c..cc0f64afd4e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java +++ b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java @@ -95,6 +95,7 @@ import org.apache.doris.load.routineload.RoutineLoadJob; import org.apache.doris.load.routineload.RoutineLoadJob.JobState; import org.apache.doris.load.routineload.RoutineLoadManager; import org.apache.doris.master.MasterImpl; +import org.apache.doris.metric.MetricRepo; import org.apache.doris.meta.MetaContext; import org.apache.doris.mysql.privilege.AccessControllerManager; import org.apache.doris.mysql.privilege.PrivPredicate; @@ -4391,15 +4392,24 @@ public class FrontendServiceImpl implements FrontendService.Iface { // check partition's number limit. because partitions in addPartitionClauseMap may be duplicated with existing // partitions, which would lead to false positive. so we should check the partition number AFTER adding new // partitions using its ACTUAL NUMBER, rather than the sum of existing and requested partitions. - if (olapTable.getPartitionNum() > Config.max_auto_partition_num) { + int partitionNum = olapTable.getPartitionNum(); + int autoPartitionLimit = Config.max_auto_partition_num; + if (partitionNum > autoPartitionLimit) { String errorMessage = String.format( "partition numbers %d exceeded limit of variable max_auto_partition_num %d", - olapTable.getPartitionNum(), Config.max_auto_partition_num); + partitionNum, autoPartitionLimit); LOG.warn(errorMessage); errorStatus.setErrorMsgs(Lists.newArrayList(errorMessage)); result.setStatus(errorStatus); LOG.warn("send create partition error status: {}", result); return result; + } else if (partitionNum > autoPartitionLimit * 0.8) { + LOG.warn("Table {}.{} auto partition count {} is approaching limit {} (>80%)." + + " Consider increasing max_auto_partition_num.", + db.getFullName(), olapTable.getName(), partitionNum, autoPartitionLimit); + if (MetricRepo.isInit) { + MetricRepo.COUNTER_AUTO_PARTITION_NEAR_LIMIT.increase(1L); + } } // build partition & tablets --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
