This is an automated email from the ASF dual-hosted git repository.
yunhong pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fluss.git
The following commit(s) were added to refs/heads/main by this push:
new 2012f0b69 [metrics] Add partition count metrics for tables and cluster
monitoring (#1662)
2012f0b69 is described below
commit 2012f0b69fe44e927c04a09604bfc9bf668cde83
Author: buvb <[email protected]>
AuthorDate: Fri Sep 26 14:21:35 2025 +0800
[metrics] Add partition count metrics for tables and cluster monitoring
(#1662)
* [FLUSS-1571] Add partition count metric for cluster monitoring
- Add PARTITION_COUNT metric name constant
- Implement getTotalPartitionCount() in CoordinatorContext
- Add partition count metric registration and updates in
CoordinatorEventManager
- Follow existing TABLE_COUNT metric implementation pattern
- Add basic tests for partition count functionality
This provides a simple way to monitor total partition count across the
cluster,
helping users identify when too many partitions might cause cluster
instability.
* [FLUSS-1571] [docs] Add partition count metrics documentation
- Add partitionCount metric documentation for cluster-level monitoring
- Document the new metric that tracks total number of partitions in the
cluster
- This metric helps users monitor partition distribution and identify
potential cluster instability issues
* [FLUSS-1571] [docs] Fix table rowspan for partition count metrics
---
.../src/main/java/org/apache/fluss/metrics/MetricNames.java | 1 +
.../org/apache/fluss/server/coordinator/CoordinatorContext.java | 4 ++++
.../fluss/server/coordinator/event/CoordinatorEventManager.java | 8 ++++++++
website/docs/maintenance/observability/monitor-metrics.md | 9 +++++++--
4 files changed, 20 insertions(+), 2 deletions(-)
diff --git
a/fluss-common/src/main/java/org/apache/fluss/metrics/MetricNames.java
b/fluss-common/src/main/java/org/apache/fluss/metrics/MetricNames.java
index 9242b4daa..6cb4e72e1 100644
--- a/fluss-common/src/main/java/org/apache/fluss/metrics/MetricNames.java
+++ b/fluss-common/src/main/java/org/apache/fluss/metrics/MetricNames.java
@@ -40,6 +40,7 @@ public class MetricNames {
public static final String OFFLINE_BUCKET_COUNT = "offlineBucketCount";
public static final String TABLE_COUNT = "tableCount";
public static final String BUCKET_COUNT = "bucketCount";
+ public static final String PARTITION_COUNT = "partitionCount";
public static final String REPLICAS_TO_DELETE_COUNT =
"replicasToDeleteCount";
// for coordinator event processor
diff --git
a/fluss-server/src/main/java/org/apache/fluss/server/coordinator/CoordinatorContext.java
b/fluss-server/src/main/java/org/apache/fluss/server/coordinator/CoordinatorContext.java
index 4cb988967..a5f34aa17 100644
---
a/fluss-server/src/main/java/org/apache/fluss/server/coordinator/CoordinatorContext.java
+++
b/fluss-server/src/main/java/org/apache/fluss/server/coordinator/CoordinatorContext.java
@@ -657,4 +657,8 @@ public class CoordinatorContext {
liveTabletServers.clear();
shuttingDownTabletServers.clear();
}
+
+ public int getTotalPartitionCount() {
+ return partitionAssignments.size();
+ }
}
diff --git
a/fluss-server/src/main/java/org/apache/fluss/server/coordinator/event/CoordinatorEventManager.java
b/fluss-server/src/main/java/org/apache/fluss/server/coordinator/event/CoordinatorEventManager.java
index 36d82df53..c32b71359 100644
---
a/fluss-server/src/main/java/org/apache/fluss/server/coordinator/event/CoordinatorEventManager.java
+++
b/fluss-server/src/main/java/org/apache/fluss/server/coordinator/event/CoordinatorEventManager.java
@@ -66,6 +66,7 @@ public final class CoordinatorEventManager implements
EventManager {
private volatile int offlineBucketCount;
private volatile int tableCount;
private volatile int bucketCount;
+ private volatile int partitionCount;
private volatile int replicasToDeleteCount;
private static final int WINDOW_SIZE = 100;
@@ -91,6 +92,7 @@ public final class CoordinatorEventManager implements
EventManager {
coordinatorMetricGroup.gauge(MetricNames.OFFLINE_BUCKET_COUNT, () ->
offlineBucketCount);
coordinatorMetricGroup.gauge(MetricNames.BUCKET_COUNT, () ->
bucketCount);
coordinatorMetricGroup.gauge(MetricNames.TABLE_COUNT, () ->
tableCount);
+ coordinatorMetricGroup.gauge(MetricNames.PARTITION_COUNT, () ->
partitionCount);
coordinatorMetricGroup.gauge(
MetricNames.REPLICAS_TO_DELETE_COUNT, () ->
replicasToDeleteCount);
}
@@ -104,6 +106,7 @@ public final class CoordinatorEventManager implements
EventManager {
int tabletServerCount =
context.getLiveTabletServers().size();
int tableCount = context.allTables().size();
int bucketCount =
context.bucketLeaderAndIsr().size();
+ int partitionCount =
context.getTotalPartitionCount();
int offlineBucketCount =
context.getOfflineBucketCount();
int replicasToDeletes = 0;
@@ -135,6 +138,7 @@ public final class CoordinatorEventManager implements
EventManager {
tabletServerCount,
tableCount,
bucketCount,
+ partitionCount,
offlineBucketCount,
replicasToDeletes);
});
@@ -147,6 +151,7 @@ public final class CoordinatorEventManager implements
EventManager {
this.tabletServerCount = metricsData.tabletServerCount;
this.tableCount = metricsData.tableCount;
this.bucketCount = metricsData.bucketCount;
+ this.partitionCount = metricsData.partitionCount;
this.offlineBucketCount = metricsData.offlineBucketCount;
this.replicasToDeleteCount = metricsData.replicasToDeleteCount;
} catch (Exception e) {
@@ -268,6 +273,7 @@ public final class CoordinatorEventManager implements
EventManager {
private final int tabletServerCount;
private final int tableCount;
private final int bucketCount;
+ private final int partitionCount;
private final int offlineBucketCount;
private final int replicasToDeleteCount;
@@ -275,11 +281,13 @@ public final class CoordinatorEventManager implements
EventManager {
int tabletServerCount,
int tableCount,
int bucketCount,
+ int partitionCount,
int offlineBucketCount,
int replicasToDeleteCount) {
this.tabletServerCount = tabletServerCount;
this.tableCount = tableCount;
this.bucketCount = bucketCount;
+ this.partitionCount = partitionCount;
this.offlineBucketCount = offlineBucketCount;
this.replicasToDeleteCount = replicasToDeleteCount;
}
diff --git a/website/docs/maintenance/observability/monitor-metrics.md
b/website/docs/maintenance/observability/monitor-metrics.md
index 713ed4163..95e1fec4a 100644
--- a/website/docs/maintenance/observability/monitor-metrics.md
+++ b/website/docs/maintenance/observability/monitor-metrics.md
@@ -294,8 +294,8 @@ Some metrics might not be exposed when using other JVM
implementations (e.g. IBM
</thead>
<tbody>
<tr>
- <th rowspan="12"><strong>coordinator</strong></th>
- <td style={{textAlign: 'center', verticalAlign: 'middle' }}
rowspan="7">-</td>
+ <th rowspan="13"><strong>coordinator</strong></th>
+ <td style={{textAlign: 'center', verticalAlign: 'middle' }}
rowspan="8">-</td>
<td>activeCoordinatorCount</td>
<td>The number of active CoordinatorServer in this cluster.</td>
<td>Gauge</td>
@@ -320,6 +320,11 @@ Some metrics might not be exposed when using other JVM
implementations (e.g. IBM
<td>The total number of buckets in this cluster.</td>
<td>Gauge</td>
</tr>
+ <tr>
+ <td>partitionCount</td>
+ <td>The total number of partitions in this cluster.</td>
+ <td>Gauge</td>
+ </tr>
<tr>
<td>replicasToDeleteCount</td>
<td>The total number of replicas in the progress to be deleted in this
cluster.</td>