This is an automated email from the ASF dual-hosted git repository.

yunhong pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fluss.git


The following commit(s) were added to refs/heads/main by this push:
     new 2012f0b69 [metrics] Add partition count metrics for tables and cluster 
monitoring (#1662)
2012f0b69 is described below

commit 2012f0b69fe44e927c04a09604bfc9bf668cde83
Author: buvb <[email protected]>
AuthorDate: Fri Sep 26 14:21:35 2025 +0800

    [metrics] Add partition count metrics for tables and cluster monitoring 
(#1662)
    
    * [FLUSS-1571] Add partition count metric for cluster monitoring
    
    - Add PARTITION_COUNT metric name constant
    - Implement getTotalPartitionCount() in CoordinatorContext
    - Add partition count metric registration and updates in 
CoordinatorEventManager
    - Follow existing TABLE_COUNT metric implementation pattern
    - Add basic tests for partition count functionality
    
    This provides a simple way to monitor total partition count across the 
cluster,
    helping users identify when too many partitions might cause cluster 
instability.
    
    * [FLUSS-1571] [docs] Add partition count metrics documentation
    
    - Add partitionCount metric documentation for cluster-level monitoring
    - Document the new metric that tracks total number of partitions in the 
cluster
    - This metric helps users monitor partition distribution and identify 
potential cluster instability issues
    
    * [FLUSS-1571] [docs] Fix table rowspan for partition count metrics
---
 .../src/main/java/org/apache/fluss/metrics/MetricNames.java      | 1 +
 .../org/apache/fluss/server/coordinator/CoordinatorContext.java  | 4 ++++
 .../fluss/server/coordinator/event/CoordinatorEventManager.java  | 8 ++++++++
 website/docs/maintenance/observability/monitor-metrics.md        | 9 +++++++--
 4 files changed, 20 insertions(+), 2 deletions(-)

diff --git 
a/fluss-common/src/main/java/org/apache/fluss/metrics/MetricNames.java 
b/fluss-common/src/main/java/org/apache/fluss/metrics/MetricNames.java
index 9242b4daa..6cb4e72e1 100644
--- a/fluss-common/src/main/java/org/apache/fluss/metrics/MetricNames.java
+++ b/fluss-common/src/main/java/org/apache/fluss/metrics/MetricNames.java
@@ -40,6 +40,7 @@ public class MetricNames {
     public static final String OFFLINE_BUCKET_COUNT = "offlineBucketCount";
     public static final String TABLE_COUNT = "tableCount";
     public static final String BUCKET_COUNT = "bucketCount";
+    public static final String PARTITION_COUNT = "partitionCount";
     public static final String REPLICAS_TO_DELETE_COUNT = 
"replicasToDeleteCount";
 
     // for coordinator event processor
diff --git 
a/fluss-server/src/main/java/org/apache/fluss/server/coordinator/CoordinatorContext.java
 
b/fluss-server/src/main/java/org/apache/fluss/server/coordinator/CoordinatorContext.java
index 4cb988967..a5f34aa17 100644
--- 
a/fluss-server/src/main/java/org/apache/fluss/server/coordinator/CoordinatorContext.java
+++ 
b/fluss-server/src/main/java/org/apache/fluss/server/coordinator/CoordinatorContext.java
@@ -657,4 +657,8 @@ public class CoordinatorContext {
         liveTabletServers.clear();
         shuttingDownTabletServers.clear();
     }
+
+    public int getTotalPartitionCount() {
+        return partitionAssignments.size();
+    }
 }
diff --git 
a/fluss-server/src/main/java/org/apache/fluss/server/coordinator/event/CoordinatorEventManager.java
 
b/fluss-server/src/main/java/org/apache/fluss/server/coordinator/event/CoordinatorEventManager.java
index 36d82df53..c32b71359 100644
--- 
a/fluss-server/src/main/java/org/apache/fluss/server/coordinator/event/CoordinatorEventManager.java
+++ 
b/fluss-server/src/main/java/org/apache/fluss/server/coordinator/event/CoordinatorEventManager.java
@@ -66,6 +66,7 @@ public final class CoordinatorEventManager implements 
EventManager {
     private volatile int offlineBucketCount;
     private volatile int tableCount;
     private volatile int bucketCount;
+    private volatile int partitionCount;
     private volatile int replicasToDeleteCount;
 
     private static final int WINDOW_SIZE = 100;
@@ -91,6 +92,7 @@ public final class CoordinatorEventManager implements 
EventManager {
         coordinatorMetricGroup.gauge(MetricNames.OFFLINE_BUCKET_COUNT, () -> 
offlineBucketCount);
         coordinatorMetricGroup.gauge(MetricNames.BUCKET_COUNT, () -> 
bucketCount);
         coordinatorMetricGroup.gauge(MetricNames.TABLE_COUNT, () -> 
tableCount);
+        coordinatorMetricGroup.gauge(MetricNames.PARTITION_COUNT, () -> 
partitionCount);
         coordinatorMetricGroup.gauge(
                 MetricNames.REPLICAS_TO_DELETE_COUNT, () -> 
replicasToDeleteCount);
     }
@@ -104,6 +106,7 @@ public final class CoordinatorEventManager implements 
EventManager {
                             int tabletServerCount = 
context.getLiveTabletServers().size();
                             int tableCount = context.allTables().size();
                             int bucketCount = 
context.bucketLeaderAndIsr().size();
+                            int partitionCount = 
context.getTotalPartitionCount();
                             int offlineBucketCount = 
context.getOfflineBucketCount();
 
                             int replicasToDeletes = 0;
@@ -135,6 +138,7 @@ public final class CoordinatorEventManager implements 
EventManager {
                                     tabletServerCount,
                                     tableCount,
                                     bucketCount,
+                                    partitionCount,
                                     offlineBucketCount,
                                     replicasToDeletes);
                         });
@@ -147,6 +151,7 @@ public final class CoordinatorEventManager implements 
EventManager {
             this.tabletServerCount = metricsData.tabletServerCount;
             this.tableCount = metricsData.tableCount;
             this.bucketCount = metricsData.bucketCount;
+            this.partitionCount = metricsData.partitionCount;
             this.offlineBucketCount = metricsData.offlineBucketCount;
             this.replicasToDeleteCount = metricsData.replicasToDeleteCount;
         } catch (Exception e) {
@@ -268,6 +273,7 @@ public final class CoordinatorEventManager implements 
EventManager {
         private final int tabletServerCount;
         private final int tableCount;
         private final int bucketCount;
+        private final int partitionCount;
         private final int offlineBucketCount;
         private final int replicasToDeleteCount;
 
@@ -275,11 +281,13 @@ public final class CoordinatorEventManager implements 
EventManager {
                 int tabletServerCount,
                 int tableCount,
                 int bucketCount,
+                int partitionCount,
                 int offlineBucketCount,
                 int replicasToDeleteCount) {
             this.tabletServerCount = tabletServerCount;
             this.tableCount = tableCount;
             this.bucketCount = bucketCount;
+            this.partitionCount = partitionCount;
             this.offlineBucketCount = offlineBucketCount;
             this.replicasToDeleteCount = replicasToDeleteCount;
         }
diff --git a/website/docs/maintenance/observability/monitor-metrics.md 
b/website/docs/maintenance/observability/monitor-metrics.md
index 713ed4163..95e1fec4a 100644
--- a/website/docs/maintenance/observability/monitor-metrics.md
+++ b/website/docs/maintenance/observability/monitor-metrics.md
@@ -294,8 +294,8 @@ Some metrics might not be exposed when using other JVM 
implementations (e.g. IBM
   </thead>
   <tbody>
     <tr>
-      <th rowspan="12"><strong>coordinator</strong></th>
-      <td style={{textAlign: 'center', verticalAlign: 'middle' }} 
rowspan="7">-</td>
+      <th rowspan="13"><strong>coordinator</strong></th>
+      <td style={{textAlign: 'center', verticalAlign: 'middle' }} 
rowspan="8">-</td>
       <td>activeCoordinatorCount</td>
       <td>The number of active CoordinatorServer in this cluster.</td>
       <td>Gauge</td>
@@ -320,6 +320,11 @@ Some metrics might not be exposed when using other JVM 
implementations (e.g. IBM
       <td>The total number of buckets in this cluster.</td>
       <td>Gauge</td>
     </tr>
+    <tr>
+      <td>partitionCount</td>
+      <td>The total number of partitions in this cluster.</td>
+      <td>Gauge</td>
+    </tr>
     <tr>
       <td>replicasToDeleteCount</td>
       <td>The total number of replicas in the progress to be deleted in this 
cluster.</td>

Reply via email to