This is an automated email from the ASF dual-hosted git repository.

chengpan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kyuubi.git


The following commit(s) were added to refs/heads/master by this push:
     new cad5a392f3 [KYUUBI #7072] Expose metrics of engine startup permit state
cad5a392f3 is described below

commit cad5a392f3195d8f3b16c293a11ac54c2f090871
Author: Lennon Chin <[email protected]>
AuthorDate: Thu May 29 13:27:42 2025 +0800

    [KYUUBI #7072] Expose metrics of engine startup permit state
    
    ### Why are the changes needed?
    
    The metrics `kyuubi_operation_state_LaunchEngine_*` cannot reflect the 
state of Semaphore after configuring the maximum engine startup limit through 
`kyuubi.server.limit.engine.startup`, add some metrics to show the relevant 
permit state.
    
    ### How was this patch tested?
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    Closes #7072 from LennonChin/engine_startup_metrics.
    
    Closes #7072
    
    d6bf3696a [Lennon Chin] Expose metrics of engine startup permit status
    
    Authored-by: Lennon Chin <[email protected]>
    Signed-off-by: Cheng Pan <[email protected]>
---
 docs/monitor/metrics.md                            |   3 +
 grafana/dashboard-template.json                    | 165 ++++++++++++++++++---
 .../apache/kyuubi/metrics/MetricsConstants.scala   |   5 +
 .../kyuubi/session/KyuubiSessionManager.scala      |   8 +
 4 files changed, 157 insertions(+), 24 deletions(-)

diff --git a/docs/monitor/metrics.md b/docs/monitor/metrics.md
index 58a078e470..6c13008874 100644
--- a/docs/monitor/metrics.md
+++ b/docs/monitor/metrics.md
@@ -65,6 +65,9 @@ These metrics include:
 | `kyuubi.engine.timeout`                          |                           
             | counter   | 1.2.0  | <div style='width: 150pt;word-wrap: 
break-word;white-space: normal'>  cumulative timeout engines</div>              
                                                                                
                                                                                
       |
 | `kyuubi.engine.failed`                           | `${user}`                 
             | counter   | 1.2.0  | <div style='width: 150pt;word-wrap: 
break-word;white-space: normal'>  cumulative explicitly failed engine count for 
a `${user}`</div>                                                               
                                                                                
       |
 | `kyuubi.engine.failed`                           | `${errorType}`            
             | counter   | 1.2.0  | <div style='width: 150pt;word-wrap: 
break-word;white-space: normal'> cumulative explicitly failed engine count for 
a particular `${errorType}`, e.g. `ClassNotFoundException`</div>                
                                                                                
        |
+| `kyuubi.engine.startup.permit.limit.total`       |                           
             | meter     | 1.11.0 | <div style='width: 150pt;word-wrap: 
break-word;white-space: normal'> concurrently startup engines permit limit 
</div>                                                                          
                                                                                
            |
+| `kyuubi.engine.startup.permit.available`         |                           
             | gauge     | 1.11.0 | <div style='width: 150pt;word-wrap: 
break-word;white-space: normal'> available permits of concurrently startup 
engines </div>                                                                  
                                                                                
            |
+| `kyuubi.engine.startup.permit.waiting`           |                           
             | gauge     | 1.11.0 | <div style='width: 150pt;word-wrap: 
break-word;white-space: normal'> startup engines that waiting to acquire permit 
</div>                                                                          
                                                                                
       |
 | `kyuubi.backend_service.open_session`            |                           
             | timer     | 1.5.0  | <div style='width: 150pt;word-wrap: 
break-word;white-space: normal'> kyuubi backend service `openSession` method 
execution time and rate </div>                                                  
                                                                                
          |
 | `kyuubi.backend_service.close_session`           |                           
             | timer     | 1.5.0  | <div style='width: 150pt;word-wrap: 
break-word;white-space: normal'> kyuubi backend service `closeSession` method 
execution time and rate </div>                                                  
                                                                                
         |
 | `kyuubi.backend_service.get_info`                |                           
             | timer     | 1.5.0  | <div style='width: 150pt;word-wrap: 
break-word;white-space: normal'> kyuubi backend service `getInfo` method 
execution time and rate </div>                                                  
                                                                                
              |
diff --git a/grafana/dashboard-template.json b/grafana/dashboard-template.json
index e2978b617a..3c3c598d25 100644
--- a/grafana/dashboard-template.json
+++ b/grafana/dashboard-template.json
@@ -823,6 +823,123 @@
         "x": 16,
         "y": 7
       },
+      "id": 104,
+      "maxPerRow": 2,
+      "options": {
+        "alertThreshold": true,
+        "legend": {
+          "calcs": [],
+          "displayMode": "table",
+          "placement": "right",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "repeatDirection": "h",
+      "targets": [
+        {
+          "datasource": "${DS_PROMETHEUS}",
+          "editorMode": "code",
+          "expr": " 
kyuubi_engine_startup_permit_limit_total{$baseFilter,instance=~\"$instance\"}",
+          "hide": false,
+          "legendFormat": "${baseLegend}-limit",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": "${DS_PROMETHEUS}",
+          "editorMode": "code",
+          "expr": " 
kyuubi_engine_startup_permit_waiting{$baseFilter,instance=~\"$instance\"}",
+          "hide": false,
+          "legendFormat": "${baseLegend}-waiting",
+          "range": true,
+          "refId": "B"
+        },
+        {
+          "datasource": "${DS_PROMETHEUS}",
+          "editorMode": "code",
+          "expr": " 
kyuubi_engine_startup_permit_available{$baseFilter,instance=~\"$instance\"}",
+          "hide": false,
+          "legendFormat": "${baseLegend}-available",
+          "range": true,
+          "refId": "C"
+        }
+      ],
+      "title": "Engine startup permit",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "description": "",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "links": [],
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "ms"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 7,
+        "w": 8,
+        "x": 0,
+        "y": 13
+      },
       "id": 75,
       "maxPerRow": 2,
       "options": {
@@ -918,7 +1035,7 @@
       "gridPos": {
         "h": 7,
         "w": 8,
-        "x": 0,
+        "x": 8,
         "y": 13
       },
       "id": 77,
@@ -1025,7 +1142,7 @@
       "gridPos": {
         "h": 7,
         "w": 8,
-        "x": 8,
+        "x": 16,
         "y": 13
       },
       "id": 79,
@@ -1130,10 +1247,10 @@
         "overrides": []
       },
       "gridPos": {
-        "h": 7,
+        "h": 6,
         "w": 8,
-        "x": 16,
-        "y": 13
+        "x": 0,
+        "y": 20
       },
       "id": 80,
       "maxPerRow": 2,
@@ -1236,7 +1353,7 @@
       "gridPos": {
         "h": 6,
         "w": 8,
-        "x": 0,
+        "x": 8,
         "y": 20
       },
       "id": 34,
@@ -1335,7 +1452,7 @@
       "gridPos": {
         "h": 6,
         "w": 8,
-        "x": 8,
+        "x": 16,
         "y": 20
       },
       "id": 71,
@@ -1430,8 +1547,8 @@
       "gridPos": {
         "h": 6,
         "w": 8,
-        "x": 16,
-        "y": 20
+        "x": 0,
+        "y": 26
       },
       "id": 76,
       "maxPerRow": 2,
@@ -1478,7 +1595,7 @@
         "h": 1,
         "w": 24,
         "x": 0,
-        "y": 26
+        "y": 32
       },
       "id": 88,
       "panels": [],
@@ -1549,7 +1666,7 @@
         "h": 8,
         "w": 12,
         "x": 0,
-        "y": 27
+        "y": 33
       },
       "id": 89,
       "maxPerRow": 2,
@@ -1646,7 +1763,7 @@
         "h": 8,
         "w": 12,
         "x": 12,
-        "y": 27
+        "y": 33
       },
       "id": 92,
       "maxPerRow": 2,
@@ -1743,7 +1860,7 @@
         "h": 7,
         "w": 12,
         "x": 0,
-        "y": 35
+        "y": 41
       },
       "id": 90,
       "maxPerRow": 2,
@@ -1849,7 +1966,7 @@
         "h": 7,
         "w": 12,
         "x": 12,
-        "y": 35
+        "y": 41
       },
       "id": 91,
       "maxPerRow": 2,
@@ -1899,7 +2016,7 @@
         "h": 1,
         "w": 24,
         "x": 0,
-        "y": 42
+        "y": 48
       },
       "id": 93,
       "panels": [],
@@ -1970,7 +2087,7 @@
         "h": 8,
         "w": 12,
         "x": 0,
-        "y": 43
+        "y": 49
       },
       "id": 94,
       "maxPerRow": 2,
@@ -2076,7 +2193,7 @@
         "h": 8,
         "w": 12,
         "x": 12,
-        "y": 43
+        "y": 49
       },
       "id": 99,
       "maxPerRow": 2,
@@ -2173,7 +2290,7 @@
         "h": 8,
         "w": 12,
         "x": 0,
-        "y": 51
+        "y": 57
       },
       "id": 98,
       "maxPerRow": 2,
@@ -2271,7 +2388,7 @@
         "h": 8,
         "w": 12,
         "x": 12,
-        "y": 51
+        "y": 57
       },
       "id": 97,
       "maxPerRow": 2,
@@ -2321,7 +2438,7 @@
         "h": 1,
         "w": 24,
         "x": 0,
-        "y": 59
+        "y": 65
       },
       "id": 68,
       "panels": [],
@@ -2392,7 +2509,7 @@
         "h": 8,
         "w": 12,
         "x": 0,
-        "y": 60
+        "y": 66
       },
       "id": 100,
       "options": {
@@ -2511,7 +2628,7 @@
         "h": 8,
         "w": 12,
         "x": 12,
-        "y": 60
+        "y": 66
       },
       "id": 101,
       "options": {
@@ -2601,7 +2718,7 @@
         "h": 8,
         "w": 12,
         "x": 0,
-        "y": 68
+        "y": 74
       },
       "id": 49,
       "options": {
@@ -2653,7 +2770,7 @@
         "h": 1,
         "w": 24,
         "x": 0,
-        "y": 68
+        "y": 82
       },
       "id": 60,
       "panels": [
diff --git 
a/kyuubi-metrics/src/main/scala/org/apache/kyuubi/metrics/MetricsConstants.scala
 
b/kyuubi-metrics/src/main/scala/org/apache/kyuubi/metrics/MetricsConstants.scala
index 4afd7246e6..61a5d8d31d 100644
--- 
a/kyuubi-metrics/src/main/scala/org/apache/kyuubi/metrics/MetricsConstants.scala
+++ 
b/kyuubi-metrics/src/main/scala/org/apache/kyuubi/metrics/MetricsConstants.scala
@@ -60,6 +60,11 @@ object MetricsConstants {
   final val ENGINE_TIMEOUT: String = ENGINE + "timeout"
   final val ENGINE_TOTAL: String = ENGINE + "total"
 
+  final private val ENGINE_STARTUP_PERMIT: String = ENGINE + "startup.permit."
+  final val ENGINE_STARTUP_PERMIT_LIMIT: String = ENGINE_STARTUP_PERMIT + 
"limit"
+  final val ENGINE_STARTUP_PERMIT_AVAILABLE: String = ENGINE_STARTUP_PERMIT + 
"available"
+  final val ENGINE_STARTUP_PERMIT_WAITING: String = ENGINE_STARTUP_PERMIT + 
"waiting"
+
   final private val OPERATION = KYUUBI + "operation."
   final val OPERATION_OPEN: String = OPERATION + "opened"
   final val OPERATION_FAIL: String = OPERATION + "failed"
diff --git 
a/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSessionManager.scala
 
b/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSessionManager.scala
index caae9a7e8b..9423521f6b 100644
--- 
a/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSessionManager.scala
+++ 
b/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSessionManager.scala
@@ -300,6 +300,14 @@ class KyuubiSessionManager private (name: String) extends 
SessionManager(name) {
       ms.registerGauge(EXEC_POOL_ALIVE, getExecPoolSize, 0)
       ms.registerGauge(EXEC_POOL_ACTIVE, getActiveCount, 0)
       ms.registerGauge(EXEC_POOL_WORK_QUEUE_SIZE, getWorkQueueSize, 0)
+      this.engineStartupProcessSemaphore.foreach { semaphore =>
+        ms.markMeter(ENGINE_STARTUP_PERMIT_LIMIT, semaphore.availablePermits)
+        ms.registerGauge(
+          ENGINE_STARTUP_PERMIT_AVAILABLE,
+          semaphore.availablePermits,
+          semaphore.availablePermits)
+        ms.registerGauge(ENGINE_STARTUP_PERMIT_WAITING, 
semaphore.getQueueLength, 0)
+      }
     }
     super.start()
     startEngineAliveChecker()

Reply via email to