wankai123 commented on code in PR #11339: URL: https://github.com/apache/skywalking/pull/11339#discussion_r1375455276
########## docs/en/setup/backend/backend-bookkeeper-monitoring.md: ########## @@ -0,0 +1,61 @@ +# BookKeeper monitoring + +SkyWalking leverages Prometheus to collect metrics data from the BookKeeper and leverages OpenTelemetry Collector to transfer the metrics to +[OpenTelemetry receiver](opentelemetry-receiver.md) and into the [Meter System](./../../concepts-and-designs/meter.md). +Kafka entity as a `Service` in OAP and on the `Layer: BOOKKEEPER. + +## Data flow + +1. BookKeeper exposes metrics in Prometheus format. +2. OpenTelemetry Collector fetches metrics from BookKeeper cluster via Prometheus Receiver and pushes metrics to SkyWalking OAP Server via OpenTelemetry gRPC exporter. +3. The SkyWalking OAP Server parses the expression with [MAL](../../concepts-and-designs/mal.md) to + filter/calculate/aggregate and store the results.` + +## Setup + +1. Set up [BookKeeper Cluster](https://bookkeeper.apache.org/docs/deployment/manual). +2. Set up [OpenTelemetry Collector](https://opentelemetry.io/docs/collector/getting-started/#kubernetes). The example + for OpenTelemetry Collector configuration, refer + to [here](../../../../test/e2e-v2/cases/pulsar/otel-collector-config.yaml). +3. Config SkyWalking [OpenTelemetry receiver](opentelemetry-receiver.md). + +## BookKeeper Monitoring + +Bookkeeper monitoring provides multidimensional metrics monitoring of BookKeeper cluster as `Layer: BOOKKEEPER` `Service` in +the OAP. In each cluster, the nodes are represented as `Instance`. + +### BookKeeper Cluster Supported Metrics + +| Monitoring Panel | Metric Name | Description | Data Source | +|------------------------------|------------------------------------------------------------------|------------------------------------------------------------|---------------------| +| Bookie Ledgers Count | meter_bookkeeper_bookie_ledgers_count | The number of the bookie ledgers. | Bookkeeper Cluster | +| Bookie Ledger Writable Dirs | meter_bookkeeper_bookie_ledger_writable_dirs | The number of writable directories in the bookie. | Bookkeeper Cluster | +| Bookie Ledger Dir Usage | meter_bookkeeper_bookie_ledger_dir_data_bookkeeper_ledgers_usage | The number of successfully created connections. | Bookkeeper Cluster | +| Bookie Entries Count | meter_bookkeeper_bookie_entries_count | The number of failed connections. | Bookkeeper Cluster | +| Bookie Write Cache Size | meter_bookkeeper_bookie_write_cache_size | The total number of closed connections. | Bookkeeper Cluster | +| bookie_write_cache_count | meter_bookkeeper_bookie_write_cache_count | The usage of jvm buffer pool. | Bookkeeper Cluster | +| Bookie Read Cache Size | meter_bookkeeper_bookie_read_cache_size | Time spent in a given JVM garbage collector in seconds. | Bookkeeper Cluster | +| Bookie Write Cache Count | meter_bookkeeper_bookie_read_cache_count | Time spent in a given JVM garbage collector in 30 seconds. | Bookkeeper Cluster | +| Bookie Read Rate | meter_bookkeeper_bookie_read_rate | Time spent in a given JVM garbage collector in 30 seconds. | Bookkeeper Cluster | +| Bookie Write Rate | meter_bookkeeper_bookie_write_rate | Time spent in a given JVM garbage collector in 30 seconds. | Bookkeeper Cluster | Review Comment: I see some `in 30 seconds` description. But according to your MAL expressions they are `rate`, such as: `bookie_WRITE_BYTES.sum(['cluster', 'node']).rate('PT1M')` please check ########## oap-server/server-starter/src/main/resources/ui-initialized-templates/bookkeeper/bookkeeper-cluster.json: ########## @@ -0,0 +1,393 @@ +[ + { + "id": "BookKeeper-Cluster", + "configuration": { + "children": [ + { + "x": 0, + "y": 0, + "w": 24, + "h": 54, + "i": "16", + "type": "Tab", + "children": [ + { + "name": "Overview", + "children": [ + { + "x": 0, + "y": 0, + "w": 12, + "h": 11, + "i": "7", + "type": "Widget", + "graph": { + "type": "Line", + "step": false, + "smooth": false, + "showSymbol": true, + "showXAxis": true, + "showYAxis": true + }, + "widget": { + "title": "Bookie Ledgers Count", + "tips": "", + "name": "bookie_ledgers_count" + }, + "expressions": [ + "meter_bookkeeper_bookie_ledgers_count" + ], + "typesOfMQE": [ + "TIME_SERIES_VALUES" + ], + "metricMode": "Expression", + "id": "16-0-7", + "moved": false, + "metrics": [], + "metricTypes": [], + "metricConfig": [] + }, + { + "x": 0, + "y": 11, + "w": 12, + "h": 11, + "i": "8", + "type": "Widget", + "graph": { + "type": "Line", + "step": false, + "smooth": false, + "showSymbol": true, + "showXAxis": true, + "showYAxis": true + }, + "widget": { + "title": "Bookie Ledger Writable Dirs", + "tips": "", + "name": "bookie_ledger_writable_dirs" + }, + "expressions": [ + "meter_bookkeeper_bookie_ledger_writable_dirs" + ], + "typesOfMQE": [ + "TIME_SERIES_VALUES" + ], + "metricMode": "Expression", + "id": "16-0-8", + "moved": false, + "metricConfig": [], + "metricTypes": [], + "metrics": [] + }, + { + "x": 12, + "y": 11, + "w": 12, + "h": 11, + "i": "9", + "type": "Widget", + "graph": { + "type": "Line", + "step": false, + "smooth": false, + "showSymbol": true, + "showXAxis": true, + "showYAxis": true + }, + "widget": { + "title": "Bookie Ledger Dir Usage", + "tips": "", + "name": "bookie_ledger_dir_data_bookkeeper_ledgers_usage" + }, + "expressions": [ + "meter_bookkeeper_bookie_ledger_dir_data_bookkeeper_ledgers_usage" + ], + "typesOfMQE": [ + "TIME_SERIES_VALUES" + ], + "metricMode": "Expression", + "id": "16-0-9", + "moved": false, + "metricConfig": [], + "metricTypes": [], + "metrics": [] + }, + { + "x": 12, + "y": 0, + "w": 12, + "h": 11, + "i": "10", + "type": "Widget", + "graph": { + "type": "Line", + "step": false, + "smooth": false, + "showSymbol": true, + "showXAxis": true, + "showYAxis": true + }, + "widget": { + "title": "Bookie Entries Count", + "tips": "", + "name": "bookie_entries_count" + }, + "expressions": [ + "meter_bookkeeper_bookie_entries_count" + ], + "typesOfMQE": [ + "TIME_SERIES_VALUES" + ], + "metricMode": "Expression", + "id": "16-0-10", + "moved": false, + "metricConfig": [], + "metricTypes": [], + "metrics": [] + }, + { + "x": 0, + "y": 22, + "w": 6, + "h": 11, + "i": "11", + "type": "Widget", + "graph": { + "type": "Line", + "step": false, + "smooth": false, + "showSymbol": true, + "showXAxis": true, + "showYAxis": true + }, + "widget": { + "title": "Bookie Write Cache Size", + "tips": "", + "name": "bookie_write_cache_size" + }, + "expressions": [ + "meter_bookkeeper_bookie_write_cache_size/1024/1024" + ], + "typesOfMQE": [ + "TIME_SERIES_VALUES" + ], + "metricMode": "Expression", + "id": "16-0-11", + "moved": false, + "metricConfig": [ + { + "label": "read cache size" + } + ], + "metricTypes": [], + "metrics": [] + }, + { + "x": 12, + "y": 22, + "w": 6, + "h": 11, + "i": "12", + "type": "Widget", + "graph": { + "type": "Line", + "step": false, + "smooth": false, + "showSymbol": true, + "showXAxis": true, + "showYAxis": true + }, + "widget": { + "title": "Bookie Write Cache Count", + "tips": "", + "name": "bookie_write_cache_count" + }, + "expressions": [ + "meter_bookkeeper_bookie_write_cache_count" + ], + "typesOfMQE": [ + "TIME_SERIES_VALUES" + ], + "metricMode": "Expression", + "id": "16-0-12", + "moved": false, + "metricConfig": [ + { + "label": "read cache count" + } + ], + "metricTypes": [], + "metrics": [] + }, + { + "x": 6, + "y": 22, + "w": 6, + "h": 11, + "i": "13", + "type": "Widget", + "graph": { + "type": "Line", + "step": false, + "smooth": false, + "showSymbol": true, + "showXAxis": true, + "showYAxis": true + }, + "widget": { + "title": "Bookie Read Cache Size", + "tips": "", + "name": "_bookie_read_cache_size" + }, + "expressions": [ + "meter_bookkeeper_bookie_read_cache_size/1024/1024" + ], + "typesOfMQE": [ + "TIME_SERIES_VALUES" + ], + "metricMode": "Expression", + "id": "16-0-13", + "moved": false, + "metricConfig": [], + "metricTypes": [], + "metrics": [] + }, + { + "x": 18, + "y": 22, + "w": 6, + "h": 11, + "i": "14", + "type": "Widget", + "graph": { + "type": "Line", + "step": false, + "smooth": false, + "showSymbol": true, + "showXAxis": true, + "showYAxis": true + }, + "widget": { + "title": "Bookie Write Cache Count", Review Comment: `Bookie Read Cache Count`? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: notifications-unsubscr...@skywalking.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org