This is an automated email from the ASF dual-hosted git repository.
rongr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new e62db612c9 Metrics for Table Disabled and Consumption Paused (#12000)
e62db612c9 is described below
commit e62db612c91b4b540bcdfeecb412442eac35ced2
Author: Prashant Pandey <[email protected]>
AuthorDate: Thu Dec 7 21:21:50 2023 +0530
Metrics for Table Disabled and Consumption Paused (#12000)
---
.../configs/controller.yml | 12 ++++++++
.../pinot/common/metrics/ControllerGauge.java | 6 +++-
.../controller/helix/SegmentStatusChecker.java | 35 ++++++++++++++++++++--
.../realtime/PinotLLCRealtimeSegmentManager.java | 6 ++--
4 files changed, 52 insertions(+), 7 deletions(-)
diff --git
a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml
b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml
index c4071887ed..e86243dfc1 100644
--- a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml
+++ b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml
@@ -204,6 +204,18 @@ rules:
cache: true
labels:
version: "$2"
+- pattern: "\"org.apache.pinot.common.metrics\"<type=\"ControllerMetrics\",
name=\"pinot.controller.tableConsumptionPaused.([^\\.]*?)_(OFFLINE|REALTIME)\"><>(\\w+)"
+ name: "pinot_controller_tableConsumptionPaused_$3"
+ cache: true
+ labels:
+ tableName: "$1"
+ tableType: "$2"
+- pattern: "\"org.apache.pinot.common.metrics\"<type=\"ControllerMetrics\",
name=\"pinot.controller.tableDisabled.([^\\.]*?)_(OFFLINE|REALTIME)\"><>(\\w+)"
+ name: "pinot_controller_tableDisabled_$3"
+ cache: true
+ labels:
+ tableName: "$1"
+ tableType: "$2"
## Metrics that fit the catch-all patterns above should not be added to this
file.
## In case a metric does not fit the catch-all patterns, add them before
this comment
diff --git
a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java
b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java
index 4006ca45b0..3444ffae5f 100644
---
a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java
+++
b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java
@@ -149,7 +149,11 @@ public enum ControllerGauge implements
AbstractMetrics.Gauge {
// Number of tables that we want to fix but failed to update table config
FAILED_TO_UPDATE_TABLE_CONFIG_COUNT("failedToUpdateTableConfigCount", true),
-
LLC_SEGMENTS_DEEP_STORE_UPLOAD_RETRY_QUEUE_SIZE("LLCSegmentDeepStoreUploadRetryQueueSize",
false);
+
LLC_SEGMENTS_DEEP_STORE_UPLOAD_RETRY_QUEUE_SIZE("LLCSegmentDeepStoreUploadRetryQueueSize",
false),
+
+ TABLE_CONSUMPTION_PAUSED("tableConsumptionPaused", false),
+
+ TABLE_DISABLED("tableDisabled", false);
private final String _gaugeName;
private final String _unit;
diff --git
a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java
b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java
index f64e6c3e75..617564757e 100644
---
a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java
+++
b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java
@@ -44,6 +44,7 @@ import org.apache.pinot.controller.LeadControllerManager;
import org.apache.pinot.controller.helix.core.PinotHelixResourceManager;
import
org.apache.pinot.controller.helix.core.periodictask.ControllerPeriodicTask;
import
org.apache.pinot.controller.helix.core.realtime.MissingConsumingSegmentFinder;
+import
org.apache.pinot.controller.helix.core.realtime.PinotLLCRealtimeSegmentManager;
import org.apache.pinot.controller.util.TableSizeReader;
import org.apache.pinot.spi.config.table.TableConfig;
import org.apache.pinot.spi.config.table.TableType;
@@ -133,7 +134,24 @@ public class SegmentStatusChecker extends
ControllerPeriodicTask<SegmentStatusCh
protected void postprocess(Context context) {
_controllerMetrics.setValueOfGlobalGauge(ControllerGauge.REALTIME_TABLE_COUNT,
context._realTimeTableCount);
_controllerMetrics.setValueOfGlobalGauge(ControllerGauge.OFFLINE_TABLE_COUNT,
context._offlineTableCount);
-
_controllerMetrics.setValueOfGlobalGauge(ControllerGauge.DISABLED_TABLE_COUNT,
context._disabledTableCount);
+
_controllerMetrics.setValueOfGlobalGauge(ControllerGauge.DISABLED_TABLE_COUNT,
context._disabledTables.size());
+
+ //emit a 0 for tables that are not paused/disabled. This makes alert
expressions simpler as we don't have to deal
+ // with missing metrics
+ context._processedTables.forEach(tableNameWithType -> {
+ if (context._pausedTables.contains(tableNameWithType)) {
+ _controllerMetrics.setValueOfTableGauge(tableNameWithType,
ControllerGauge.TABLE_CONSUMPTION_PAUSED, 1);
+ } else {
+ _controllerMetrics.setValueOfTableGauge(tableNameWithType,
ControllerGauge.TABLE_CONSUMPTION_PAUSED, 0);
+ }
+ });
+ context._processedTables.forEach(tableNameWithType -> {
+ if (context._disabledTables.contains(tableNameWithType)) {
+ _controllerMetrics.setValueOfTableGauge(tableNameWithType,
ControllerGauge.TABLE_DISABLED, 1);
+ } else {
+ _controllerMetrics.setValueOfTableGauge(tableNameWithType,
ControllerGauge.TABLE_DISABLED, 0);
+ }
+ });
// Remove metrics for tables that are no longer in the cluster
_cachedTableNamesWithType.removeAll(context._processedTables);
@@ -186,10 +204,18 @@ public class SegmentStatusChecker extends
ControllerPeriodicTask<SegmentStatusCh
LOGGER.warn("Table {} is disabled. Skipping segment status checks",
tableNameWithType);
}
resetTableMetrics(tableNameWithType);
- context._disabledTableCount++;
+ context._disabledTables.add(tableNameWithType);
return;
}
+ //check if table consumption is paused
+ boolean isTablePaused =
+
Boolean.parseBoolean(idealState.getRecord().getSimpleField(PinotLLCRealtimeSegmentManager.IS_TABLE_PAUSED));
+
+ if (isTablePaused) {
+ context._pausedTables.add(tableNameWithType);
+ }
+
if (idealState.getPartitionSet().isEmpty()) {
int nReplicasFromIdealState = 1;
try {
@@ -335,6 +361,8 @@ public class SegmentStatusChecker extends
ControllerPeriodicTask<SegmentStatusCh
_controllerMetrics.removeTableGauge(tableNameWithType,
ControllerGauge.SEGMENTS_IN_ERROR_STATE);
_controllerMetrics.removeTableGauge(tableNameWithType,
ControllerGauge.PERCENT_SEGMENTS_AVAILABLE);
+ _controllerMetrics.removeTableGauge(tableNameWithType,
ControllerGauge.TABLE_DISABLED);
+ _controllerMetrics.removeTableGauge(tableNameWithType,
ControllerGauge.TABLE_CONSUMPTION_PAUSED);
}
private void setStatusToDefault() {
@@ -367,7 +395,8 @@ public class SegmentStatusChecker extends
ControllerPeriodicTask<SegmentStatusCh
private boolean _logDisabledTables;
private int _realTimeTableCount;
private int _offlineTableCount;
- private int _disabledTableCount;
private Set<String> _processedTables = new HashSet<>();
+ private Set<String> _disabledTables = new HashSet<>();
+ private Set<String> _pausedTables = new HashSet<>();
}
}
diff --git
a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
index 74eb758b26..298b16d605 100644
---
a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
+++
b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
@@ -129,14 +129,14 @@ import org.slf4j.LoggerFactory;
* TODO: migrate code in this class to other places for better readability
*/
public class PinotLLCRealtimeSegmentManager {
+
+ // simple field in Ideal State representing pause status for the table
+ public static final String IS_TABLE_PAUSED = "isTablePaused";
private static final Logger LOGGER =
LoggerFactory.getLogger(PinotLLCRealtimeSegmentManager.class);
private static final int STARTING_SEQUENCE_NUMBER = 0; // Initial sequence
number for new table segments
private static final String METADATA_EVENT_NOTIFIER_PREFIX =
"metadata.event.notifier";
- // simple field in Ideal State representing pause status for the table
- private static final String IS_TABLE_PAUSED = "isTablePaused";
-
// Max time to wait for all LLC segments to complete committing their
metadata while stopping the controller.
private static final long MAX_LLC_SEGMENT_METADATA_COMMIT_TIME_MILLIS =
30_000L;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]