[
https://issues.apache.org/jira/browse/YARN-11326?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17638005#comment-17638005
]
ASF GitHub Bot commented on YARN-11326:
---------------------------------------
slfan1989 commented on code in PR #4963:
URL: https://github.com/apache/hadoop/pull/4963#discussion_r1030953006
##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/federation/FederationStateStoreServiceMetrics.java:
##########
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.server.resourcemanager.federation;
+
+import org.apache.hadoop.classification.VisibleForTesting;
+import org.apache.hadoop.metrics2.MetricsInfo;
+import org.apache.hadoop.metrics2.annotation.Metric;
+import org.apache.hadoop.metrics2.annotation.Metrics;
+import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
+import org.apache.hadoop.metrics2.lib.MetricsRegistry;
+import org.apache.hadoop.metrics2.lib.MutableCounterLong;
+import org.apache.hadoop.metrics2.lib.MutableQuantiles;
+import org.apache.hadoop.metrics2.lib.MutableRate;
+import org.apache.hadoop.yarn.server.federation.store.FederationStateStore;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.lang.reflect.Method;
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.apache.hadoop.metrics2.lib.Interns.info;
+
+@Metrics(about = "Metrics for FederationStateStoreService", context = "fedr")
+public final class FederationStateStoreServiceMetrics {
+
+ public static final Logger LOG =
+ LoggerFactory.getLogger(FederationStateStoreServiceMetrics.class);
+
+ private static final MetricsInfo RECORD_INFO =
+ info("FederationStateStoreServiceMetrics", "Metrics for the RM
FederationStateStoreService");
+
+ private static volatile FederationStateStoreServiceMetrics instance = null;
+ private MetricsRegistry registry;
+
+ private final static Method[] STATESTORE_API_METHODS =
FederationStateStore.class.getMethods();
+
+ // Map method names to counter objects
+ private static final Map<String, MutableCounterLong> FAILED_CALLS = new
HashMap<>();
+ private static final Map<String, MutableRate> SUCCESSFUL_CALLS = new
HashMap<>();
+ // Provide quantile latency for each api call.
+ private static final Map<String, MutableQuantiles> QUANTILE_METRICS = new
HashMap<>();
+
+ // Error string templates for logging calls from methods not in
+ // FederationStateStore API
+ private static final String UNKOWN_FAIL_ERROR_MSG =
+ "Not recording failed call for unknown FederationStateStore method {}";
+ private static final String UNKNOWN_SUCCESS_ERROR_MSG =
+ "Not recording successful call for unknown FederationStateStore method
{}";
+
+ /**
+ * Initialize the singleton instance.
+ *
+ * @return the singleton
+ */
+ public static FederationStateStoreServiceMetrics getMetrics() {
+ synchronized (FederationStateStoreServiceMetrics.class) {
+ if (instance == null) {
+ instance = DefaultMetricsSystem.instance()
+ .register(new FederationStateStoreServiceMetrics());
+ }
+ }
+ return instance;
+ }
+
+ private FederationStateStoreServiceMetrics() {
+ registry = new MetricsRegistry(RECORD_INFO);
+ registry.tag(RECORD_INFO, "FederationStateStoreServiceMetrics");
+
+ // Create the metrics for each method and put them into the map
+ for (Method m : STATESTORE_API_METHODS) {
+ String methodName = m.getName();
+ LOG.debug("Registering Federation StateStore Service metrics for {}",
methodName);
+
+ // This metric only records the number of failed calls; it does not
+ // capture latency information
+ FAILED_CALLS.put(methodName, registry.newCounter(methodName +
"NumFailedCalls",
+ "# failed calls to " + methodName, 0L));
+
+ // This metric records both the number and average latency of successful
+ // calls.
+ SUCCESSFUL_CALLS.put(methodName, registry.newRate(methodName +
"SuccessfulCalls",
+ "# successful calls and latency(ms) for" + methodName));
+
+ // This metric records the quantile-based latency of each successful
call,
+ // re-sampled every 10 seconds.
+ QUANTILE_METRICS.put(methodName, registry.newQuantiles(methodName +
"Latency",
+ "Quantile latency (ms) for " + methodName, "ops", "latency", 10));
+ }
+ }
+
+ // Aggregate metrics are shared, and don't have to be looked up per call
+ @Metric("Total number of successful calls and latency(ms)")
+ private static MutableRate totalSucceededCalls;
+
+ @Metric("Total number of failed StateStore calls")
+ private static MutableCounterLong totalFailedCalls;
+
+ public static void failedStateStoreServiceCall() {
+ String methodName =
Thread.currentThread().getStackTrace()[2].getMethodName();
+ MutableCounterLong methodMetric = FAILED_CALLS.get(methodName);
+
+ if (methodMetric == null) {
+ LOG.error(UNKOWN_FAIL_ERROR_MSG, methodName);
+ return;
+ }
+
+ totalFailedCalls.incr();
+ methodMetric.incr();
+ }
+
+ public static void failedStateStoreServiceCall(String methodName) {
+ MutableCounterLong methodMetric = FAILED_CALLS.get(methodName);
+ if (methodMetric == null) {
+ LOG.error(UNKOWN_FAIL_ERROR_MSG, methodName);
+ return;
+ }
+ totalFailedCalls.incr();
+ methodMetric.incr();
+ }
+
+ public static void succeededStateStoreServiceCall(long duration) {
+ String methodName =
Thread.currentThread().getStackTrace()[2].getMethodName();
+ MutableRate methodMetric = SUCCESSFUL_CALLS.get(methodName);
+ MutableQuantiles methodQuantileMetric = QUANTILE_METRICS.get(methodName);
+
+ if (methodMetric == null || methodQuantileMetric == null) {
+ LOG.error(UNKNOWN_SUCCESS_ERROR_MSG, methodName);
+ return;
+ }
+
+ totalSucceededCalls.add(duration);
Review Comment:
I will fix it.
##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/federation/FederationStateStoreServiceMetrics.java:
##########
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.server.resourcemanager.federation;
+
+import org.apache.hadoop.classification.VisibleForTesting;
+import org.apache.hadoop.metrics2.MetricsInfo;
+import org.apache.hadoop.metrics2.annotation.Metric;
+import org.apache.hadoop.metrics2.annotation.Metrics;
+import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
+import org.apache.hadoop.metrics2.lib.MetricsRegistry;
+import org.apache.hadoop.metrics2.lib.MutableCounterLong;
+import org.apache.hadoop.metrics2.lib.MutableQuantiles;
+import org.apache.hadoop.metrics2.lib.MutableRate;
+import org.apache.hadoop.yarn.server.federation.store.FederationStateStore;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.lang.reflect.Method;
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.apache.hadoop.metrics2.lib.Interns.info;
+
+@Metrics(about = "Metrics for FederationStateStoreService", context = "fedr")
+public final class FederationStateStoreServiceMetrics {
+
+ public static final Logger LOG =
+ LoggerFactory.getLogger(FederationStateStoreServiceMetrics.class);
+
+ private static final MetricsInfo RECORD_INFO =
+ info("FederationStateStoreServiceMetrics", "Metrics for the RM
FederationStateStoreService");
+
+ private static volatile FederationStateStoreServiceMetrics instance = null;
+ private MetricsRegistry registry;
+
+ private final static Method[] STATESTORE_API_METHODS =
FederationStateStore.class.getMethods();
+
+ // Map method names to counter objects
+ private static final Map<String, MutableCounterLong> FAILED_CALLS = new
HashMap<>();
+ private static final Map<String, MutableRate> SUCCESSFUL_CALLS = new
HashMap<>();
+ // Provide quantile latency for each api call.
+ private static final Map<String, MutableQuantiles> QUANTILE_METRICS = new
HashMap<>();
+
+ // Error string templates for logging calls from methods not in
+ // FederationStateStore API
+ private static final String UNKOWN_FAIL_ERROR_MSG =
+ "Not recording failed call for unknown FederationStateStore method {}";
+ private static final String UNKNOWN_SUCCESS_ERROR_MSG =
+ "Not recording successful call for unknown FederationStateStore method
{}";
+
+ /**
+ * Initialize the singleton instance.
+ *
+ * @return the singleton
+ */
+ public static FederationStateStoreServiceMetrics getMetrics() {
+ synchronized (FederationStateStoreServiceMetrics.class) {
+ if (instance == null) {
+ instance = DefaultMetricsSystem.instance()
+ .register(new FederationStateStoreServiceMetrics());
+ }
+ }
+ return instance;
+ }
+
+ private FederationStateStoreServiceMetrics() {
+ registry = new MetricsRegistry(RECORD_INFO);
+ registry.tag(RECORD_INFO, "FederationStateStoreServiceMetrics");
+
+ // Create the metrics for each method and put them into the map
+ for (Method m : STATESTORE_API_METHODS) {
+ String methodName = m.getName();
+ LOG.debug("Registering Federation StateStore Service metrics for {}",
methodName);
+
+ // This metric only records the number of failed calls; it does not
+ // capture latency information
+ FAILED_CALLS.put(methodName, registry.newCounter(methodName +
"NumFailedCalls",
+ "# failed calls to " + methodName, 0L));
+
+ // This metric records both the number and average latency of successful
+ // calls.
+ SUCCESSFUL_CALLS.put(methodName, registry.newRate(methodName +
"SuccessfulCalls",
+ "# successful calls and latency(ms) for" + methodName));
+
+ // This metric records the quantile-based latency of each successful
call,
+ // re-sampled every 10 seconds.
+ QUANTILE_METRICS.put(methodName, registry.newQuantiles(methodName +
"Latency",
+ "Quantile latency (ms) for " + methodName, "ops", "latency", 10));
+ }
+ }
+
+ // Aggregate metrics are shared, and don't have to be looked up per call
+ @Metric("Total number of successful calls and latency(ms)")
+ private static MutableRate totalSucceededCalls;
+
+ @Metric("Total number of failed StateStore calls")
+ private static MutableCounterLong totalFailedCalls;
+
+ public static void failedStateStoreServiceCall() {
+ String methodName =
Thread.currentThread().getStackTrace()[2].getMethodName();
+ MutableCounterLong methodMetric = FAILED_CALLS.get(methodName);
+
+ if (methodMetric == null) {
+ LOG.error(UNKOWN_FAIL_ERROR_MSG, methodName);
+ return;
+ }
+
+ totalFailedCalls.incr();
+ methodMetric.incr();
+ }
+
+ public static void failedStateStoreServiceCall(String methodName) {
+ MutableCounterLong methodMetric = FAILED_CALLS.get(methodName);
+ if (methodMetric == null) {
+ LOG.error(UNKOWN_FAIL_ERROR_MSG, methodName);
+ return;
+ }
+ totalFailedCalls.incr();
+ methodMetric.incr();
+ }
+
+ public static void succeededStateStoreServiceCall(long duration) {
+ String methodName =
Thread.currentThread().getStackTrace()[2].getMethodName();
+ MutableRate methodMetric = SUCCESSFUL_CALLS.get(methodName);
+ MutableQuantiles methodQuantileMetric = QUANTILE_METRICS.get(methodName);
+
+ if (methodMetric == null || methodQuantileMetric == null) {
+ LOG.error(UNKNOWN_SUCCESS_ERROR_MSG, methodName);
Review Comment:
I will fix it.
> [Federation] Add RM FederationStateStoreService Metrics
> -------------------------------------------------------
>
> Key: YARN-11326
> URL: https://issues.apache.org/jira/browse/YARN-11326
> Project: Hadoop YARN
> Issue Type: Bug
> Components: federation, resourcemanager
> Affects Versions: 3.4.0
> Reporter: Shilun Fan
> Assignee: Shilun Fan
> Priority: Major
> Labels: pull-request-available
>
> RM FederationStateStoreService lacks Metric information, increase Metric
> information statistics.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]