[
https://issues.apache.org/jira/browse/YARN-11326?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17637929#comment-17637929
]
ASF GitHub Bot commented on YARN-11326:
---------------------------------------
goiri commented on code in PR #4963:
URL: https://github.com/apache/hadoop/pull/4963#discussion_r1030807228
##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/federation/FederationStateStoreServiceMetrics.java:
##########
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.server.resourcemanager.federation;
+
+import org.apache.hadoop.classification.VisibleForTesting;
+import org.apache.hadoop.metrics2.MetricsInfo;
+import org.apache.hadoop.metrics2.annotation.Metric;
+import org.apache.hadoop.metrics2.annotation.Metrics;
+import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
+import org.apache.hadoop.metrics2.lib.MetricsRegistry;
+import org.apache.hadoop.metrics2.lib.MutableCounterLong;
+import org.apache.hadoop.metrics2.lib.MutableQuantiles;
+import org.apache.hadoop.metrics2.lib.MutableRate;
+import org.apache.hadoop.yarn.server.federation.store.FederationStateStore;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.lang.reflect.Method;
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.apache.hadoop.metrics2.lib.Interns.info;
+
+@Metrics(about = "Metrics for FederationStateStoreService", context = "fedr")
+public final class FederationStateStoreServiceMetrics {
+
+ public static final Logger LOG =
+ LoggerFactory.getLogger(FederationStateStoreServiceMetrics.class);
+
+ private static final MetricsInfo RECORD_INFO =
+ info("FederationStateStoreServiceMetrics", "Metrics for the RM
FederationStateStoreService");
+
+ private static volatile FederationStateStoreServiceMetrics instance = null;
+ private MetricsRegistry registry;
+
+ private final static Method[] STATESTORE_API_METHODS =
FederationStateStore.class.getMethods();
+
+ // Map method names to counter objects
+ private static final Map<String, MutableCounterLong> FAILED_CALLS = new
HashMap<>();
+ private static final Map<String, MutableRate> SUCCESSFUL_CALLS = new
HashMap<>();
+ // Provide quantile latency for each api call.
+ private static final Map<String, MutableQuantiles> QUANTILE_METRICS = new
HashMap<>();
+
+ // Error string templates for logging calls from methods not in
+ // FederationStateStore API
+ private static final String UNKOWN_FAIL_ERROR_MSG =
+ "Not recording failed call for unknown FederationStateStore method {}";
+ private static final String UNKNOWN_SUCCESS_ERROR_MSG =
+ "Not recording successful call for unknown FederationStateStore method
{}";
+
+ /**
+ * Initialize the singleton instance.
+ *
+ * @return the singleton
+ */
+ public static FederationStateStoreServiceMetrics getMetrics() {
+ synchronized (FederationStateStoreServiceMetrics.class) {
+ if (instance == null) {
+ instance = DefaultMetricsSystem.instance()
+ .register(new FederationStateStoreServiceMetrics());
+ }
+ }
+ return instance;
+ }
+
+ private FederationStateStoreServiceMetrics() {
+ registry = new MetricsRegistry(RECORD_INFO);
+ registry.tag(RECORD_INFO, "FederationStateStoreServiceMetrics");
+
+ // Create the metrics for each method and put them into the map
+ for (Method m : STATESTORE_API_METHODS) {
+ String methodName = m.getName();
+ LOG.debug("Registering Federation StateStore Service metrics for {}",
methodName);
+
+ // This metric only records the number of failed calls; it does not
+ // capture latency information
+ FAILED_CALLS.put(methodName, registry.newCounter(methodName +
"NumFailedCalls",
+ "# failed calls to " + methodName, 0L));
+
+ // This metric records both the number and average latency of successful
+ // calls.
+ SUCCESSFUL_CALLS.put(methodName, registry.newRate(methodName +
"SuccessfulCalls",
+ "# successful calls and latency(ms) for" + methodName));
+
+ // This metric records the quantile-based latency of each successful
call,
+ // re-sampled every 10 seconds.
+ QUANTILE_METRICS.put(methodName, registry.newQuantiles(methodName +
"Latency",
+ "Quantile latency (ms) for " + methodName, "ops", "latency", 10));
+ }
+ }
+
+ // Aggregate metrics are shared, and don't have to be looked up per call
+ @Metric("Total number of successful calls and latency(ms)")
+ private static MutableRate totalSucceededCalls;
+
+ @Metric("Total number of failed StateStore calls")
+ private static MutableCounterLong totalFailedCalls;
+
+ public static void failedStateStoreServiceCall() {
+ String methodName =
Thread.currentThread().getStackTrace()[2].getMethodName();
+ MutableCounterLong methodMetric = FAILED_CALLS.get(methodName);
+
+ if (methodMetric == null) {
+ LOG.error(UNKOWN_FAIL_ERROR_MSG, methodName);
+ return;
+ }
+
+ totalFailedCalls.incr();
+ methodMetric.incr();
+ }
+
+ public static void failedStateStoreServiceCall(String methodName) {
+ MutableCounterLong methodMetric = FAILED_CALLS.get(methodName);
+ if (methodMetric == null) {
+ LOG.error(UNKOWN_FAIL_ERROR_MSG, methodName);
+ return;
+ }
+ totalFailedCalls.incr();
+ methodMetric.incr();
+ }
+
+ public static void succeededStateStoreServiceCall(long duration) {
+ String methodName =
Thread.currentThread().getStackTrace()[2].getMethodName();
Review Comment:
This is a little dangerous in case somebody decides to add a nested query.
Im fine going with this bu want to make sure we don't mess up.
We should probably extract this method and add checks for size being 2 for
example.
##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/federation/TestFederationRMStateStoreService.java:
##########
@@ -482,4 +507,156 @@ private void addApplication2RMAppManager(MockRM rm,
ApplicationId appId) {
rmAppMaps.putIfAbsent(application.getApplicationId(), application);
}
+
+ @Test
+ public void testPolicyConfigurationMethod() throws YarnException {
+
+ /**
Review Comment:
no javadoc. just group comment
##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/federation/FederationStateStoreServiceMetrics.java:
##########
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.server.resourcemanager.federation;
+
+import org.apache.hadoop.classification.VisibleForTesting;
+import org.apache.hadoop.metrics2.MetricsInfo;
+import org.apache.hadoop.metrics2.annotation.Metric;
+import org.apache.hadoop.metrics2.annotation.Metrics;
+import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
+import org.apache.hadoop.metrics2.lib.MetricsRegistry;
+import org.apache.hadoop.metrics2.lib.MutableCounterLong;
+import org.apache.hadoop.metrics2.lib.MutableQuantiles;
+import org.apache.hadoop.metrics2.lib.MutableRate;
+import org.apache.hadoop.yarn.server.federation.store.FederationStateStore;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.lang.reflect.Method;
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.apache.hadoop.metrics2.lib.Interns.info;
+
+@Metrics(about = "Metrics for FederationStateStoreService", context = "fedr")
+public final class FederationStateStoreServiceMetrics {
+
+ public static final Logger LOG =
+ LoggerFactory.getLogger(FederationStateStoreServiceMetrics.class);
+
+ private static final MetricsInfo RECORD_INFO =
+ info("FederationStateStoreServiceMetrics", "Metrics for the RM
FederationStateStoreService");
+
+ private static volatile FederationStateStoreServiceMetrics instance = null;
+ private MetricsRegistry registry;
+
+ private final static Method[] STATESTORE_API_METHODS =
FederationStateStore.class.getMethods();
+
+ // Map method names to counter objects
+ private static final Map<String, MutableCounterLong> FAILED_CALLS = new
HashMap<>();
+ private static final Map<String, MutableRate> SUCCESSFUL_CALLS = new
HashMap<>();
+ // Provide quantile latency for each api call.
+ private static final Map<String, MutableQuantiles> QUANTILE_METRICS = new
HashMap<>();
+
+ // Error string templates for logging calls from methods not in
+ // FederationStateStore API
+ private static final String UNKOWN_FAIL_ERROR_MSG =
+ "Not recording failed call for unknown FederationStateStore method {}";
+ private static final String UNKNOWN_SUCCESS_ERROR_MSG =
+ "Not recording successful call for unknown FederationStateStore method
{}";
+
+ /**
+ * Initialize the singleton instance.
+ *
+ * @return the singleton
+ */
+ public static FederationStateStoreServiceMetrics getMetrics() {
+ synchronized (FederationStateStoreServiceMetrics.class) {
+ if (instance == null) {
+ instance = DefaultMetricsSystem.instance()
+ .register(new FederationStateStoreServiceMetrics());
+ }
+ }
+ return instance;
+ }
+
+ private FederationStateStoreServiceMetrics() {
+ registry = new MetricsRegistry(RECORD_INFO);
+ registry.tag(RECORD_INFO, "FederationStateStoreServiceMetrics");
+
+ // Create the metrics for each method and put them into the map
+ for (Method m : STATESTORE_API_METHODS) {
+ String methodName = m.getName();
+ LOG.debug("Registering Federation StateStore Service metrics for {}",
methodName);
+
+ // This metric only records the number of failed calls; it does not
+ // capture latency information
+ FAILED_CALLS.put(methodName, registry.newCounter(methodName +
"NumFailedCalls",
+ "# failed calls to " + methodName, 0L));
+
+ // This metric records both the number and average latency of successful
+ // calls.
+ SUCCESSFUL_CALLS.put(methodName, registry.newRate(methodName +
"SuccessfulCalls",
+ "# successful calls and latency(ms) for" + methodName));
+
+ // This metric records the quantile-based latency of each successful
call,
+ // re-sampled every 10 seconds.
+ QUANTILE_METRICS.put(methodName, registry.newQuantiles(methodName +
"Latency",
+ "Quantile latency (ms) for " + methodName, "ops", "latency", 10));
+ }
+ }
+
+ // Aggregate metrics are shared, and don't have to be looked up per call
+ @Metric("Total number of successful calls and latency(ms)")
+ private static MutableRate totalSucceededCalls;
+
+ @Metric("Total number of failed StateStore calls")
+ private static MutableCounterLong totalFailedCalls;
+
+ public static void failedStateStoreServiceCall() {
+ String methodName =
Thread.currentThread().getStackTrace()[2].getMethodName();
+ MutableCounterLong methodMetric = FAILED_CALLS.get(methodName);
+
+ if (methodMetric == null) {
+ LOG.error(UNKOWN_FAIL_ERROR_MSG, methodName);
+ return;
+ }
+
+ totalFailedCalls.incr();
+ methodMetric.incr();
+ }
+
+ public static void failedStateStoreServiceCall(String methodName) {
+ MutableCounterLong methodMetric = FAILED_CALLS.get(methodName);
+ if (methodMetric == null) {
+ LOG.error(UNKOWN_FAIL_ERROR_MSG, methodName);
+ return;
+ }
+ totalFailedCalls.incr();
+ methodMetric.incr();
+ }
+
+ public static void succeededStateStoreServiceCall(long duration) {
+ String methodName =
Thread.currentThread().getStackTrace()[2].getMethodName();
+ MutableRate methodMetric = SUCCESSFUL_CALLS.get(methodName);
+ MutableQuantiles methodQuantileMetric = QUANTILE_METRICS.get(methodName);
+
+ if (methodMetric == null || methodQuantileMetric == null) {
+ LOG.error(UNKNOWN_SUCCESS_ERROR_MSG, methodName);
+ return;
+ }
+
+ totalSucceededCalls.add(duration);
Review Comment:
Shouldn't we increase this even if unknown?
##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/federation/FederationStateStoreServiceMetrics.java:
##########
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.server.resourcemanager.federation;
+
+import org.apache.hadoop.classification.VisibleForTesting;
+import org.apache.hadoop.metrics2.MetricsInfo;
+import org.apache.hadoop.metrics2.annotation.Metric;
+import org.apache.hadoop.metrics2.annotation.Metrics;
+import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
+import org.apache.hadoop.metrics2.lib.MetricsRegistry;
+import org.apache.hadoop.metrics2.lib.MutableCounterLong;
+import org.apache.hadoop.metrics2.lib.MutableQuantiles;
+import org.apache.hadoop.metrics2.lib.MutableRate;
+import org.apache.hadoop.yarn.server.federation.store.FederationStateStore;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.lang.reflect.Method;
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.apache.hadoop.metrics2.lib.Interns.info;
+
+@Metrics(about = "Metrics for FederationStateStoreService", context = "fedr")
+public final class FederationStateStoreServiceMetrics {
+
+ public static final Logger LOG =
+ LoggerFactory.getLogger(FederationStateStoreServiceMetrics.class);
+
+ private static final MetricsInfo RECORD_INFO =
+ info("FederationStateStoreServiceMetrics", "Metrics for the RM
FederationStateStoreService");
+
+ private static volatile FederationStateStoreServiceMetrics instance = null;
+ private MetricsRegistry registry;
+
+ private final static Method[] STATESTORE_API_METHODS =
FederationStateStore.class.getMethods();
+
+ // Map method names to counter objects
+ private static final Map<String, MutableCounterLong> FAILED_CALLS = new
HashMap<>();
+ private static final Map<String, MutableRate> SUCCESSFUL_CALLS = new
HashMap<>();
+ // Provide quantile latency for each api call.
+ private static final Map<String, MutableQuantiles> QUANTILE_METRICS = new
HashMap<>();
+
+ // Error string templates for logging calls from methods not in
+ // FederationStateStore API
+ private static final String UNKOWN_FAIL_ERROR_MSG =
+ "Not recording failed call for unknown FederationStateStore method {}";
+ private static final String UNKNOWN_SUCCESS_ERROR_MSG =
+ "Not recording successful call for unknown FederationStateStore method
{}";
+
+ /**
+ * Initialize the singleton instance.
+ *
+ * @return the singleton
+ */
+ public static FederationStateStoreServiceMetrics getMetrics() {
+ synchronized (FederationStateStoreServiceMetrics.class) {
+ if (instance == null) {
+ instance = DefaultMetricsSystem.instance()
+ .register(new FederationStateStoreServiceMetrics());
+ }
+ }
+ return instance;
+ }
+
+ private FederationStateStoreServiceMetrics() {
+ registry = new MetricsRegistry(RECORD_INFO);
+ registry.tag(RECORD_INFO, "FederationStateStoreServiceMetrics");
+
+ // Create the metrics for each method and put them into the map
+ for (Method m : STATESTORE_API_METHODS) {
+ String methodName = m.getName();
+ LOG.debug("Registering Federation StateStore Service metrics for {}",
methodName);
+
+ // This metric only records the number of failed calls; it does not
+ // capture latency information
+ FAILED_CALLS.put(methodName, registry.newCounter(methodName +
"NumFailedCalls",
+ "# failed calls to " + methodName, 0L));
+
+ // This metric records both the number and average latency of successful
+ // calls.
+ SUCCESSFUL_CALLS.put(methodName, registry.newRate(methodName +
"SuccessfulCalls",
+ "# successful calls and latency(ms) for" + methodName));
+
+ // This metric records the quantile-based latency of each successful
call,
+ // re-sampled every 10 seconds.
+ QUANTILE_METRICS.put(methodName, registry.newQuantiles(methodName +
"Latency",
+ "Quantile latency (ms) for " + methodName, "ops", "latency", 10));
+ }
+ }
+
+ // Aggregate metrics are shared, and don't have to be looked up per call
+ @Metric("Total number of successful calls and latency(ms)")
+ private static MutableRate totalSucceededCalls;
+
+ @Metric("Total number of failed StateStore calls")
+ private static MutableCounterLong totalFailedCalls;
+
+ public static void failedStateStoreServiceCall() {
+ String methodName =
Thread.currentThread().getStackTrace()[2].getMethodName();
+ MutableCounterLong methodMetric = FAILED_CALLS.get(methodName);
+
+ if (methodMetric == null) {
+ LOG.error(UNKOWN_FAIL_ERROR_MSG, methodName);
+ return;
+ }
+
+ totalFailedCalls.incr();
+ methodMetric.incr();
+ }
+
+ public static void failedStateStoreServiceCall(String methodName) {
+ MutableCounterLong methodMetric = FAILED_CALLS.get(methodName);
+ if (methodMetric == null) {
+ LOG.error(UNKOWN_FAIL_ERROR_MSG, methodName);
+ return;
+ }
+ totalFailedCalls.incr();
+ methodMetric.incr();
+ }
+
+ public static void succeededStateStoreServiceCall(long duration) {
+ String methodName =
Thread.currentThread().getStackTrace()[2].getMethodName();
+ MutableRate methodMetric = SUCCESSFUL_CALLS.get(methodName);
+ MutableQuantiles methodQuantileMetric = QUANTILE_METRICS.get(methodName);
+
+ if (methodMetric == null || methodQuantileMetric == null) {
+ LOG.error(UNKNOWN_SUCCESS_ERROR_MSG, methodName);
Review Comment:
Log unknown queries?
##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/federation/TestFederationRMStateStoreService.java:
##########
@@ -482,4 +507,156 @@ private void addApplication2RMAppManager(MockRM rm,
ApplicationId appId) {
rmAppMaps.putIfAbsent(application.getApplicationId(), application);
}
+
+ @Test
+ public void testPolicyConfigurationMethod() throws YarnException {
+
+ /**
+ * This test case tests 3 methods.
+ *
+ * 1.setPolicyConfiguration
+ * 2.getPolicyConfiguration
+ * 3.getPolicyConfigurations
+ */
+
+ FederationStateStoreService stateStoreService =
Review Comment:
single line?
##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/federation/TestFederationStateStoreServiceMetrics.java:
##########
@@ -0,0 +1,102 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
under
+ * the License.
+ */
+
+package org.apache.hadoop.yarn.server.resourcemanager.federation;
+
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * Unit tests for TestFederationStateStoreServiceMetrics.
+ */
+public class TestFederationStateStoreServiceMetrics {
+
+ public static final Logger LOG =
+ LoggerFactory.getLogger(TestFederationStateStoreServiceMetrics.class);
+
+ private static FederationStateStoreServiceMetrics metrics =
+ FederationStateStoreServiceMetrics.getMetrics();;
Review Comment:
double semi-colon
> [Federation] Add RM FederationStateStoreService Metrics
> -------------------------------------------------------
>
> Key: YARN-11326
> URL: https://issues.apache.org/jira/browse/YARN-11326
> Project: Hadoop YARN
> Issue Type: Bug
> Components: federation, resourcemanager
> Affects Versions: 3.4.0
> Reporter: Shilun Fan
> Assignee: Shilun Fan
> Priority: Major
> Labels: pull-request-available
>
> RM FederationStateStoreService lacks Metric information, increase Metric
> information statistics.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]