This is an automated email from the ASF dual-hosted git repository.
slfan1989 pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push:
new 478c4ced5a50 YARN-11620. [Federation] Improve
FederationClientInterceptor To Return Partial Results of subClusters. (#6289)
Contributed by Shilun Fan.
478c4ced5a50 is described below
commit 478c4ced5a50ee05577bfe36c1e3f77991a7b065
Author: slfan1989 <[email protected]>
AuthorDate: Wed Nov 29 07:11:35 2023 +0800
YARN-11620. [Federation] Improve FederationClientInterceptor To Return
Partial Results of subClusters. (#6289) Contributed by Shilun Fan.
Reviewed-by: Inigo Goiri <[email protected]>
Signed-off-by: Shilun Fan <[email protected]>
---
.../router/clientrm/FederationClientInterceptor.java | 16 ++++++++++++++--
.../clientrm/TestFederationClientInterceptorRetry.java | 18 ++++++++++++++++++
2 files changed, 32 insertions(+), 2 deletions(-)
diff --git
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/FederationClientInterceptor.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/FederationClientInterceptor.java
index 71e265be1b9c..ab0e1b345e9f 100644
---
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/FederationClientInterceptor.java
+++
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/FederationClientInterceptor.java
@@ -208,6 +208,7 @@ public class FederationClientInterceptor
private final Clock clock = new MonotonicClock();
private boolean returnPartialReport;
private long submitIntervalTime;
+ private boolean allowPartialResult;
@Override
public void init(String userName) {
@@ -263,6 +264,10 @@ public class FederationClientInterceptor
returnPartialReport = conf.getBoolean(
YarnConfiguration.ROUTER_CLIENTRM_PARTIAL_RESULTS_ENABLED,
YarnConfiguration.DEFAULT_ROUTER_CLIENTRM_PARTIAL_RESULTS_ENABLED);
+
+ allowPartialResult = conf.getBoolean(
+ YarnConfiguration.ROUTER_INTERCEPTOR_ALLOW_PARTIAL_RESULT_ENABLED,
+
YarnConfiguration.DEFAULT_ROUTER_INTERCEPTOR_ALLOW_PARTIAL_RESULT_ENABLED);
}
@Override
@@ -895,8 +900,10 @@ public class FederationClientInterceptor
// All sub-clusters return results to be considered successful,
// otherwise an exception will be thrown.
if (exceptions != null && !exceptions.isEmpty()) {
- throw new YarnException("invokeConcurrent Failed = " +
- StringUtils.join(exceptions.values(), ","));
+ if (!allowPartialResult || exceptions.keySet().size() ==
subClusterIds.size()) {
+ throw new YarnException("invokeConcurrent Failed = " +
+ StringUtils.join(exceptions.values(), ","));
+ }
}
// return result
@@ -2350,4 +2357,9 @@ public class FederationClientInterceptor
public void setNumSubmitRetries(int numSubmitRetries) {
this.numSubmitRetries = numSubmitRetries;
}
+
+ @VisibleForTesting
+ public void setAllowPartialResult(boolean allowPartialResult) {
+ this.allowPartialResult = allowPartialResult;
+ }
}
diff --git
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/clientrm/TestFederationClientInterceptorRetry.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/clientrm/TestFederationClientInterceptorRetry.java
index bf7ef7d17913..f0ecf8367cc8 100644
---
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/clientrm/TestFederationClientInterceptorRetry.java
+++
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/clientrm/TestFederationClientInterceptorRetry.java
@@ -35,6 +35,7 @@ import
org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationResponse;
import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
@@ -410,4 +411,21 @@ public class TestFederationClientInterceptorRetry
"subClusterId 1 exec getClusterMetrics error RM is stopped.",
() -> interceptor.getClusterMetrics(request));
}
+
+ @Test
+ public void testGetClusterMetricsOneBadOneGoodNodeWithRealError() throws
Exception {
+ LOG.info("Test getClusterMetrics with one bad and one good SubCluster.");
+ setupCluster(Arrays.asList(bad1, good));
+ GetClusterMetricsRequest request = GetClusterMetricsRequest.newInstance();
+
+ GetClusterMetricsResponse clusterMetrics =
interceptor.getClusterMetrics(request);
+ Assert.assertNotNull(clusterMetrics);
+
+ // If partial results are not allowed to be returned, an exception will be
thrown.
+ interceptor.setAllowPartialResult(false);
+ LambdaTestUtils.intercept(YarnException.class,
+ "subClusterId 1 exec getClusterMetrics error RM is stopped.",
+ () -> interceptor.getClusterMetrics(request));
+ interceptor.setAllowPartialResult(true);
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]