This is an automated email from the ASF dual-hosted git repository. slfan1989 pushed a commit to branch trunk in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push: new 478c4ced5a50 YARN-11620. [Federation] Improve FederationClientInterceptor To Return Partial Results of subClusters. (#6289) Contributed by Shilun Fan. 478c4ced5a50 is described below commit 478c4ced5a50ee05577bfe36c1e3f77991a7b065 Author: slfan1989 <55643692+slfan1...@users.noreply.github.com> AuthorDate: Wed Nov 29 07:11:35 2023 +0800 YARN-11620. [Federation] Improve FederationClientInterceptor To Return Partial Results of subClusters. (#6289) Contributed by Shilun Fan. Reviewed-by: Inigo Goiri <inigo...@apache.org> Signed-off-by: Shilun Fan <slfan1...@apache.org> --- .../router/clientrm/FederationClientInterceptor.java | 16 ++++++++++++++-- .../clientrm/TestFederationClientInterceptorRetry.java | 18 ++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/FederationClientInterceptor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/FederationClientInterceptor.java index 71e265be1b9c..ab0e1b345e9f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/FederationClientInterceptor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/FederationClientInterceptor.java @@ -208,6 +208,7 @@ public class FederationClientInterceptor private final Clock clock = new MonotonicClock(); private boolean returnPartialReport; private long submitIntervalTime; + private boolean allowPartialResult; @Override public void init(String userName) { @@ -263,6 +264,10 @@ public class FederationClientInterceptor returnPartialReport = conf.getBoolean( YarnConfiguration.ROUTER_CLIENTRM_PARTIAL_RESULTS_ENABLED, YarnConfiguration.DEFAULT_ROUTER_CLIENTRM_PARTIAL_RESULTS_ENABLED); + + allowPartialResult = conf.getBoolean( + YarnConfiguration.ROUTER_INTERCEPTOR_ALLOW_PARTIAL_RESULT_ENABLED, + YarnConfiguration.DEFAULT_ROUTER_INTERCEPTOR_ALLOW_PARTIAL_RESULT_ENABLED); } @Override @@ -895,8 +900,10 @@ public class FederationClientInterceptor // All sub-clusters return results to be considered successful, // otherwise an exception will be thrown. if (exceptions != null && !exceptions.isEmpty()) { - throw new YarnException("invokeConcurrent Failed = " + - StringUtils.join(exceptions.values(), ",")); + if (!allowPartialResult || exceptions.keySet().size() == subClusterIds.size()) { + throw new YarnException("invokeConcurrent Failed = " + + StringUtils.join(exceptions.values(), ",")); + } } // return result @@ -2350,4 +2357,9 @@ public class FederationClientInterceptor public void setNumSubmitRetries(int numSubmitRetries) { this.numSubmitRetries = numSubmitRetries; } + + @VisibleForTesting + public void setAllowPartialResult(boolean allowPartialResult) { + this.allowPartialResult = allowPartialResult; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/clientrm/TestFederationClientInterceptorRetry.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/clientrm/TestFederationClientInterceptorRetry.java index bf7ef7d17913..f0ecf8367cc8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/clientrm/TestFederationClientInterceptorRetry.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/clientrm/TestFederationClientInterceptorRetry.java @@ -35,6 +35,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse; import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest; import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; @@ -410,4 +411,21 @@ public class TestFederationClientInterceptorRetry "subClusterId 1 exec getClusterMetrics error RM is stopped.", () -> interceptor.getClusterMetrics(request)); } + + @Test + public void testGetClusterMetricsOneBadOneGoodNodeWithRealError() throws Exception { + LOG.info("Test getClusterMetrics with one bad and one good SubCluster."); + setupCluster(Arrays.asList(bad1, good)); + GetClusterMetricsRequest request = GetClusterMetricsRequest.newInstance(); + + GetClusterMetricsResponse clusterMetrics = interceptor.getClusterMetrics(request); + Assert.assertNotNull(clusterMetrics); + + // If partial results are not allowed to be returned, an exception will be thrown. + interceptor.setAllowPartialResult(false); + LambdaTestUtils.intercept(YarnException.class, + "subClusterId 1 exec getClusterMetrics error RM is stopped.", + () -> interceptor.getClusterMetrics(request)); + interceptor.setAllowPartialResult(true); + } } --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org