tillrohrmann commented on a change in pull request #15134: URL: https://github.com/apache/flink/pull/15134#discussion_r593182205
########## File path: flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnTestBase.java ########## @@ -327,6 +328,36 @@ public void close() throws Exception { } } + private static List<ApplicationReport> getApplicationReportWithRetryOnNPE( + final YarnClient yarnClient) throws IOException, YarnException { + return getApplicationReportWithRetryOnNPE(yarnClient, null); + } + + private static List<ApplicationReport> getApplicationReportWithRetryOnNPE( + final YarnClient yarnClient, EnumSet<YarnApplicationState> states) Review comment: ```suggestion final YarnClient yarnClient, @Nullable EnumSet<YarnApplicationState> states) ``` ########## File path: flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnTestBase.java ########## @@ -327,6 +328,36 @@ public void close() throws Exception { } } + private static List<ApplicationReport> getApplicationReportWithRetryOnNPE( + final YarnClient yarnClient) throws IOException, YarnException { + return getApplicationReportWithRetryOnNPE(yarnClient, null); + } + + private static List<ApplicationReport> getApplicationReportWithRetryOnNPE( + final YarnClient yarnClient, EnumSet<YarnApplicationState> states) + throws IOException, YarnException { + final int maxRetryCount = 10; + for (int i = 0; i < maxRetryCount; i++) { + try { + return yarnClient.getApplications(states); + } catch (NullPointerException e) { + String npeStr = ExceptionUtils.stringifyException(e); + if (!npeStr.contains("RMAppAttemptMetrics.getAggregateAppResourceUsage")) { + // unrelated NullPointerExceptions should be forwarded to the calling method + throw e; + } + + LOG.warn( + "NullPointerException was caught most likely being related to YARN-7007. The related discussion is happening in FLINK-15534. The exception is going to be ignored."); + } + } + + throw new IllegalStateException( + "YarnClient.getApplications command failed " + + maxRetryCount + + " times to gather the application report. Check FLINK-15534 for further details."); Review comment: Let's add `e` as the cause for the sake of completeness. ########## File path: flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnTestBase.java ########## @@ -327,6 +328,36 @@ public void close() throws Exception { } } + private static List<ApplicationReport> getApplicationReportWithRetryOnNPE( + final YarnClient yarnClient) throws IOException, YarnException { + return getApplicationReportWithRetryOnNPE(yarnClient, null); + } + + private static List<ApplicationReport> getApplicationReportWithRetryOnNPE( + final YarnClient yarnClient, EnumSet<YarnApplicationState> states) + throws IOException, YarnException { + final int maxRetryCount = 10; + for (int i = 0; i < maxRetryCount; i++) { + try { + return yarnClient.getApplications(states); + } catch (NullPointerException e) { + String npeStr = ExceptionUtils.stringifyException(e); + if (!npeStr.contains("RMAppAttemptMetrics.getAggregateAppResourceUsage")) { + // unrelated NullPointerExceptions should be forwarded to the calling method + throw e; + } + + LOG.warn( + "NullPointerException was caught most likely being related to YARN-7007. The related discussion is happening in FLINK-15534. The exception is going to be ignored."); Review comment: Maybe we could log `e` on debug if debug is enabled. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org