afchung commented on a change in pull request #3642: URL: https://github.com/apache/hadoop/pull/3642#discussion_r746917343
########## File path: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestOpportunisticContainerAllocatorAMService.java ########## @@ -817,6 +825,56 @@ public void testOpportunisticSchedulerMetrics() throws Exception { metrics.getAggregatedReleasedContainers()); } + /** + * Tests that, if a node has running opportunistic containers when the RM + * is down, RM is able to reflect the opportunistic containers + * in its metrics upon RM recovery. + */ + @Test + public void testMetricsRetainsAllocatedOpportunisticAfterRMRestart() + throws Exception { + HashMap<NodeId, MockNM> nodes = new HashMap<>(); + MockNM nm1 = new MockNM("h1:1234", 4096, rm.getResourceTrackerService()); + nodes.put(nm1.getNodeId(), nm1); + final RMApp app = MockRMAppSubmitter.submit(rm, + MockRMAppSubmissionData.Builder.createWithMemory(GB, rm) + .withAppName("app") + .withUser("user") + .withAcls(null) + .withQueue("default") + .build()); + + final ApplicationAttemptId appAttemptId = + app.getCurrentAppAttempt().getAppAttemptId(); + + final ContainerId recoverContainerId = ContainerId.newContainerId( + appAttemptId, 2); + + final Resource fakeResource = Resource.newInstance(1024, 1); + final String fakeDiagnostics = "recover container"; + final Priority fakePriority = Priority.newInstance(0); + + final NMContainerStatus recoverContainerReport = + NMContainerStatus.newInstance( + recoverContainerId, 0, ContainerState.RUNNING, + fakeResource, fakeDiagnostics, 0, + fakePriority, 0, null, + ExecutionType.OPPORTUNISTIC, -1); + + rm.registerNode( + "h1:1234", 4096, 1, + Collections.singletonList( + appAttemptId.getApplicationId()), + Collections.singletonList(recoverContainerReport)); + + OpportunisticSchedulerMetrics metrics = + OpportunisticSchedulerMetrics.getMetrics(); + Assert.assertEquals(1, Review comment: Makes sense. This helped me find that while the test passes when run on its own, it fails when run with the rest of the tests in the class since the metrics were not re-initialized between tests. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-issues-h...@hadoop.apache.org