szilard-nemeth commented on code in PR #5317: URL: https://github.com/apache/hadoop/pull/5317#discussion_r1122893607
########## hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestNMClient.java: ########## @@ -125,576 +117,362 @@ public void postTransition( org.apache.hadoop.yarn.server.nodemanager.containermanager.container .ContainerState afterState, ContainerEvent processedEvent) { - synchronized (TRANSITION_COUNTER) { - if (beforeState != afterState) { - ContainerId id = op.getContainerId(); - TRANSITION_COUNTER - .putIfAbsent(id, new HashMap<>()); - long sum = TRANSITION_COUNTER.get(id) - .compute(afterState, - (state, count) -> count == null ? 1 : count + 1); - LOG.info("***** " + id + - " Transition from " + beforeState + - " to " + afterState + - "sum:" + sum); - } + if (beforeState != afterState && + afterState == org.apache.hadoop.yarn.server.nodemanager.containermanager.container + .ContainerState.RUNNING) { + RUNNING_TRANSITIONS.compute(op.getContainerId(), + (containerId, counter) -> counter == null ? 1 : ++counter); } } - - /** - * Get the current number of state transitions. - * This is useful to check, if an event has occurred in unit tests. - * @param id Container id to check - * @param state Return the overall number of transitions to this state - * @return Number of transitions to the state specified - */ - static long getTransitionCounter(ContainerId id, - org.apache.hadoop.yarn.server.nodemanager - .containermanager.container - .ContainerState state) { - Long ret = TRANSITION_COUNTER.getOrDefault(id, new HashMap<>()) - .get(state); - return ret != null ? ret : 0; - } } - @Before - public void setup() throws YarnException, IOException { - // start minicluster + public void setup() throws YarnException, IOException, InterruptedException, TimeoutException { conf = new YarnConfiguration(); - // Turn on state tracking conf.set(YarnConfiguration.NM_CONTAINER_STATE_TRANSITION_LISTENERS, DebugSumContainerStateListener.class.getName()); - yarnCluster = - new MiniYARNCluster(TestAMRMClient.class.getName(), nodeCount, 1, 1); + startYarnCluster(); + startYarnClient(); + UserGroupInformation.setLoginUser(UserGroupInformation + .createRemoteUser(UserGroupInformation.getCurrentUser().getUserName())); + UserGroupInformation.getCurrentUser().addToken(appAttempt.getAMRMToken()); + nmTokenCache = new NMTokenCache(); + startRMClient(); + startNMClient(); + } + + + private void startYarnCluster() { + yarnCluster = new MiniYARNCluster(TestNMClient.class.getName(), 3, 1, 1); yarnCluster.init(conf); yarnCluster.start(); - assertNotNull(yarnCluster); assertEquals(STATE.STARTED, yarnCluster.getServiceState()); + } - // start rm client + private void startYarnClient() + throws IOException, YarnException, InterruptedException, TimeoutException { yarnClient = (YarnClientImpl) YarnClient.createYarnClient(); yarnClient.init(conf); yarnClient.start(); - assertNotNull(yarnClient); assertEquals(STATE.STARTED, yarnClient.getServiceState()); - - // get node info nodeReports = yarnClient.getNodeReports(NodeState.RUNNING); - - // submit new app - ApplicationSubmissionContext appContext = + ApplicationSubmissionContext appContext = yarnClient.createApplication().getApplicationSubmissionContext(); ApplicationId appId = appContext.getApplicationId(); - // set the application name appContext.setApplicationName("Test"); - // Set the priority for the application master Priority pri = Priority.newInstance(0); appContext.setPriority(pri); - // Set the queue to which this application is to be submitted in the RM appContext.setQueue("default"); - // Set up the container launch context for the application master - ContainerLaunchContext amContainer = Records - .newRecord(ContainerLaunchContext.class); + ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); appContext.setAMContainerSpec(amContainer); - // unmanaged AM appContext.setUnmanagedAM(true); - // Create the request to send to the applications manager - SubmitApplicationRequest appRequest = Records - .newRecord(SubmitApplicationRequest.class); + SubmitApplicationRequest appRequest = Records.newRecord(SubmitApplicationRequest.class); appRequest.setApplicationSubmissionContext(appContext); - // Submit the application to the applications manager yarnClient.submitApplication(appContext); + GenericTestUtils.waitFor(() -> yarnCluster.getResourceManager().getRMContext().getRMApps() + .get(appId).getCurrentAppAttempt().getAppAttemptState() == RMAppAttemptState.LAUNCHED, + 100, 30_000, "Failed to start app"); + appAttempt = yarnCluster.getResourceManager().getRMContext().getRMApps() + .get(appId).getCurrentAppAttempt(); + } - // wait for app to start - int iterationsLeft = 30; - RMAppAttempt appAttempt = null; - while (iterationsLeft > 0) { - ApplicationReport appReport = yarnClient.getApplicationReport(appId); - if (appReport.getYarnApplicationState() == - YarnApplicationState.ACCEPTED) { - attemptId = appReport.getCurrentApplicationAttemptId(); - appAttempt = - yarnCluster.getResourceManager().getRMContext().getRMApps() - .get(attemptId.getApplicationId()).getCurrentAppAttempt(); - while (true) { - if (appAttempt.getAppAttemptState() == RMAppAttemptState.LAUNCHED) { - break; - } - } - break; - } - sleep(1000); - --iterationsLeft; - } - if (iterationsLeft == 0) { - fail("Application hasn't bee started"); - } - - // Just dig into the ResourceManager and get the AMRMToken just for the sake - // of testing. - UserGroupInformation.setLoginUser(UserGroupInformation - .createRemoteUser(UserGroupInformation.getCurrentUser().getUserName())); - UserGroupInformation.getCurrentUser().addToken(appAttempt.getAMRMToken()); - - //creating an instance NMTokenCase - nmTokenCache = new NMTokenCache(); - - // start am rm client - rmClient = - (AMRMClientImpl<ContainerRequest>) AMRMClient - .<ContainerRequest> createAMRMClient(); - - //setting an instance NMTokenCase + private void startRMClient() { + rmClient = (AMRMClientImpl<ContainerRequest>) AMRMClient.createAMRMClient(); rmClient.setNMTokenCache(nmTokenCache); rmClient.init(conf); rmClient.start(); - assertNotNull(rmClient); assertEquals(STATE.STARTED, rmClient.getServiceState()); + } - // start am nm client + private void startNMClient() { nmClient = (NMClientImpl) NMClient.createNMClient(); - - //propagating the AMRMClient NMTokenCache instance nmClient.setNMTokenCache(rmClient.getNMTokenCache()); nmClient.init(conf); nmClient.start(); - assertNotNull(nmClient); assertEquals(STATE.STARTED, nmClient.getServiceState()); } - @After - public void tearDown() { + public void tearDown() throws InterruptedException { rmClient.stop(); yarnClient.stop(); - yarnCluster.stop(); + yarnCluster.asyncStop(this); } - private void stopNmClient(boolean stopContainers) { - assertNotNull("Null nmClient", nmClient); - // leave one unclosed - assertEquals(1, nmClient.startedContainers.size()); - // default true - assertTrue(nmClient.getCleanupRunningContainers().get()); - nmClient.cleanupRunningContainersOnStop(stopContainers); - assertEquals(stopContainers, nmClient.getCleanupRunningContainers().get()); - nmClient.stop(); - } - - @Test (timeout = 180000) + @Test (timeout = 180_000 * MAX_EARLY_FINISH) public void testNMClientNoCleanupOnStop() - throws YarnException, IOException { - - rmClient.registerApplicationMaster("Host", 10000, ""); - - testContainerManagement(nmClient, allocateContainers(rmClient, 5)); - - rmClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, - null, null); - // don't stop the running containers - stopNmClient(false); - assertFalse(nmClient.startedContainers.isEmpty()); - //now cleanup - nmClient.cleanupRunningContainers(); - assertEquals(0, nmClient.startedContainers.size()); + throws YarnException, IOException, InterruptedException, TimeoutException { + int earlyFinishCounter = MAX_EARLY_FINISH; Review Comment: > but during the test we interested some some hack what happening in RUNNING state Sorry I don't really get this part. Can you please elaborate of what the test is doing? thanks. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-issues-h...@hadoop.apache.org