YARN-7458. TestContainerManagerSecurity is still flakey (Contributed by Robert Kanter via Daniel Templeton)
Change-Id: Ibb1975ad086c3a33f8af0b4f8b9a13c3cdca3f7d Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/49b4c0b3 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/49b4c0b3 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/49b4c0b3 Branch: refs/heads/HDFS-7240 Commit: 49b4c0b334e5472dbbf71b042a6a6b1d4b2ce3b7 Parents: 0de1068 Author: Daniel Templeton <templ...@apache.org> Authored: Wed Nov 8 17:31:14 2017 -0800 Committer: Daniel Templeton <templ...@apache.org> Committed: Wed Nov 8 17:31:14 2017 -0800 ---------------------------------------------------------------------- .../server/TestContainerManagerSecurity.java | 38 ++++++++++++-------- 1 file changed, 24 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/49b4c0b3/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerManagerSecurity.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerManagerSecurity.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerManagerSecurity.java index 1cbad70..ad2f68a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerManagerSecurity.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerManagerSecurity.java @@ -28,7 +28,9 @@ import java.util.Arrays; import java.util.Collection; import java.util.LinkedList; import java.util.List; +import java.util.concurrent.TimeoutException; +import com.google.common.base.Supplier; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.io.DataInputBuffer; @@ -36,6 +38,7 @@ import org.apache.hadoop.minikdc.KerberosSecurityTestcase; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.SecretManager.InvalidToken; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.yarn.api.ContainerManagementProtocol; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesResponse; @@ -49,6 +52,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ContainerState; +import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; @@ -404,27 +408,33 @@ public class TestContainerManagerSecurity extends KerberosSecurityTestcase { newContainerToken, attempt1NMToken, false).isEmpty()); } - private void waitForContainerToFinishOnNM(ContainerId containerId) { + private void waitForContainerToFinishOnNM(ContainerId containerId) + throws TimeoutException, InterruptedException { Context nmContext = yarnCluster.getNodeManager(0).getNMContext(); int interval = 4 * 60; // Max time for container token to expire. - Assert.assertNotNull(nmContext.getContainers().containsKey(containerId)); - - // Get the container first, as it may be removed from the Context - // by asynchronous calls. - // This was leading to a flakey test as otherwise the container could - // be removed and end up null. + // If the container is null, then it has already completed and been removed + // from the Context by asynchronous calls. Container waitContainer = nmContext.getContainers().get(containerId); - - while ((interval-- > 0) - && !waitContainer.cloneAndGetContainerStatus() - .getState().equals(ContainerState.COMPLETE)) { + if (waitContainer != null) { try { - LOG.info("Waiting for " + containerId + " to complete."); - Thread.sleep(1000); - } catch (InterruptedException e) { + LOG.info("Waiting for " + containerId + " to get to state " + + ContainerState.COMPLETE); + GenericTestUtils.waitFor(new Supplier<Boolean>() { + @Override + public Boolean get() { + return ContainerState.COMPLETE.equals( + waitContainer.cloneAndGetContainerStatus().getState()); + } + }, 10, interval); + } catch (TimeoutException te) { + fail("Was waiting for " + containerId + " to get to state " + + ContainerState.COMPLETE + " but was in state " + + waitContainer.cloneAndGetContainerStatus().getState() + + " after the timeout"); } } + // Normally, Containers will be removed from NM context after they are // explicitly acked by RM. Now, manually remove it for testing. yarnCluster.getNodeManager(0).getNodeStatusUpdater() --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org