YARN-7458. TestContainerManagerSecurity is still flakey
(Contributed by Robert Kanter via Daniel Templeton)

Change-Id: Ibb1975ad086c3a33f8af0b4f8b9a13c3cdca3f7d


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/49b4c0b3
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/49b4c0b3
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/49b4c0b3

Branch: refs/heads/HDFS-7240
Commit: 49b4c0b334e5472dbbf71b042a6a6b1d4b2ce3b7
Parents: 0de1068
Author: Daniel Templeton <templ...@apache.org>
Authored: Wed Nov 8 17:31:14 2017 -0800
Committer: Daniel Templeton <templ...@apache.org>
Committed: Wed Nov 8 17:31:14 2017 -0800

----------------------------------------------------------------------
 .../server/TestContainerManagerSecurity.java    | 38 ++++++++++++--------
 1 file changed, 24 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/49b4c0b3/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerManagerSecurity.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerManagerSecurity.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerManagerSecurity.java
index 1cbad70..ad2f68a 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerManagerSecurity.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerManagerSecurity.java
@@ -28,7 +28,9 @@ import java.util.Arrays;
 import java.util.Collection;
 import java.util.LinkedList;
 import java.util.List;
+import java.util.concurrent.TimeoutException;
 
+import com.google.common.base.Supplier;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.io.DataInputBuffer;
@@ -36,6 +38,7 @@ import org.apache.hadoop.minikdc.KerberosSecurityTestcase;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.SecretManager.InvalidToken;
+import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.yarn.api.ContainerManagementProtocol;
 import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesResponse;
@@ -49,6 +52,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 import org.apache.hadoop.yarn.api.records.ContainerState;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
 import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Resource;
@@ -404,27 +408,33 @@ public class TestContainerManagerSecurity extends 
KerberosSecurityTestcase {
       newContainerToken, attempt1NMToken, false).isEmpty());
   }
 
-  private void waitForContainerToFinishOnNM(ContainerId containerId) {
+  private void waitForContainerToFinishOnNM(ContainerId containerId)
+      throws TimeoutException, InterruptedException {
     Context nmContext = yarnCluster.getNodeManager(0).getNMContext();
     int interval = 4 * 60; // Max time for container token to expire.
 
-    Assert.assertNotNull(nmContext.getContainers().containsKey(containerId));
-
-    // Get the container first, as it may be removed from the Context
-    // by asynchronous calls.
-    // This was leading to a flakey test as otherwise the container could
-    // be removed and end up null.
+    // If the container is null, then it has already completed and been removed
+    // from the Context by asynchronous calls.
     Container waitContainer = nmContext.getContainers().get(containerId);
-
-    while ((interval-- > 0)
-        && !waitContainer.cloneAndGetContainerStatus()
-        .getState().equals(ContainerState.COMPLETE)) {
+    if (waitContainer != null) {
       try {
-        LOG.info("Waiting for " + containerId + " to complete.");
-        Thread.sleep(1000);
-      } catch (InterruptedException e) {
+        LOG.info("Waiting for " + containerId + " to get to state " +
+            ContainerState.COMPLETE);
+        GenericTestUtils.waitFor(new Supplier<Boolean>() {
+          @Override
+          public Boolean get() {
+            return ContainerState.COMPLETE.equals(
+                waitContainer.cloneAndGetContainerStatus().getState());
+          }
+        }, 10, interval);
+      } catch (TimeoutException te) {
+        fail("Was waiting for " + containerId + " to get to state " +
+            ContainerState.COMPLETE + " but was in state " +
+            waitContainer.cloneAndGetContainerStatus().getState() +
+            " after the timeout");
       }
     }
+
     // Normally, Containers will be removed from NM context after they are
     // explicitly acked by RM. Now, manually remove it for testing.
     yarnCluster.getNodeManager(0).getNodeStatusUpdater()


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to