[jira] [Commented] (YARN-7009) TestNMClient.testNMClientNoCleanupOnStop is flaky by design
[ https://issues.apache.org/jira/browse/YARN-7009?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16193970#comment-16193970 ] Miklos Szegedi commented on YARN-7009: -- Thank you, [~haibochen]. I opened YARN-7293 for the backport. > TestNMClient.testNMClientNoCleanupOnStop is flaky by design > --- > > Key: YARN-7009 > URL: https://issues.apache.org/jira/browse/YARN-7009 > Project: Hadoop YARN > Issue Type: Bug >Reporter: Miklos Szegedi >Assignee: Miklos Szegedi > Fix For: 3.0.0 > > Attachments: YARN-7009.000.patch, YARN-7009.001.patch, > YARN-7009.002.patch, YARN-7009.003.patch, YARN-7009.004.patch, > YARN-7009.005.patch, YARN-7009.006.patch, YARN-7009.007.patch > > > The sleeps to wait for a transition to reinit and than back to running is not > long enough, it can miss the reinit event. > {code} > java.lang.AssertionError: Exception is not expected: > org.apache.hadoop.yarn.exceptions.YarnException: Cannot perform RE_INIT on > [container_1502735389852_0001_01_01]. Current state is [REINITIALIZING, > isReInitializing=true]. > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.preReInitializeOrLocalizeCheck(ContainerManagerImpl.java:1772) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1697) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1668) > at > org.apache.hadoop.yarn.api.impl.pb.service.ContainerManagementProtocolPBServiceImpl.reInitializeContainer(ContainerManagementProtocolPBServiceImpl.java:214) > at > org.apache.hadoop.yarn.proto.ContainerManagementProtocol$ContainerManagementProtocolService$2.callBlockingMethod(ContainerManagementProtocol.java:237) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:523) > at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:991) > at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:869) > at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:815) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1962) > at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2675) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testReInitializeContainer(TestNMClient.java:567) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testContainerManagement(TestNMClient.java:405) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testNMClientNoCleanupOnStop(TestNMClient.java:214) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:47) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:44) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) > at > org.junit.internal.runners.statements.FailOnTimeout$StatementThread.run(FailOnTimeout.java:74) > Caused by: org.apache.hadoop.yarn.exceptions.YarnException: Cannot perform > RE_INIT on [container_1502735389852_0001_01_01]. Current state is > [REINITIALIZING, isReInitializing=true]. > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.preReInitializeOrLocalizeCheck(ContainerManagerImpl.java:1772) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1697) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1668) > at > org.apache.hadoop.yarn.api.impl.pb.service.ContainerManagementProtocolPBServiceImpl.reInitializeContainer(ContainerManagementProtocolPBServiceImpl.java:214) > at > org.apache.hadoop.yarn.proto.ContainerManagementProtocol$ContainerManagementProtocolService$2.callBlockingMethod(ContainerManagementProtocol.java:237) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:523) > at
[jira] [Commented] (YARN-7009) TestNMClient.testNMClientNoCleanupOnStop is flaky by design
[ https://issues.apache.org/jira/browse/YARN-7009?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16193887#comment-16193887 ] Hudson commented on YARN-7009: -- SUCCESS: Integrated in Jenkins build Hadoop-trunk-Commit #13037 (See [https://builds.apache.org/job/Hadoop-trunk-Commit/13037/]) YARN-7009. TestNMClient.testNMClientNoCleanupOnStop is flaky by design. (haibochen: rev c071aad5da6f6601978e73d38ce5813958848bc4) * (edit) hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java * (edit) hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java * (edit) hadoop-tools/hadoop-sls/src/main/data/2jobs2min-rumen-jh.json * (edit) hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml * (edit) hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java * (edit) hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestNMClient.java > TestNMClient.testNMClientNoCleanupOnStop is flaky by design > --- > > Key: YARN-7009 > URL: https://issues.apache.org/jira/browse/YARN-7009 > Project: Hadoop YARN > Issue Type: Bug >Reporter: Miklos Szegedi >Assignee: Miklos Szegedi > Fix For: 3.0.0 > > Attachments: YARN-7009.000.patch, YARN-7009.001.patch, > YARN-7009.002.patch, YARN-7009.003.patch, YARN-7009.004.patch, > YARN-7009.005.patch, YARN-7009.006.patch, YARN-7009.007.patch > > > The sleeps to wait for a transition to reinit and than back to running is not > long enough, it can miss the reinit event. > {code} > java.lang.AssertionError: Exception is not expected: > org.apache.hadoop.yarn.exceptions.YarnException: Cannot perform RE_INIT on > [container_1502735389852_0001_01_01]. Current state is [REINITIALIZING, > isReInitializing=true]. > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.preReInitializeOrLocalizeCheck(ContainerManagerImpl.java:1772) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1697) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1668) > at > org.apache.hadoop.yarn.api.impl.pb.service.ContainerManagementProtocolPBServiceImpl.reInitializeContainer(ContainerManagementProtocolPBServiceImpl.java:214) > at > org.apache.hadoop.yarn.proto.ContainerManagementProtocol$ContainerManagementProtocolService$2.callBlockingMethod(ContainerManagementProtocol.java:237) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:523) > at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:991) > at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:869) > at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:815) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1962) > at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2675) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testReInitializeContainer(TestNMClient.java:567) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testContainerManagement(TestNMClient.java:405) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testNMClientNoCleanupOnStop(TestNMClient.java:214) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:47) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:44) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) > at > org.junit.internal.runners.statements.FailOnTimeout$StatementThread.run(FailOnTimeout.java:74) > Caused by: org.apache.hadoop.yarn.exceptions.YarnException: Cannot perform > RE_INIT on [container_1502735389852_0001_01_01]. Current state is > [REINITIALIZING,
[jira] [Commented] (YARN-7009) TestNMClient.testNMClientNoCleanupOnStop is flaky by design
[ https://issues.apache.org/jira/browse/YARN-7009?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16193572#comment-16193572 ] Haibo Chen commented on YARN-7009: -- Unit test failures in the report are tracked and fixed in YARN-7211. LGTM +1 Will commit it to trunk and branch-3 at end of the day. The race condition that you fixed in ContainerLaunch is also applicable to branch 2, maybe create another jira to fix it for branch-2 specifically? > TestNMClient.testNMClientNoCleanupOnStop is flaky by design > --- > > Key: YARN-7009 > URL: https://issues.apache.org/jira/browse/YARN-7009 > Project: Hadoop YARN > Issue Type: Bug >Reporter: Miklos Szegedi >Assignee: Miklos Szegedi > Attachments: YARN-7009.000.patch, YARN-7009.001.patch, > YARN-7009.002.patch, YARN-7009.003.patch, YARN-7009.004.patch, > YARN-7009.005.patch, YARN-7009.006.patch, YARN-7009.007.patch > > > The sleeps to wait for a transition to reinit and than back to running is not > long enough, it can miss the reinit event. > {code} > java.lang.AssertionError: Exception is not expected: > org.apache.hadoop.yarn.exceptions.YarnException: Cannot perform RE_INIT on > [container_1502735389852_0001_01_01]. Current state is [REINITIALIZING, > isReInitializing=true]. > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.preReInitializeOrLocalizeCheck(ContainerManagerImpl.java:1772) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1697) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1668) > at > org.apache.hadoop.yarn.api.impl.pb.service.ContainerManagementProtocolPBServiceImpl.reInitializeContainer(ContainerManagementProtocolPBServiceImpl.java:214) > at > org.apache.hadoop.yarn.proto.ContainerManagementProtocol$ContainerManagementProtocolService$2.callBlockingMethod(ContainerManagementProtocol.java:237) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:523) > at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:991) > at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:869) > at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:815) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1962) > at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2675) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testReInitializeContainer(TestNMClient.java:567) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testContainerManagement(TestNMClient.java:405) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testNMClientNoCleanupOnStop(TestNMClient.java:214) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:47) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:44) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) > at > org.junit.internal.runners.statements.FailOnTimeout$StatementThread.run(FailOnTimeout.java:74) > Caused by: org.apache.hadoop.yarn.exceptions.YarnException: Cannot perform > RE_INIT on [container_1502735389852_0001_01_01]. Current state is > [REINITIALIZING, isReInitializing=true]. > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.preReInitializeOrLocalizeCheck(ContainerManagerImpl.java:1772) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1697) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1668) > at > org.apache.hadoop.yarn.api.impl.pb.service.ContainerManagementProtocolPBServiceImpl.reInitializeContainer(ContainerManagementProtocolPBServiceImpl.java:214) > at >
[jira] [Commented] (YARN-7009) TestNMClient.testNMClientNoCleanupOnStop is flaky by design
[ https://issues.apache.org/jira/browse/YARN-7009?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16184850#comment-16184850 ] Hadoop QA commented on YARN-7009: - | (x) *{color:red}-1 overall{color}* | \\ \\ || Vote || Subsystem || Runtime || Comment || | {color:blue}0{color} | {color:blue} reexec {color} | {color:blue} 0m 16s{color} | {color:blue} Docker mode activated. {color} | || || || || {color:brown} Prechecks {color} || | {color:green}+1{color} | {color:green} @author {color} | {color:green} 0m 0s{color} | {color:green} The patch does not contain any @author tags. {color} | | {color:green}+1{color} | {color:green} test4tests {color} | {color:green} 0m 0s{color} | {color:green} The patch appears to include 1 new or modified test files. {color} | || || || || {color:brown} trunk Compile Tests {color} || | {color:blue}0{color} | {color:blue} mvndep {color} | {color:blue} 1m 26s{color} | {color:blue} Maven dependency ordering for branch {color} | | {color:green}+1{color} | {color:green} mvninstall {color} | {color:green} 12m 40s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} compile {color} | {color:green} 13m 41s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} checkstyle {color} | {color:green} 1m 58s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} mvnsite {color} | {color:green} 2m 55s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} shadedclient {color} | {color:green} 16m 30s{color} | {color:green} branch has no errors when building and testing our client artifacts. {color} | | {color:green}+1{color} | {color:green} findbugs {color} | {color:green} 4m 36s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} javadoc {color} | {color:green} 2m 35s{color} | {color:green} trunk passed {color} | || || || || {color:brown} Patch Compile Tests {color} || | {color:blue}0{color} | {color:blue} mvndep {color} | {color:blue} 0m 19s{color} | {color:blue} Maven dependency ordering for patch {color} | | {color:green}+1{color} | {color:green} mvninstall {color} | {color:green} 2m 7s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} compile {color} | {color:green} 10m 16s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} javac {color} | {color:green} 10m 16s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} checkstyle {color} | {color:green} 1m 49s{color} | {color:green} root: The patch generated 0 new + 278 unchanged - 2 fixed = 278 total (was 280) {color} | | {color:green}+1{color} | {color:green} mvnsite {color} | {color:green} 2m 31s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} whitespace {color} | {color:green} 0m 0s{color} | {color:green} The patch has no whitespace issues. {color} | | {color:green}+1{color} | {color:green} xml {color} | {color:green} 0m 1s{color} | {color:green} The patch has no ill-formed XML file. {color} | | {color:green}+1{color} | {color:green} shadedclient {color} | {color:green} 9m 4s{color} | {color:green} patch has no errors when building and testing our client artifacts. {color} | | {color:green}+1{color} | {color:green} findbugs {color} | {color:green} 4m 47s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} javadoc {color} | {color:green} 2m 19s{color} | {color:green} the patch passed {color} | || || || || {color:brown} Other Tests {color} || | {color:green}+1{color} | {color:green} unit {color} | {color:green} 0m 44s{color} | {color:green} hadoop-yarn-api in the patch passed. {color} | | {color:green}+1{color} | {color:green} unit {color} | {color:green} 2m 51s{color} | {color:green} hadoop-yarn-common in the patch passed. {color} | | {color:green}+1{color} | {color:green} unit {color} | {color:green} 16m 19s{color} | {color:green} hadoop-yarn-server-nodemanager in the patch passed. {color} | | {color:green}+1{color} | {color:green} unit {color} | {color:green} 22m 10s{color} | {color:green} hadoop-yarn-client in the patch passed. {color} | | {color:red}-1{color} | {color:red} unit {color} | {color:red} 1m 53s{color} | {color:red} hadoop-sls in the patch failed. {color} | | {color:green}+1{color} | {color:green} asflicense {color} | {color:green} 0m 37s{color} | {color:green} The patch does not generate ASF License warnings. {color} | | {color:black}{color} | {color:black} {color} | {color:black}152m 2s{color} | {color:black} {color} | \\ \\ || Reason || Tests || | Failed junit tests | hadoop.yarn.sls.TestReservationSystemInvariants | | | hadoop.yarn.sls.TestSLSRunner | \\ \\ || Subsystem || Report/Notes || | Docker | Image:yetus/hadoop:71bbb86 | | JIRA Issue |
[jira] [Commented] (YARN-7009) TestNMClient.testNMClientNoCleanupOnStop is flaky by design
[ https://issues.apache.org/jira/browse/YARN-7009?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16183504#comment-16183504 ] Hadoop QA commented on YARN-7009: - | (x) *{color:red}-1 overall{color}* | \\ \\ || Vote || Subsystem || Runtime || Comment || | {color:blue}0{color} | {color:blue} reexec {color} | {color:blue} 0m 20s{color} | {color:blue} Docker mode activated. {color} | || || || || {color:brown} Prechecks {color} || | {color:green}+1{color} | {color:green} @author {color} | {color:green} 0m 0s{color} | {color:green} The patch does not contain any @author tags. {color} | | {color:green}+1{color} | {color:green} test4tests {color} | {color:green} 0m 0s{color} | {color:green} The patch appears to include 1 new or modified test files. {color} | || || || || {color:brown} trunk Compile Tests {color} || | {color:blue}0{color} | {color:blue} mvndep {color} | {color:blue} 0m 14s{color} | {color:blue} Maven dependency ordering for branch {color} | | {color:green}+1{color} | {color:green} mvninstall {color} | {color:green} 13m 51s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} compile {color} | {color:green} 15m 10s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} checkstyle {color} | {color:green} 2m 8s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} mvnsite {color} | {color:green} 2m 44s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} shadedclient {color} | {color:green} 15m 24s{color} | {color:green} branch has no errors when building and testing our client artifacts. {color} | | {color:green}+1{color} | {color:green} findbugs {color} | {color:green} 4m 57s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} javadoc {color} | {color:green} 2m 26s{color} | {color:green} trunk passed {color} | || || || || {color:brown} Patch Compile Tests {color} || | {color:blue}0{color} | {color:blue} mvndep {color} | {color:blue} 0m 19s{color} | {color:blue} Maven dependency ordering for patch {color} | | {color:green}+1{color} | {color:green} mvninstall {color} | {color:green} 2m 29s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} compile {color} | {color:green} 12m 53s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} javac {color} | {color:green} 12m 53s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} checkstyle {color} | {color:green} 2m 14s{color} | {color:green} root: The patch generated 0 new + 278 unchanged - 2 fixed = 278 total (was 280) {color} | | {color:green}+1{color} | {color:green} mvnsite {color} | {color:green} 3m 46s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} whitespace {color} | {color:green} 0m 0s{color} | {color:green} The patch has no whitespace issues. {color} | | {color:green}+1{color} | {color:green} xml {color} | {color:green} 0m 2s{color} | {color:green} The patch has no ill-formed XML file. {color} | | {color:green}+1{color} | {color:green} shadedclient {color} | {color:green} 11m 19s{color} | {color:green} patch has no errors when building and testing our client artifacts. {color} | | {color:green}+1{color} | {color:green} findbugs {color} | {color:green} 10m 10s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} javadoc {color} | {color:green} 5m 52s{color} | {color:green} the patch passed {color} | || || || || {color:brown} Other Tests {color} || | {color:green}+1{color} | {color:green} unit {color} | {color:green} 1m 31s{color} | {color:green} hadoop-yarn-api in the patch passed. {color} | | {color:green}+1{color} | {color:green} unit {color} | {color:green} 5m 0s{color} | {color:green} hadoop-yarn-common in the patch passed. {color} | | {color:red}-1{color} | {color:red} unit {color} | {color:red} 18m 26s{color} | {color:red} hadoop-yarn-server-nodemanager in the patch failed. {color} | | {color:green}+1{color} | {color:green} unit {color} | {color:green} 27m 53s{color} | {color:green} hadoop-yarn-client in the patch passed. {color} | | {color:red}-1{color} | {color:red} unit {color} | {color:red} 3m 44s{color} | {color:red} hadoop-sls in the patch failed. {color} | | {color:green}+1{color} | {color:green} asflicense {color} | {color:green} 1m 15s{color} | {color:green} The patch does not generate ASF License warnings. {color} | | {color:black}{color} | {color:black} {color} | {color:black}182m 28s{color} | {color:black} {color} | \\ \\ || Reason || Tests || | Failed junit tests | hadoop.yarn.server.nodemanager.containermanager.scheduler.TestContainerSchedulerQueuing | | | hadoop.yarn.sls.TestReservationSystemInvariants | | | hadoop.yarn.sls.TestSLSRunner | |
[jira] [Commented] (YARN-7009) TestNMClient.testNMClientNoCleanupOnStop is flaky by design
[ https://issues.apache.org/jira/browse/YARN-7009?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16183278#comment-16183278 ] Hadoop QA commented on YARN-7009: - | (x) *{color:red}-1 overall{color}* | \\ \\ || Vote || Subsystem || Runtime || Comment || | {color:blue}0{color} | {color:blue} reexec {color} | {color:blue} 0m 22s{color} | {color:blue} Docker mode activated. {color} | || || || || {color:brown} Prechecks {color} || | {color:green}+1{color} | {color:green} @author {color} | {color:green} 0m 0s{color} | {color:green} The patch does not contain any @author tags. {color} | | {color:green}+1{color} | {color:green} test4tests {color} | {color:green} 0m 0s{color} | {color:green} The patch appears to include 1 new or modified test files. {color} | || || || || {color:brown} trunk Compile Tests {color} || | {color:blue}0{color} | {color:blue} mvndep {color} | {color:blue} 0m 18s{color} | {color:blue} Maven dependency ordering for branch {color} | | {color:green}+1{color} | {color:green} mvninstall {color} | {color:green} 14m 0s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} compile {color} | {color:green} 15m 3s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} checkstyle {color} | {color:green} 2m 5s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} mvnsite {color} | {color:green} 2m 51s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} shadedclient {color} | {color:green} 16m 21s{color} | {color:green} branch has no errors when building and testing our client artifacts. {color} | | {color:green}+1{color} | {color:green} findbugs {color} | {color:green} 4m 42s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} javadoc {color} | {color:green} 2m 24s{color} | {color:green} trunk passed {color} | || || || || {color:brown} Patch Compile Tests {color} || | {color:blue}0{color} | {color:blue} mvndep {color} | {color:blue} 0m 17s{color} | {color:blue} Maven dependency ordering for patch {color} | | {color:green}+1{color} | {color:green} mvninstall {color} | {color:green} 2m 15s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} compile {color} | {color:green} 11m 26s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} javac {color} | {color:green} 11m 26s{color} | {color:green} the patch passed {color} | | {color:orange}-0{color} | {color:orange} checkstyle {color} | {color:orange} 2m 7s{color} | {color:orange} root: The patch generated 2 new + 278 unchanged - 2 fixed = 280 total (was 280) {color} | | {color:green}+1{color} | {color:green} mvnsite {color} | {color:green} 3m 2s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} whitespace {color} | {color:green} 0m 0s{color} | {color:green} The patch has no whitespace issues. {color} | | {color:green}+1{color} | {color:green} xml {color} | {color:green} 0m 2s{color} | {color:green} The patch has no ill-formed XML file. {color} | | {color:green}+1{color} | {color:green} shadedclient {color} | {color:green} 10m 10s{color} | {color:green} patch has no errors when building and testing our client artifacts. {color} | | {color:green}+1{color} | {color:green} findbugs {color} | {color:green} 5m 31s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} javadoc {color} | {color:green} 2m 42s{color} | {color:green} the patch passed {color} | || || || || {color:brown} Other Tests {color} || | {color:green}+1{color} | {color:green} unit {color} | {color:green} 0m 44s{color} | {color:green} hadoop-yarn-api in the patch passed. {color} | | {color:green}+1{color} | {color:green} unit {color} | {color:green} 2m 46s{color} | {color:green} hadoop-yarn-common in the patch passed. {color} | | {color:green}+1{color} | {color:green} unit {color} | {color:green} 16m 9s{color} | {color:green} hadoop-yarn-server-nodemanager in the patch passed. {color} | | {color:red}-1{color} | {color:red} unit {color} | {color:red} 20m 3s{color} | {color:red} hadoop-yarn-client in the patch failed. {color} | | {color:red}-1{color} | {color:red} unit {color} | {color:red} 1m 45s{color} | {color:red} hadoop-sls in the patch failed. {color} | | {color:green}+1{color} | {color:green} asflicense {color} | {color:green} 0m 34s{color} | {color:green} The patch does not generate ASF License warnings. {color} | | {color:black}{color} | {color:black} {color} | {color:black}155m 11s{color} | {color:black} {color} | \\ \\ || Reason || Tests || | Failed junit tests | hadoop.yarn.client.api.impl.TestNMClient | | | hadoop.yarn.sls.TestReservationSystemInvariants | | | hadoop.yarn.sls.TestSLSRunner | \\ \\ || Subsystem || Report/Notes || | Docker |
[jira] [Commented] (YARN-7009) TestNMClient.testNMClientNoCleanupOnStop is flaky by design
[ https://issues.apache.org/jira/browse/YARN-7009?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16177326#comment-16177326 ] Haibo Chen commented on YARN-7009: -- Thanks for the update. I think we can avoid DebugSumContainerStateListenerProxy by making DebugSumContainerStateListener.transitionCounter static. The javadoc for DebugSumContainerStateListener bq.* Container State transition listener to debug the sum of transitions. The sum of transitions is not very precise. Can we say to track the # of times a container has transitioned into a state. > TestNMClient.testNMClientNoCleanupOnStop is flaky by design > --- > > Key: YARN-7009 > URL: https://issues.apache.org/jira/browse/YARN-7009 > Project: Hadoop YARN > Issue Type: Bug >Reporter: Miklos Szegedi >Assignee: Miklos Szegedi > Attachments: YARN-7009.000.patch, YARN-7009.001.patch, > YARN-7009.002.patch, YARN-7009.003.patch, YARN-7009.004.patch, > YARN-7009.005.patch > > > The sleeps to wait for a transition to reinit and than back to running is not > long enough, it can miss the reinit event. > {code} > java.lang.AssertionError: Exception is not expected: > org.apache.hadoop.yarn.exceptions.YarnException: Cannot perform RE_INIT on > [container_1502735389852_0001_01_01]. Current state is [REINITIALIZING, > isReInitializing=true]. > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.preReInitializeOrLocalizeCheck(ContainerManagerImpl.java:1772) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1697) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1668) > at > org.apache.hadoop.yarn.api.impl.pb.service.ContainerManagementProtocolPBServiceImpl.reInitializeContainer(ContainerManagementProtocolPBServiceImpl.java:214) > at > org.apache.hadoop.yarn.proto.ContainerManagementProtocol$ContainerManagementProtocolService$2.callBlockingMethod(ContainerManagementProtocol.java:237) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:523) > at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:991) > at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:869) > at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:815) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1962) > at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2675) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testReInitializeContainer(TestNMClient.java:567) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testContainerManagement(TestNMClient.java:405) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testNMClientNoCleanupOnStop(TestNMClient.java:214) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:47) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:44) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) > at > org.junit.internal.runners.statements.FailOnTimeout$StatementThread.run(FailOnTimeout.java:74) > Caused by: org.apache.hadoop.yarn.exceptions.YarnException: Cannot perform > RE_INIT on [container_1502735389852_0001_01_01]. Current state is > [REINITIALIZING, isReInitializing=true]. > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.preReInitializeOrLocalizeCheck(ContainerManagerImpl.java:1772) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1697) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1668) > at > org.apache.hadoop.yarn.api.impl.pb.service.ContainerManagementProtocolPBServiceImpl.reInitializeContainer(ContainerManagementProtocolPBServiceImpl.java:214) > at >
[jira] [Commented] (YARN-7009) TestNMClient.testNMClientNoCleanupOnStop is flaky by design
[ https://issues.apache.org/jira/browse/YARN-7009?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16177087#comment-16177087 ] Hadoop QA commented on YARN-7009: - | (x) *{color:red}-1 overall{color}* | \\ \\ || Vote || Subsystem || Runtime || Comment || | {color:blue}0{color} | {color:blue} reexec {color} | {color:blue} 0m 27s{color} | {color:blue} Docker mode activated. {color} | || || || || {color:brown} Prechecks {color} || | {color:green}+1{color} | {color:green} @author {color} | {color:green} 0m 0s{color} | {color:green} The patch does not contain any @author tags. {color} | | {color:green}+1{color} | {color:green} test4tests {color} | {color:green} 0m 0s{color} | {color:green} The patch appears to include 1 new or modified test files. {color} | || || || || {color:brown} trunk Compile Tests {color} || | {color:blue}0{color} | {color:blue} mvndep {color} | {color:blue} 1m 48s{color} | {color:blue} Maven dependency ordering for branch {color} | | {color:green}+1{color} | {color:green} mvninstall {color} | {color:green} 15m 16s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} compile {color} | {color:green} 16m 56s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} checkstyle {color} | {color:green} 2m 7s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} mvnsite {color} | {color:green} 2m 51s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} findbugs {color} | {color:green} 4m 36s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} javadoc {color} | {color:green} 2m 25s{color} | {color:green} trunk passed {color} | || || || || {color:brown} Patch Compile Tests {color} || | {color:blue}0{color} | {color:blue} mvndep {color} | {color:blue} 0m 18s{color} | {color:blue} Maven dependency ordering for patch {color} | | {color:green}+1{color} | {color:green} mvninstall {color} | {color:green} 2m 9s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} compile {color} | {color:green} 10m 53s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} javac {color} | {color:green} 10m 53s{color} | {color:green} the patch passed {color} | | {color:orange}-0{color} | {color:orange} checkstyle {color} | {color:orange} 2m 7s{color} | {color:orange} root: The patch generated 2 new + 278 unchanged - 2 fixed = 280 total (was 280) {color} | | {color:green}+1{color} | {color:green} mvnsite {color} | {color:green} 3m 5s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} whitespace {color} | {color:green} 0m 0s{color} | {color:green} The patch has no whitespace issues. {color} | | {color:green}+1{color} | {color:green} xml {color} | {color:green} 0m 1s{color} | {color:green} The patch has no ill-formed XML file. {color} | | {color:green}+1{color} | {color:green} findbugs {color} | {color:green} 5m 46s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} javadoc {color} | {color:green} 2m 42s{color} | {color:green} the patch passed {color} | || || || || {color:brown} Other Tests {color} || | {color:green}+1{color} | {color:green} unit {color} | {color:green} 0m 41s{color} | {color:green} hadoop-yarn-api in the patch passed. {color} | | {color:green}+1{color} | {color:green} unit {color} | {color:green} 2m 41s{color} | {color:green} hadoop-yarn-common in the patch passed. {color} | | {color:green}+1{color} | {color:green} unit {color} | {color:green} 16m 20s{color} | {color:green} hadoop-yarn-server-nodemanager in the patch passed. {color} | | {color:red}-1{color} | {color:red} unit {color} | {color:red} 22m 10s{color} | {color:red} hadoop-yarn-client in the patch failed. {color} | | {color:red}-1{color} | {color:red} unit {color} | {color:red} 1m 45s{color} | {color:red} hadoop-sls in the patch failed. {color} | | {color:green}+1{color} | {color:green} asflicense {color} | {color:green} 0m 38s{color} | {color:green} The patch does not generate ASF License warnings. {color} | | {color:black}{color} | {color:black} {color} | {color:black}144m 15s{color} | {color:black} {color} | \\ \\ || Reason || Tests || | Failed junit tests | hadoop.yarn.client.api.impl.TestAMRMClient | | | hadoop.yarn.sls.TestReservationSystemInvariants | | | hadoop.yarn.sls.TestSLSRunner | \\ \\ || Subsystem || Report/Notes || | Docker | Image:yetus/hadoop:71bbb86 | | JIRA Issue | YARN-7009 | | JIRA Patch URL | https://issues.apache.org/jira/secure/attachment/12888535/YARN-7009.005.patch | | Optional Tests | asflicense compile javac javadoc mvninstall mvnsite unit findbugs checkstyle xml | | uname | Linux 275d7293479f 3.13.0-119-generic #166-Ubuntu SMP Wed May 3 12:18:55 UTC 2017 x86_64
[jira] [Commented] (YARN-7009) TestNMClient.testNMClientNoCleanupOnStop is flaky by design
[ https://issues.apache.org/jira/browse/YARN-7009?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16176681#comment-16176681 ] Haibo Chen commented on YARN-7009: -- Thanks [~miklos.szeg...@cloudera.com] for the patch. A few comments: 1) The pattern in DebugSumContainerStateListener is not very intuitive to me. Can we remove the singleton in the constructor and just modify it in the postTransition() method? 2) In the else block you added in ContainerLaunch, you mention one specific race condition. Is it possible that this can be triggered if the container process finishes too quickly by itself in which case, the kill event is unnecessary. 3) Some knits: There are some newly added unused imports; testContainer() has a very generic parameter name, array. Can we rename it to exitStatuses? > TestNMClient.testNMClientNoCleanupOnStop is flaky by design > --- > > Key: YARN-7009 > URL: https://issues.apache.org/jira/browse/YARN-7009 > Project: Hadoop YARN > Issue Type: Bug >Reporter: Miklos Szegedi >Assignee: Miklos Szegedi > Attachments: YARN-7009.000.patch, YARN-7009.001.patch, > YARN-7009.002.patch, YARN-7009.003.patch, YARN-7009.004.patch > > > The sleeps to wait for a transition to reinit and than back to running is not > long enough, it can miss the reinit event. > {code} > java.lang.AssertionError: Exception is not expected: > org.apache.hadoop.yarn.exceptions.YarnException: Cannot perform RE_INIT on > [container_1502735389852_0001_01_01]. Current state is [REINITIALIZING, > isReInitializing=true]. > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.preReInitializeOrLocalizeCheck(ContainerManagerImpl.java:1772) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1697) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1668) > at > org.apache.hadoop.yarn.api.impl.pb.service.ContainerManagementProtocolPBServiceImpl.reInitializeContainer(ContainerManagementProtocolPBServiceImpl.java:214) > at > org.apache.hadoop.yarn.proto.ContainerManagementProtocol$ContainerManagementProtocolService$2.callBlockingMethod(ContainerManagementProtocol.java:237) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:523) > at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:991) > at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:869) > at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:815) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1962) > at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2675) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testReInitializeContainer(TestNMClient.java:567) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testContainerManagement(TestNMClient.java:405) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testNMClientNoCleanupOnStop(TestNMClient.java:214) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:47) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:44) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) > at > org.junit.internal.runners.statements.FailOnTimeout$StatementThread.run(FailOnTimeout.java:74) > Caused by: org.apache.hadoop.yarn.exceptions.YarnException: Cannot perform > RE_INIT on [container_1502735389852_0001_01_01]. Current state is > [REINITIALIZING, isReInitializing=true]. > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.preReInitializeOrLocalizeCheck(ContainerManagerImpl.java:1772) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1697) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1668) > at >
[jira] [Commented] (YARN-7009) TestNMClient.testNMClientNoCleanupOnStop is flaky by design
[ https://issues.apache.org/jira/browse/YARN-7009?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16175737#comment-16175737 ] Hadoop QA commented on YARN-7009: - | (x) *{color:red}-1 overall{color}* | \\ \\ || Vote || Subsystem || Runtime || Comment || | {color:blue}0{color} | {color:blue} reexec {color} | {color:blue} 0m 13s{color} | {color:blue} Docker mode activated. {color} | || || || || {color:brown} Prechecks {color} || | {color:green}+1{color} | {color:green} @author {color} | {color:green} 0m 0s{color} | {color:green} The patch does not contain any @author tags. {color} | | {color:green}+1{color} | {color:green} test4tests {color} | {color:green} 0m 0s{color} | {color:green} The patch appears to include 1 new or modified test files. {color} | || || || || {color:brown} trunk Compile Tests {color} || | {color:blue}0{color} | {color:blue} mvndep {color} | {color:blue} 0m 17s{color} | {color:blue} Maven dependency ordering for branch {color} | | {color:green}+1{color} | {color:green} mvninstall {color} | {color:green} 22m 19s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} compile {color} | {color:green} 30m 17s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} checkstyle {color} | {color:green} 3m 42s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} mvnsite {color} | {color:green} 6m 56s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} findbugs {color} | {color:green} 10m 49s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} javadoc {color} | {color:green} 5m 43s{color} | {color:green} trunk passed {color} | || || || || {color:brown} Patch Compile Tests {color} || | {color:blue}0{color} | {color:blue} mvndep {color} | {color:blue} 0m 39s{color} | {color:blue} Maven dependency ordering for patch {color} | | {color:green}+1{color} | {color:green} mvninstall {color} | {color:green} 5m 38s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} compile {color} | {color:green} 25m 58s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} javac {color} | {color:green} 25m 58s{color} | {color:green} the patch passed {color} | | {color:orange}-0{color} | {color:orange} checkstyle {color} | {color:orange} 4m 5s{color} | {color:orange} root: The patch generated 8 new + 305 unchanged - 2 fixed = 313 total (was 307) {color} | | {color:green}+1{color} | {color:green} mvnsite {color} | {color:green} 7m 52s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} whitespace {color} | {color:green} 0m 1s{color} | {color:green} The patch has no whitespace issues. {color} | | {color:green}+1{color} | {color:green} xml {color} | {color:green} 0m 4s{color} | {color:green} The patch has no ill-formed XML file. {color} | | {color:green}+1{color} | {color:green} findbugs {color} | {color:green} 14m 17s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} javadoc {color} | {color:green} 5m 53s{color} | {color:green} the patch passed {color} | || || || || {color:brown} Other Tests {color} || | {color:green}+1{color} | {color:green} unit {color} | {color:green} 1m 36s{color} | {color:green} hadoop-yarn-api in the patch passed. {color} | | {color:green}+1{color} | {color:green} unit {color} | {color:green} 5m 43s{color} | {color:green} hadoop-yarn-common in the patch passed. {color} | | {color:red}-1{color} | {color:red} unit {color} | {color:red} 24m 42s{color} | {color:red} hadoop-yarn-server-nodemanager in the patch failed. {color} | | {color:red}-1{color} | {color:red} unit {color} | {color:red} 56m 9s{color} | {color:red} hadoop-yarn-client in the patch failed. {color} | | {color:red}-1{color} | {color:red} unit {color} | {color:red} 5m 41s{color} | {color:red} hadoop-sls in the patch failed. {color} | | {color:green}+1{color} | {color:green} asflicense {color} | {color:green} 2m 37s{color} | {color:green} The patch does not generate ASF License warnings. {color} | | {color:black}{color} | {color:black} {color} | {color:black}271m 25s{color} | {color:black} {color} | \\ \\ || Reason || Tests || | Failed junit tests | hadoop.yarn.server.nodemanager.TestNodeManagerResync | | | hadoop.yarn.server.nodemanager.TestNodeStatusUpdater | | | hadoop.yarn.server.nodemanager.containermanager.scheduler.TestContainerSchedulerQueuing | | | hadoop.yarn.client.TestApplicationClientProtocolOnHA | | | hadoop.yarn.sls.TestReservationSystemInvariants | | | hadoop.yarn.sls.TestSLSRunner | | Timed out junit tests | org.apache.hadoop.yarn.server.nodemanager.containermanager.TestContainerManager | | | org.apache.hadoop.yarn.client.api.impl.TestOpportunisticContainerAllocation | | |
[jira] [Commented] (YARN-7009) TestNMClient.testNMClientNoCleanupOnStop is flaky by design
[ https://issues.apache.org/jira/browse/YARN-7009?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16175131#comment-16175131 ] Arun Suresh commented on YARN-7009: --- Thanks for the update [~miklos.szeg...@cloudera.com], Can we move the {{DebugSumContainerStateListener}} out of the {{NodeManager}} class and somewhere into the test package ? I like the use of the lambda expression - but we must ensure to replace it for the 2.x patch. > TestNMClient.testNMClientNoCleanupOnStop is flaky by design > --- > > Key: YARN-7009 > URL: https://issues.apache.org/jira/browse/YARN-7009 > Project: Hadoop YARN > Issue Type: Bug >Reporter: Miklos Szegedi >Assignee: Miklos Szegedi > Attachments: YARN-7009.000.patch, YARN-7009.001.patch, > YARN-7009.002.patch, YARN-7009.003.patch > > > The sleeps to wait for a transition to reinit and than back to running is not > long enough, it can miss the reinit event. > {code} > java.lang.AssertionError: Exception is not expected: > org.apache.hadoop.yarn.exceptions.YarnException: Cannot perform RE_INIT on > [container_1502735389852_0001_01_01]. Current state is [REINITIALIZING, > isReInitializing=true]. > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.preReInitializeOrLocalizeCheck(ContainerManagerImpl.java:1772) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1697) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1668) > at > org.apache.hadoop.yarn.api.impl.pb.service.ContainerManagementProtocolPBServiceImpl.reInitializeContainer(ContainerManagementProtocolPBServiceImpl.java:214) > at > org.apache.hadoop.yarn.proto.ContainerManagementProtocol$ContainerManagementProtocolService$2.callBlockingMethod(ContainerManagementProtocol.java:237) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:523) > at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:991) > at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:869) > at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:815) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1962) > at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2675) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testReInitializeContainer(TestNMClient.java:567) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testContainerManagement(TestNMClient.java:405) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testNMClientNoCleanupOnStop(TestNMClient.java:214) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:47) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:44) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) > at > org.junit.internal.runners.statements.FailOnTimeout$StatementThread.run(FailOnTimeout.java:74) > Caused by: org.apache.hadoop.yarn.exceptions.YarnException: Cannot perform > RE_INIT on [container_1502735389852_0001_01_01]. Current state is > [REINITIALIZING, isReInitializing=true]. > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.preReInitializeOrLocalizeCheck(ContainerManagerImpl.java:1772) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1697) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1668) > at > org.apache.hadoop.yarn.api.impl.pb.service.ContainerManagementProtocolPBServiceImpl.reInitializeContainer(ContainerManagementProtocolPBServiceImpl.java:214) > at > org.apache.hadoop.yarn.proto.ContainerManagementProtocol$ContainerManagementProtocolService$2.callBlockingMethod(ContainerManagementProtocol.java:237) > at >
[jira] [Commented] (YARN-7009) TestNMClient.testNMClientNoCleanupOnStop is flaky by design
[ https://issues.apache.org/jira/browse/YARN-7009?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16174056#comment-16174056 ] Hadoop QA commented on YARN-7009: - | (x) *{color:red}-1 overall{color}* | \\ \\ || Vote || Subsystem || Runtime || Comment || | {color:blue}0{color} | {color:blue} reexec {color} | {color:blue} 0m 11s{color} | {color:blue} Docker mode activated. {color} | || || || || {color:brown} Prechecks {color} || | {color:green}+1{color} | {color:green} @author {color} | {color:green} 0m 0s{color} | {color:green} The patch does not contain any @author tags. {color} | | {color:green}+1{color} | {color:green} test4tests {color} | {color:green} 0m 0s{color} | {color:green} The patch appears to include 1 new or modified test files. {color} | || || || || {color:brown} trunk Compile Tests {color} || | {color:blue}0{color} | {color:blue} mvndep {color} | {color:blue} 0m 18s{color} | {color:blue} Maven dependency ordering for branch {color} | | {color:green}+1{color} | {color:green} mvninstall {color} | {color:green} 12m 31s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} compile {color} | {color:green} 13m 59s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} checkstyle {color} | {color:green} 1m 54s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} mvnsite {color} | {color:green} 2m 38s{color} | {color:green} trunk passed {color} | | {color:red}-1{color} | {color:red} findbugs {color} | {color:red} 0m 52s{color} | {color:red} hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager in trunk has 1 extant Findbugs warnings. {color} | | {color:green}+1{color} | {color:green} javadoc {color} | {color:green} 1m 55s{color} | {color:green} trunk passed {color} | || || || || {color:brown} Patch Compile Tests {color} || | {color:blue}0{color} | {color:blue} mvndep {color} | {color:blue} 0m 17s{color} | {color:blue} Maven dependency ordering for patch {color} | | {color:green}+1{color} | {color:green} mvninstall {color} | {color:green} 1m 58s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} compile {color} | {color:green} 10m 28s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} javac {color} | {color:green} 10m 28s{color} | {color:green} the patch passed {color} | | {color:orange}-0{color} | {color:orange} checkstyle {color} | {color:orange} 1m 50s{color} | {color:orange} root: The patch generated 5 new + 379 unchanged - 5 fixed = 384 total (was 384) {color} | | {color:green}+1{color} | {color:green} mvnsite {color} | {color:green} 3m 7s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} whitespace {color} | {color:green} 0m 0s{color} | {color:green} The patch has no whitespace issues. {color} | | {color:green}+1{color} | {color:green} xml {color} | {color:green} 0m 2s{color} | {color:green} The patch has no ill-formed XML file. {color} | | {color:green}+1{color} | {color:green} findbugs {color} | {color:green} 5m 39s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} javadoc {color} | {color:green} 2m 56s{color} | {color:green} the patch passed {color} | || || || || {color:brown} Other Tests {color} || | {color:green}+1{color} | {color:green} unit {color} | {color:green} 0m 34s{color} | {color:green} hadoop-yarn-api in the patch passed. {color} | | {color:red}-1{color} | {color:red} unit {color} | {color:red} 2m 45s{color} | {color:red} hadoop-yarn-common in the patch failed. {color} | | {color:green}+1{color} | {color:green} unit {color} | {color:green} 16m 15s{color} | {color:green} hadoop-yarn-server-nodemanager in the patch passed. {color} | | {color:red}-1{color} | {color:red} unit {color} | {color:red} 32m 10s{color} | {color:red} hadoop-yarn-client in the patch failed. {color} | | {color:red}-1{color} | {color:red} unit {color} | {color:red} 1m 44s{color} | {color:red} hadoop-sls in the patch failed. {color} | | {color:green}+1{color} | {color:green} asflicense {color} | {color:green} 0m 30s{color} | {color:green} The patch does not generate ASF License warnings. {color} | | {color:black}{color} | {color:black} {color} | {color:black}145m 37s{color} | {color:black} {color} | \\ \\ || Reason || Tests || | Failed junit tests | hadoop.yarn.client.api.impl.TestTimelineClientV2Impl | | | hadoop.yarn.sls.TestReservationSystemInvariants | | | hadoop.yarn.sls.TestSLSRunner | | Timed out junit tests | org.apache.hadoop.yarn.client.api.impl.TestAMRMClient | \\ \\ || Subsystem || Report/Notes || | Docker | Image:yetus/hadoop:71bbb86 | | JIRA Issue | YARN-7009 | | JIRA Patch URL | https://issues.apache.org/jira/secure/attachment/12888142/YARN-7009.003.patch | | Optional Tests | asflicense
[jira] [Commented] (YARN-7009) TestNMClient.testNMClientNoCleanupOnStop is flaky by design
[ https://issues.apache.org/jira/browse/YARN-7009?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16173858#comment-16173858 ] Miklos Szegedi commented on YARN-7009: -- Thank you for the review and comments, [~asuresh], [~templedf] and [~gsohn]. bq. Instead of throwing an AssertionError, how about using fail()? AssertionError also captures the source exception, so the output will be more actionable bq. Your change to StateMachine is technically correct, but I always prefer to have things be explicit. I prefer having the methods public. This does not apply to the latest patch anymore. I rewrote the patch to use the listener implementation by [~asuresh]. > TestNMClient.testNMClientNoCleanupOnStop is flaky by design > --- > > Key: YARN-7009 > URL: https://issues.apache.org/jira/browse/YARN-7009 > Project: Hadoop YARN > Issue Type: Bug >Reporter: Miklos Szegedi >Assignee: Miklos Szegedi > Attachments: YARN-7009.000.patch, YARN-7009.001.patch, > YARN-7009.002.patch > > > The sleeps to wait for a transition to reinit and than back to running is not > long enough, it can miss the reinit event. > {code} > java.lang.AssertionError: Exception is not expected: > org.apache.hadoop.yarn.exceptions.YarnException: Cannot perform RE_INIT on > [container_1502735389852_0001_01_01]. Current state is [REINITIALIZING, > isReInitializing=true]. > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.preReInitializeOrLocalizeCheck(ContainerManagerImpl.java:1772) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1697) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1668) > at > org.apache.hadoop.yarn.api.impl.pb.service.ContainerManagementProtocolPBServiceImpl.reInitializeContainer(ContainerManagementProtocolPBServiceImpl.java:214) > at > org.apache.hadoop.yarn.proto.ContainerManagementProtocol$ContainerManagementProtocolService$2.callBlockingMethod(ContainerManagementProtocol.java:237) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:523) > at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:991) > at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:869) > at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:815) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1962) > at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2675) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testReInitializeContainer(TestNMClient.java:567) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testContainerManagement(TestNMClient.java:405) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testNMClientNoCleanupOnStop(TestNMClient.java:214) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:47) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:44) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) > at > org.junit.internal.runners.statements.FailOnTimeout$StatementThread.run(FailOnTimeout.java:74) > Caused by: org.apache.hadoop.yarn.exceptions.YarnException: Cannot perform > RE_INIT on [container_1502735389852_0001_01_01]. Current state is > [REINITIALIZING, isReInitializing=true]. > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.preReInitializeOrLocalizeCheck(ContainerManagerImpl.java:1772) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1697) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1668) > at > org.apache.hadoop.yarn.api.impl.pb.service.ContainerManagementProtocolPBServiceImpl.reInitializeContainer(ContainerManagementProtocolPBServiceImpl.java:214) > at >
[jira] [Commented] (YARN-7009) TestNMClient.testNMClientNoCleanupOnStop is flaky by design
[ https://issues.apache.org/jira/browse/YARN-7009?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16167128#comment-16167128 ] Miklos Szegedi commented on YARN-7009: -- Thank you, [~asuresh] for the feedback. Yes indeed there is no point having two different implementations for the same purpose, so I will wait until you check in YARN-7192 and use your implementation. > TestNMClient.testNMClientNoCleanupOnStop is flaky by design > --- > > Key: YARN-7009 > URL: https://issues.apache.org/jira/browse/YARN-7009 > Project: Hadoop YARN > Issue Type: Bug >Reporter: Miklos Szegedi >Assignee: Miklos Szegedi > Attachments: YARN-7009.000.patch, YARN-7009.001.patch, > YARN-7009.002.patch > > > The sleeps to wait for a transition to reinit and than back to running is not > long enough, it can miss the reinit event. > {code} > java.lang.AssertionError: Exception is not expected: > org.apache.hadoop.yarn.exceptions.YarnException: Cannot perform RE_INIT on > [container_1502735389852_0001_01_01]. Current state is [REINITIALIZING, > isReInitializing=true]. > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.preReInitializeOrLocalizeCheck(ContainerManagerImpl.java:1772) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1697) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1668) > at > org.apache.hadoop.yarn.api.impl.pb.service.ContainerManagementProtocolPBServiceImpl.reInitializeContainer(ContainerManagementProtocolPBServiceImpl.java:214) > at > org.apache.hadoop.yarn.proto.ContainerManagementProtocol$ContainerManagementProtocolService$2.callBlockingMethod(ContainerManagementProtocol.java:237) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:523) > at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:991) > at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:869) > at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:815) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1962) > at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2675) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testReInitializeContainer(TestNMClient.java:567) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testContainerManagement(TestNMClient.java:405) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testNMClientNoCleanupOnStop(TestNMClient.java:214) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:47) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:44) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) > at > org.junit.internal.runners.statements.FailOnTimeout$StatementThread.run(FailOnTimeout.java:74) > Caused by: org.apache.hadoop.yarn.exceptions.YarnException: Cannot perform > RE_INIT on [container_1502735389852_0001_01_01]. Current state is > [REINITIALIZING, isReInitializing=true]. > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.preReInitializeOrLocalizeCheck(ContainerManagerImpl.java:1772) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1697) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1668) > at > org.apache.hadoop.yarn.api.impl.pb.service.ContainerManagementProtocolPBServiceImpl.reInitializeContainer(ContainerManagementProtocolPBServiceImpl.java:214) > at > org.apache.hadoop.yarn.proto.ContainerManagementProtocol$ContainerManagementProtocolService$2.callBlockingMethod(ContainerManagementProtocol.java:237) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:523) > at
[jira] [Commented] (YARN-7009) TestNMClient.testNMClientNoCleanupOnStop is flaky by design
[ https://issues.apache.org/jira/browse/YARN-7009?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16167076#comment-16167076 ] Arun Suresh commented on YARN-7009: --- Thanks for the patch [~miklos.szeg...@cloudera.com]. I was thinking, instead of explicitly making a check in the ContainerImpl constructor, if we were to integrate with YARN-7192, for the testcases, you can just probably override the {{createNMContext()}} or maybe even plugin an implementation of the NMcontext that returns a decorated Listener. > TestNMClient.testNMClientNoCleanupOnStop is flaky by design > --- > > Key: YARN-7009 > URL: https://issues.apache.org/jira/browse/YARN-7009 > Project: Hadoop YARN > Issue Type: Bug >Reporter: Miklos Szegedi >Assignee: Miklos Szegedi > Attachments: YARN-7009.000.patch, YARN-7009.001.patch, > YARN-7009.002.patch > > > The sleeps to wait for a transition to reinit and than back to running is not > long enough, it can miss the reinit event. > {code} > java.lang.AssertionError: Exception is not expected: > org.apache.hadoop.yarn.exceptions.YarnException: Cannot perform RE_INIT on > [container_1502735389852_0001_01_01]. Current state is [REINITIALIZING, > isReInitializing=true]. > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.preReInitializeOrLocalizeCheck(ContainerManagerImpl.java:1772) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1697) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1668) > at > org.apache.hadoop.yarn.api.impl.pb.service.ContainerManagementProtocolPBServiceImpl.reInitializeContainer(ContainerManagementProtocolPBServiceImpl.java:214) > at > org.apache.hadoop.yarn.proto.ContainerManagementProtocol$ContainerManagementProtocolService$2.callBlockingMethod(ContainerManagementProtocol.java:237) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:523) > at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:991) > at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:869) > at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:815) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1962) > at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2675) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testReInitializeContainer(TestNMClient.java:567) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testContainerManagement(TestNMClient.java:405) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testNMClientNoCleanupOnStop(TestNMClient.java:214) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:47) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:44) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) > at > org.junit.internal.runners.statements.FailOnTimeout$StatementThread.run(FailOnTimeout.java:74) > Caused by: org.apache.hadoop.yarn.exceptions.YarnException: Cannot perform > RE_INIT on [container_1502735389852_0001_01_01]. Current state is > [REINITIALIZING, isReInitializing=true]. > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.preReInitializeOrLocalizeCheck(ContainerManagerImpl.java:1772) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1697) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1668) > at > org.apache.hadoop.yarn.api.impl.pb.service.ContainerManagementProtocolPBServiceImpl.reInitializeContainer(ContainerManagementProtocolPBServiceImpl.java:214) > at > org.apache.hadoop.yarn.proto.ContainerManagementProtocol$ContainerManagementProtocolService$2.callBlockingMethod(ContainerManagementProtocol.java:237) > at >
[jira] [Commented] (YARN-7009) TestNMClient.testNMClientNoCleanupOnStop is flaky by design
[ https://issues.apache.org/jira/browse/YARN-7009?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16166087#comment-16166087 ] Daniel Templeton commented on YARN-7009: My comments from the partial review I did: # Instead of throwing an {{AssertionError}}, how about using {{fail()}}? # Your change to {{StateMachine}} is technically correct, but I always prefer to have things be explicit. I prefer having the methods _public_. > TestNMClient.testNMClientNoCleanupOnStop is flaky by design > --- > > Key: YARN-7009 > URL: https://issues.apache.org/jira/browse/YARN-7009 > Project: Hadoop YARN > Issue Type: Bug >Reporter: Miklos Szegedi >Assignee: Miklos Szegedi > Attachments: YARN-7009.000.patch, YARN-7009.001.patch, > YARN-7009.002.patch > > > The sleeps to wait for a transition to reinit and than back to running is not > long enough, it can miss the reinit event. > {code} > java.lang.AssertionError: Exception is not expected: > org.apache.hadoop.yarn.exceptions.YarnException: Cannot perform RE_INIT on > [container_1502735389852_0001_01_01]. Current state is [REINITIALIZING, > isReInitializing=true]. > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.preReInitializeOrLocalizeCheck(ContainerManagerImpl.java:1772) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1697) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1668) > at > org.apache.hadoop.yarn.api.impl.pb.service.ContainerManagementProtocolPBServiceImpl.reInitializeContainer(ContainerManagementProtocolPBServiceImpl.java:214) > at > org.apache.hadoop.yarn.proto.ContainerManagementProtocol$ContainerManagementProtocolService$2.callBlockingMethod(ContainerManagementProtocol.java:237) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:523) > at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:991) > at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:869) > at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:815) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1962) > at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2675) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testReInitializeContainer(TestNMClient.java:567) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testContainerManagement(TestNMClient.java:405) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testNMClientNoCleanupOnStop(TestNMClient.java:214) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:47) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:44) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) > at > org.junit.internal.runners.statements.FailOnTimeout$StatementThread.run(FailOnTimeout.java:74) > Caused by: org.apache.hadoop.yarn.exceptions.YarnException: Cannot perform > RE_INIT on [container_1502735389852_0001_01_01]. Current state is > [REINITIALIZING, isReInitializing=true]. > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.preReInitializeOrLocalizeCheck(ContainerManagerImpl.java:1772) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1697) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1668) > at > org.apache.hadoop.yarn.api.impl.pb.service.ContainerManagementProtocolPBServiceImpl.reInitializeContainer(ContainerManagementProtocolPBServiceImpl.java:214) > at > org.apache.hadoop.yarn.proto.ContainerManagementProtocol$ContainerManagementProtocolService$2.callBlockingMethod(ContainerManagementProtocol.java:237) > at >
[jira] [Commented] (YARN-7009) TestNMClient.testNMClientNoCleanupOnStop is flaky by design
[ https://issues.apache.org/jira/browse/YARN-7009?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16165016#comment-16165016 ] Grant Sohn commented on YARN-7009: -- +1 (non-binding). > TestNMClient.testNMClientNoCleanupOnStop is flaky by design > --- > > Key: YARN-7009 > URL: https://issues.apache.org/jira/browse/YARN-7009 > Project: Hadoop YARN > Issue Type: Bug >Reporter: Miklos Szegedi >Assignee: Miklos Szegedi > Attachments: YARN-7009.000.patch, YARN-7009.001.patch, > YARN-7009.002.patch > > > The sleeps to wait for a transition to reinit and than back to running is not > long enough, it can miss the reinit event. > {code} > java.lang.AssertionError: Exception is not expected: > org.apache.hadoop.yarn.exceptions.YarnException: Cannot perform RE_INIT on > [container_1502735389852_0001_01_01]. Current state is [REINITIALIZING, > isReInitializing=true]. > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.preReInitializeOrLocalizeCheck(ContainerManagerImpl.java:1772) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1697) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1668) > at > org.apache.hadoop.yarn.api.impl.pb.service.ContainerManagementProtocolPBServiceImpl.reInitializeContainer(ContainerManagementProtocolPBServiceImpl.java:214) > at > org.apache.hadoop.yarn.proto.ContainerManagementProtocol$ContainerManagementProtocolService$2.callBlockingMethod(ContainerManagementProtocol.java:237) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:523) > at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:991) > at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:869) > at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:815) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1962) > at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2675) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testReInitializeContainer(TestNMClient.java:567) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testContainerManagement(TestNMClient.java:405) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testNMClientNoCleanupOnStop(TestNMClient.java:214) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:47) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:44) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) > at > org.junit.internal.runners.statements.FailOnTimeout$StatementThread.run(FailOnTimeout.java:74) > Caused by: org.apache.hadoop.yarn.exceptions.YarnException: Cannot perform > RE_INIT on [container_1502735389852_0001_01_01]. Current state is > [REINITIALIZING, isReInitializing=true]. > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.preReInitializeOrLocalizeCheck(ContainerManagerImpl.java:1772) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1697) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1668) > at > org.apache.hadoop.yarn.api.impl.pb.service.ContainerManagementProtocolPBServiceImpl.reInitializeContainer(ContainerManagementProtocolPBServiceImpl.java:214) > at > org.apache.hadoop.yarn.proto.ContainerManagementProtocol$ContainerManagementProtocolService$2.callBlockingMethod(ContainerManagementProtocol.java:237) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:523) > at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:991) > at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:869) > at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:815) > at
[jira] [Commented] (YARN-7009) TestNMClient.testNMClientNoCleanupOnStop is flaky by design
[ https://issues.apache.org/jira/browse/YARN-7009?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16136227#comment-16136227 ] Hadoop QA commented on YARN-7009: - | (x) *{color:red}-1 overall{color}* | \\ \\ || Vote || Subsystem || Runtime || Comment || | {color:blue}0{color} | {color:blue} reexec {color} | {color:blue} 0m 14s{color} | {color:blue} Docker mode activated. {color} | || || || || {color:brown} Prechecks {color} || | {color:green}+1{color} | {color:green} @author {color} | {color:green} 0m 0s{color} | {color:green} The patch does not contain any @author tags. {color} | | {color:green}+1{color} | {color:green} test4tests {color} | {color:green} 0m 0s{color} | {color:green} The patch appears to include 1 new or modified test files. {color} | || || || || {color:brown} trunk Compile Tests {color} || | {color:blue}0{color} | {color:blue} mvndep {color} | {color:blue} 0m 22s{color} | {color:blue} Maven dependency ordering for branch {color} | | {color:green}+1{color} | {color:green} mvninstall {color} | {color:green} 13m 29s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} compile {color} | {color:green} 10m 22s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} checkstyle {color} | {color:green} 1m 9s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} mvnsite {color} | {color:green} 1m 38s{color} | {color:green} trunk passed {color} | | {color:red}-1{color} | {color:red} findbugs {color} | {color:red} 0m 49s{color} | {color:red} hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager in trunk has 1 extant Findbugs warnings. {color} | | {color:green}+1{color} | {color:green} javadoc {color} | {color:green} 1m 26s{color} | {color:green} trunk passed {color} | || || || || {color:brown} Patch Compile Tests {color} || | {color:blue}0{color} | {color:blue} mvndep {color} | {color:blue} 0m 10s{color} | {color:blue} Maven dependency ordering for patch {color} | | {color:green}+1{color} | {color:green} mvninstall {color} | {color:green} 1m 15s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} compile {color} | {color:green} 5m 45s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} javac {color} | {color:green} 5m 45s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} checkstyle {color} | {color:green} 0m 54s{color} | {color:green} hadoop-yarn-project/hadoop-yarn: The patch generated 0 new + 107 unchanged - 4 fixed = 107 total (was 111) {color} | | {color:green}+1{color} | {color:green} mvnsite {color} | {color:green} 1m 35s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} whitespace {color} | {color:green} 0m 0s{color} | {color:green} The patch has no whitespace issues. {color} | | {color:green}+1{color} | {color:green} findbugs {color} | {color:green} 2m 58s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} javadoc {color} | {color:green} 1m 20s{color} | {color:green} the patch passed {color} | || || || || {color:brown} Other Tests {color} || | {color:green}+1{color} | {color:green} unit {color} | {color:green} 2m 37s{color} | {color:green} hadoop-yarn-common in the patch passed. {color} | | {color:green}+1{color} | {color:green} unit {color} | {color:green} 13m 54s{color} | {color:green} hadoop-yarn-server-nodemanager in the patch passed. {color} | | {color:green}+1{color} | {color:green} unit {color} | {color:green} 20m 41s{color} | {color:green} hadoop-yarn-client in the patch passed. {color} | | {color:green}+1{color} | {color:green} asflicense {color} | {color:green} 0m 26s{color} | {color:green} The patch does not generate ASF License warnings. {color} | | {color:black}{color} | {color:black} {color} | {color:black} 91m 18s{color} | {color:black} {color} | \\ \\ || Subsystem || Report/Notes || | Docker | Image:yetus/hadoop:14b5c93 | | JIRA Issue | YARN-7009 | | JIRA Patch URL | https://issues.apache.org/jira/secure/attachment/12883029/YARN-7009.002.patch | | Optional Tests | asflicense compile javac javadoc mvninstall mvnsite unit findbugs checkstyle | | uname | Linux 5ab49c7c436b 4.4.0-43-generic #63-Ubuntu SMP Wed Oct 12 13:48:03 UTC 2016 x86_64 x86_64 x86_64 GNU/Linux | | Build tool | maven | | Personality | /testptch/hadoop/patchprocess/precommit/personality/provided.sh | | git revision | trunk / b6bfb2f | | Default Java | 1.8.0_144 | | findbugs | v3.1.0-RC1 | | findbugs | https://builds.apache.org/job/PreCommit-YARN-Build/17052/artifact/patchprocess/branch-findbugs-hadoop-yarn-project_hadoop-yarn_hadoop-yarn-server_hadoop-yarn-server-nodemanager-warnings.html | | Test Results |
[jira] [Commented] (YARN-7009) TestNMClient.testNMClientNoCleanupOnStop is flaky by design
[ https://issues.apache.org/jira/browse/YARN-7009?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16136096#comment-16136096 ] Hadoop QA commented on YARN-7009: - | (x) *{color:red}-1 overall{color}* | \\ \\ || Vote || Subsystem || Runtime || Comment || | {color:blue}0{color} | {color:blue} reexec {color} | {color:blue} 0m 19s{color} | {color:blue} Docker mode activated. {color} | || || || || {color:brown} Prechecks {color} || | {color:green}+1{color} | {color:green} @author {color} | {color:green} 0m 0s{color} | {color:green} The patch does not contain any @author tags. {color} | | {color:green}+1{color} | {color:green} test4tests {color} | {color:green} 0m 0s{color} | {color:green} The patch appears to include 1 new or modified test files. {color} | || || || || {color:brown} trunk Compile Tests {color} || | {color:blue}0{color} | {color:blue} mvndep {color} | {color:blue} 0m 9s{color} | {color:blue} Maven dependency ordering for branch {color} | | {color:green}+1{color} | {color:green} mvninstall {color} | {color:green} 14m 4s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} compile {color} | {color:green} 10m 23s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} checkstyle {color} | {color:green} 0m 53s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} mvnsite {color} | {color:green} 1m 45s{color} | {color:green} trunk passed {color} | | {color:red}-1{color} | {color:red} findbugs {color} | {color:red} 0m 55s{color} | {color:red} hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager in trunk has 1 extant Findbugs warnings. {color} | | {color:green}+1{color} | {color:green} javadoc {color} | {color:green} 1m 28s{color} | {color:green} trunk passed {color} | || || || || {color:brown} Patch Compile Tests {color} || | {color:blue}0{color} | {color:blue} mvndep {color} | {color:blue} 0m 10s{color} | {color:blue} Maven dependency ordering for patch {color} | | {color:green}+1{color} | {color:green} mvninstall {color} | {color:green} 1m 18s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} compile {color} | {color:green} 6m 43s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} javac {color} | {color:green} 6m 43s{color} | {color:green} the patch passed {color} | | {color:orange}-0{color} | {color:orange} checkstyle {color} | {color:orange} 0m 53s{color} | {color:orange} hadoop-yarn-project/hadoop-yarn: The patch generated 1 new + 108 unchanged - 4 fixed = 109 total (was 112) {color} | | {color:green}+1{color} | {color:green} mvnsite {color} | {color:green} 1m 37s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} whitespace {color} | {color:green} 0m 0s{color} | {color:green} The patch has no whitespace issues. {color} | | {color:green}+1{color} | {color:green} findbugs {color} | {color:green} 3m 6s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} javadoc {color} | {color:green} 1m 14s{color} | {color:green} the patch passed {color} | || || || || {color:brown} Other Tests {color} || | {color:red}-1{color} | {color:red} unit {color} | {color:red} 2m 39s{color} | {color:red} hadoop-yarn-common in the patch failed. {color} | | {color:green}+1{color} | {color:green} unit {color} | {color:green} 14m 4s{color} | {color:green} hadoop-yarn-server-nodemanager in the patch passed. {color} | | {color:green}+1{color} | {color:green} unit {color} | {color:green} 20m 53s{color} | {color:green} hadoop-yarn-client in the patch passed. {color} | | {color:green}+1{color} | {color:green} asflicense {color} | {color:green} 0m 28s{color} | {color:green} The patch does not generate ASF License warnings. {color} | | {color:black}{color} | {color:black} {color} | {color:black} 93m 0s{color} | {color:black} {color} | \\ \\ || Reason || Tests || | Failed junit tests | hadoop.yarn.client.api.impl.TestTimelineClientV2Impl | \\ \\ || Subsystem || Report/Notes || | Docker | Image:yetus/hadoop:14b5c93 | | JIRA Issue | YARN-7009 | | JIRA Patch URL | https://issues.apache.org/jira/secure/attachment/12882995/YARN-7009.001.patch | | Optional Tests | asflicense compile javac javadoc mvninstall mvnsite unit findbugs checkstyle | | uname | Linux bad24e9ad7b9 4.4.0-43-generic #63-Ubuntu SMP Wed Oct 12 13:48:03 UTC 2016 x86_64 x86_64 x86_64 GNU/Linux | | Build tool | maven | | Personality | /testptch/hadoop/patchprocess/precommit/personality/provided.sh | | git revision | trunk / b6bfb2f | | Default Java | 1.8.0_144 | | findbugs | v3.1.0-RC1 | | findbugs |
[jira] [Commented] (YARN-7009) TestNMClient.testNMClientNoCleanupOnStop is flaky by design
[ https://issues.apache.org/jira/browse/YARN-7009?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16130021#comment-16130021 ] Hadoop QA commented on YARN-7009: - | (x) *{color:red}-1 overall{color}* | \\ \\ || Vote || Subsystem || Runtime || Comment || | {color:blue}0{color} | {color:blue} reexec {color} | {color:blue} 0m 21s{color} | {color:blue} Docker mode activated. {color} | || || || || {color:brown} Prechecks {color} || | {color:green}+1{color} | {color:green} @author {color} | {color:green} 0m 0s{color} | {color:green} The patch does not contain any @author tags. {color} | | {color:green}+1{color} | {color:green} test4tests {color} | {color:green} 0m 0s{color} | {color:green} The patch appears to include 1 new or modified test files. {color} | || || || || {color:brown} trunk Compile Tests {color} || | {color:blue}0{color} | {color:blue} mvndep {color} | {color:blue} 0m 49s{color} | {color:blue} Maven dependency ordering for branch {color} | | {color:green}+1{color} | {color:green} mvninstall {color} | {color:green} 15m 12s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} compile {color} | {color:green} 9m 36s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} checkstyle {color} | {color:green} 0m 57s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} mvnsite {color} | {color:green} 1m 42s{color} | {color:green} trunk passed {color} | | {color:red}-1{color} | {color:red} findbugs {color} | {color:red} 0m 52s{color} | {color:red} hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager in trunk has 1 extant Findbugs warnings. {color} | | {color:green}+1{color} | {color:green} javadoc {color} | {color:green} 1m 28s{color} | {color:green} trunk passed {color} | || || || || {color:brown} Patch Compile Tests {color} || | {color:blue}0{color} | {color:blue} mvndep {color} | {color:blue} 0m 11s{color} | {color:blue} Maven dependency ordering for patch {color} | | {color:green}+1{color} | {color:green} mvninstall {color} | {color:green} 1m 16s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} compile {color} | {color:green} 5m 34s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} javac {color} | {color:green} 5m 34s{color} | {color:green} the patch passed {color} | | {color:orange}-0{color} | {color:orange} checkstyle {color} | {color:orange} 0m 56s{color} | {color:orange} hadoop-yarn-project/hadoop-yarn: The patch generated 3 new + 136 unchanged - 17 fixed = 139 total (was 153) {color} | | {color:green}+1{color} | {color:green} mvnsite {color} | {color:green} 1m 40s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} whitespace {color} | {color:green} 0m 0s{color} | {color:green} The patch has no whitespace issues. {color} | | {color:red}-1{color} | {color:red} findbugs {color} | {color:red} 1m 22s{color} | {color:red} hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common generated 1 new + 0 unchanged - 0 fixed = 1 total (was 0) {color} | | {color:green}+1{color} | {color:green} javadoc {color} | {color:green} 1m 25s{color} | {color:green} the patch passed {color} | || || || || {color:brown} Other Tests {color} || | {color:green}+1{color} | {color:green} unit {color} | {color:green} 2m 27s{color} | {color:green} hadoop-yarn-common in the patch passed. {color} | | {color:red}-1{color} | {color:red} unit {color} | {color:red} 12m 39s{color} | {color:red} hadoop-yarn-server-nodemanager in the patch failed. {color} | | {color:green}+1{color} | {color:green} unit {color} | {color:green} 20m 26s{color} | {color:green} hadoop-yarn-client in the patch passed. {color} | | {color:green}+1{color} | {color:green} asflicense {color} | {color:green} 0m 33s{color} | {color:green} The patch does not generate ASF License warnings. {color} | | {color:black}{color} | {color:black} {color} | {color:black} 91m 14s{color} | {color:black} {color} | \\ \\ || Reason || Tests || | FindBugs | module:hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common | | | Dead store to sum in org.apache.hadoop.yarn.state.DebugStateMachineDecoratorImpl.doTransition(Enum, Object) At DebugStateMachineDecoratorImpl.java:org.apache.hadoop.yarn.state.DebugStateMachineDecoratorImpl.doTransition(Enum, Object) At DebugStateMachineDecoratorImpl.java:[line 56] | | Timed out junit tests | org.apache.hadoop.yarn.server.nodemanager.containermanager.TestContainerManager | \\ \\ || Subsystem || Report/Notes || | Docker | Image:yetus/hadoop:14b5c93 | | JIRA Issue | YARN-7009 | | JIRA Patch URL | https://issues.apache.org/jira/secure/attachment/12882267/YARN-7009.000.patch | | Optional Tests | asflicense compile javac javadoc mvninstall mvnsite unit findbugs checkstyle | | uname |
[jira] [Commented] (YARN-7009) TestNMClient.testNMClientNoCleanupOnStop is flaky by design
[ https://issues.apache.org/jira/browse/YARN-7009?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16129937#comment-16129937 ] Miklos Szegedi commented on YARN-7009: -- The exception above was caused by the fact that we are always in the RUNNING state, so we just jump to reinitialize after restart. I have also seen another instance, when the container finished on a busy machine early waiting indefinitely. I solved this by increasing the sleep time to an arbitrarily large number. We stop the container anyways, plus we have the timeout. There is another issue with the pid file. If it is not created within 2 seconds, this code may skip the reinitialize keeping the container in the REINITIALIZE state forever. processId may be null: {code} // get process id from pid file if available // else if shell is still active, get it from the shell String processId = null; if (pidFilePath != null) { processId = getContainerPid(pidFilePath); } // kill process if (processId != null) { {code} Fixing this however needs significant refactoring, so another Jira I think. The only thing I could do in this jira is to increase the DEFAULT_NM_PROCESS_KILL_WAIT_MS timeout to 1. What do you think? > TestNMClient.testNMClientNoCleanupOnStop is flaky by design > --- > > Key: YARN-7009 > URL: https://issues.apache.org/jira/browse/YARN-7009 > Project: Hadoop YARN > Issue Type: Bug >Reporter: Miklos Szegedi >Assignee: Miklos Szegedi > Attachments: YARN-7009.000.patch > > > The sleeps to wait for a transition to reinit and than back to running is not > long enough, it can miss the reinit event. > {code} > java.lang.AssertionError: Exception is not expected: > org.apache.hadoop.yarn.exceptions.YarnException: Cannot perform RE_INIT on > [container_1502735389852_0001_01_01]. Current state is [REINITIALIZING, > isReInitializing=true]. > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.preReInitializeOrLocalizeCheck(ContainerManagerImpl.java:1772) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1697) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.reInitializeContainer(ContainerManagerImpl.java:1668) > at > org.apache.hadoop.yarn.api.impl.pb.service.ContainerManagementProtocolPBServiceImpl.reInitializeContainer(ContainerManagementProtocolPBServiceImpl.java:214) > at > org.apache.hadoop.yarn.proto.ContainerManagementProtocol$ContainerManagementProtocolService$2.callBlockingMethod(ContainerManagementProtocol.java:237) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:523) > at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:991) > at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:869) > at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:815) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1962) > at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2675) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testReInitializeContainer(TestNMClient.java:567) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testContainerManagement(TestNMClient.java:405) > at > org.apache.hadoop.yarn.client.api.impl.TestNMClient.testNMClientNoCleanupOnStop(TestNMClient.java:214) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:47) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:44) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) > at > org.junit.internal.runners.statements.FailOnTimeout$StatementThread.run(FailOnTimeout.java:74) > Caused by: org.apache.hadoop.yarn.exceptions.YarnException: Cannot perform > RE_INIT on [container_1502735389852_0001_01_01]. Current state is > [REINITIALIZING, isReInitializing=true]. > at >