[ https://issues.apache.org/jira/browse/HDFS-3181?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Tsz Wo (Nicholas), SZE updated HDFS-3181: ----------------------------------------- Description: org.apache.hadoop.hdfs.TestLeaseRecovery2.testHardLeaseRecoveryAfterNameNodeRestart seems to be failing intermittently on jenkins. {code} org.apache.hadoop.hdfs.TestLeaseRecovery2.testHardLeaseRecoveryAfterNameNodeRestart Failing for the past 1 build (Since Failed#2163 ) Took 8.4 sec. Error Message Lease mismatch on /hardLeaseRecovery owned by HDFS_NameNode but is accessed by DFSClient_NONMAPREDUCE_1147689755_1 at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkLease(FSNamesystem.java:2076) at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkLease(FSNamesystem.java:2051) at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getAdditionalDatanode(FSNamesystem.java:1983) at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.getAdditionalDatanode(NameNodeRpcServer.java:492) at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getAdditionalDatanode(ClientNamenodeProtocolServerSideTranslatorPB.java:311) at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java:42604) at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:417) at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:891) at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1661) at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1657) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:396) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1205) at org.apache.hadoop.ipc.Server$Handler.run(Server.java:1655) Stacktrace org.apache.hadoop.hdfs.server.namenode.LeaseExpiredException: Lease mismatch on /hardLeaseRecovery owned by HDFS_NameNode but is accessed by DFSClient_NONMAPREDUCE_1147689755_1 at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkLease(FSNamesystem.java:2076) at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkLease(FSNamesystem.java:2051) at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getAdditionalDatanode(FSNamesystem.java:1983) at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.getAdditionalDatanode(NameNodeRpcServer.java:492) at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getAdditionalDatanode(ClientNamenodeProtocolServerSideTranslatorPB.java:311) at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java:42604) at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:417) ... at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:83) at $Proxy15.getAdditionalDatanode(Unknown Source) at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getAdditionalDatanode(ClientNamenodeProtocolTranslatorPB.java:317) at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.addDatanode2ExistingPipeline(DFSOutputStream.java:828) at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.setupPipelineForAppendOrRecovery(DFSOutputStream.java:930) at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.processDatanodeError(DFSOutputStream.java:741) at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:416) {code} was: org.apache.hadoop.hdfs.TestLeaseRecovery2.testHardLeaseRecoveryAfterNameNodeRestart seems to be failing intermittently on jenkins. {code} org.apache.hadoop.hdfs.TestLeaseRecovery2.testHardLeaseRecoveryAfterNameNodeRestart Failing for the past 1 build (Since Failed#2163 ) Took 8.4 sec. Error Message Lease mismatch on /hardLeaseRecovery owned by HDFS_NameNode but is accessed by DFSClient_NONMAPREDUCE_1147689755_1 at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkLease(FSNamesystem.java:2076) at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkLease(FSNamesystem.java:2051) at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getAdditionalDatanode(FSNamesystem.java:1983) at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.getAdditionalDatanode(NameNodeRpcServer.java:492) at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getAdditionalDatanode(ClientNamenodeProtocolServerSideTranslatorPB.java:311) at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java:42604) at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:417) at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:891) at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1661) at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1657) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:396) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1205) at org.apache.hadoop.ipc.Server$Handler.run(Server.java:1655) Stacktrace org.apache.hadoop.hdfs.server.namenode.LeaseExpiredException: Lease mismatch on /hardLeaseRecovery owned by HDFS_NameNode but is accessed by DFSClient_NONMAPREDUCE_1147689755_1 at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkLease(FSNamesystem.java:2076) at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkLease(FSNamesystem.java:2051) at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getAdditionalDatanode(FSNamesystem.java:1983) at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.getAdditionalDatanode(NameNodeRpcServer.java:492) at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getAdditionalDatanode(ClientNamenodeProtocolServerSideTranslatorPB.java:311) at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java:42604) at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:417) at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:891) at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1661) at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1657) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:396) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1205) at org.apache.hadoop.ipc.Server$Handler.run(Server.java:1655) at org.apache.hadoop.ipc.Client.call(Client.java:1159) at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:185) at $Proxy15.getAdditionalDatanode(Unknown Source) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) at java.lang.reflect.Method.invoke(Method.java:597) at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:164) at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:83) at $Proxy15.getAdditionalDatanode(Unknown Source) at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getAdditionalDatanode(ClientNamenodeProtocolTranslatorPB.java:317) at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.addDatanode2ExistingPipeline(DFSOutputStream.java:828) at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.setupPipelineForAppendOrRecovery(DFSOutputStream.java:930) at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.processDatanodeError(DFSOutputStream.java:741) at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:416) {code} Summary: testHardLeaseRecoveryAfterNameNodeRestart fails when length before restart is 1 byte less than CRC chunk size (was: testHardLeaseRecoveryAfterNameNodeRestart fails when length before restart is 1 byte less than block size) > testHardLeaseRecoveryAfterNameNodeRestart fails when length before restart is > 1 byte less than CRC chunk size > ------------------------------------------------------------------------------------------------------------- > > Key: HDFS-3181 > URL: https://issues.apache.org/jira/browse/HDFS-3181 > Project: Hadoop HDFS > Issue Type: Bug > Affects Versions: 2.0.0 > Reporter: Colin Patrick McCabe > Priority: Critical > Attachments: TestLeaseRecovery2with1535.patch, repro.txt, testOut.txt > > > org.apache.hadoop.hdfs.TestLeaseRecovery2.testHardLeaseRecoveryAfterNameNodeRestart > seems to be failing intermittently on jenkins. > {code} > org.apache.hadoop.hdfs.TestLeaseRecovery2.testHardLeaseRecoveryAfterNameNodeRestart > Failing for the past 1 build (Since Failed#2163 ) > Took 8.4 sec. > Error Message > Lease mismatch on /hardLeaseRecovery owned by HDFS_NameNode but is accessed > by DFSClient_NONMAPREDUCE_1147689755_1 at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkLease(FSNamesystem.java:2076) > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkLease(FSNamesystem.java:2051) > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getAdditionalDatanode(FSNamesystem.java:1983) > at > org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.getAdditionalDatanode(NameNodeRpcServer.java:492) > at > org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getAdditionalDatanode(ClientNamenodeProtocolServerSideTranslatorPB.java:311) > at > org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java:42604) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:417) > at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:891) at > org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1661) at > org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1657) at > java.security.AccessController.doPrivileged(Native Method) at > javax.security.auth.Subject.doAs(Subject.java:396) at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1205) > at org.apache.hadoop.ipc.Server$Handler.run(Server.java:1655) > Stacktrace > org.apache.hadoop.hdfs.server.namenode.LeaseExpiredException: Lease mismatch > on /hardLeaseRecovery owned by HDFS_NameNode but is accessed by > DFSClient_NONMAPREDUCE_1147689755_1 > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkLease(FSNamesystem.java:2076) > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkLease(FSNamesystem.java:2051) > at > org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getAdditionalDatanode(FSNamesystem.java:1983) > at > org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.getAdditionalDatanode(NameNodeRpcServer.java:492) > at > org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getAdditionalDatanode(ClientNamenodeProtocolServerSideTranslatorPB.java:311) > at > org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java:42604) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:417) > ... > at > org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:83) > at $Proxy15.getAdditionalDatanode(Unknown Source) > at > org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getAdditionalDatanode(ClientNamenodeProtocolTranslatorPB.java:317) > at > org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.addDatanode2ExistingPipeline(DFSOutputStream.java:828) > at > org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.setupPipelineForAppendOrRecovery(DFSOutputStream.java:930) > at > org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.processDatanodeError(DFSOutputStream.java:741) > at > org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:416) > {code} -- This message is automatically generated by JIRA. If you think it was sent incorrectly, please contact your JIRA administrators: https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa For more information on JIRA, see: http://www.atlassian.com/software/jira