liaowenrui created HDFS-4404: -------------------------------- Summary: create file failure,when the machine of firse NameNod is down Key: HDFS-4404 URL: https://issues.apache.org/jira/browse/HDFS-4404 Project: Hadoop HDFS Issue Type: Bug Components: hdfs-client Affects Versions: 2.0.2-alpha Reporter: liaowenrui Priority: Blocker Fix For: 2.0.3-alpha, 2.0.2-alpha
test Environment: NN1,NN2,DN1,DN2,DN3 machine1:NN1,DN1 machine2:NN2,DN2 machine3:DN3 mathine1 is down. 2013-01-12 09:51:21,248 DEBUG ipc.Client (Client.java:setupIOstreams(562)) - Connecting to /160.161.0.155:8020 2013-01-12 09:51:38,442 DEBUG ipc.Client (Client.java:close(932)) - closing ipc connection to vm2/160.161.0.155:8020: 10000 millis timeout while waiting for channel to be ready for connect. ch : java.nio.channels.SocketChannel[connection-pending remote=/160.161.0.155:8020] java.net.SocketTimeoutException: 10000 millis timeout while waiting for channel to be ready for connect. ch : java.nio.channels.SocketChannel[connection-pending remote=/160.161.0.155:8020] at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:213) at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:524) at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:489) at org.apache.hadoop.ipc.Client$Connection.setupConnection(Client.java:474) at org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:568) at org.apache.hadoop.ipc.Client$Connection.access$2000(Client.java:217) at org.apache.hadoop.ipc.Client.getConnection(Client.java:1286) at org.apache.hadoop.ipc.Client.call(Client.java:1156) at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:184) at $Proxy9.create(Unknown Source) at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.create(ClientNamenodeProtocolTranslatorPB.java:187) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) at java.lang.reflect.Method.invoke(Method.java:597) at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:165) at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:84) at $Proxy10.create(Unknown Source) at org.apache.hadoop.hdfs.DFSOutputStream.<init>(DFSOutputStream.java:1261) at org.apache.hadoop.hdfs.DFSOutputStream.newStreamForCreate(DFSOutputStream.java:1280) at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1128) at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1086) at org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:232) at org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:75) at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:806) at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:787) at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:715) at test.TestLease.main(TestLease.java:45) 2013-01-12 09:51:38,443 DEBUG ipc.Client (Client.java:close(940)) - IPC Client (31594013) connection to /160.161.0.155:8020 from hdfs/had...@hadoop.com: closed 2013-01-12 09:52:47,834 WARN retry.RetryInvocationHandler (RetryInvocationHandler.java:invoke(95)) - Exception while invoking class org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.create. Not retrying because the invoked method is not idempotent, and unable to determine whether it was invoked java.net.SocketTimeoutException: Call From szxy1x001833091/172.0.0.13 to vm2:8020 failed on socket timeout exception: java.net.SocketTimeoutException: 10000 millis timeout while waiting for channel to be ready for connect. ch : java.nio.channels.SocketChannel[connection-pending remote=/160.161.0.155:8020]; For more details see: http://wiki.apache.org/hadoop/SocketTimeout at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:743) at org.apache.hadoop.ipc.Client.call(Client.java:1180) at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:184) at $Proxy9.create(Unknown Source) at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.create(ClientNamenodeProtocolTranslatorPB.java:187) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) at java.lang.reflect.Method.invoke(Method.java:597) at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:165) at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:84) at $Proxy10.create(Unknown Source) at org.apache.hadoop.hdfs.DFSOutputStream.<init>(DFSOutputStream.java:1261) at org.apache.hadoop.hdfs.DFSOutputStream.newStreamForCreate(DFSOutputStream.java:1280) at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1128) at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1086) at org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:232) at org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:75) at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:806) at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:787) at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:715) at test.TestLease.main(TestLease.java:45) Caused by: java.net.SocketTimeoutException: 10000 millis timeout while waiting for channel to be ready for connect. ch : java.nio.channels.SocketChannel[connection-pending remote=/160.161.0.155:8020] at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:213) at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:524) at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:489) at org.apache.hadoop.ipc.Client$Connection.setupConnection(Client.java:474) at org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:568) at org.apache.hadoop.ipc.Client$Connection.access$2000(Client.java:217) at org.apache.hadoop.ipc.Client.getConnection(Client.java:1286) at org.apache.hadoop.ipc.Client.call(Client.java:1156) ... 20 more java.net.SocketTimeoutException: Call From szxy1x001833091/172.0.0.13 to vm2:8020 failed on socket timeout exception: java.net.SocketTimeoutException: 10000 millis timeout while waiting for channel to be ready for connect. ch : java.nio.channels.SocketChannel[connection-pending remote=/160.161.0.155:8020]; For more details see: http://wiki.apache.org/hadoop/SocketTimeout at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:743) at org.apache.hadoop.ipc.Client.call(Client.java:1180) at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:184) at $Proxy9.create(Unknown Source) at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.create(ClientNamenodeProtocolTranslatorPB.java:187) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) at java.lang.reflect.Method.invoke(Method.java:597) at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:165) at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:84) at $Proxy10.create(Unknown Source) at org.apache.hadoop.hdfs.DFSOutputStream.<init>(DFSOutputStream.java:1261) at org.apache.hadoop.hdfs.DFSOutputStream.newStreamForCreate(DFSOutputStream.java:1280) at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1128) at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1086) at org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:232) at org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:75) at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:806) at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:787) at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:715) at test.TestLease.main(TestLease.java:45) Caused by: java.net.SocketTimeoutException: 10000 millis timeout while waiting for channel to be ready for connect. ch : java.nio.channels.SocketChannel[connection-pending remote=/160.161.0.155:8020] at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:213) at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:524) at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:489) at org.apache.hadoop.ipc.Client$Connection.setupConnection(Client.java:474) at org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:568) at org.apache.hadoop.ipc.Client$Connection.access$2000(Client.java:217) at org.apache.hadoop.ipc.Client.getConnection(Client.java:1286) at org.apache.hadoop.ipc.Client.call(Client.java:1156) ... 20 more 2013-01-12 09:54:52,269 DEBUG ipc.Client (Client.java:stop(1021)) - Stopping client -- This message is automatically generated by JIRA. If you think it was sent incorrectly, please contact your JIRA administrators For more information on JIRA, see: http://www.atlassian.com/software/jira