[ 
https://issues.apache.org/jira/browse/HDFS-4404?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13555724#comment-13555724
 ] 

Todd Lipcon commented on HDFS-4404:
-----------------------------------

(btw, I verified that it works on a test setup.. will write an actual unit test 
if people think this is the right approach)
                
> Create file failure when the machine of first attempted NameNode is down
> ------------------------------------------------------------------------
>
>                 Key: HDFS-4404
>                 URL: https://issues.apache.org/jira/browse/HDFS-4404
>             Project: Hadoop HDFS
>          Issue Type: Bug
>          Components: ha, hdfs-client
>    Affects Versions: 2.0.2-alpha
>            Reporter: liaowenrui
>            Assignee: Todd Lipcon
>            Priority: Blocker
>         Attachments: HDFS-4404.patch, hdfs-4404.txt
>
>
> test Environment: NN1,NN2,DN1,DN2,DN3
> machine1:NN1,DN1
> machine2:NN2,DN2
> machine3:DN3
> mathine1 is down.
> 2013-01-12 09:51:21,248 DEBUG ipc.Client (Client.java:setupIOstreams(562)) - 
> Connecting to /160.161.0.155:8020
> 2013-01-12 09:51:38,442 DEBUG ipc.Client (Client.java:close(932)) - closing 
> ipc connection to vm2/160.161.0.155:8020: 10000 millis timeout while waiting 
> for channel to be ready for connect. ch : 
> java.nio.channels.SocketChannel[connection-pending remote=/160.161.0.155:8020]
> java.net.SocketTimeoutException: 10000 millis timeout while waiting for 
> channel to be ready for connect. ch : 
> java.nio.channels.SocketChannel[connection-pending remote=/160.161.0.155:8020]
>  at 
> org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:213)
>  at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:524)
>  at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:489)
>  at org.apache.hadoop.ipc.Client$Connection.setupConnection(Client.java:474)
>  at org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:568)
>  at org.apache.hadoop.ipc.Client$Connection.access$2000(Client.java:217)
>  at org.apache.hadoop.ipc.Client.getConnection(Client.java:1286)
>  at org.apache.hadoop.ipc.Client.call(Client.java:1156)
>  at 
> org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:184)
>  at $Proxy9.create(Unknown Source)
>  at 
> org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.create(ClientNamenodeProtocolTranslatorPB.java:187)
>  at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>  at 
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>  at 
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>  at java.lang.reflect.Method.invoke(Method.java:597)
>  at 
> org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:165)
>  at 
> org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:84)
>  at $Proxy10.create(Unknown Source)
>  at org.apache.hadoop.hdfs.DFSOutputStream.<init>(DFSOutputStream.java:1261)
>  at 
> org.apache.hadoop.hdfs.DFSOutputStream.newStreamForCreate(DFSOutputStream.java:1280)
>  at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1128)
>  at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1086)
>  at 
> org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:232)
>  at 
> org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:75)
>  at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:806)
>  at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:787)
>  at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:715)
>  at test.TestLease.main(TestLease.java:45)
> 2013-01-12 09:51:38,443 DEBUG ipc.Client (Client.java:close(940)) - IPC 
> Client (31594013) connection to /160.161.0.155:8020 from 
> hdfs/had...@hadoop.com: closed
> 2013-01-12 09:52:47,834 WARN  retry.RetryInvocationHandler 
> (RetryInvocationHandler.java:invoke(95)) - Exception while invoking class 
> org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.create. 
> Not retrying because the invoked method is not idempotent, and unable to 
> determine whether it was invoked
> java.net.SocketTimeoutException: Call From szxy1x001833091/172.0.0.13 to 
> vm2:8020 failed on socket timeout exception: java.net.SocketTimeoutException: 
> 10000 millis timeout while waiting for channel to be ready for connect. ch : 
> java.nio.channels.SocketChannel[connection-pending 
> remote=/160.161.0.155:8020]; For more details see:  
> http://wiki.apache.org/hadoop/SocketTimeout
>  at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:743)
>  at org.apache.hadoop.ipc.Client.call(Client.java:1180)
>  at 
> org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:184)
>  at $Proxy9.create(Unknown Source)
>  at 
> org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.create(ClientNamenodeProtocolTranslatorPB.java:187)
>  at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>  at 
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>  at 
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>  at java.lang.reflect.Method.invoke(Method.java:597)
>  at 
> org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:165)
>  at 
> org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:84)
>  at $Proxy10.create(Unknown Source)
>  at org.apache.hadoop.hdfs.DFSOutputStream.<init>(DFSOutputStream.java:1261)
>  at 
> org.apache.hadoop.hdfs.DFSOutputStream.newStreamForCreate(DFSOutputStream.java:1280)
>  at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1128)
>  at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1086)
>  at 
> org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:232)
>  at 
> org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:75)
>  at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:806)
>  at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:787)
>  at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:715)
>  at test.TestLease.main(TestLease.java:45)
> Caused by: java.net.SocketTimeoutException: 10000 millis timeout while 
> waiting for channel to be ready for connect. ch : 
> java.nio.channels.SocketChannel[connection-pending remote=/160.161.0.155:8020]
>  at 
> org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:213)
>  at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:524)
>  at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:489)
>  at org.apache.hadoop.ipc.Client$Connection.setupConnection(Client.java:474)
>  at org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:568)
>  at org.apache.hadoop.ipc.Client$Connection.access$2000(Client.java:217)
>  at org.apache.hadoop.ipc.Client.getConnection(Client.java:1286)
>  at org.apache.hadoop.ipc.Client.call(Client.java:1156)
>  ... 20 more
> java.net.SocketTimeoutException: Call From szxy1x001833091/172.0.0.13 to 
> vm2:8020 failed on socket timeout exception: java.net.SocketTimeoutException: 
> 10000 millis timeout while waiting for channel to be ready for connect. ch : 
> java.nio.channels.SocketChannel[connection-pending 
> remote=/160.161.0.155:8020]; For more details see:  
> http://wiki.apache.org/hadoop/SocketTimeout
>  at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:743)
>  at org.apache.hadoop.ipc.Client.call(Client.java:1180)
>  at 
> org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:184)
>  at $Proxy9.create(Unknown Source)
>  at 
> org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.create(ClientNamenodeProtocolTranslatorPB.java:187)
>  at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>  at 
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>  at 
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>  at java.lang.reflect.Method.invoke(Method.java:597)
>  at 
> org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:165)
>  at 
> org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:84)
>  at $Proxy10.create(Unknown Source)
>  at org.apache.hadoop.hdfs.DFSOutputStream.<init>(DFSOutputStream.java:1261)
>  at 
> org.apache.hadoop.hdfs.DFSOutputStream.newStreamForCreate(DFSOutputStream.java:1280)
>  at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1128)
>  at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1086)
>  at 
> org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:232)
>  at 
> org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:75)
>  at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:806)
>  at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:787)
>  at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:715)
>  at test.TestLease.main(TestLease.java:45)
> Caused by: java.net.SocketTimeoutException: 10000 millis timeout while 
> waiting for channel to be ready for connect. ch : 
> java.nio.channels.SocketChannel[connection-pending remote=/160.161.0.155:8020]
>  at 
> org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:213)
>  at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:524)
>  at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:489)
>  at org.apache.hadoop.ipc.Client$Connection.setupConnection(Client.java:474)
>  at org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:568)
>  at org.apache.hadoop.ipc.Client$Connection.access$2000(Client.java:217)
>  at org.apache.hadoop.ipc.Client.getConnection(Client.java:1286)
>  at org.apache.hadoop.ipc.Client.call(Client.java:1156)
>  ... 20 more
> 2013-01-12 09:54:52,269 DEBUG ipc.Client (Client.java:stop(1021)) - Stopping 
> client

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators
For more information on JIRA, see: http://www.atlassian.com/software/jira

Reply via email to