[ https://issues.apache.org/jira/browse/HDFS-14161?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Fei Hui updated HDFS-14161: --------------------------- Attachment: HDFS-14161-HDFS-13891.002.patch > RBF: Throw RetriableException instead of IOException so that client can retry > when can not get connection > --------------------------------------------------------------------------------------------------------- > > Key: HDFS-14161 > URL: https://issues.apache.org/jira/browse/HDFS-14161 > Project: Hadoop HDFS > Issue Type: Sub-task > Affects Versions: 3.1.1, 2.9.2, 3.0.3 > Reporter: Fei Hui > Assignee: Fei Hui > Priority: Major > Attachments: HDFS-14161-HDFS-13891.001.patch, > HDFS-14161-HDFS-13891.002.patch, HDFS-14161.001.patch > > > Hive Client may hang when get IOException, stack follows > {code:java} > Exception in thread "Thread-150" java.lang.RuntimeException: > org.apache.hadoop.ipc.RemoteException(java.io.IOException): Cannot get a > connection to bigdata-nn20.g01:8020 > at > org.apache.hadoop.hdfs.server.federation.router.RouterRpcClient.getConnection(RouterRpcClient.java:262) > at > org.apache.hadoop.hdfs.server.federation.router.RouterRpcClient.invokeMethod(RouterRpcClient.java:380) > at > org.apache.hadoop.hdfs.server.federation.router.RouterRpcClient.invokeSequential(RouterRpcClient.java:752) > at > org.apache.hadoop.hdfs.server.federation.router.RouterRpcServer.getFileInfo(RouterRpcServer.java:1152) > at > org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getFileInfo(ClientNamenodeProtocolServerSideTranslatorPB.java:849) > at > org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:616) > at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:982) > at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2134) > at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2130) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1867) > at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2130) > at > org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:554) > at org.apache.hadoop.hive.ql.exec.TaskRunner.run(TaskRunner.java:74) > Caused by: org.apache.hadoop.ipc.RemoteException(java.io.IOException): Cannot > get a connection to bigdata-nn20.g01:8020 > at > org.apache.hadoop.hdfs.server.federation.router.RouterRpcClient.getConnection(RouterRpcClient.java:262) > at > org.apache.hadoop.hdfs.server.federation.router.RouterRpcClient.invokeMethod(RouterRpcClient.java:380) > at > org.apache.hadoop.hdfs.server.federation.router.RouterRpcClient.invokeSequential(RouterRpcClient.java:752) > at > org.apache.hadoop.hdfs.server.federation.router.RouterRpcServer.getFileInfo(RouterRpcServer.java:1152) > at > org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getFileInfo(ClientNamenodeProtocolServerSideTranslatorPB.java:849) > at > org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:616) > at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:982) > at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2134) > at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2130) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1867) > at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2130) > at org.apache.hadoop.ipc.Client.call(Client.java:1503) > at org.apache.hadoop.ipc.Client.call(Client.java:1441) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:229) > at com.sun.proxy.$Proxy14.getFileInfo(Unknown Source) > at > org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getFileInfo(ClientNamenodeProtocolTranslatorPB.java:775) > at sun.reflect.GeneratedMethodAccessor9.invoke(Unknown Source) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:253) > at > org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:101) > at com.sun.proxy.$Proxy15.getFileInfo(Unknown Source) > at org.apache.hadoop.hdfs.DFSClient.getFileInfo(DFSClient.java:2111) > at > org.apache.hadoop.hdfs.DistributedFileSystem$23.doCall(DistributedFileSystem.java:1390) > at > org.apache.hadoop.hdfs.DistributedFileSystem$23.doCall(DistributedFileSystem.java:1386) > at > org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81) > at > org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1402) > at org.apache.hadoop.fs.FileSystem.exists(FileSystem.java:1494) > at > org.apache.hadoop.hive.ql.session.SessionState.createPath(SessionState.java:719) > at > org.apache.hadoop.hive.ql.session.SessionState.createTmpTableSpaceDir(SessionState.java:635) > at > org.apache.hadoop.hive.ql.session.SessionState.createSessionDirs(SessionState.java:613) > at > org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:540) > ... 1 more > {code} > If router throw RetriableException when can not get connection and client set > *dfs.client.retry.policy.enabled* true, this problem can be resvoled. -- This message was sent by Atlassian JIRA (v7.6.3#76005) --------------------------------------------------------------------- To unsubscribe, e-mail: hdfs-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: hdfs-issues-h...@hadoop.apache.org