[ https://issues.apache.org/jira/browse/HDDS-1787?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16883305#comment-16883305 ]
Siddharth Wagle commented on HDDS-1787: --------------------------------------- This CCE might be the cause of this issue. I found this while running MiniOzoneChaos cluster. {code} 16:45:53.177 [IPC Server handler 10 on 35066] ERROR SCMAudit - user=root | ip=172.31.116.73 | op=SORT_DATANODE null | ret=FAILURE java.lang.ClassCastException: org.apache.hadoop.hdds.scm.net.InnerNodeImpl cannot be cast to org.apache.hadoop.hdds.protocol.DatanodeDetails at org.apache.hadoop.hdds.scm.node.SCMNodeManager.getNode(SCMNodeManager.java:571) ~[hadoop-hdds-server-scm-0.5.0-SNAPSHOT.jar:?] at org.apache.hadoop.hdds.scm.server.SCMBlockProtocolServer.lambda$sortDatanodes$0(SCMBlockProtocolServer.java:293) ~[hadoop-hdds-server-scm-0.5.0-SNAPSHOT.jar:?] at java.util.Iterator.forEachRemaining(Iterator.java:116) ~[?:1.8.0_211] at java.util.Spliterators$IteratorSpliterator.forEachRemaining(Spliterators.java:1801) ~[?:1.8.0_211] at java.util.stream.ReferencePipeline$Head.forEach(ReferencePipeline.java:580) ~[?:1.8.0_211] at org.apache.hadoop.hdds.scm.server.SCMBlockProtocolServer.sortDatanodes(SCMBlockProtocolServer.java:293) [hadoop-hdds-server-scm-0.5.0-SNAPSHOT.jar:?] at org.apache.hadoop.ozone.protocolPB.ScmBlockLocationProtocolServerSideTranslatorPB.sortDatanodes(ScmBlockLocationProtocolServerSideTranslatorPB.java:213) [hadoop-hdds-common-0.5.0-SNAPSHOT.jar:?] at org.apache.hadoop.ozone.protocolPB.ScmBlockLocationProtocolServerSideTranslatorPB.send(ScmBlockLocationProtocolServerSideTranslatorPB.java:124) [hadoop-hdds-common-0.5.0-SNAPSHOT.jar:?] at org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos$ScmBlockLocationProtocolService$2.callBlockingMethod(ScmBlockLocationProtocolProtos.java:13157) [hadoop-hdds-common-0.5.0-SNAPSHOT.jar:?] at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:524) [hadoop-common-3.2.0.jar:?] at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1025) [hadoop-common-3.2.0.jar:?] at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:876) [hadoop-common-3.2.0.jar:?] at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:822) [hadoop-common-3.2.0.jar:?] at java.security.AccessController.doPrivileged(Native Method) [?:1.8.0_211] at javax.security.auth.Subject.doAs(Subject.java:422) [?:1.8.0_211] at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1730) [hadoop-common-3.2.0.jar:?] at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2682) [hadoop-common-3.2.0.jar:?] {code} > NPE thrown while trying to find DN closest to client > ---------------------------------------------------- > > Key: HDDS-1787 > URL: https://issues.apache.org/jira/browse/HDDS-1787 > Project: Hadoop Distributed Data Store > Issue Type: Bug > Components: Ozone Datanode > Affects Versions: 0.5.0 > Reporter: Siddharth Wagle > Priority: Major > > cc: [~xyao] This seems related to the client side topology changes, not sure > if some other Jira is already addressing this. > {code} > 2019-07-10 16:45:53,176 WARN ipc.Server (Server.java:logException(2724)) - > IPC Server handler 14 on 35066, call Call#127037 Retry#0 > org.apache.hadoop.hdds.scm.protocol.ScmBlockLocationProtocol.send from 17 > 2.31.116.73:52540 > java.lang.NullPointerException > at > org.apache.hadoop.ozone.protocolPB.ScmBlockLocationProtocolServerSideTranslatorPB.lambda$sortDatanodes$0(ScmBlockLocationProtocolServerSideTranslatorPB.java:215) > at > java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1382) > at > java.util.stream.ReferencePipeline$Head.forEach(ReferencePipeline.java:580) > at > org.apache.hadoop.ozone.protocolPB.ScmBlockLocationProtocolServerSideTranslatorPB.sortDatanodes(ScmBlockLocationProtocolServerSideTranslatorPB.java:215) > at > org.apache.hadoop.ozone.protocolPB.ScmBlockLocationProtocolServerSideTranslatorPB.send(ScmBlockLocationProtocolServerSideTranslatorPB.java:124) > at > org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos$ScmBlockLocationProtocolService$2.callBlockingMethod(ScmBlockLocationProtocolProtos.java:13157) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:524) > at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1025) > at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:876) > at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:822) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1730) > at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2682) > 2019-07-10 16:45:53,176 WARN om.KeyManagerImpl > (KeyManagerImpl.java:lambda$sortDatanodeInPipeline$7(2129)) - Unable to sort > datanodes based on distance to client, volume=xqoyzocpse, bucket=vxwajaczqh, > key=pool-444-thread-7-201077822, client=127.0.0.1, > datanodes=[10f15723-45d7-4a0c-8f01-8b101744a110{ip: 172.31.116.73, host: > sid-minichaos.gce.cloudera.com, networkLocation: /default-rack, certSerialId: > null}, 7ac2777f-0a5c-4414-9e7f-bfbc47d696ea{ip: 172.31.116.73, host: > sid-minichaos.gce.cloudera.com, networkLocation: /default-rack, certSerialId: > null}], exception=java.lang.NullPointerException > at > org.apache.hadoop.ozone.protocolPB.ScmBlockLocationProtocolServerSideTranslatorPB.lambda$sortDatanodes$0(ScmBlockLocationProtocolServerSideTranslatorPB.java:215) > at > java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1382) > at > java.util.stream.ReferencePipeline$Head.forEach(ReferencePipeline.java:580) > at > org.apache.hadoop.ozone.protocolPB.ScmBlockLocationProtocolServerSideTranslatorPB.sortDatanodes(ScmBlockLocationProtocolServerSideTranslatorPB.java:215) > at > org.apache.hadoop.ozone.protocolPB.ScmBlockLocationProtocolServerSideTranslatorPB.send(ScmBlockLocationProtocolServerSideTranslatorPB.java:124) > at > org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos$ScmBlockLocationProtocolService$2.callBlockingMethod(ScmBlockLocationProtocolProtos.java:13157) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:524) > at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1025) > at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:876) > at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:822) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1730) > at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2682) > {code} -- This message was sent by Atlassian JIRA (v7.6.14#76016) --------------------------------------------------------------------- To unsubscribe, e-mail: hdfs-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: hdfs-issues-h...@hadoop.apache.org