Soumitra Sulav created HDDS-728: ----------------------------------- Summary: Datanodes are going to dead state after some interval Key: HDDS-728 URL: https://issues.apache.org/jira/browse/HDDS-728 Project: Hadoop Distributed Data Store Issue Type: Bug Components: Ozone Filesystem Affects Versions: 0.3.0 Reporter: Soumitra Sulav
Setup a 5 datanode ozone cluster with HDP on top of it. After restarting all HDP services few times encountered below issue which is making the HDP services to fail. Same exception was observed in an old setup but I thought it could have been issue with the setup but now encountered the same issue in new setup as well. {code:java} 2018-10-24 10:42:03,308 WARN org.apache.ratis.grpc.server.GrpcServerProtocolService: 2974da2b-e765-43f9-8d30-45fe40dcb9ab: Failed requestVote 1672d28e-800f-4318-895b-1648976acff6->2974da2b-e765-43f9-8d30-45fe40dcb9ab#0 org.apache.ratis.protocol.GroupMismatchException: 2974da2b-e765-43f9-8d30-45fe40dcb9ab: group-CE87A994686F not found. at org.apache.ratis.server.impl.RaftServerProxy$ImplMap.get(RaftServerProxy.java:114) at org.apache.ratis.server.impl.RaftServerProxy.getImplFuture(RaftServerProxy.java:252) at org.apache.ratis.server.impl.RaftServerProxy.getImpl(RaftServerProxy.java:261) at org.apache.ratis.server.impl.RaftServerProxy.getImpl(RaftServerProxy.java:256) at org.apache.ratis.server.impl.RaftServerProxy.requestVote(RaftServerProxy.java:411) at org.apache.ratis.grpc.server.GrpcServerProtocolService.requestVote(GrpcServerProtocolService.java:54) at org.apache.ratis.proto.grpc.RaftServerProtocolServiceGrpc$MethodHandlers.invoke(RaftServerProtocolServiceGrpc.java:319) at org.apache.ratis.thirdparty.io.grpc.stub.ServerCalls$UnaryServerCallHandler$UnaryServerCallListener.onHalfClose(ServerCalls.java:171) at org.apache.ratis.thirdparty.io.grpc.internal.ServerCallImpl$ServerStreamListenerImpl.halfClosed(ServerCallImpl.java:283) at org.apache.ratis.thirdparty.io.grpc.internal.ServerImpl$JumpToApplicationThreadServerStreamListener$1HalfClosed.runInContext(ServerImpl.java:707) at org.apache.ratis.thirdparty.io.grpc.internal.ContextRunnable.run(ContextRunnable.java:37) at org.apache.ratis.thirdparty.io.grpc.internal.SerializingExecutor.run(SerializingExecutor.java:123) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) 2018-10-24 10:42:03,342 WARN org.apache.ratis.grpc.server.GrpcServerProtocolService: 2974da2b-e765-43f9-8d30-45fe40dcb9ab: Failed requestVote 7839294e-5657-447f-b320-6b390fffb963->2974da2b-e765-43f9-8d30-45fe40dcb9ab#0 org.apache.ratis.protocol.GroupMismatchException: 2974da2b-e765-43f9-8d30-45fe40dcb9ab: group-CE87A994686F not found. at org.apache.ratis.server.impl.RaftServerProxy$ImplMap.get(RaftServerProxy.java:114) at org.apache.ratis.server.impl.RaftServerProxy.getImplFuture(RaftServerProxy.java:252) at org.apache.ratis.server.impl.RaftServerProxy.getImpl(RaftServerProxy.java:261) at org.apache.ratis.server.impl.RaftServerProxy.getImpl(RaftServerProxy.java:256) at org.apache.ratis.server.impl.RaftServerProxy.requestVote(RaftServerProxy.java:411) at org.apache.ratis.grpc.server.GrpcServerProtocolService.requestVote(GrpcServerProtocolService.java:54) at org.apache.ratis.proto.grpc.RaftServerProtocolServiceGrpc$MethodHandlers.invoke(RaftServerProtocolServiceGrpc.java:319) at org.apache.ratis.thirdparty.io.grpc.stub.ServerCalls$UnaryServerCallHandler$UnaryServerCallListener.onHalfClose(ServerCalls.java:171) at org.apache.ratis.thirdparty.io.grpc.internal.ServerCallImpl$ServerStreamListenerImpl.halfClosed(ServerCallImpl.java:283) at org.apache.ratis.thirdparty.io.grpc.internal.ServerImpl$JumpToApplicationThreadServerStreamListener$1HalfClosed.runInContext(ServerImpl.java:707) at org.apache.ratis.thirdparty.io.grpc.internal.ContextRunnable.run(ContextRunnable.java:37) at org.apache.ratis.thirdparty.io.grpc.internal.SerializingExecutor.run(SerializingExecutor.java:123) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) 2018-10-24 10:42:04,466 WARN org.apache.ratis.grpc.server.GrpcServerProtocolService: 2974da2b-e765-43f9-8d30-45fe40dcb9ab: Failed requestVote 1672d28e-800f-4318-895b-1648976acff6->2974da2b-e765-43f9-8d30-45fe40dcb9ab#0 org.apache.ratis.protocol.GroupMismatchException: 2974da2b-e765-43f9-8d30-45fe40dcb9ab: group-CE87A994686F not found. at org.apache.ratis.server.impl.RaftServerProxy$ImplMap.get(RaftServerProxy.java:114) at org.apache.ratis.server.impl.RaftServerProxy.getImplFuture(RaftServerProxy.java:252) at org.apache.ratis.server.impl.RaftServerProxy.getImpl(RaftServerProxy.java:261) at org.apache.ratis.server.impl.RaftServerProxy.getImpl(RaftServerProxy.java:256) at org.apache.ratis.server.impl.RaftServerProxy.requestVote(RaftServerProxy.java:411) at org.apache.ratis.grpc.server.GrpcServerProtocolService.requestVote(GrpcServerProtocolService.java:54) at org.apache.ratis.proto.grpc.RaftServerProtocolServiceGrpc$MethodHandlers.invoke(RaftServerProtocolServiceGrpc.java:319) at org.apache.ratis.thirdparty.io.grpc.stub.ServerCalls$UnaryServerCallHandler$UnaryServerCallListener.onHalfClose(ServerCalls.java:171) at org.apache.ratis.thirdparty.io.grpc.internal.ServerCallImpl$ServerStreamListenerImpl.halfClosed(ServerCallImpl.java:283) at org.apache.ratis.thirdparty.io.grpc.internal.ServerImpl$JumpToApplicationThreadServerStreamListener$1HalfClosed.runInContext(ServerImpl.java:707) at org.apache.ratis.thirdparty.io.grpc.internal.ContextRunnable.run(ContextRunnable.java:37) at org.apache.ratis.thirdparty.io.grpc.internal.SerializingExecutor.run(SerializingExecutor.java:123) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005) --------------------------------------------------------------------- To unsubscribe, e-mail: hdfs-dev-unsubscr...@hadoop.apache.org For additional commands, e-mail: hdfs-dev-h...@hadoop.apache.org