the regionserver jstack log is http://paste2.org/yLDJeXgL
------------------ ???????? ------------------ ??????: "????-????";<[email protected]>; ????????: 2015??10??29??(??????) ????9:10 ??????: "user"<[email protected]>; ????: ?????? ?????? Hbase cluster is suddenly unable to respond hi Ted?? Yesterday around 14:40,one of regionservers hang once against.At that time I saw web ui can not open.Hbase cluster is unable to respond.Very anxious, hoping to get help?? jstack log is as follows?? "RpcServer.reader=4,port=60020" daemon prio=10 tid=0x00007f4466146800 nid=0x12d3 runnable [0x00007f3bebe58000] java.lang.Thread.State: RUNNABLE at sun.nio.ch.NativeThread.current(Native Method) at sun.nio.ch.SocketChannelImpl.read(SocketChannelImpl.java:325) - locked <0x00007f3d27360fb0> (a java.lang.Object) - locked <0x00007f3d27360f90> (a java.lang.Object) at org.apache.hadoop.hbase.ipc.RpcServer.channelIO(RpcServer.java:2402) at org.apache.hadoop.hbase.ipc.RpcServer.channelRead(RpcServer.java:2368) at org.apache.hadoop.hbase.ipc.RpcServer$Connection.readAndProcess(RpcServer.java:1476) at org.apache.hadoop.hbase.ipc.RpcServer$Listener.doRead(RpcServer.java:770) at org.apache.hadoop.hbase.ipc.RpcServer$Listener$Reader.doRunLoop(RpcServer.java:563) - locked <0x00007f3c584ce990> (a org.apache.hadoop.hbase.ipc.RpcServer$Listener$Reader) at org.apache.hadoop.hbase.ipc.RpcServer$Listener$Reader.run(RpcServer.java:538) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:744) "RpcServer.reader=3,port=60020" daemon prio=10 tid=0x00007f4466145000 nid=0x12d2 runnable [0x00007f3bebf59000] java.lang.Thread.State: RUNNABLE at sun.nio.ch.NativeThread.current(Native Method) at sun.nio.ch.SocketChannelImpl.read(SocketChannelImpl.java:325) - locked <0x00007f3d27360530> (a java.lang.Object) - locked <0x00007f3d27360510> (a java.lang.Object) at org.apache.hadoop.hbase.ipc.RpcServer.channelIO(RpcServer.java:2402) at org.apache.hadoop.hbase.ipc.RpcServer.channelRead(RpcServer.java:2368) at org.apache.hadoop.hbase.ipc.RpcServer$Connection.readAndProcess(RpcServer.java:1476) at org.apache.hadoop.hbase.ipc.RpcServer$Listener.doRead(RpcServer.java:770) at org.apache.hadoop.hbase.ipc.RpcServer$Listener$Reader.doRunLoop(RpcServer.java:563) - locked <0x00007f3c584cf7d8> (a org.apache.hadoop.hbase.ipc.RpcServer$Listener$Reader) at org.apache.hadoop.hbase.ipc.RpcServer$Listener$Reader.run(RpcServer.java:538) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:744) region server log : 2015-10-28 14:38:19,801 INFO [Thread-15] regionserver.HRegion: Finished memstore flush of ~3.6 M/3820648, currentsize=536/536 for region order_history,2801xyz140618175732642$3,1418829598639.afc853471a8cd4184bc9e7be00b8eea0. in 45ms, sequenceid=9599960557, compaction requested=true 2015-10-28 14:38:32,693 ERROR [RpcServer.handler=3,port=60020] regionserver.HRegionServer: org.apache.hadoop.hbase.exceptions.OutOfOrderScannerNextException: Expected nextCallSeq: 1 But the nextCallSeq got from client: 0; request=scanner_id: 16740356019163164014 number_of_rows: 10 close_scanner: false next_call_seq: 0 at org.apache.hadoop.hbase.regionserver.HRegionServer.scan(HRegionServer.java:3007) at org.apache.hadoop.hbase.protobuf.generated.ClientProtos$ClientService$2.callBlockingMethod(ClientProtos.java:26929) at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:2146) at org.apache.hadoop.hbase.ipc.RpcServer$Handler.run(RpcServer.java:1851) 2015-10-28 14:38:32,693 ERROR [RpcServer.handler=9,port=60020] regionserver.HRegionServer: org.apache.hadoop.hbase.exceptions.OutOfOrderScannerNextException: Expected nextCallSeq: 1 But the nextCallSeq got from client: 0; request=scanner_id: 16740356019163164014 number_of_rows: 10 close_scanner: false next_call_seq: 0 at org.apache.hadoop.hbase.regionserver.HRegionServer.scan(HRegionServer.java:3007) at org.apache.hadoop.hbase.protobuf.generated.ClientProtos$ClientService$2.callBlockingMethod(ClientProtos.java:26929) at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:2146) at org.apache.hadoop.hbase.ipc.RpcServer$Handler.run(RpcServer.java:1851) 2015-10-28 14:38:32,696 WARN [RpcServer.reader=2,port=60020] ipc.RpcServer: Unable to read call parameter from client 192.168.37.135 com.google.protobuf.UninitializedMessageException: Message missing required fields: region, get at com.google.protobuf.AbstractMessage$Builder.newUninitializedMessageException(AbstractMessage.java:770) at org.apache.hadoop.hbase.protobuf.generated.ClientProtos$GetRequest$Builder.build(ClientProtos.java:4474) at org.apache.hadoop.hbase.protobuf.generated.ClientProtos$GetRequest$Builder.build(ClientProtos.java:4406) at org.apache.hadoop.hbase.ipc.RpcServer$Connection.processRequest(RpcServer.java:1689) at org.apache.hadoop.hbase.ipc.RpcServer$Connection.processOneRpc(RpcServer.java:1631) at org.apache.hadoop.hbase.ipc.RpcServer$Connection.readAndProcess(RpcServer.java:1491) at org.apache.hadoop.hbase.ipc.RpcServer$Listener.doRead(RpcServer.java:770) at org.apache.hadoop.hbase.ipc.RpcServer$Listener$Reader.doRunLoop(RpcServer.java:563) at org.apache.hadoop.hbase.ipc.RpcServer$Listener$Reader.run(RpcServer.java:538) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:744) 2015-10-28 14:38:32,696 WARN [RpcServer.reader=1,port=60020] ipc.RpcServer: Unable to read call parameter from client 192.168.37.135 com.google.protobuf.UninitializedMessageException: Message missing required fields: region, get at com.google.protobuf.AbstractMessage$Builder.newUninitializedMessageException(AbstractMessage.java:770) at org.apache.hadoop.hbase.protobuf.generated.ClientProtos$GetRequest$Builder.build(ClientProtos.java:4474) at org.apache.hadoop.hbase.protobuf.generated.ClientProtos$GetRequest$Builder.build(ClientProtos.java:4406) at org.apache.hadoop.hbase.ipc.RpcServer$Connection.processRequest(RpcServer.java:1689) at org.apache.hadoop.hbase.ipc.RpcServer$Connection.processOneRpc(RpcServer.java:1631) at org.apache.hadoop.hbase.ipc.RpcServer$Connection.readAndProcess(RpcServer.java:1491) at org.apache.hadoop.hbase.ipc.RpcServer$Listener.doRead(RpcServer.java:770) at org.apache.hadoop.hbase.ipc.RpcServer$Listener$Reader.doRunLoop(RpcServer.java:563) at org.apache.hadoop.hbase.ipc.RpcServer$Listener$Reader.run(RpcServer.java:538) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:744) ------------------ ???????? ------------------ ??????: "????-????";<[email protected]>; ????????: 2015??10??26??(??????) ????9:28 ??????: "user"<[email protected]>; ????: ?????? ?????? Hbase cluster is suddenly unable to respond Thank you very much?? ------------------ ???????? ------------------ ??????: "Ted Yu";<[email protected]>; ????????: 2015??10??26??(??????) ????8:28 ??????: "user"<[email protected]>; ????: Re: ?????? Hbase cluster is suddenly unable to respond The fix from HBASE-11277 may solve your problem - if you collect stack trace during the hang, we would have more clue. I suggest upgrading to newer release such as 1.1.2 or 0.98.15 Cheers > On Oct 26, 2015, at 12:42 AM, ???? <[email protected]> wrote: > > hi,Ted: > > > I use the HBase version is hbase-0.96.0. > Around 17:33,other region servers also appeared in this warn log.I don't know > if it's normal or not.At that time I saw web ui can not open.I restart the > regionserver then hbase back to normal. Is it possible bug HBASE-11277?? > > > Regionserver on the log basically almost this warn log > mater on the log is as follows?? > 2015-10-21 22:15:43,575 INFO [CatalogJanitor-l-namenode2:60000] > master.CatalogJanitor: Scanned 672 catalog row(s), gc'd 0 unreferenced merged > region(s) and 1 unreferenced parent region(s) > 2015-10-23 17:47:25,617 INFO [RpcServer.handler=28,port=60000] > master.HMaster: Client=hbase//192.168.39.19 set balanceSwitch=false > 2015-10-23 17:49:45,513 WARN [RpcServer.handler=24,port=60000] > ipc.RpcServer: (responseTooSlow): > {"processingtimems":70266,"call":"ListTableDescriptorsByNamespace(org.apache.hadoop.hbase.protobuf.generated.MasterProtos$ListTableDescriptorsByNamespaceRequest)","client":"192.168.39.22:60292","starttimems":1445593715207,"queuetimems":0,"class":"HMaster","responsesize":704,"method":"ListTableDescriptorsByNamespace"} > 2015-10-23 17:49:45,513 WARN [RpcServer.handler=6,port=60000] ipc.RpcServer: > (responseTooSlow): > {"processingtimems":130525,"call":"ListTableDescriptorsByNamespace(org.apache.hadoop.hbase.protobuf.generated.MasterProtos$ListTableDescriptorsByNamespaceRequest)","client":"192.168.39.22:60286","starttimems":1445593654945,"queuetimems":0,"class":"HMaster","responsesize":704,"method":"ListTableDescriptorsByNamespace"} > 2015-10-23 17:49:45,513 WARN [RpcServer.handler=24,port=60000] > ipc.RpcServer: RpcServer.respondercallId: 130953 service: MasterService > methodName: ListTableDescriptorsByNamespace size: 48 connection: > 192.168.39.22:60292: output error > 2015-10-23 17:49:45,513 WARN [RpcServer.handler=6,port=60000] ipc.RpcServer: > RpcServer.respondercallId: 130945 service: MasterService methodName: > ListTableDescriptorsByNamespace size: 48 connection: 192.168.39.22:60286: > output error > 2015-10-23 17:49:45,513 WARN [RpcServer.handler=6,port=60000] ipc.RpcServer: > RpcServer.handler=6,port=60000: caught a ClosedChannelException, this means > that the server was processing a request but the client went away. The error > message was: null > > > > > > > > > > > > ------------------ ???????? ------------------ > ??????: "Ted Yu";<[email protected]>; > ????????: 2015??10??23??(??????) ????11:39 > ??????: "[email protected]"<[email protected]>; > > ????: Re: Hbase cluster is suddenly unable to respond > > > > Were other region servers functioning normally around 17:33 ? > > Which hbase release are you using ? > > Can you pastebin more of the region server log ? > > Thanks > >> On Fri, Oct 23, 2015 at 8:28 AM, ???? <[email protected]> wrote: >> >> hi??all?? >> >> >> This afternoon,The whole Hbase cluster is suddenly unable to respond.after >> I restart a regionserver after??the cluster has recovered.I don't know the >> cause of the trouble.I hope I can get help from you. >> >> >> Regionserver on the log is as follows?? >> 2015-10-23 17:28:49,335 INFO [regionserver60020.logRoller] wal.FSHLog: >> moving old hlog file /hbase/WALs/l-hbase30.data.cn8.qunar.com >> ,60020,1442810406218/l-hbase30.data.cn8.qunar.com%2C60020%2C1442810406218.1445580462689 >> whose highest sequenceid is 9071525521 to /hbase/oldWALs/ >> l-hbase30.data.cn8.qunar.com%2C60020%2C1442810406218.1445580462689 >> 2015-10-23 17:33:31,375 WARN [RpcServer.reader=8,port=60020] >> ipc.RpcServer: RpcServer.listener,port=60020: count of bytes read: 0 >> java.io.IOException: Connection reset by peer >> at sun.nio.ch.FileDispatcherImpl.read0(Native Method) >> at sun.nio.ch.SocketDispatcher.read(SocketDispatcher.java:39) >> at sun.nio.ch.IOUtil.readIntoNativeBuffer(IOUtil.java:223) >> at sun.nio.ch.IOUtil.read(IOUtil.java:197) >> at sun.nio.ch.SocketChannelImpl.read(SocketChannelImpl.java:379) >> at >> org.apache.hadoop.hbase.ipc.RpcServer.channelRead(RpcServer.java:2368) >> at >> org.apache.hadoop.hbase.ipc.RpcServer$Connection.readAndProcess(RpcServer.java:1403) >> at >> org.apache.hadoop.hbase.ipc.RpcServer$Listener.doRead(RpcServer.java:770) >> at >> org.apache.hadoop.hbase.ipc.RpcServer$Listener$Reader.doRunLoop(RpcServer.java:563) >> at >> org.apache.hadoop.hbase.ipc.RpcServer$Listener$Reader.run(RpcServer.java:538) >> at >> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) >> at >> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) >> at java.lang.Thread.run(Thread.java:744) >> 2015-10-23 17:33:31,779 WARN [RpcServer.reader=2,port=60020] >> ipc.RpcServer: RpcServer.listener,port=60020: count of bytes read: 0 >> java.io.IOException: Connection reset by peer >> at sun.nio.ch.FileDispatcherImpl.read0(Native Method) >> at sun.nio.ch.SocketDispatcher.read(SocketDispatcher.java:39) >> at sun.nio.ch.IOUtil.readIntoNativeBuffer(IOUtil.java:223) >> at sun.nio.ch.IOUtil.read(IOUtil.java:197) >> at sun.nio.ch.SocketChannelImpl.read(SocketChannelImpl.java:379) >> at >> org.apache.hadoop.hbase.ipc.RpcServer.channelRead(RpcServer.java:2368) >> at >> org.apache.hadoop.hbase.ipc.RpcServer$Connection.readAndProcess(RpcServer.java:1403) >> at >> org.apache.hadoop.hbase.ipc.RpcServer$Listener.doRead(RpcServer.java:770) >> at >> org.apache.hadoop.hbase.ipc.RpcServer$Listener$Reader.doRunLoop(RpcServer.java:563) >> at >> org.apache.hadoop.hbase.ipc.RpcServer$Listener$Reader.run(RpcServer.java:538) >> at >> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) >> at >> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) >> at java.lang.Thread.run(Thread.java:744)
