Any comments? On Thu, Jan 12, 2017 at 10:18 AM, Tulasi Paradarami < tulasi.krishn...@gmail.com> wrote:
> Hi, > > I noticed that Regionservers are raising following exceptions > intermittently that is manifesting itself as request timeouts on the client > side. HDFS is in a healthy state and there are no corrupted blocks (from > "hdfs fsck" results). Datanodes were not out of service when this error > occurs and GC on datanodes is usually around 0.3sec. > > Also, when these exceptions occur, HDFS metric "Send Data Packet Blocked > On Network Average Time" tends to go up. > > Here are the configured values for some of the relevant parameters: > dfs.client.socket-timeout: 10s > dfs.datanode.socket.write.timeout: 10s > dfs.namenode.avoid.read.stale.datanode: true > dfs.namenode.avoid.write.stale.datanode: true > dfs.datanode.max.xcievers: 8192 > > Any pointers towards what could be causing these exceptions is > appreciated. Thanks. > > CDH 5.7.2 > HBase 1.2.0 > > ---> Regionserver logs > > 2017-01-11 19:19:04,940 WARN [PriorityRpcServer.handler=3,queue=1,port=60020] > hdfs.BlockReaderFactory: I/O error constructing remote block reader. > java.net.SocketTimeoutException: 10000 millis timeout while waiting for > channel to be ready for read. ch : java.nio.channels.SocketChannel[connected > local=/datanode3:27094 remote=/datanode2:50010] > at org.apache.hadoop.net.SocketIOWithTimeout.doIO( > SocketIOWithTimeout.java:164) > ... > > 2017-01-11 19:19:04,995 WARN > [PriorityRpcServer.handler=11,queue=1,port=60020] > hdfs.DFSClient: Connection failure: Failed to connect to /datanode2:50010 > for file /hbase/data/default/<table_name>/ec9ca > java.net.SocketTimeoutException: 10000 millis timeout while waiting for > channel to be ready for read. ch : java.nio.channels.SocketChannel[connected > local=/datanode3:27107 remote=/datanode2:50010] > at org.apache.hadoop.net.SocketIOWithTimeout.doIO( > SocketIOWithTimeout.java:164) > at org.apache.hadoop.net.SocketInputStream.read( > SocketInputStream.java:161) > at org.apache.hadoop.hdfs.protocol.datatransfer.PacketReceiver. > readChannelFully(PacketReceiver.java:258) > at org.apache.hadoop.hdfs.protocol.datatransfer. > PacketReceiver.doReadFully(PacketReceiver.java:209) > at org.apache.hadoop.hdfs.protocol.datatransfer. > PacketReceiver.doRead(PacketReceiver.java:171) > at org.apache.hadoop.hdfs.protocol.datatransfer.PacketReceiver. > receiveNextPacket(PacketReceiver.java:102) > at org.apache.hadoop.hdfs.RemoteBlockReader2.readNextPacket( > RemoteBlockReader2.java:207) > at org.apache.hadoop.hdfs.RemoteBlockReader2.read( > RemoteBlockReader2.java:156) > at org.apache.hadoop.hdfs.BlockReaderUtil.readAll( > BlockReaderUtil.java:32) > at org.apache.hadoop.hdfs.RemoteBlockReader2.readAll( > RemoteBlockReader2.java:386) > at org.apache.hadoop.hdfs.DFSInputStream.actualGetFromOneDataNode( > DFSInputStream.java:1193) > at org.apache.hadoop.hdfs.DFSInputStream.fetchBlockByteRange( > DFSInputStream.java:1112) > at org.apache.hadoop.hdfs.DFSInputStream.pread( > DFSInputStream.java:1473) > at org.apache.hadoop.hdfs.DFSInputStream.read( > DFSInputStream.java:1432) > at org.apache.hadoop.fs.FSDataInputStream.read( > FSDataInputStream.java:89) > at org.apache.hadoop.hbase.io.hfile.HFileBlock. > positionalReadWithExtra(HFileBlock.java:752) > at org.apache.hadoop.hbase.io.hfile.HFileBlock$ > AbstractFSReader.readAtOffset(HFileBlock.java:1448) > at org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl. > readBlockDataInternal(HFileBlock.java:1648) > at org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl. > readBlockData(HFileBlock.java:1532) > at org.apache.hadoop.hbase.io.hfile.HFileReaderV2.readBlock( > HFileReaderV2.java:445) > at org.apache.hadoop.hbase.io.hfile.HFileBlockIndex$ > BlockIndexReader.loadDataBlockWithScanInfo(HFileBlockIndex.java:261) > at org.apache.hadoop.hbase.io.hfile.HFileReaderV2$ > AbstractScannerV2.seekTo(HFileReaderV2.java:642) > at org.apache.hadoop.hbase.io.hfile.HFileReaderV2$ > AbstractScannerV2.reseekTo(HFileReaderV2.java:622) > at org.apache.hadoop.hbase.regionserver.StoreFileScanner. > reseekAtOrAfter(StoreFileScanner.java:314) > at org.apache.hadoop.hbase.regionserver.StoreFileScanner. > reseek(StoreFileScanner.java:226) > at org.apache.hadoop.hbase.regionserver.StoreFileScanner. > enforceSeek(StoreFileScanner.java:437) > at org.apache.hadoop.hbase.regionserver.KeyValueHeap. > pollRealKV(KeyValueHeap.java:340) > at org.apache.hadoop.hbase.regionserver.KeyValueHeap. > generalizedSeek(KeyValueHeap.java:296) > at org.apache.hadoop.hbase.regionserver.KeyValueHeap. > requestSeek(KeyValueHeap.java:261) > at org.apache.hadoop.hbase.regionserver.StoreScanner. > reseek(StoreScanner.java:806) > at org.apache.hadoop.hbase.regionserver.StoreScanner. > seekAsDirection(StoreScanner.java:794) > at org.apache.hadoop.hbase.regionserver.StoreScanner. > next(StoreScanner.java:617) > at org.apache.hadoop.hbase.regionserver.KeyValueHeap. > next(KeyValueHeap.java:147) > at org.apache.hadoop.hbase.regionserver.HRegion$RegionScannerImpl. > populateResult(HRegion.java:5696) > at org.apache.hadoop.hbase.regionserver.HRegion$RegionScannerImpl. > nextInternal(HRegion.java:5855) > at org.apache.hadoop.hbase.regionserver.HRegion$ > RegionScannerImpl.nextRaw(HRegion.java:5633) > at org.apache.hadoop.hbase.regionserver.HRegion$ > RegionScannerImpl.nextRaw(HRegion.java:5619) > at org.apache.phoenix.coprocessor.BaseScannerRegionObserver$2. > nextRaw(BaseScannerRegionObserver.java:330) > at org.apache.phoenix.iterate.RegionScannerResultIterator.next( > RegionScannerResultIterator.java:50) > at org.apache.phoenix.iterate.OrderedResultIterator. > getResultIterator(OrderedResultIterator.java:240) > at org.apache.phoenix.iterate.OrderedResultIterator.next( > OrderedResultIterator.java:193) > at org.apache.phoenix.coprocessor.ScanRegionObserver. > getTopNScanner(ScanRegionObserver.java:245) > at org.apache.phoenix.coprocessor.ScanRegionObserver. > doPostScannerOpen(ScanRegionObserver.java:226) > at org.apache.phoenix.coprocessor.BaseScannerRegionObserver. > postScannerOpen(BaseScannerRegionObserver.java:202) > at org.apache.hadoop.hbase.regionserver. > RegionCoprocessorHost$52.call(RegionCoprocessorHost.java:1334) > at org.apache.hadoop.hbase.regionserver.RegionCoprocessorHost$ > RegionOperation.call(RegionCoprocessorHost.java:1673) > at org.apache.hadoop.hbase.regionserver.RegionCoprocessorHost. > execOperation(RegionCoprocessorHost.java:1749) > at org.apache.hadoop.hbase.regionserver.RegionCoprocessorHost. > execOperationWithResult(RegionCoprocessorHost.java:1712) > at org.apache.hadoop.hbase.regionserver.RegionCoprocessorHost. > postScannerOpen(RegionCoprocessorHost.java:1329) > at org.apache.hadoop.hbase.regionserver.RSRpcServices. > scan(RSRpcServices.java:2408) > at org.apache.hadoop.hbase.protobuf.generated. > ClientProtos$ClientService$2.callBlockingMethod(ClientProtos.java:33648) > at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:2170) > at org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:109) > at org.apache.hadoop.hbase.ipc.RpcExecutor.consumerLoop( > RpcExecutor.java:133) > at org.apache.hadoop.hbase.ipc.RpcExecutor$1.run(RpcExecutor. > java:108) > at java.lang.Thread.run(Thread.java:745) > > > ---> Datanode logs > > 2017-01-11 19:19:16,171 INFO [DataXceiver for client > DFSClient_NONMAPREDUCE_-988481032_1 at /datanode4:13495 [Receiving block > BP-684272099-namenode-1472795703360:blk_1075014094_1273639]] > datanode.DataNode (BlockReceiver.java:receiveBlock(933)) - Exception for > BP-684272099-namenode-1472795703360:blk_1075014094_1273639 > java.io.IOException: Premature EOF from inputStream > at org.apache.hadoop.io.IOUtils.readFully(IOUtils.java:201) > ... > > 2017-01-11 19:19:16,171 INFO [DataXceiver for client > DFSClient_NONMAPREDUCE_-988481032_1 at /datanode4:46727 [Receiving block > BP-684272099-namenode-1472795703360:blk_1075014093_1273638]] > datanode.DataNode (BlockReceiver.java:receiveBlock(933)) - Exception for > BP-684272099-namenode-1472795703360:blk_1075014093_1273638 > java.io.IOException: Premature EOF from inputStream > at org.apache.hadoop.io.IOUtils.readFully(IOUtils.java:201) > at org.apache.hadoop.hdfs.protocol.datatransfer. > PacketReceiver.doReadFully(PacketReceiver.java:213) > at org.apache.hadoop.hdfs.protocol.datatransfer. > PacketReceiver.doRead(PacketReceiver.java:134) > at org.apache.hadoop.hdfs.protocol.datatransfer.PacketReceiver. > receiveNextPacket(PacketReceiver.java:109) > at org.apache.hadoop.hdfs.server.datanode.BlockReceiver. > receivePacket(BlockReceiver.java:500) > at org.apache.hadoop.hdfs.server.datanode.BlockReceiver. > receiveBlock(BlockReceiver.java:894) > at org.apache.hadoop.hdfs.server.datanode.DataXceiver. > writeBlock(DataXceiver.java:798) > at org.apache.hadoop.hdfs.protocol.datatransfer. > Receiver.opWriteBlock(Receiver.java:169) > at org.apache.hadoop.hdfs.protocol.datatransfer. > Receiver.processOp(Receiver.java:106) > at org.apache.hadoop.hdfs.server.datanode.DataXceiver.run( > DataXceiver.java:246) > at java.lang.Thread.run(Thread.java:745) >