Sorry, I am later to check the logs of HDFS. It seems it is caused by
HDFS DataXceiver!
Near this exception time point.
There is no error or warning in namenode.
Datanode1:
2009-03-13 19:06:20,352 ERROR
org.apache.hadoop.hdfs.server.datanode.DataNode: DatanodeRegistration(
10.24.1.12:50010, storageID=DS-1393238225-10.24.1.12-50010-1236857766916,
infoPort=50075, ipcPort=50020):DataXceiver
java.io.EOFException
at java.io.DataInputStream.readShort(DataInputStream.java:298)
at
org.apache.hadoop.hdfs.server.datanode.DataXceiver.run(DataXceiver.java:79)
at java.lang.Thread.run(Thread.java:619)
2009-03-13 19:06:23,919 INFO
org.apache.hadoop.hdfs.server.datanode.DataNode.clienttrace: src: /
10.24.1.18:53499, dest: /10.24.1.12:
Datenode2:
ion java.io.InterruptedIOException: Interruped while waiting for IO on
channel java.nio.channels.SocketChannel[connected
local=/10.24.1.14:50859remote=/
10.24.1.18:50010]. 59964 millis timeout left.
at
org.apache.hadoop.net.SocketIOWithTimeout$SelectorPool.select(SocketIOWithTimeout.java:277)
at
org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:155)
at
org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:150)
at
org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:123)
at java.io.DataInputStream.readFully(DataInputStream.java:178)
at java.io.DataInputStream.readLong(DataInputStream.java:399)
at
org.apache.hadoop.hdfs.server.datanode.BlockReceiver$PacketResponder.run(BlockReceiver.java:853)
at java.lang.Thread.run(Thread.java:619)
2009-03-13 19:04:43,301 INFO
org.apache.hadoop.hdfs.server.datanode.DataNode: PacketResponder
blk_-910979015546692644_66299 1 : Thread is interrupted.
2009-03-13 19:04:43,301 INFO
org.apache.hadoop.hdfs.server.datanode.DataNode: PacketResponder 1 for block
blk_-910979015546692644_66299 terminating
2009-03-13 19:04:43,301 INFO
org.apache.hadoop.hdfs.server.datanode.DataNode: writeBlock
blk_-910979015546692644_66299 received exception java.io.IOException:
Connection reset by peer
2009-03-13 19:04:43,301 ERROR
org.apache.hadoop.hdfs.server.datanode.DataNode: DatanodeRegistration(
10.24.1.14:50010, storageID=DS-567164670-10.24.1.14-50010-1236857753312,
infoPort=50075, ipcPort=50020):DataXceiver
java.io.IOException: Connection reset by peer
at sun.nio.ch.FileDispatcher.read0(Native Method)
at sun.nio.ch.SocketDispatcher.read(SocketDispatcher.java:21)
at sun.nio.ch.IOUtil.readIntoNativeBuffer(IOUtil.java:233)
at sun.nio.ch.IOUtil.read(IOUtil.java:206)
at sun.nio.ch.SocketChannelImpl.read(SocketChannelImpl.java:236)
at
org.apache.hadoop.net.SocketInputStream$Reader.performIO(SocketInputStream.java:55)
at
org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:140)
at
org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:150)
at
org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:123)
at java.io.BufferedInputStream.read1(BufferedInputStream.java:256)
at java.io.BufferedInputStream.read(BufferedInputStream.java:317)
at java.io.DataInputStream.read(DataInputStream.java:132)
at
org.apache.hadoop.hdfs.server.datanode.BlockReceiver.readToBuf(BlockReceiver.java:261)
at
org.apache.hadoop.hdfs.server.datanode.BlockReceiver.readNextPacket(BlockReceiver.java:308)
at
org.apache.hadoop.hdfs.server.datanode.BlockReceiver.receivePacket(BlockReceiver.java:372)
at
org.apache.hadoop.hdfs.server.datanode.BlockReceiver.receiveBlock(BlockReceiver.java:524)
at
org.apache.hadoop.hdfs.server.datanode.DataXceiver.writeBlock(DataXceiver.java:357)
at
org.apache.hadoop.hdfs.server.datanode.DataXceiver.run(DataXceiver.java:103)
at java.lang.Thread.run(Thread.java:619)
Datanode3:
2009-03-13 19:08:44,511 ERROR
org.apache.hadoop.hdfs.server.datanode.DataNode: DatanodeRegistration(
10.24.1.16:50010, storageID=DS-1231915619-10.24.1.16-50010-1236857760922,
infoPort=50075, ipcPort=50020):DataXceiver
java.io.IOException: Connection reset by peer
at sun.nio.ch.FileDispatcher.read0(Native Method)
at sun.nio.ch.SocketDispatcher.read(SocketDispatcher.java:21)
at sun.nio.ch.IOUtil.readIntoNativeBuffer(IOUtil.java:233)
at sun.nio.ch.IOUtil.read(IOUtil.java:206)
at sun.nio.ch.SocketChannelImpl.read(SocketChannelImpl.java:236)
at
org.apache.hadoop.net.SocketInputStream$Reader.performIO(SocketInputStream.java:55)
at
org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:140)
at
org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:150)
at
org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:123)
at java.io.BufferedInputStream.read1(BufferedInputStream.java:256)
at java.io.BufferedInputStream.read(BufferedInputStream.java:317)
at java.io.DataInputStream.read(DataInputStream.java:132)
at
org.apache.hadoop.hdfs.server.datanode.BlockReceiver.readToBuf(BlockReceiver.java:261)
at
org.apache.hadoop.hdfs.server.datanode.BlockReceiver.readNextPacket(BlockReceiver.java:308)
at
org.apache.hadoop.hdfs.server.datanode.BlockReceiver.receivePacket(BlockReceiver.java:372)
at
org.apache.hadoop.hdfs.server.datanode.BlockReceiver.receiveBlock(BlockReceiver.java:524)
at
org.apache.hadoop.hdfs.server.datanode.DataXceiver.writeBlock(DataXceiver.java:357)
at
org.apache.hadoop.hdfs.server.datanode.DataXceiver.run(DataXceiver.java:103)
at java.lang.Thread.run(Thread.java:619)
2009-03-13 19:08:45,435 INFO
org.apache.hadoop.hdfs.server.datanode.DataNode.clienttrace: src: /
10.24.1.16:50010, dest: /10.24.1.18:58953, bytes: 132096,
.....
2009-03-13 19:08:46,461 ERROR
org.apache.hadoop.hdfs.server.datanode.DataNode: DatanodeRegistration(
10.24.1.16:50010, storageID=DS-1231915619-10.24.1.16-50010-1236857760922,
infoPort=50075, ipcPort=50020):DataXceiver
java.io.EOFException
at java.io.DataInputStream.readShort(DataInputStream.java:298)
at
org.apache.hadoop.hdfs.server.datanode.DataXceiver.run(DataXceiver.java:79)
at java.lang.Thread.run(Thread.java:619)
2009-03-13 19:08:48,975 INFO
org.apache.hadoop.hdfs.server.datanode.DataNode: Deleting block
blk_-8652778507652884344_66369 file
/data/schubert/sandmill/hadoop-schubert/dfs/data/current/subdir54/blk_-8652778507652884344
Datanode4:
2009-03-13 19:11:00,075 ERROR
org.apache.hadoop.hdfs.server.datanode.DataNode: DatanodeRegistration(
10.24.1.18:50010, storageID=DS-1468632010-10.24.1.18-50010-1236857745172,
infoPort=50075, ipcPort=50020):DataXceiver
java.io.IOException: Unexpected problem in creating temporary file for
blk_-4705866815694058664_66391. File
/data/schubert/sandmill/hadoop-schubert/dfs/data/tmp/blk_-4705866815694058664
should not be present, but is.
at
org.apache.hadoop.hdfs.server.datanode.FSDataset$FSVolume.createTmpFile(FSDataset.java:390)
at
org.apache.hadoop.hdfs.server.datanode.FSDataset$FSVolume.createTmpFile(FSDataset.java:368)
at
org.apache.hadoop.hdfs.server.datanode.FSDataset.createTmpFile(FSDataset.java:1125)
at
org.apache.hadoop.hdfs.server.datanode.FSDataset.writeToBlock(FSDataset.java:1014)
at
org.apache.hadoop.hdfs.server.datanode.BlockReceiver.<init>(BlockReceiver.java:97)
at
org.apache.hadoop.hdfs.server.datanode.DataXceiver.writeBlock(DataXceiver.java:259)
at
org.apache.hadoop.hdfs.server.datanode.DataXceiver.run(DataXceiver.java:103)
at java.lang.Thread.run(Thread.java:619)
Datanode5:
2009-03-13 19:06:28,139 ERROR
org.apache.hadoop.hdfs.server.datanode.DataNode: DatanodeRegistration(
10.24.1.20:50010, storageID=DS-1337672599-10.24.1.20-50010-1236857787987,
infoPort=50075, ipcPort=50020):DataXceiver
java.net.SocketException: Too many open files
at sun.nio.ch.Net.socket0(Native Method)
at sun.nio.ch.Net.socket(Net.java:94)
at sun.nio.ch.SocketChannelImpl.<init>(SocketChannelImpl.java:84)
at
sun.nio.ch.SelectorProviderImpl.openSocketChannel(SelectorProviderImpl.java:37)
at java.nio.channels.SocketChannel.open(SocketChannel.java:105)
at
org.apache.hadoop.hdfs.server.datanode.DataNode.newSocket(DataNode.java:385)
at
org.apache.hadoop.hdfs.server.datanode.DataXceiver.writeBlock(DataXceiver.java:277)
at
org.apache.hadoop.hdfs.server.datanode.DataXceiver.run(DataXceiver.java:103)
at java.lang.Thread.run(Thread.java:619)
and may exceptions as:
2009-03-13 19:06:34,033 WARN
org.apache.hadoop.hdfs.server.datanode.DataNode: DatanodeRegistration(
10.24.1.20:50010, storageID=DS-1337672599-10.24.1.20-50010-1236857787987,
infoPort=50075, ipcPort=50020):DataXceiveServer: java.io.IOException: Too
many open files
at sun.nio.ch.ServerSocketChannelImpl.accept0(Native Method)
at
sun.nio.ch.ServerSocketChannelImpl.accept(ServerSocketChannelImpl.java:145)
at
sun.nio.ch.ServerSocketAdaptor.accept(ServerSocketAdaptor.java:84)
at
org.apache.hadoop.hdfs.server.datanode.DataXceiverServer.run(DataXceiverServer.java:130)
at java.lang.Thread.run(Thread.java:619)
2009-03-13 19:06:34,033 WARN
org.apache.hadoop.hdfs.server.datanode.DataNode: DatanodeRegistration(
10.24.1.20:50010, storageID=DS-1337672599-10.24.1.20-50010-1236857787987,
infoPort=50075, ipcPort=50020):DataXceiveServer: java.io.IOException: Too
many open files
at sun.nio.ch.ServerSocketChannelImpl.accept0(Native Method)
at
sun.nio.ch.ServerSocketChannelImpl.accept(ServerSocketChannelImpl.java:145)
at
sun.nio.ch.ServerSocketAdaptor.accept(ServerSocketAdaptor.java:84)
at
org.apache.hadoop.hdfs.server.datanode.DataXceiverServer.run(DataXceiverServer.java:130)
at java.lang.Thread.run(Thread.java:619)
2009-03-13 19:06:34,033 WARN
org.apache.hadoop.hdfs.server.datanode.DataNode: DatanodeRegistration(
10.24.1.20:50010, storageID=DS-1337672599-10.24.1.20-50010-1236857787987,
infoPort=50075, ipcPort=50020):DataXceiveServer: java.io.IOException: Too
many open files
at sun.nio.ch.ServerSocketChannelImpl.accept0(Native Method)
at
sun.nio.ch.ServerSocketChannelImpl.accept(ServerSocketChannelImpl.java:145)
at
sun.nio.ch.ServerSocketAdaptor.accept(ServerSocketAdaptor.java:84)
at
org.apache.hadoop.hdfs.server.datanode.DataXceiverServer.run(DataXceiverServer.java:130)
at java.lang.Thread.run(Thread.java:619)
2009-03-13 19:06:34,033 WARN
org.apache.hadoop.hdfs.server.datanode.DataNode: DatanodeRegistration(
10.24.1.20:50010, storageID=DS-1337672599-10.24.1.20-50010-1236857787987,
infoPort=50075, ipcPort=50020):DataXceiveServer: java.io.IOException: Too
many open files
at sun.nio.ch.ServerSocketChannelImpl.accept0(Native Method)
at
sun.nio.ch.ServerSocketChannelImpl.accept(ServerSocketChannelImpl.java:145)
at
sun.nio.ch.ServerSocketAdaptor.accept(ServerSocketAdaptor.java:84)
at
org.apache.hadoop.hdfs.server.datanode.DataXceiverServer.run(DataXceiverServer.java:130)
at java.lang.Thread.run(Thread.java:619)
On Fri, Mar 13, 2009 at 10:32 PM, Jean-Daniel Cryans <[email protected]>wrote:
> schubert,
>
> What's happening on the HDFS side? Is it really closed? What do you
> see in the datanodes logs?
>
> Thx,
>
> J-D
>
> On Fri, Mar 13, 2009 at 9:23 AM, schubert zhang <[email protected]> wrote:
> > HBase 0.19.0.
> > 5 rangeservers and 1 master, overlay hadoop (5+1).
> >
> > When inserting rows, one rangeserve Shutdown itself.
> >
> > 2009-03-13 19:11:40,335 INFO
> > org.apache.hadoop.hbase.regionserver.HRegionServer: aborting server at:
> > 10.24.1.20:60020
> > 2009-03-13 19:11:40,336 ERROR org.apache.hadoop.hdfs.DFSClient: Exception
> > closing file
> /hbase/TESTTAB/1481479883/cdr/mapfiles/651889757518114976/index
> > : java.io.IOException: Filesystem closed
> > java.io.IOException: Filesystem closed
> > at org.apache.hadoop.hdfs.DFSClient.checkOpen(DFSClient.java:197)
> > at org.apache.hadoop.hdfs.DFSClient.access$600(DFSClient.java:65)
> > at
> >
> org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.closeInternal(DFSClient.java:3085)
> > at
> >
> org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.close(DFSClient.java:3054)
> > at
> > org.apache.hadoop.hdfs.DFSClient$LeaseChecker.close(DFSClient.java:942)
> > at org.apache.hadoop.hdfs.DFSClient.close(DFSClient.java:209)
> > at
> >
> org.apache.hadoop.hdfs.DistributedFileSystem.close(DistributedFileSystem.java:264)
> > at
> > org.apache.hadoop.fs.FileSystem$Cache.closeAll(FileSystem.java:1413)
> > at org.apache.hadoop.fs.FileSystem.closeAll(FileSystem.java:236)
> > at
> > org.apache.hadoop.fs.FileSystem$ClientFinalizer.run(FileSystem.java:221)
> > 2009-03-13 19:11:40,336 ERROR org.apache.hadoop.hdfs.DFSClient: Exception
> > closing file
> >
> /hbase/TESTTAB/compaction.dir/1481479883/cdr/mapfiles/2407942683821506355/index
> > : java.io.IOException: Filesystem closed
> > java.io.IOException: Filesystem closed
> > at org.apache.hadoop.hdfs.DFSClient.checkOpen(DFSClient.java:197)
> > at org.apache.hadoop.hdfs.DFSClient.access$600(DFSClient.java:65)
> > at
> >
> org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.closeInternal(DFSClient.java:3085)
> > at
> >
> org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.close(DFSClient.java:3054)
> > at
> > org.apache.hadoop.hdfs.DFSClient$LeaseChecker.close(DFSClient.java:942)
> > at org.apache.hadoop.hdfs.DFSClient.close(DFSClient.java:209)
> > at
> >
> org.apache.hadoop.hdfs.DistributedFileSystem.close(DistributedFileSystem.java:264)
> > at
> > org.apache.hadoop.fs.FileSystem$Cache.closeAll(FileSystem.java:1413)
> > at org.apache.hadoop.fs.FileSystem.closeAll(FileSystem.java:236)
> > at
> > org.apache.hadoop.fs.FileSystem$ClientFinalizer.run(FileSystem.java:221)
> > 2009-03-13 19:11:40,336 ERROR org.apache.hadoop.hdfs.DFSClient: Exception
> > closing file
> >
> /hbase/TESTTAB/compaction.dir/1481479883/cdr/mapfiles/6642350706924681632/index
> > : java.io.IOException: Filesystem closed
> > java.io.IOException: Filesystem closed
> > at org.apache.hadoop.hdfs.DFSClient.checkOpen(DFSClient.java:197)
> > at org.apache.hadoop.hdfs.DFSClient.access$600(DFSClient.java:65)
> > at
> >
> org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.closeInternal(DFSClient.java:3085)
> > at
> >
> org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.close(DFSClient.java:3054)
> > at
> > org.apache.hadoop.hdfs.DFSClient$LeaseChecker.close(DFSClient.java:942)
> > at org.apache.hadoop.hdfs.DFSClient.close(DFSClient.java:209)
> > at
> >
> org.apache.hadoop.hdfs.DistributedFileSystem.close(DistributedFileSystem.java:264)
> > at
> > org.apache.hadoop.fs.FileSystem$Cache.closeAll(FileSystem.java:1413)
> > at org.apache.hadoop.fs.FileSystem.closeAll(FileSystem.java:236)
> > at
> > org.apache.hadoop.fs.FileSystem$ClientFinalizer.run(FileSystem.java:221)
> > 2009-03-13 19:11:40,460 INFO
> > org.apache.hadoop.hbase.regionserver.HRegionServer:
> > regionserver/0:0:0:0:0:0:0:0:60020 exiting
> > 2009-03-13 19:11:40,475 INFO
> > org.apache.hadoop.hbase.regionserver.HRegionServer: Starting shutdown
> > thread.
> > 2009-03-13 19:11:40,475 INFO
> > org.apache.hadoop.hbase.regionserver.HRegionServer: Shutdown thread
> complete
> >
>