Sorry, I am later to check the logs of HDFS. It seems it is caused by
HDFS DataXceiver!

Near this exception time point.

There is no error or warning in namenode.

Datanode1:
2009-03-13 19:06:20,352 ERROR
org.apache.hadoop.hdfs.server.datanode.DataNode: DatanodeRegistration(
10.24.1.12:50010, storageID=DS-1393238225-10.24.1.12-50010-1236857766916,
infoPort=50075, ipcPort=50020):DataXceiver
java.io.EOFException
        at java.io.DataInputStream.readShort(DataInputStream.java:298)
        at
org.apache.hadoop.hdfs.server.datanode.DataXceiver.run(DataXceiver.java:79)
        at java.lang.Thread.run(Thread.java:619)
2009-03-13 19:06:23,919 INFO
org.apache.hadoop.hdfs.server.datanode.DataNode.clienttrace: src: /
10.24.1.18:53499, dest: /10.24.1.12:

Datenode2:
ion java.io.InterruptedIOException: Interruped while waiting for IO on
channel java.nio.channels.SocketChannel[connected
local=/10.24.1.14:50859remote=/
10.24.1.18:50010]. 59964 millis timeout left.
        at
org.apache.hadoop.net.SocketIOWithTimeout$SelectorPool.select(SocketIOWithTimeout.java:277)
        at
org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:155)
        at
org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:150)
        at
org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:123)
        at java.io.DataInputStream.readFully(DataInputStream.java:178)
        at java.io.DataInputStream.readLong(DataInputStream.java:399)
        at
org.apache.hadoop.hdfs.server.datanode.BlockReceiver$PacketResponder.run(BlockReceiver.java:853)
        at java.lang.Thread.run(Thread.java:619)

2009-03-13 19:04:43,301 INFO
org.apache.hadoop.hdfs.server.datanode.DataNode: PacketResponder
blk_-910979015546692644_66299 1 : Thread is interrupted.
2009-03-13 19:04:43,301 INFO
org.apache.hadoop.hdfs.server.datanode.DataNode: PacketResponder 1 for block
blk_-910979015546692644_66299 terminating
2009-03-13 19:04:43,301 INFO
org.apache.hadoop.hdfs.server.datanode.DataNode: writeBlock
blk_-910979015546692644_66299 received exception java.io.IOException:
Connection reset by peer
2009-03-13 19:04:43,301 ERROR
org.apache.hadoop.hdfs.server.datanode.DataNode: DatanodeRegistration(
10.24.1.14:50010, storageID=DS-567164670-10.24.1.14-50010-1236857753312,
infoPort=50075, ipcPort=50020):DataXceiver
java.io.IOException: Connection reset by peer
        at sun.nio.ch.FileDispatcher.read0(Native Method)
        at sun.nio.ch.SocketDispatcher.read(SocketDispatcher.java:21)
        at sun.nio.ch.IOUtil.readIntoNativeBuffer(IOUtil.java:233)
        at sun.nio.ch.IOUtil.read(IOUtil.java:206)
        at sun.nio.ch.SocketChannelImpl.read(SocketChannelImpl.java:236)
        at
org.apache.hadoop.net.SocketInputStream$Reader.performIO(SocketInputStream.java:55)
        at
org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:140)
        at
org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:150)
        at
org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:123)
        at java.io.BufferedInputStream.read1(BufferedInputStream.java:256)
        at java.io.BufferedInputStream.read(BufferedInputStream.java:317)
        at java.io.DataInputStream.read(DataInputStream.java:132)
        at
org.apache.hadoop.hdfs.server.datanode.BlockReceiver.readToBuf(BlockReceiver.java:261)
        at
org.apache.hadoop.hdfs.server.datanode.BlockReceiver.readNextPacket(BlockReceiver.java:308)
        at
org.apache.hadoop.hdfs.server.datanode.BlockReceiver.receivePacket(BlockReceiver.java:372)
        at
org.apache.hadoop.hdfs.server.datanode.BlockReceiver.receiveBlock(BlockReceiver.java:524)
        at
org.apache.hadoop.hdfs.server.datanode.DataXceiver.writeBlock(DataXceiver.java:357)
        at
org.apache.hadoop.hdfs.server.datanode.DataXceiver.run(DataXceiver.java:103)
        at java.lang.Thread.run(Thread.java:619)

Datanode3:
2009-03-13 19:08:44,511 ERROR
org.apache.hadoop.hdfs.server.datanode.DataNode: DatanodeRegistration(
10.24.1.16:50010, storageID=DS-1231915619-10.24.1.16-50010-1236857760922,
infoPort=50075, ipcPort=50020):DataXceiver
java.io.IOException: Connection reset by peer
        at sun.nio.ch.FileDispatcher.read0(Native Method)
        at sun.nio.ch.SocketDispatcher.read(SocketDispatcher.java:21)
        at sun.nio.ch.IOUtil.readIntoNativeBuffer(IOUtil.java:233)
        at sun.nio.ch.IOUtil.read(IOUtil.java:206)
        at sun.nio.ch.SocketChannelImpl.read(SocketChannelImpl.java:236)
        at
org.apache.hadoop.net.SocketInputStream$Reader.performIO(SocketInputStream.java:55)
        at
org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:140)
        at
org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:150)
        at
org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:123)
        at java.io.BufferedInputStream.read1(BufferedInputStream.java:256)
        at java.io.BufferedInputStream.read(BufferedInputStream.java:317)
        at java.io.DataInputStream.read(DataInputStream.java:132)
        at
org.apache.hadoop.hdfs.server.datanode.BlockReceiver.readToBuf(BlockReceiver.java:261)
        at
org.apache.hadoop.hdfs.server.datanode.BlockReceiver.readNextPacket(BlockReceiver.java:308)
        at
org.apache.hadoop.hdfs.server.datanode.BlockReceiver.receivePacket(BlockReceiver.java:372)
        at
org.apache.hadoop.hdfs.server.datanode.BlockReceiver.receiveBlock(BlockReceiver.java:524)
        at
org.apache.hadoop.hdfs.server.datanode.DataXceiver.writeBlock(DataXceiver.java:357)
        at
org.apache.hadoop.hdfs.server.datanode.DataXceiver.run(DataXceiver.java:103)
        at java.lang.Thread.run(Thread.java:619)
2009-03-13 19:08:45,435 INFO
org.apache.hadoop.hdfs.server.datanode.DataNode.clienttrace: src: /
10.24.1.16:50010, dest: /10.24.1.18:58953, bytes: 132096,

.....
2009-03-13 19:08:46,461 ERROR
org.apache.hadoop.hdfs.server.datanode.DataNode: DatanodeRegistration(
10.24.1.16:50010, storageID=DS-1231915619-10.24.1.16-50010-1236857760922,
infoPort=50075, ipcPort=50020):DataXceiver
java.io.EOFException
        at java.io.DataInputStream.readShort(DataInputStream.java:298)
        at
org.apache.hadoop.hdfs.server.datanode.DataXceiver.run(DataXceiver.java:79)
        at java.lang.Thread.run(Thread.java:619)
2009-03-13 19:08:48,975 INFO
org.apache.hadoop.hdfs.server.datanode.DataNode: Deleting block
blk_-8652778507652884344_66369 file
/data/schubert/sandmill/hadoop-schubert/dfs/data/current/subdir54/blk_-8652778507652884344

Datanode4:
2009-03-13 19:11:00,075 ERROR
org.apache.hadoop.hdfs.server.datanode.DataNode: DatanodeRegistration(
10.24.1.18:50010, storageID=DS-1468632010-10.24.1.18-50010-1236857745172,
infoPort=50075, ipcPort=50020):DataXceiver
java.io.IOException: Unexpected problem in creating temporary file for
blk_-4705866815694058664_66391.  File
/data/schubert/sandmill/hadoop-schubert/dfs/data/tmp/blk_-4705866815694058664
should not be present, but is.
        at
org.apache.hadoop.hdfs.server.datanode.FSDataset$FSVolume.createTmpFile(FSDataset.java:390)
        at
org.apache.hadoop.hdfs.server.datanode.FSDataset$FSVolume.createTmpFile(FSDataset.java:368)
        at
org.apache.hadoop.hdfs.server.datanode.FSDataset.createTmpFile(FSDataset.java:1125)
        at
org.apache.hadoop.hdfs.server.datanode.FSDataset.writeToBlock(FSDataset.java:1014)
        at
org.apache.hadoop.hdfs.server.datanode.BlockReceiver.<init>(BlockReceiver.java:97)
        at
org.apache.hadoop.hdfs.server.datanode.DataXceiver.writeBlock(DataXceiver.java:259)
        at
org.apache.hadoop.hdfs.server.datanode.DataXceiver.run(DataXceiver.java:103)
        at java.lang.Thread.run(Thread.java:619)

Datanode5:
     2009-03-13 19:06:28,139 ERROR
org.apache.hadoop.hdfs.server.datanode.DataNode: DatanodeRegistration(
10.24.1.20:50010, storageID=DS-1337672599-10.24.1.20-50010-1236857787987,
infoPort=50075, ipcPort=50020):DataXceiver
java.net.SocketException: Too many open files
        at sun.nio.ch.Net.socket0(Native Method)
        at sun.nio.ch.Net.socket(Net.java:94)
        at sun.nio.ch.SocketChannelImpl.<init>(SocketChannelImpl.java:84)
        at
sun.nio.ch.SelectorProviderImpl.openSocketChannel(SelectorProviderImpl.java:37)
        at java.nio.channels.SocketChannel.open(SocketChannel.java:105)
        at
org.apache.hadoop.hdfs.server.datanode.DataNode.newSocket(DataNode.java:385)
        at
org.apache.hadoop.hdfs.server.datanode.DataXceiver.writeBlock(DataXceiver.java:277)
        at
org.apache.hadoop.hdfs.server.datanode.DataXceiver.run(DataXceiver.java:103)
        at java.lang.Thread.run(Thread.java:619)

and may exceptions as:
2009-03-13 19:06:34,033 WARN
org.apache.hadoop.hdfs.server.datanode.DataNode: DatanodeRegistration(
10.24.1.20:50010, storageID=DS-1337672599-10.24.1.20-50010-1236857787987,
infoPort=50075, ipcPort=50020):DataXceiveServer: java.io.IOException: Too
many open files
        at sun.nio.ch.ServerSocketChannelImpl.accept0(Native Method)
        at
sun.nio.ch.ServerSocketChannelImpl.accept(ServerSocketChannelImpl.java:145)
        at
sun.nio.ch.ServerSocketAdaptor.accept(ServerSocketAdaptor.java:84)
        at
org.apache.hadoop.hdfs.server.datanode.DataXceiverServer.run(DataXceiverServer.java:130)
        at java.lang.Thread.run(Thread.java:619)

2009-03-13 19:06:34,033 WARN
org.apache.hadoop.hdfs.server.datanode.DataNode: DatanodeRegistration(
10.24.1.20:50010, storageID=DS-1337672599-10.24.1.20-50010-1236857787987,
infoPort=50075, ipcPort=50020):DataXceiveServer: java.io.IOException: Too
many open files
        at sun.nio.ch.ServerSocketChannelImpl.accept0(Native Method)
        at
sun.nio.ch.ServerSocketChannelImpl.accept(ServerSocketChannelImpl.java:145)
        at
sun.nio.ch.ServerSocketAdaptor.accept(ServerSocketAdaptor.java:84)
        at
org.apache.hadoop.hdfs.server.datanode.DataXceiverServer.run(DataXceiverServer.java:130)
        at java.lang.Thread.run(Thread.java:619)

2009-03-13 19:06:34,033 WARN
org.apache.hadoop.hdfs.server.datanode.DataNode: DatanodeRegistration(
10.24.1.20:50010, storageID=DS-1337672599-10.24.1.20-50010-1236857787987,
infoPort=50075, ipcPort=50020):DataXceiveServer: java.io.IOException: Too
many open files
        at sun.nio.ch.ServerSocketChannelImpl.accept0(Native Method)
        at
sun.nio.ch.ServerSocketChannelImpl.accept(ServerSocketChannelImpl.java:145)
        at
sun.nio.ch.ServerSocketAdaptor.accept(ServerSocketAdaptor.java:84)
        at
org.apache.hadoop.hdfs.server.datanode.DataXceiverServer.run(DataXceiverServer.java:130)
        at java.lang.Thread.run(Thread.java:619)

2009-03-13 19:06:34,033 WARN
org.apache.hadoop.hdfs.server.datanode.DataNode: DatanodeRegistration(
10.24.1.20:50010, storageID=DS-1337672599-10.24.1.20-50010-1236857787987,
infoPort=50075, ipcPort=50020):DataXceiveServer: java.io.IOException: Too
many open files
        at sun.nio.ch.ServerSocketChannelImpl.accept0(Native Method)
        at
sun.nio.ch.ServerSocketChannelImpl.accept(ServerSocketChannelImpl.java:145)
        at
sun.nio.ch.ServerSocketAdaptor.accept(ServerSocketAdaptor.java:84)
        at
org.apache.hadoop.hdfs.server.datanode.DataXceiverServer.run(DataXceiverServer.java:130)
        at java.lang.Thread.run(Thread.java:619)



On Fri, Mar 13, 2009 at 10:32 PM, Jean-Daniel Cryans <[email protected]>wrote:

> schubert,
>
> What's happening on the HDFS side? Is it really closed? What do you
> see in the datanodes logs?
>
> Thx,
>
> J-D
>
> On Fri, Mar 13, 2009 at 9:23 AM, schubert zhang <[email protected]> wrote:
> > HBase 0.19.0.
> > 5 rangeservers and 1 master, overlay hadoop (5+1).
> >
> > When inserting rows, one rangeserve Shutdown itself.
> >
> > 2009-03-13 19:11:40,335 INFO
> > org.apache.hadoop.hbase.regionserver.HRegionServer: aborting server at:
> > 10.24.1.20:60020
> > 2009-03-13 19:11:40,336 ERROR org.apache.hadoop.hdfs.DFSClient: Exception
> > closing file
> /hbase/TESTTAB/1481479883/cdr/mapfiles/651889757518114976/index
> > : java.io.IOException: Filesystem closed
> > java.io.IOException: Filesystem closed
> >        at org.apache.hadoop.hdfs.DFSClient.checkOpen(DFSClient.java:197)
> >        at org.apache.hadoop.hdfs.DFSClient.access$600(DFSClient.java:65)
> >        at
> >
> org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.closeInternal(DFSClient.java:3085)
> >        at
> >
> org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.close(DFSClient.java:3054)
> >        at
> > org.apache.hadoop.hdfs.DFSClient$LeaseChecker.close(DFSClient.java:942)
> >        at org.apache.hadoop.hdfs.DFSClient.close(DFSClient.java:209)
> >        at
> >
> org.apache.hadoop.hdfs.DistributedFileSystem.close(DistributedFileSystem.java:264)
> >        at
> > org.apache.hadoop.fs.FileSystem$Cache.closeAll(FileSystem.java:1413)
> >        at org.apache.hadoop.fs.FileSystem.closeAll(FileSystem.java:236)
> >        at
> > org.apache.hadoop.fs.FileSystem$ClientFinalizer.run(FileSystem.java:221)
> > 2009-03-13 19:11:40,336 ERROR org.apache.hadoop.hdfs.DFSClient: Exception
> > closing file
> >
> /hbase/TESTTAB/compaction.dir/1481479883/cdr/mapfiles/2407942683821506355/index
> > : java.io.IOException: Filesystem closed
> > java.io.IOException: Filesystem closed
> >        at org.apache.hadoop.hdfs.DFSClient.checkOpen(DFSClient.java:197)
> >        at org.apache.hadoop.hdfs.DFSClient.access$600(DFSClient.java:65)
> >        at
> >
> org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.closeInternal(DFSClient.java:3085)
> >        at
> >
> org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.close(DFSClient.java:3054)
> >        at
> > org.apache.hadoop.hdfs.DFSClient$LeaseChecker.close(DFSClient.java:942)
> >        at org.apache.hadoop.hdfs.DFSClient.close(DFSClient.java:209)
> >        at
> >
> org.apache.hadoop.hdfs.DistributedFileSystem.close(DistributedFileSystem.java:264)
> >        at
> > org.apache.hadoop.fs.FileSystem$Cache.closeAll(FileSystem.java:1413)
> >        at org.apache.hadoop.fs.FileSystem.closeAll(FileSystem.java:236)
> >        at
> > org.apache.hadoop.fs.FileSystem$ClientFinalizer.run(FileSystem.java:221)
> > 2009-03-13 19:11:40,336 ERROR org.apache.hadoop.hdfs.DFSClient: Exception
> > closing file
> >
> /hbase/TESTTAB/compaction.dir/1481479883/cdr/mapfiles/6642350706924681632/index
> > : java.io.IOException: Filesystem closed
> > java.io.IOException: Filesystem closed
> >        at org.apache.hadoop.hdfs.DFSClient.checkOpen(DFSClient.java:197)
> >        at org.apache.hadoop.hdfs.DFSClient.access$600(DFSClient.java:65)
> >        at
> >
> org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.closeInternal(DFSClient.java:3085)
> >        at
> >
> org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.close(DFSClient.java:3054)
> >        at
> > org.apache.hadoop.hdfs.DFSClient$LeaseChecker.close(DFSClient.java:942)
> >        at org.apache.hadoop.hdfs.DFSClient.close(DFSClient.java:209)
> >        at
> >
> org.apache.hadoop.hdfs.DistributedFileSystem.close(DistributedFileSystem.java:264)
> >        at
> > org.apache.hadoop.fs.FileSystem$Cache.closeAll(FileSystem.java:1413)
> >        at org.apache.hadoop.fs.FileSystem.closeAll(FileSystem.java:236)
> >        at
> > org.apache.hadoop.fs.FileSystem$ClientFinalizer.run(FileSystem.java:221)
> > 2009-03-13 19:11:40,460 INFO
> > org.apache.hadoop.hbase.regionserver.HRegionServer:
> > regionserver/0:0:0:0:0:0:0:0:60020 exiting
> > 2009-03-13 19:11:40,475 INFO
> > org.apache.hadoop.hbase.regionserver.HRegionServer: Starting shutdown
> > thread.
> > 2009-03-13 19:11:40,475 INFO
> > org.apache.hadoop.hbase.regionserver.HRegionServer: Shutdown thread
> complete
> >
>

Reply via email to