schubert, What I see is :
org.apache.hadoop.hdfs.server.datanode.DataNode: DatanodeRegistration( 10.24.1.20:50010, storageID=DS-1337672599-10.24.1.20-50010-1236857787987, infoPort=50075, ipcPort=50020):DataXceiveServer: java.io.IOException: Too many open files Did you change the nofile in /etc/security/limits.conf? J-D On Fri, Mar 13, 2009 at 3:47 PM, schubert zhang <[email protected]> wrote: > Sorry, I am later to check the logs of HDFS. It seems it is caused by > HDFS DataXceiver! > > Near this exception time point. > > There is no error or warning in namenode. > > Datanode1: > 2009-03-13 19:06:20,352 ERROR > org.apache.hadoop.hdfs.server.datanode.DataNode: DatanodeRegistration( > 10.24.1.12:50010, storageID=DS-1393238225-10.24.1.12-50010-1236857766916, > infoPort=50075, ipcPort=50020):DataXceiver > java.io.EOFException > at java.io.DataInputStream.readShort(DataInputStream.java:298) > at > org.apache.hadoop.hdfs.server.datanode.DataXceiver.run(DataXceiver.java:79) > at java.lang.Thread.run(Thread.java:619) > 2009-03-13 19:06:23,919 INFO > org.apache.hadoop.hdfs.server.datanode.DataNode.clienttrace: src: / > 10.24.1.18:53499, dest: /10.24.1.12: > > Datenode2: > ion java.io.InterruptedIOException: Interruped while waiting for IO on > channel java.nio.channels.SocketChannel[connected > local=/10.24.1.14:50859remote=/ > 10.24.1.18:50010]. 59964 millis timeout left. > at > org.apache.hadoop.net.SocketIOWithTimeout$SelectorPool.select(SocketIOWithTimeout.java:277) > at > org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:155) > at > org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:150) > at > org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:123) > at java.io.DataInputStream.readFully(DataInputStream.java:178) > at java.io.DataInputStream.readLong(DataInputStream.java:399) > at > org.apache.hadoop.hdfs.server.datanode.BlockReceiver$PacketResponder.run(BlockReceiver.java:853) > at java.lang.Thread.run(Thread.java:619) > > 2009-03-13 19:04:43,301 INFO > org.apache.hadoop.hdfs.server.datanode.DataNode: PacketResponder > blk_-910979015546692644_66299 1 : Thread is interrupted. > 2009-03-13 19:04:43,301 INFO > org.apache.hadoop.hdfs.server.datanode.DataNode: PacketResponder 1 for block > blk_-910979015546692644_66299 terminating > 2009-03-13 19:04:43,301 INFO > org.apache.hadoop.hdfs.server.datanode.DataNode: writeBlock > blk_-910979015546692644_66299 received exception java.io.IOException: > Connection reset by peer > 2009-03-13 19:04:43,301 ERROR > org.apache.hadoop.hdfs.server.datanode.DataNode: DatanodeRegistration( > 10.24.1.14:50010, storageID=DS-567164670-10.24.1.14-50010-1236857753312, > infoPort=50075, ipcPort=50020):DataXceiver > java.io.IOException: Connection reset by peer > at sun.nio.ch.FileDispatcher.read0(Native Method) > at sun.nio.ch.SocketDispatcher.read(SocketDispatcher.java:21) > at sun.nio.ch.IOUtil.readIntoNativeBuffer(IOUtil.java:233) > at sun.nio.ch.IOUtil.read(IOUtil.java:206) > at sun.nio.ch.SocketChannelImpl.read(SocketChannelImpl.java:236) > at > org.apache.hadoop.net.SocketInputStream$Reader.performIO(SocketInputStream.java:55) > at > org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:140) > at > org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:150) > at > org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:123) > at java.io.BufferedInputStream.read1(BufferedInputStream.java:256) > at java.io.BufferedInputStream.read(BufferedInputStream.java:317) > at java.io.DataInputStream.read(DataInputStream.java:132) > at > org.apache.hadoop.hdfs.server.datanode.BlockReceiver.readToBuf(BlockReceiver.java:261) > at > org.apache.hadoop.hdfs.server.datanode.BlockReceiver.readNextPacket(BlockReceiver.java:308) > at > org.apache.hadoop.hdfs.server.datanode.BlockReceiver.receivePacket(BlockReceiver.java:372) > at > org.apache.hadoop.hdfs.server.datanode.BlockReceiver.receiveBlock(BlockReceiver.java:524) > at > org.apache.hadoop.hdfs.server.datanode.DataXceiver.writeBlock(DataXceiver.java:357) > at > org.apache.hadoop.hdfs.server.datanode.DataXceiver.run(DataXceiver.java:103) > at java.lang.Thread.run(Thread.java:619) > > Datanode3: > 2009-03-13 19:08:44,511 ERROR > org.apache.hadoop.hdfs.server.datanode.DataNode: DatanodeRegistration( > 10.24.1.16:50010, storageID=DS-1231915619-10.24.1.16-50010-1236857760922, > infoPort=50075, ipcPort=50020):DataXceiver > java.io.IOException: Connection reset by peer > at sun.nio.ch.FileDispatcher.read0(Native Method) > at sun.nio.ch.SocketDispatcher.read(SocketDispatcher.java:21) > at sun.nio.ch.IOUtil.readIntoNativeBuffer(IOUtil.java:233) > at sun.nio.ch.IOUtil.read(IOUtil.java:206) > at sun.nio.ch.SocketChannelImpl.read(SocketChannelImpl.java:236) > at > org.apache.hadoop.net.SocketInputStream$Reader.performIO(SocketInputStream.java:55) > at > org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:140) > at > org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:150) > at > org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:123) > at java.io.BufferedInputStream.read1(BufferedInputStream.java:256) > at java.io.BufferedInputStream.read(BufferedInputStream.java:317) > at java.io.DataInputStream.read(DataInputStream.java:132) > at > org.apache.hadoop.hdfs.server.datanode.BlockReceiver.readToBuf(BlockReceiver.java:261) > at > org.apache.hadoop.hdfs.server.datanode.BlockReceiver.readNextPacket(BlockReceiver.java:308) > at > org.apache.hadoop.hdfs.server.datanode.BlockReceiver.receivePacket(BlockReceiver.java:372) > at > org.apache.hadoop.hdfs.server.datanode.BlockReceiver.receiveBlock(BlockReceiver.java:524) > at > org.apache.hadoop.hdfs.server.datanode.DataXceiver.writeBlock(DataXceiver.java:357) > at > org.apache.hadoop.hdfs.server.datanode.DataXceiver.run(DataXceiver.java:103) > at java.lang.Thread.run(Thread.java:619) > 2009-03-13 19:08:45,435 INFO > org.apache.hadoop.hdfs.server.datanode.DataNode.clienttrace: src: / > 10.24.1.16:50010, dest: /10.24.1.18:58953, bytes: 132096, > > ..... > 2009-03-13 19:08:46,461 ERROR > org.apache.hadoop.hdfs.server.datanode.DataNode: DatanodeRegistration( > 10.24.1.16:50010, storageID=DS-1231915619-10.24.1.16-50010-1236857760922, > infoPort=50075, ipcPort=50020):DataXceiver > java.io.EOFException > at java.io.DataInputStream.readShort(DataInputStream.java:298) > at > org.apache.hadoop.hdfs.server.datanode.DataXceiver.run(DataXceiver.java:79) > at java.lang.Thread.run(Thread.java:619) > 2009-03-13 19:08:48,975 INFO > org.apache.hadoop.hdfs.server.datanode.DataNode: Deleting block > blk_-8652778507652884344_66369 file > /data/schubert/sandmill/hadoop-schubert/dfs/data/current/subdir54/blk_-8652778507652884344 > > Datanode4: > 2009-03-13 19:11:00,075 ERROR > org.apache.hadoop.hdfs.server.datanode.DataNode: DatanodeRegistration( > 10.24.1.18:50010, storageID=DS-1468632010-10.24.1.18-50010-1236857745172, > infoPort=50075, ipcPort=50020):DataXceiver > java.io.IOException: Unexpected problem in creating temporary file for > blk_-4705866815694058664_66391. File > /data/schubert/sandmill/hadoop-schubert/dfs/data/tmp/blk_-4705866815694058664 > should not be present, but is. > at > org.apache.hadoop.hdfs.server.datanode.FSDataset$FSVolume.createTmpFile(FSDataset.java:390) > at > org.apache.hadoop.hdfs.server.datanode.FSDataset$FSVolume.createTmpFile(FSDataset.java:368) > at > org.apache.hadoop.hdfs.server.datanode.FSDataset.createTmpFile(FSDataset.java:1125) > at > org.apache.hadoop.hdfs.server.datanode.FSDataset.writeToBlock(FSDataset.java:1014) > at > org.apache.hadoop.hdfs.server.datanode.BlockReceiver.<init>(BlockReceiver.java:97) > at > org.apache.hadoop.hdfs.server.datanode.DataXceiver.writeBlock(DataXceiver.java:259) > at > org.apache.hadoop.hdfs.server.datanode.DataXceiver.run(DataXceiver.java:103) > at java.lang.Thread.run(Thread.java:619) > > Datanode5: > 2009-03-13 19:06:28,139 ERROR > org.apache.hadoop.hdfs.server.datanode.DataNode: DatanodeRegistration( > 10.24.1.20:50010, storageID=DS-1337672599-10.24.1.20-50010-1236857787987, > infoPort=50075, ipcPort=50020):DataXceiver > java.net.SocketException: Too many open files > at sun.nio.ch.Net.socket0(Native Method) > at sun.nio.ch.Net.socket(Net.java:94) > at sun.nio.ch.SocketChannelImpl.<init>(SocketChannelImpl.java:84) > at > sun.nio.ch.SelectorProviderImpl.openSocketChannel(SelectorProviderImpl.java:37) > at java.nio.channels.SocketChannel.open(SocketChannel.java:105) > at > org.apache.hadoop.hdfs.server.datanode.DataNode.newSocket(DataNode.java:385) > at > org.apache.hadoop.hdfs.server.datanode.DataXceiver.writeBlock(DataXceiver.java:277) > at > org.apache.hadoop.hdfs.server.datanode.DataXceiver.run(DataXceiver.java:103) > at java.lang.Thread.run(Thread.java:619) > > and may exceptions as: > 2009-03-13 19:06:34,033 WARN > org.apache.hadoop.hdfs.server.datanode.DataNode: DatanodeRegistration( > 10.24.1.20:50010, storageID=DS-1337672599-10.24.1.20-50010-1236857787987, > infoPort=50075, ipcPort=50020):DataXceiveServer: java.io.IOException: Too > many open files > at sun.nio.ch.ServerSocketChannelImpl.accept0(Native Method) > at > sun.nio.ch.ServerSocketChannelImpl.accept(ServerSocketChannelImpl.java:145) > at > sun.nio.ch.ServerSocketAdaptor.accept(ServerSocketAdaptor.java:84) > at > org.apache.hadoop.hdfs.server.datanode.DataXceiverServer.run(DataXceiverServer.java:130) > at java.lang.Thread.run(Thread.java:619) > > 2009-03-13 19:06:34,033 WARN > org.apache.hadoop.hdfs.server.datanode.DataNode: DatanodeRegistration( > 10.24.1.20:50010, storageID=DS-1337672599-10.24.1.20-50010-1236857787987, > infoPort=50075, ipcPort=50020):DataXceiveServer: java.io.IOException: Too > many open files > at sun.nio.ch.ServerSocketChannelImpl.accept0(Native Method) > at > sun.nio.ch.ServerSocketChannelImpl.accept(ServerSocketChannelImpl.java:145) > at > sun.nio.ch.ServerSocketAdaptor.accept(ServerSocketAdaptor.java:84) > at > org.apache.hadoop.hdfs.server.datanode.DataXceiverServer.run(DataXceiverServer.java:130) > at java.lang.Thread.run(Thread.java:619) > > 2009-03-13 19:06:34,033 WARN > org.apache.hadoop.hdfs.server.datanode.DataNode: DatanodeRegistration( > 10.24.1.20:50010, storageID=DS-1337672599-10.24.1.20-50010-1236857787987, > infoPort=50075, ipcPort=50020):DataXceiveServer: java.io.IOException: Too > many open files > at sun.nio.ch.ServerSocketChannelImpl.accept0(Native Method) > at > sun.nio.ch.ServerSocketChannelImpl.accept(ServerSocketChannelImpl.java:145) > at > sun.nio.ch.ServerSocketAdaptor.accept(ServerSocketAdaptor.java:84) > at > org.apache.hadoop.hdfs.server.datanode.DataXceiverServer.run(DataXceiverServer.java:130) > at java.lang.Thread.run(Thread.java:619) > > 2009-03-13 19:06:34,033 WARN > org.apache.hadoop.hdfs.server.datanode.DataNode: DatanodeRegistration( > 10.24.1.20:50010, storageID=DS-1337672599-10.24.1.20-50010-1236857787987, > infoPort=50075, ipcPort=50020):DataXceiveServer: java.io.IOException: Too > many open files > at sun.nio.ch.ServerSocketChannelImpl.accept0(Native Method) > at > sun.nio.ch.ServerSocketChannelImpl.accept(ServerSocketChannelImpl.java:145) > at > sun.nio.ch.ServerSocketAdaptor.accept(ServerSocketAdaptor.java:84) > at > org.apache.hadoop.hdfs.server.datanode.DataXceiverServer.run(DataXceiverServer.java:130) > at java.lang.Thread.run(Thread.java:619) > > > > On Fri, Mar 13, 2009 at 10:32 PM, Jean-Daniel Cryans > <[email protected]>wrote: > >> schubert, >> >> What's happening on the HDFS side? Is it really closed? What do you >> see in the datanodes logs? >> >> Thx, >> >> J-D >> >> On Fri, Mar 13, 2009 at 9:23 AM, schubert zhang <[email protected]> wrote: >> > HBase 0.19.0. >> > 5 rangeservers and 1 master, overlay hadoop (5+1). >> > >> > When inserting rows, one rangeserve Shutdown itself. >> > >> > 2009-03-13 19:11:40,335 INFO >> > org.apache.hadoop.hbase.regionserver.HRegionServer: aborting server at: >> > 10.24.1.20:60020 >> > 2009-03-13 19:11:40,336 ERROR org.apache.hadoop.hdfs.DFSClient: Exception >> > closing file >> /hbase/TESTTAB/1481479883/cdr/mapfiles/651889757518114976/index >> > : java.io.IOException: Filesystem closed >> > java.io.IOException: Filesystem closed >> > at org.apache.hadoop.hdfs.DFSClient.checkOpen(DFSClient.java:197) >> > at org.apache.hadoop.hdfs.DFSClient.access$600(DFSClient.java:65) >> > at >> > >> org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.closeInternal(DFSClient.java:3085) >> > at >> > >> org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.close(DFSClient.java:3054) >> > at >> > org.apache.hadoop.hdfs.DFSClient$LeaseChecker.close(DFSClient.java:942) >> > at org.apache.hadoop.hdfs.DFSClient.close(DFSClient.java:209) >> > at >> > >> org.apache.hadoop.hdfs.DistributedFileSystem.close(DistributedFileSystem.java:264) >> > at >> > org.apache.hadoop.fs.FileSystem$Cache.closeAll(FileSystem.java:1413) >> > at org.apache.hadoop.fs.FileSystem.closeAll(FileSystem.java:236) >> > at >> > org.apache.hadoop.fs.FileSystem$ClientFinalizer.run(FileSystem.java:221) >> > 2009-03-13 19:11:40,336 ERROR org.apache.hadoop.hdfs.DFSClient: Exception >> > closing file >> > >> /hbase/TESTTAB/compaction.dir/1481479883/cdr/mapfiles/2407942683821506355/index >> > : java.io.IOException: Filesystem closed >> > java.io.IOException: Filesystem closed >> > at org.apache.hadoop.hdfs.DFSClient.checkOpen(DFSClient.java:197) >> > at org.apache.hadoop.hdfs.DFSClient.access$600(DFSClient.java:65) >> > at >> > >> org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.closeInternal(DFSClient.java:3085) >> > at >> > >> org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.close(DFSClient.java:3054) >> > at >> > org.apache.hadoop.hdfs.DFSClient$LeaseChecker.close(DFSClient.java:942) >> > at org.apache.hadoop.hdfs.DFSClient.close(DFSClient.java:209) >> > at >> > >> org.apache.hadoop.hdfs.DistributedFileSystem.close(DistributedFileSystem.java:264) >> > at >> > org.apache.hadoop.fs.FileSystem$Cache.closeAll(FileSystem.java:1413) >> > at org.apache.hadoop.fs.FileSystem.closeAll(FileSystem.java:236) >> > at >> > org.apache.hadoop.fs.FileSystem$ClientFinalizer.run(FileSystem.java:221) >> > 2009-03-13 19:11:40,336 ERROR org.apache.hadoop.hdfs.DFSClient: Exception >> > closing file >> > >> /hbase/TESTTAB/compaction.dir/1481479883/cdr/mapfiles/6642350706924681632/index >> > : java.io.IOException: Filesystem closed >> > java.io.IOException: Filesystem closed >> > at org.apache.hadoop.hdfs.DFSClient.checkOpen(DFSClient.java:197) >> > at org.apache.hadoop.hdfs.DFSClient.access$600(DFSClient.java:65) >> > at >> > >> org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.closeInternal(DFSClient.java:3085) >> > at >> > >> org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.close(DFSClient.java:3054) >> > at >> > org.apache.hadoop.hdfs.DFSClient$LeaseChecker.close(DFSClient.java:942) >> > at org.apache.hadoop.hdfs.DFSClient.close(DFSClient.java:209) >> > at >> > >> org.apache.hadoop.hdfs.DistributedFileSystem.close(DistributedFileSystem.java:264) >> > at >> > org.apache.hadoop.fs.FileSystem$Cache.closeAll(FileSystem.java:1413) >> > at org.apache.hadoop.fs.FileSystem.closeAll(FileSystem.java:236) >> > at >> > org.apache.hadoop.fs.FileSystem$ClientFinalizer.run(FileSystem.java:221) >> > 2009-03-13 19:11:40,460 INFO >> > org.apache.hadoop.hbase.regionserver.HRegionServer: >> > regionserver/0:0:0:0:0:0:0:0:60020 exiting >> > 2009-03-13 19:11:40,475 INFO >> > org.apache.hadoop.hbase.regionserver.HRegionServer: Starting shutdown >> > thread. >> > 2009-03-13 19:11:40,475 INFO >> > org.apache.hadoop.hbase.regionserver.HRegionServer: Shutdown thread >> complete >> > >> >
