Author: szetszwo Date: Mon Aug 18 23:41:53 2014 New Revision: 1618764 URL: http://svn.apache.org/r1618764 Log: Merge r1609845 through r1618763 from trunk.
Added: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/WhitelistBasedTrustedChannelResolver.java - copied unchanged from r1618763, hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/WhitelistBasedTrustedChannelResolver.java Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/ (props changed) hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/ (props changed) hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoUnderConstruction.java hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationMonitor.java hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiverServer.java hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeRollingUpgrade.java hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCommitBlockSynchronization.java hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeleteRace.java hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java Propchange: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/ ------------------------------------------------------------------------------ Merged /hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs:r1618417-1618763 Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1618764&r1=1618763&r2=1618764&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original) +++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Mon Aug 18 23:41:53 2014 @@ -425,6 +425,9 @@ Release 2.6.0 - UNRELEASED HDFS-6850. Move NFS out of order write unit tests into TestWrites class. (Zhe Zhang via atm) + HDFS-6188. An ip whitelist based implementation of TrustedChannelResolver. + (Benoy Antony via Arpit Agarwal) + OPTIMIZATIONS HDFS-6690. Deduplicate xattr names in memory. (wang) @@ -534,6 +537,12 @@ Release 2.6.0 - UNRELEASED HDFS-6783. Fix HDFS CacheReplicationMonitor rescan logic. (Yi Liu and Colin Patrick McCabe via umamahesh) + HDFS-6825. Edit log corruption due to delayed block removal. + (Yongjun Zhang via wang) + + HDFS-6569. OOB message can't be sent to the client when DataNode shuts down for upgrade + (brandonli) + Release 2.5.0 - UNRELEASED INCOMPATIBLE CHANGES Propchange: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/ ------------------------------------------------------------------------------ Merged /hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java:r1618417-1618763 Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoUnderConstruction.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoUnderConstruction.java?rev=1618764&r1=1618763&r2=1618764&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoUnderConstruction.java (original) +++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoUnderConstruction.java Mon Aug 18 23:41:53 2014 @@ -373,12 +373,14 @@ public class BlockInfoUnderConstruction sb.append("{blockUCState=").append(blockUCState) .append(", primaryNodeIndex=").append(primaryNodeIndex) .append(", replicas=["); - Iterator<ReplicaUnderConstruction> iter = replicas.iterator(); - if (iter.hasNext()) { - iter.next().appendStringTo(sb); - while (iter.hasNext()) { - sb.append(", "); + if (replicas != null) { + Iterator<ReplicaUnderConstruction> iter = replicas.iterator(); + if (iter.hasNext()) { iter.next().appendStringTo(sb); + while (iter.hasNext()) { + sb.append(", "); + iter.next().appendStringTo(sb); + } } } sb.append("]}"); Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationMonitor.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationMonitor.java?rev=1618764&r1=1618763&r2=1618764&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationMonitor.java (original) +++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationMonitor.java Mon Aug 18 23:41:53 2014 @@ -286,19 +286,19 @@ public class CacheReplicationMonitor ext private void rescan() throws InterruptedException { scannedDirectives = 0; scannedBlocks = 0; - namesystem.writeLock(); try { - lock.lock(); - if (shutdown) { - throw new InterruptedException("CacheReplicationMonitor was " + - "shut down."); + namesystem.writeLock(); + try { + lock.lock(); + if (shutdown) { + throw new InterruptedException("CacheReplicationMonitor was " + + "shut down."); + } + curScanCount = completedScanCount + 1; + } finally { + lock.unlock(); } - curScanCount = completedScanCount + 1; - } - finally { - lock.unlock(); - } - try { + resetStatistics(); rescanCacheDirectives(); rescanCachedBlockMap(); Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java?rev=1618764&r1=1618763&r2=1618764&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java (original) +++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java Mon Aug 18 23:41:53 2014 @@ -738,7 +738,12 @@ class BlockReceiver implements Closeable LOG.warn("Error managing cache for writer of block " + block, t); } } - + + public void sendOOB() throws IOException, InterruptedException { + ((PacketResponder) responder.getRunnable()).sendOOBResponse(PipelineAck + .getRestartOOBStatus()); + } + void receiveBlock( DataOutputStream mirrOut, // output to next datanode DataInputStream mirrIn, // input from next datanode @@ -830,9 +835,7 @@ class BlockReceiver implements Closeable // The worst case is not recovering this RBW replica. // Client will fall back to regular pipeline recovery. } - try { - ((PacketResponder) responder.getRunnable()). - sendOOBResponse(PipelineAck.getRestartOOBStatus()); + try { // Even if the connection is closed after the ack packet is // flushed, the client can react to the connection closure // first. Insert a delay to lower the chance of client @@ -840,8 +843,6 @@ class BlockReceiver implements Closeable Thread.sleep(1000); } catch (InterruptedException ie) { // It is already going down. Ignore this. - } catch (IOException ioe) { - LOG.info("Error sending OOB Ack.", ioe); } } responder.interrupt(); Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java?rev=1618764&r1=1618763&r2=1618764&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java (original) +++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java Mon Aug 18 23:41:53 2014 @@ -270,6 +270,7 @@ public class DataNode extends Configured public final static String EMPTY_DEL_HINT = ""; final AtomicInteger xmitsInProgress = new AtomicInteger(); Daemon dataXceiverServer = null; + DataXceiverServer xserver = null; Daemon localDataXceiverServer = null; ShortCircuitRegistry shortCircuitRegistry = null; ThreadGroup threadGroup = null; @@ -649,8 +650,8 @@ public class DataNode extends Configured streamingAddr = tcpPeerServer.getStreamingAddr(); LOG.info("Opened streaming server at " + streamingAddr); this.threadGroup = new ThreadGroup("dataXceiverServer"); - this.dataXceiverServer = new Daemon(threadGroup, - new DataXceiverServer(tcpPeerServer, conf, this)); + xserver = new DataXceiverServer(tcpPeerServer, conf, this); + this.dataXceiverServer = new Daemon(threadGroup, xserver); this.threadGroup.setDaemon(true); // auto destroy when empty if (conf.getBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, @@ -1138,6 +1139,11 @@ public class DataNode extends Configured } @VisibleForTesting + public DataXceiverServer getXferServer() { + return xserver; + } + + @VisibleForTesting public int getXferPort() { return streamingAddr.getPort(); } @@ -1395,6 +1401,7 @@ public class DataNode extends Configured // in order to avoid any further acceptance of requests, but the peers // for block writes are not closed until the clients are notified. if (dataXceiverServer != null) { + xserver.sendOOBToPeers(); ((DataXceiverServer) this.dataXceiverServer.getRunnable()).kill(); this.dataXceiverServer.interrupt(); } Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java?rev=1618764&r1=1618763&r2=1618764&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java (original) +++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java Mon Aug 18 23:41:53 2014 @@ -103,7 +103,8 @@ class DataXceiver extends Receiver imple private long opStartTime; //the start time of receiving an Op private final InputStream socketIn; private OutputStream socketOut; - + private BlockReceiver blockReceiver = null; + /** * Client Name used in previous operation. Not available on first request * on the socket. @@ -159,6 +160,12 @@ class DataXceiver extends Receiver imple return socketOut; } + public void sendOOB() throws IOException, InterruptedException { + LOG.info("Sending OOB to peer: " + peer); + if(blockReceiver!=null) + blockReceiver.sendOOB(); + } + /** * Read/write data from/to the DataXceiverServer. */ @@ -168,7 +175,7 @@ class DataXceiver extends Receiver imple Op op = null; try { - dataXceiverServer.addPeer(peer, Thread.currentThread()); + dataXceiverServer.addPeer(peer, Thread.currentThread(), this); peer.setWriteTimeout(datanode.getDnConf().socketWriteTimeout); InputStream input = socketIn; IOStreamPair saslStreams = datanode.saslServer.receive(peer, socketOut, @@ -584,7 +591,6 @@ class DataXceiver extends Receiver imple DataOutputStream mirrorOut = null; // stream to next target DataInputStream mirrorIn = null; // reply from next target Socket mirrorSock = null; // socket to next target - BlockReceiver blockReceiver = null; // responsible for data handling String mirrorNode = null; // the name:port of next target String firstBadLink = ""; // first datanode that failed in connection setup Status mirrorInStatus = SUCCESS; @@ -747,6 +753,7 @@ class DataXceiver extends Receiver imple IOUtils.closeStream(replyOut); IOUtils.closeSocket(mirrorSock); IOUtils.closeStream(blockReceiver); + blockReceiver = null; } //update metrics Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiverServer.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiverServer.java?rev=1618764&r1=1618763&r2=1618764&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiverServer.java (original) +++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiverServer.java Mon Aug 18 23:41:53 2014 @@ -27,11 +27,11 @@ import org.apache.hadoop.conf.Configurat import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.net.Peer; import org.apache.hadoop.hdfs.net.PeerServer; -import org.apache.hadoop.hdfs.server.balancer.Balancer; import org.apache.hadoop.hdfs.util.DataTransferThrottler; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.util.Daemon; +import com.google.common.annotations.VisibleForTesting; /** * Server used for receiving/sending a block of data. @@ -45,6 +45,7 @@ class DataXceiverServer implements Runna private final PeerServer peerServer; private final DataNode datanode; private final HashMap<Peer, Thread> peers = new HashMap<Peer, Thread>(); + private final HashMap<Peer, DataXceiver> peersXceiver = new HashMap<Peer, DataXceiver>(); private boolean closed = false; /** @@ -217,18 +218,38 @@ class DataXceiverServer implements Runna } } - synchronized void addPeer(Peer peer, Thread t) throws IOException { + synchronized void addPeer(Peer peer, Thread t, DataXceiver xceiver) + throws IOException { if (closed) { throw new IOException("Server closed."); } peers.put(peer, t); + peersXceiver.put(peer, xceiver); } synchronized void closePeer(Peer peer) { peers.remove(peer); + peersXceiver.remove(peer); IOUtils.cleanup(null, peer); } + // Sending OOB to all peers + public synchronized void sendOOBToPeers() { + if (!datanode.shutdownForUpgrade) { + return; + } + + for (Peer p : peers.keySet()) { + try { + peersXceiver.get(p).sendOOB(); + } catch (IOException e) { + LOG.warn("Got error when sending OOB message.", e); + } catch (InterruptedException e) { + LOG.warn("Interrupted when sending OOB message."); + } + } + } + // Notify all peers of the shutdown and restart. // datanode.shouldRun should still be true and datanode.restarting should // be set true before calling this method. @@ -247,6 +268,7 @@ class DataXceiverServer implements Runna IOUtils.cleanup(LOG, p); } peers.clear(); + peersXceiver.clear(); } // Return the number of peers. @@ -254,7 +276,14 @@ class DataXceiverServer implements Runna return peers.size(); } + // Return the number of peers and DataXceivers. + @VisibleForTesting + synchronized int getNumPeersXceiver() { + return peersXceiver.size(); + } + synchronized void releasePeer(Peer peer) { peers.remove(peer); + peersXceiver.remove(peer); } } Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1618764&r1=1618763&r2=1618764&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original) +++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Mon Aug 18 23:41:53 2014 @@ -4350,7 +4350,30 @@ public class FSNamesystem implements Nam throw new IOException("Block (=" + lastblock + ") not found"); } } - INodeFile iFile = ((INode)storedBlock.getBlockCollection()).asFile(); + // + // The implementation of delete operation (see @deleteInternal method) + // first removes the file paths from namespace, and delays the removal + // of blocks to later time for better performance. When + // commitBlockSynchronization (this method) is called in between, the + // blockCollection of storedBlock could have been assigned to null by + // the delete operation, throw IOException here instead of NPE; if the + // file path is already removed from namespace by the delete operation, + // throw FileNotFoundException here, so not to proceed to the end of + // this method to add a CloseOp to the edit log for an already deleted + // file (See HDFS-6825). + // + BlockCollection blockCollection = storedBlock.getBlockCollection(); + if (blockCollection == null) { + throw new IOException("The blockCollection of " + storedBlock + + " is null, likely because the file owning this block was" + + " deleted and the block removal is delayed"); + } + INodeFile iFile = ((INode)blockCollection).asFile(); + if (isFileDeleted(iFile)) { + throw new FileNotFoundException("File not found: " + + iFile.getFullPathName() + ", likely due to delayed block" + + " removal"); + } if (!iFile.isUnderConstruction() || storedBlock.isComplete()) { if (LOG.isDebugEnabled()) { LOG.debug("Unexpected block (=" + lastblock @@ -6349,9 +6372,28 @@ public class FSNamesystem implements Nam private boolean isFileDeleted(INodeFile file) { // Not in the inodeMap or in the snapshot but marked deleted. - if (dir.getInode(file.getId()) == null || - file.getParent() == null || (file.isWithSnapshot() && - file.getFileWithSnapshotFeature().isCurrentFileDeleted())) { + if (dir.getInode(file.getId()) == null) { + return true; + } + + // look at the path hierarchy to see if one parent is deleted by recursive + // deletion + INode tmpChild = file; + INodeDirectory tmpParent = file.getParent(); + while (true) { + if (tmpParent == null || + tmpParent.searchChildren(tmpChild.getLocalNameBytes()) < 0) { + return true; + } + if (tmpParent.isRoot()) { + break; + } + tmpChild = tmpParent; + tmpParent = tmpParent.getParent(); + } + + if (file.isWithSnapshot() && + file.getFileWithSnapshotFeature().isCurrentFileDeleted()) { return true; } return false; Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java?rev=1618764&r1=1618763&r2=1618764&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java (original) +++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java Mon Aug 18 23:41:53 2014 @@ -176,7 +176,7 @@ public class INodeDirectory extends INod return quota; } - private int searchChildren(byte[] name) { + int searchChildren(byte[] name) { return children == null? -1: Collections.binarySearch(children, name); } Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java?rev=1618764&r1=1618763&r2=1618764&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java (original) +++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java Mon Aug 18 23:41:53 2014 @@ -44,6 +44,9 @@ import org.apache.hadoop.hdfs.protocol.d import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.BlockOpResponseProto; import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier; import org.apache.hadoop.hdfs.security.token.block.ExportedBlockKeys; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager; @@ -1300,4 +1303,33 @@ public class DFSTestUtil { sockDir.close(); } } + + /** + * @return the node which is expected to run the recovery of the + * given block, which is known to be under construction inside the + * given NameNOde. + */ + public static DatanodeDescriptor getExpectedPrimaryNode(NameNode nn, + ExtendedBlock blk) { + BlockManager bm0 = nn.getNamesystem().getBlockManager(); + BlockInfo storedBlock = bm0.getStoredBlock(blk.getLocalBlock()); + assertTrue("Block " + blk + " should be under construction, " + + "got: " + storedBlock, + storedBlock instanceof BlockInfoUnderConstruction); + BlockInfoUnderConstruction ucBlock = + (BlockInfoUnderConstruction)storedBlock; + // We expect that the replica with the most recent heart beat will be + // the one to be in charge of the synchronization / recovery protocol. + final DatanodeStorageInfo[] storages = ucBlock.getExpectedStorageLocations(); + DatanodeStorageInfo expectedPrimary = storages[0]; + long mostRecentLastUpdate = expectedPrimary.getDatanodeDescriptor().getLastUpdate(); + for (int i = 1; i < storages.length; i++) { + final long lastUpdate = storages[i].getDatanodeDescriptor().getLastUpdate(); + if (lastUpdate > mostRecentLastUpdate) { + expectedPrimary = storages[i]; + mostRecentLastUpdate = lastUpdate; + } + } + return expectedPrimary.getDatanodeDescriptor(); + } } Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeRollingUpgrade.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeRollingUpgrade.java?rev=1618764&r1=1618763&r2=1618764&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeRollingUpgrade.java (original) +++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeRollingUpgrade.java Mon Aug 18 23:41:53 2014 @@ -27,11 +27,14 @@ import static org.junit.Assert.assertTru import java.io.File; import java.io.IOException; +import java.util.Random; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.DFSClient; +import org.apache.hadoop.hdfs.DFSOutputStream; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.HdfsConfiguration; @@ -67,6 +70,7 @@ public class TestDataNodeRollingUpgrade private void startCluster() throws IOException { conf = new HdfsConfiguration(); + conf.setInt("dfs.blocksize", 1024*1024); cluster = new Builder(conf).numDataNodes(REPL_FACTOR).build(); cluster.waitActive(); fs = cluster.getFileSystem(); @@ -243,4 +247,48 @@ public class TestDataNodeRollingUpgrade shutdownCluster(); } } + + @Test (timeout=600000) + // Test DatanodeXceiver has correct peer-dataxceiver pairs for sending OOB message + public void testDatanodePeersXceiver() throws Exception { + try { + startCluster(); + + // Create files in DFS. + String testFile1 = "/TestDataNodeXceiver1.dat"; + String testFile2 = "/TestDataNodeXceiver2.dat"; + String testFile3 = "/TestDataNodeXceiver3.dat"; + + DFSClient client1 = new DFSClient(NameNode.getAddress(conf), conf); + DFSClient client2 = new DFSClient(NameNode.getAddress(conf), conf); + DFSClient client3 = new DFSClient(NameNode.getAddress(conf), conf); + + DFSOutputStream s1 = (DFSOutputStream) client1.create(testFile1, true); + DFSOutputStream s2 = (DFSOutputStream) client2.create(testFile2, true); + DFSOutputStream s3 = (DFSOutputStream) client3.create(testFile3, true); + + byte[] toWrite = new byte[1024*1024*8]; + Random rb = new Random(1111); + rb.nextBytes(toWrite); + s1.write(toWrite, 0, 1024*1024*8); + s1.flush(); + s2.write(toWrite, 0, 1024*1024*8); + s2.flush(); + s3.write(toWrite, 0, 1024*1024*8); + s3.flush(); + + assertTrue(dn.getXferServer().getNumPeersXceiver() == dn.getXferServer() + .getNumPeersXceiver()); + s1.close(); + s2.close(); + s3.close(); + assertTrue(dn.getXferServer().getNumPeersXceiver() == dn.getXferServer() + .getNumPeersXceiver()); + client1.close(); + client2.close(); + client3.close(); + } finally { + shutdownCluster(); + } + } } Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCommitBlockSynchronization.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCommitBlockSynchronization.java?rev=1618764&r1=1618763&r2=1618764&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCommitBlockSynchronization.java (original) +++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCommitBlockSynchronization.java Mon Aug 18 23:41:53 2014 @@ -50,6 +50,17 @@ public class TestCommitBlockSynchronizat FSNamesystem namesystem = new FSNamesystem(conf, image); namesystem.setImageLoaded(true); + + // set file's parent as root and put the file to inodeMap, so + // FSNamesystem's isFileDeleted() method will return false on this file + if (file.getParent() == null) { + INodeDirectory parent = mock(INodeDirectory.class); + parent.setLocalName(new byte[0]); + parent.addChild(file); + file.setParent(parent); + } + namesystem.dir.getINodeMap().put(file); + FSNamesystem namesystemSpy = spy(namesystem); BlockInfoUnderConstruction blockInfo = new BlockInfoUnderConstruction( block, 1, HdfsServerConstants.BlockUCState.UNDER_CONSTRUCTION, targets); Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeleteRace.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeleteRace.java?rev=1618764&r1=1618763&r2=1618764&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeleteRace.java (original) +++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeleteRace.java Mon Aug 18 23:41:53 2014 @@ -18,7 +18,9 @@ package org.apache.hadoop.hdfs.server.namenode; import java.io.FileNotFoundException; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Set; import org.apache.commons.logging.Log; @@ -28,18 +30,29 @@ import org.apache.hadoop.fs.FSDataOutput import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.BlockStoragePolicy; +import org.apache.hadoop.hdfs.AppendTestUtil; import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.protocol.DatanodeID; +import org.apache.hadoop.hdfs.protocol.ExtendedBlock; +import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB; import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicy; import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyDefault; +import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo; +import org.apache.hadoop.hdfs.server.datanode.DataNode; +import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils; import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotTestHelper; +import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.net.Node; import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.test.GenericTestUtils.DelayAnswer; import org.junit.Assert; import org.junit.Test; +import org.mockito.Mockito; import org.mockito.internal.util.reflection.Whitebox; @@ -49,6 +62,7 @@ import org.mockito.internal.util.reflect * whole duration. */ public class TestDeleteRace { + private static final int BLOCK_SIZE = 4096; private static final Log LOG = LogFactory.getLog(TestDeleteRace.class); private static final Configuration conf = new HdfsConfiguration(); private MiniDFSCluster cluster; @@ -201,7 +215,126 @@ public class TestDeleteRace { cluster.shutdown(); } } + } + + /** + * Test race between delete operation and commitBlockSynchronization method. + * See HDFS-6825. + * @param hasSnapshot + * @throws Exception + */ + private void testDeleteAndCommitBlockSynchronizationRace(boolean hasSnapshot) + throws Exception { + LOG.info("Start testing, hasSnapshot: " + hasSnapshot); + final String testPaths[] = { + "/test-file", + "/testdir/testdir1/test-file" + }; + final Path rootPath = new Path("/"); + final Configuration conf = new Configuration(); + // Disable permissions so that another user can recover the lease. + conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false); + conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); + FSDataOutputStream stm = null; + Map<DataNode, DatanodeProtocolClientSideTranslatorPB> dnMap = + new HashMap<DataNode, DatanodeProtocolClientSideTranslatorPB>(); + + try { + cluster = new MiniDFSCluster.Builder(conf) + .numDataNodes(3) + .build(); + cluster.waitActive(); + + DistributedFileSystem fs = cluster.getFileSystem(); + int stId = 0; + for (String testPath : testPaths) { + LOG.info("test on " + testPath + " snapshot: " + hasSnapshot); + Path fPath = new Path(testPath); + //find grandest non-root parent + Path grandestNonRootParent = fPath; + while (!grandestNonRootParent.getParent().equals(rootPath)) { + grandestNonRootParent = grandestNonRootParent.getParent(); + } + stm = fs.create(fPath); + LOG.info("test on " + testPath + " created " + fPath); + + // write a half block + AppendTestUtil.write(stm, 0, BLOCK_SIZE / 2); + stm.hflush(); + + if (hasSnapshot) { + SnapshotTestHelper.createSnapshot(fs, rootPath, + "st" + String.valueOf(stId)); + ++stId; + } + + // Look into the block manager on the active node for the block + // under construction. + NameNode nn = cluster.getNameNode(); + ExtendedBlock blk = DFSTestUtil.getFirstBlock(fs, fPath); + DatanodeDescriptor expectedPrimary = + DFSTestUtil.getExpectedPrimaryNode(nn, blk); + LOG.info("Expecting block recovery to be triggered on DN " + + expectedPrimary); + + // Find the corresponding DN daemon, and spy on its connection to the + // active. + DataNode primaryDN = cluster.getDataNode(expectedPrimary.getIpcPort()); + DatanodeProtocolClientSideTranslatorPB nnSpy = dnMap.get(primaryDN); + if (nnSpy == null) { + nnSpy = DataNodeTestUtils.spyOnBposToNN(primaryDN, nn); + dnMap.put(primaryDN, nnSpy); + } + + // Delay the commitBlockSynchronization call + DelayAnswer delayer = new DelayAnswer(LOG); + Mockito.doAnswer(delayer).when(nnSpy).commitBlockSynchronization( + Mockito.eq(blk), + Mockito.anyInt(), // new genstamp + Mockito.anyLong(), // new length + Mockito.eq(true), // close file + Mockito.eq(false), // delete block + (DatanodeID[]) Mockito.anyObject(), // new targets + (String[]) Mockito.anyObject()); // new target storages + + fs.recoverLease(fPath); + + LOG.info("Waiting for commitBlockSynchronization call from primary"); + delayer.waitForCall(); + + LOG.info("Deleting recursively " + grandestNonRootParent); + fs.delete(grandestNonRootParent, true); + + delayer.proceed(); + LOG.info("Now wait for result"); + delayer.waitForResult(); + Throwable t = delayer.getThrown(); + if (t != null) { + LOG.info("Result exception (snapshot: " + hasSnapshot + "): " + t); + } + } // end of loop each fPath + LOG.info("Now check we can restart"); + cluster.restartNameNodes(); + LOG.info("Restart finished"); + } finally { + if (stm != null) { + IOUtils.closeStream(stm); + } + if (cluster != null) { + cluster.shutdown(); + } + } + } + @Test(timeout=600000) + public void testDeleteAndCommitBlockSynchonizationRaceNoSnapshot() + throws Exception { + testDeleteAndCommitBlockSynchronizationRace(false); + } + @Test(timeout=600000) + public void testDeleteAndCommitBlockSynchronizationRaceHasSnapshot() + throws Exception { + testDeleteAndCommitBlockSynchronizationRace(true); } } Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java?rev=1618764&r1=1618763&r2=1618764&view=diff ============================================================================== --- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java (original) +++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java Mon Aug 18 23:41:53 2014 @@ -356,7 +356,8 @@ public class TestPipelinesFailover { NameNode nn0 = cluster.getNameNode(0); ExtendedBlock blk = DFSTestUtil.getFirstBlock(fs, TEST_PATH); - DatanodeDescriptor expectedPrimary = getExpectedPrimaryNode(nn0, blk); + DatanodeDescriptor expectedPrimary = + DFSTestUtil.getExpectedPrimaryNode(nn0, blk); LOG.info("Expecting block recovery to be triggered on DN " + expectedPrimary); @@ -506,37 +507,6 @@ public class TestPipelinesFailover { } } - - - /** - * @return the node which is expected to run the recovery of the - * given block, which is known to be under construction inside the - * given NameNOde. - */ - private DatanodeDescriptor getExpectedPrimaryNode(NameNode nn, - ExtendedBlock blk) { - BlockManager bm0 = nn.getNamesystem().getBlockManager(); - BlockInfo storedBlock = bm0.getStoredBlock(blk.getLocalBlock()); - assertTrue("Block " + blk + " should be under construction, " + - "got: " + storedBlock, - storedBlock instanceof BlockInfoUnderConstruction); - BlockInfoUnderConstruction ucBlock = - (BlockInfoUnderConstruction)storedBlock; - // We expect that the replica with the most recent heart beat will be - // the one to be in charge of the synchronization / recovery protocol. - final DatanodeStorageInfo[] storages = ucBlock.getExpectedStorageLocations(); - DatanodeStorageInfo expectedPrimary = storages[0]; - long mostRecentLastUpdate = expectedPrimary.getDatanodeDescriptor().getLastUpdate(); - for (int i = 1; i < storages.length; i++) { - final long lastUpdate = storages[i].getDatanodeDescriptor().getLastUpdate(); - if (lastUpdate > mostRecentLastUpdate) { - expectedPrimary = storages[i]; - mostRecentLastUpdate = lastUpdate; - } - } - return expectedPrimary.getDatanodeDescriptor(); - } - private DistributedFileSystem createFsAsOtherUser( final MiniDFSCluster cluster, final Configuration conf) throws IOException, InterruptedException {