Author: arp Date: Fri Aug 8 05:42:00 2014 New Revision: 1616681 URL: http://svn.apache.org/r1616681 Log: HDFS-6772: Merging r1616680 from trunk to branch-2.
Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMBean.java hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/RegisterCommand.java hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystemMBean.java hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStartup.java Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1616681&r1=1616680&r2=1616681&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Fri Aug 8 05:42:00 2014 @@ -121,6 +121,9 @@ Release 2.6.0 - UNRELEASED HDFS-6722. Display readable last contact time for dead nodes on NN webUI. (Ming Ma via wheat9) + HDFS-6772. Get DN storages out of blockContentsStale state faster after + NN restarts. (Ming Ma via Arpit Agarwal) + OPTIMIZATIONS HDFS-6690. Deduplicate xattr names in memory. (wang) Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java?rev=1616681&r1=1616680&r2=1616681&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java Fri Aug 8 05:42:00 2014 @@ -135,7 +135,10 @@ public class DatanodeManager { /** The number of stale DataNodes */ private volatile int numStaleNodes; - + + /** The number of stale storages */ + private volatile int numStaleStorages; + /** * Whether or not this cluster has ever consisted of more than 1 rack, * according to the NetworkTopology. @@ -1141,6 +1144,22 @@ public class DatanodeManager { return this.numStaleNodes; } + /** + * Get the number of content stale storages. + */ + public int getNumStaleStorages() { + return numStaleStorages; + } + + /** + * Set the number of content stale storages. + * + * @param numStaleStorages The number of content stale storages. + */ + void setNumStaleStorages(int numStaleStorages) { + this.numStaleStorages = numStaleStorages; + } + /** Fetch live and dead datanodes. */ public void fetchDatanodes(final List<DatanodeDescriptor> live, final List<DatanodeDescriptor> dead, final boolean removeDecommissionNode) { Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java?rev=1616681&r1=1616680&r2=1616681&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java Fri Aug 8 05:42:00 2014 @@ -256,6 +256,7 @@ class HeartbeatManager implements Datano DatanodeID dead = null; // check the number of stale nodes int numOfStaleNodes = 0; + int numOfStaleStorages = 0; synchronized(this) { for (DatanodeDescriptor d : datanodes) { if (dead == null && dm.isDatanodeDead(d)) { @@ -265,10 +266,17 @@ class HeartbeatManager implements Datano if (d.isStale(dm.getStaleInterval())) { numOfStaleNodes++; } + DatanodeStorageInfo[] storageInfos = d.getStorageInfos(); + for(DatanodeStorageInfo storageInfo : storageInfos) { + if (storageInfo.areBlockContentsStale()) { + numOfStaleStorages++; + } + } } // Set the number of stale nodes in the DatanodeManager dm.setNumStaleNodes(numOfStaleNodes); + dm.setNumStaleStorages(numOfStaleStorages); } allAlive = dead == null; Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java?rev=1616681&r1=1616680&r2=1616681&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java Fri Aug 8 05:42:00 2014 @@ -602,7 +602,7 @@ class BPOfferService { LOG.info("DatanodeCommand action : DNA_REGISTER from " + actor.nnAddr + " with " + actor.state + " state"); actor.reRegister(); - return true; + return false; } writeLock(); try { Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java?rev=1616681&r1=1616680&r2=1616681&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java Fri Aug 8 05:42:00 2014 @@ -222,7 +222,19 @@ class BPServiceActor implements Runnable // Second phase of the handshake with the NN. register(); } - + + // This is useful to make sure NN gets Heartbeat before Blockreport + // upon NN restart while DN keeps retrying Otherwise, + // 1. NN restarts. + // 2. Heartbeat RPC will retry and succeed. NN asks DN to reregister. + // 3. After reregistration completes, DN will send Blockreport first. + // 4. Given NN receives Blockreport after Heartbeat, it won't mark + // DatanodeStorageInfo#blockContentsStale to false until the next + // Blockreport. + void scheduleHeartbeat() { + lastHeartbeat = 0; + } + /** * This methods arranges for the data node to send the block report at * the next heartbeat. @@ -902,6 +914,7 @@ class BPServiceActor implements Runnable retrieveNamespaceInfo(); // and re-register register(); + scheduleHeartbeat(); } } Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1616681&r1=1616680&r2=1616681&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Fri Aug 8 05:42:00 2014 @@ -6091,7 +6091,6 @@ public class FSNamesystem implements Nam blockManager.shutdown(); } } - @Override // FSNamesystemMBean public int getNumLiveDataNodes() { @@ -6139,6 +6138,15 @@ public class FSNamesystem implements Nam } /** + * Storages are marked as "content stale" after NN restart or fails over and + * before NN receives the first Heartbeat followed by the first Blockreport. + */ + @Override // FSNamesystemMBean + public int getNumStaleStorages() { + return getBlockManager().getDatanodeManager().getNumStaleStorages(); + } + + /** * Sets the current generation stamp for legacy blocks */ void setGenerationStampV1(long stamp) { Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMBean.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMBean.java?rev=1616681&r1=1616680&r2=1616681&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMBean.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMBean.java Fri Aug 8 05:42:00 2014 @@ -151,4 +151,11 @@ public interface FSNamesystemMBean { * @return number of blocks pending deletion */ long getPendingDeletionBlocks(); + + /** + * Number of content stale storages. + * @return number of content stale storages + */ + public int getNumStaleStorages(); + } Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/RegisterCommand.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/RegisterCommand.java?rev=1616681&r1=1616680&r2=1616681&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/RegisterCommand.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/RegisterCommand.java Fri Aug 8 05:42:00 2014 @@ -22,6 +22,9 @@ import org.apache.hadoop.classification. /** * A BlockCommand is an instruction to a datanode to register with the namenode. + * This command can't be combined with other commands in the same response. + * This is because after the datanode processes RegisterCommand, it will skip + * the rest of the DatanodeCommands in the same HeartbeatResponse. */ @InterfaceAudience.Private @InterfaceStability.Evolving Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystemMBean.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystemMBean.java?rev=1616681&r1=1616680&r2=1616681&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystemMBean.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystemMBean.java Fri Aug 8 05:42:00 2014 @@ -94,6 +94,8 @@ public class TestFSNamesystemMBean { "SnapshotStats")); Long MaxObjects = (Long) (mbs.getAttribute(mxbeanNameFsns, "MaxObjects")); + Integer numStaleStorages = (Integer) (mbs.getAttribute( + mxbeanNameFsns, "NumStaleStorages")); // Metrics that belong to "NameNodeInfo". // These are metrics that FSNamesystem registers directly with MBeanServer. Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStartup.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStartup.java?rev=1616681&r1=1616680&r2=1616681&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStartup.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStartup.java Fri Aug 8 05:42:00 2014 @@ -26,6 +26,7 @@ import static org.junit.Assert.fail; import java.io.File; import java.io.IOException; +import java.lang.management.ManagementFactory; import java.net.InetAddress; import java.net.URI; import java.util.ArrayList; @@ -51,6 +52,7 @@ import org.apache.hadoop.hdfs.MiniDFSClu import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType; import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil; import org.apache.hadoop.hdfs.server.common.Storage; import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType; @@ -66,6 +68,9 @@ import org.junit.After; import org.junit.Before; import org.junit.Test; +import javax.management.MBeanServer; +import javax.management.ObjectName; + /** * Startup and checkpoint tests * @@ -699,4 +704,40 @@ public class TestStartup { } } } + + + /** + * Verify the following scenario. + * 1. NN restarts. + * 2. Heartbeat RPC will retry and succeed. NN asks DN to reregister. + * 3. After reregistration completes, DN will send Heartbeat, followed by + * Blockreport. + * 4. NN will mark DatanodeStorageInfo#blockContentsStale to false. + * @throws Exception + */ + @Test(timeout = 60000) + public void testStorageBlockContentsStaleAfterNNRestart() throws Exception { + MiniDFSCluster dfsCluster = null; + try { + Configuration config = new Configuration(); + dfsCluster = new MiniDFSCluster.Builder(config).numDataNodes(1).build(); + dfsCluster.waitActive(); + dfsCluster.restartNameNode(true); + BlockManagerTestUtil.checkHeartbeat( + dfsCluster.getNamesystem().getBlockManager()); + MBeanServer mbs = ManagementFactory.getPlatformMBeanServer(); + ObjectName mxbeanNameFsns = new ObjectName( + "Hadoop:service=NameNode,name=FSNamesystemState"); + Integer numStaleStorages = (Integer) (mbs.getAttribute( + mxbeanNameFsns, "NumStaleStorages")); + assertEquals(0, numStaleStorages.intValue()); + } finally { + if (dfsCluster != null) { + dfsCluster.shutdown(); + } + } + + return; + } + }