Repository: hadoop Updated Branches: refs/heads/branch-2 6d2eca081 -> 069366e1b
HDFS-8025. Addendum fix for HDFS-3087 Decomissioning on NN restart can complete without blocks being replicated. Contributed by Ming Ma. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/069366e1 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/069366e1 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/069366e1 Branch: refs/heads/branch-2 Commit: 069366e1beaf57c26e19eb63eb5bc08e8d24562f Parents: 6d2eca0 Author: Andrew Wang <w...@apache.org> Authored: Wed Apr 8 16:09:17 2015 -0700 Committer: Andrew Wang <w...@apache.org> Committed: Wed Apr 8 16:09:28 2015 -0700 ---------------------------------------------------------------------- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++ .../server/blockmanagement/BlockManager.java | 5 +++ .../apache/hadoop/hdfs/TestDecommission.java | 32 ++++++++------------ 3 files changed, 20 insertions(+), 20 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/069366e1/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 26117e9..d10123f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -84,6 +84,9 @@ Release 2.8.0 - UNRELEASED HDFS-8076. Code cleanup for DFSInputStream: use offset instead of LocatedBlock when possible. (Zhe Zhang via wang) + HDFS-8025. Addendum fix for HDFS-3087 Decomissioning on NN restart can + complete without blocks being replicated. (Ming Ma via wang) + OPTIMIZATIONS HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than http://git-wip-us.apache.org/repos/asf/hadoop/blob/069366e1/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index b2babf9..fd0db8c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -3308,6 +3308,11 @@ public class BlockManager { * liveness. Dead nodes cannot always be safely decommissioned. */ boolean isNodeHealthyForDecommission(DatanodeDescriptor node) { + if (!node.checkBlockReportReceived()) { + LOG.info("Node {} hasn't sent its first block report.", node); + return false; + } + if (node.isAlive) { return true; } http://git-wip-us.apache.org/repos/asf/hadoop/blob/069366e1/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java index 081e40f..1ab7427 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java @@ -882,9 +882,12 @@ public class TestDecommission { int numNamenodes = 1; int numDatanodes = 1; int replicas = 1; - + conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, + DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_DEFAULT); + conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INITIAL_DELAY_KEY, 5); + startCluster(numNamenodes, numDatanodes, conf); - Path file1 = new Path("testDecommission.dat"); + Path file1 = new Path("testDecommissionWithNamenodeRestart.dat"); FileSystem fileSys = cluster.getFileSystem(); writeFile(fileSys, file1, replicas); @@ -894,37 +897,26 @@ public class TestDecommission { String excludedDatanodeName = info[0].getXferAddr(); writeConfigFile(excludeFile, new ArrayList<String>(Arrays.asList(excludedDatanodeName))); - + //Add a new datanode to cluster cluster.startDataNodes(conf, 1, true, null, null, null, null); numDatanodes+=1; - + assertEquals("Number of datanodes should be 2 ", 2, cluster.getDataNodes().size()); //Restart the namenode cluster.restartNameNode(); DatanodeInfo datanodeInfo = NameNodeAdapter.getDatanode( cluster.getNamesystem(), excludedDatanodeID); waitNodeState(datanodeInfo, AdminStates.DECOMMISSIONED); - + // Ensure decommissioned datanode is not automatically shutdown assertEquals("All datanodes must be alive", numDatanodes, client.datanodeReport(DatanodeReportType.LIVE).length); - // wait for the block to be replicated - int tries = 0; - while (tries++ < 20) { - try { - Thread.sleep(1000); - if (checkFile(fileSys, file1, replicas, datanodeInfo.getXferAddr(), - numDatanodes) == null) { - break; - } - } catch (InterruptedException ie) { - } - } - assertTrue("Checked if block was replicated after decommission, tried " - + tries + " times.", tries < 20); - cleanupFile(fileSys, file1); + assertTrue("Checked if block was replicated after decommission.", + checkFile(fileSys, file1, replicas, datanodeInfo.getXferAddr(), + numDatanodes) == null); + cleanupFile(fileSys, file1); // Restart the cluster and ensure recommissioned datanodes // are allowed to register with the namenode cluster.shutdown();