Author: kihwal
Date: Mon Mar 24 15:40:47 2014
New Revision: 1580888

URL: http://svn.apache.org/r1580888
Log:
svn merge -c 1580886 merging from trunk to branch-2 to fix:HDFS-3087. 
Decomissioning on NN restart can complete without blocks being replicated. 
Contributed by Rushabh S Shah.

Modified:
    hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
    
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
    
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
    
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java

Modified: 
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1580888&r1=1580887&r2=1580888&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt 
(original)
+++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt 
Mon Mar 24 15:40:47 2014
@@ -18,6 +18,9 @@ Release 2.5.0 - UNRELEASED
     HDFS-6112. NFS Gateway docs are incorrect for allowed hosts configuration.
     (atm)
 
+    HDFS-3087. Decomissioning on NN restart can complete without blocks being
+    replicated. (Rushabh S Shah via kihwal)
+
 Release 2.4.0 - UNRELEASED
 
   INCOMPATIBLE CHANGES

Modified: 
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java?rev=1580888&r1=1580887&r2=1580888&view=diff
==============================================================================
--- 
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
 (original)
+++ 
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
 Mon Mar 24 15:40:47 2014
@@ -697,5 +697,20 @@ public class DatanodeDescriptor extends 
   public void setLastCachingDirectiveSentTimeMs(long time) {
     this.lastCachingDirectiveSentTimeMs = time;
   }
+  
+  /**
+   * checks whether atleast first block report has been received
+   * @return
+   */
+  public boolean checkBlockReportReceived() {
+    if(this.getStorageInfos().length == 0) {
+      return false;
+    }
+    for(DatanodeStorageInfo storageInfo: this.getStorageInfos()) {
+      if(storageInfo.getBlockReportCount() == 0 )
+        return false;
+    }
+    return true;
+ }
 }
 

Modified: 
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java?rev=1580888&r1=1580887&r2=1580888&view=diff
==============================================================================
--- 
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
 (original)
+++ 
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
 Mon Mar 24 15:40:47 2014
@@ -710,7 +710,7 @@ public class DatanodeManager {
   boolean checkDecommissionState(DatanodeDescriptor node) {
     // Check to see if all blocks in this decommissioned
     // node has reached their target replication factor.
-    if (node.isDecommissionInProgress()) {
+    if (node.isDecommissionInProgress() && node.checkBlockReportReceived()) {
       if (!blockManager.isReplicationInProgress(node)) {
         node.setDecommissioned();
         LOG.info("Decommission complete for " + node);

Modified: 
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java?rev=1580888&r1=1580887&r2=1580888&view=diff
==============================================================================
--- 
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java
 (original)
+++ 
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java
 Mon Mar 24 15:40:47 2014
@@ -23,6 +23,7 @@ import static org.junit.Assert.assertTru
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.Iterator;
@@ -830,4 +831,63 @@ public class TestDecommission {
 
     fdos.close();
   }
+  
+  /**
+   * Tests restart of namenode while datanode hosts are added to exclude file
+   **/
+  @Test(timeout=360000)
+  public void testDecommissionWithNamenodeRestart()throws IOException, 
InterruptedException {
+    LOG.info("Starting test testDecommissionWithNamenodeRestart");
+    int numNamenodes = 1;
+    int numDatanodes = 1;
+    int replicas = 1;
+    
+    startCluster(numNamenodes, numDatanodes, conf);
+    Path file1 = new Path("testDecommission.dat");
+    FileSystem fileSys = cluster.getFileSystem();
+    writeFile(fileSys, file1, replicas);
+        
+    DFSClient client = getDfsClient(cluster.getNameNode(), conf);
+    DatanodeInfo[] info = client.datanodeReport(DatanodeReportType.LIVE);
+    DatanodeID excludedDatanodeID = info[0];
+    String excludedDatanodeName = info[0].getXferAddr();
+
+    writeConfigFile(excludeFile, new 
ArrayList<String>(Arrays.asList(excludedDatanodeName)));
+    
+    //Add a new datanode to cluster
+    cluster.startDataNodes(conf, 1, true, null, null, null, null);
+    numDatanodes+=1;
+    
+    assertEquals("Number of datanodes should be 2 ", 2, 
cluster.getDataNodes().size());
+    //Restart the namenode
+    cluster.restartNameNode();
+    DatanodeInfo datanodeInfo = NameNodeAdapter.getDatanode(
+        cluster.getNamesystem(), excludedDatanodeID);
+    waitNodeState(datanodeInfo, AdminStates.DECOMMISSIONED);
+    
+    // Ensure decommissioned datanode is not automatically shutdown
+    assertEquals("All datanodes must be alive", numDatanodes, 
+        client.datanodeReport(DatanodeReportType.LIVE).length);
+    // wait for the block to be replicated
+    int tries = 0;
+    while (tries++ < 20) {
+      try {
+        Thread.sleep(1000);
+        if (checkFile(fileSys, file1, replicas, datanodeInfo.getXferAddr(),
+            numDatanodes) == null) {
+          break;
+        }
+      } catch (InterruptedException ie) {
+      }
+    }
+    assertTrue("Checked if block was replicated after decommission, tried "
+        + tries + " times.", tries < 20);
+    cleanupFile(fileSys, file1);
+
+    // Restart the cluster and ensure recommissioned datanodes
+    // are allowed to register with the namenode
+    cluster.shutdown();
+    startCluster(numNamenodes, numDatanodes, conf);
+    cluster.shutdown();
+  }
 }


Reply via email to