[36/50] [abbrv] hadoop git commit: HDFS-9390. Block management for maintenance states.

sjlee Tue, 18 Oct 2016 16:45:12 -0700

http://git-wip-us.apache.org/repos/asf/hadoop/blob/b61fb267/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMaintenanceState.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMaintenanceState.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMaintenanceState.java
index 63617ad..c125f45 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMaintenanceState.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMaintenanceState.java
@@ -18,13 +18,19 @@
 package org.apache.hadoop.hdfs;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.client.HdfsDataInputStream;
@@ -32,6 +38,8 @@ import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo.AdminStates;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
+import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.util.Time;
 import org.junit.Test;
@@ -40,13 +48,23 @@ import org.junit.Test;
  * This class tests node maintenance.
  */
 public class TestMaintenanceState extends AdminStatesBaseTest {
-  public static final Log LOG = LogFactory.getLog(TestMaintenanceState.class);
-  static private final long EXPIRATION_IN_MS = 500;
+  public static final Logger LOG =
+      LoggerFactory.getLogger(TestMaintenanceState.class);
+  static private final long EXPIRATION_IN_MS = 50;
+  private int minMaintenanceR =
+      DFSConfigKeys.DFS_NAMENODE_MAINTENANCE_REPLICATION_MIN_DEFAULT;
 
   public TestMaintenanceState() {
     setUseCombinedHostFileManager();
   }
 
+  void setMinMaintenanceR(int minMaintenanceR) {
+    this.minMaintenanceR = minMaintenanceR;
+    getConf().setInt(
+        DFSConfigKeys.DFS_NAMENODE_MAINTENANCE_REPLICATION_MIN_KEY,
+        minMaintenanceR);
+  }
+
   /**
    * Verify a node can transition from AdminStates.ENTERING_MAINTENANCE to
    * AdminStates.NORMAL.
@@ -55,21 +73,25 @@ public class TestMaintenanceState extends 
AdminStatesBaseTest {
   public void testTakeNodeOutOfEnteringMaintenance() throws Exception {
     LOG.info("Starting testTakeNodeOutOfEnteringMaintenance");
     final int replicas = 1;
-    final int numNamenodes = 1;
-    final int numDatanodes = 1;
-    final Path file1 = new Path("/testTakeNodeOutOfEnteringMaintenance.dat");
+    final Path file = new Path("/testTakeNodeOutOfEnteringMaintenance.dat");
 
-    startCluster(numNamenodes, numDatanodes);
+    startCluster(1, 1);
 
-    FileSystem fileSys = getCluster().getFileSystem(0);
-    writeFile(fileSys, file1, replicas, 1);
+    final FileSystem fileSys = getCluster().getFileSystem(0);
+    final FSNamesystem ns = getCluster().getNamesystem(0);
+    writeFile(fileSys, file, replicas, 1);
 
-    DatanodeInfo nodeOutofService = takeNodeOutofService(0,
+    final DatanodeInfo nodeOutofService = takeNodeOutofService(0,
         null, Long.MAX_VALUE, null, AdminStates.ENTERING_MAINTENANCE);
 
+    // When node is in ENTERING_MAINTENANCE state, it can still serve read
+    // requests
+    assertNull(checkWithRetry(ns, fileSys, file, replicas, null,
+        nodeOutofService));
+
     putNodeInService(0, nodeOutofService.getDatanodeUuid());
 
-    cleanupFile(fileSys, file1);
+    cleanupFile(fileSys, file);
   }
 
   /**
@@ -80,23 +102,21 @@ public class TestMaintenanceState extends 
AdminStatesBaseTest {
   public void testEnteringMaintenanceExpiration() throws Exception {
     LOG.info("Starting testEnteringMaintenanceExpiration");
     final int replicas = 1;
-    final int numNamenodes = 1;
-    final int numDatanodes = 1;
-    final Path file1 = new Path("/testTakeNodeOutOfEnteringMaintenance.dat");
+    final Path file = new Path("/testEnteringMaintenanceExpiration.dat");
 
-    startCluster(numNamenodes, numDatanodes);
+    startCluster(1, 1);
 
-    FileSystem fileSys = getCluster().getFileSystem(0);
-    writeFile(fileSys, file1, replicas, 1);
+    final FileSystem fileSys = getCluster().getFileSystem(0);
+    writeFile(fileSys, file, replicas, 1);
 
-    // expires in 500 milliseconds
-    DatanodeInfo nodeOutofService = takeNodeOutofService(0, null,
-        Time.monotonicNow() + EXPIRATION_IN_MS, null,
-        AdminStates.ENTERING_MAINTENANCE);
+    final DatanodeInfo nodeOutofService = takeNodeOutofService(0, null,
+        Long.MAX_VALUE, null, AdminStates.ENTERING_MAINTENANCE);
 
-    waitNodeState(nodeOutofService, AdminStates.NORMAL);
+    // Adjust the expiration.
+    takeNodeOutofService(0, nodeOutofService.getDatanodeUuid(),
+        Time.monotonicNow() + EXPIRATION_IN_MS, null, AdminStates.NORMAL);
 
-    cleanupFile(fileSys, file1);
+    cleanupFile(fileSys, file);
   }
 
   /**
@@ -106,20 +126,18 @@ public class TestMaintenanceState extends 
AdminStatesBaseTest {
   public void testInvalidExpiration() throws Exception {
     LOG.info("Starting testInvalidExpiration");
     final int replicas = 1;
-    final int numNamenodes = 1;
-    final int numDatanodes = 1;
-    final Path file1 = new Path("/testTakeNodeOutOfEnteringMaintenance.dat");
+    final Path file = new Path("/testInvalidExpiration.dat");
 
-    startCluster(numNamenodes, numDatanodes);
+    startCluster(1, 1);
 
-    FileSystem fileSys = getCluster().getFileSystem(0);
-    writeFile(fileSys, file1, replicas, 1);
+    final FileSystem fileSys = getCluster().getFileSystem(0);
+    writeFile(fileSys, file, replicas, 1);
 
     // expiration has to be greater than Time.monotonicNow().
     takeNodeOutofService(0, null, Time.monotonicNow(), null,
         AdminStates.NORMAL);
 
-    cleanupFile(fileSys, file1);
+    cleanupFile(fileSys, file);
   }
 
   /**
@@ -129,18 +147,17 @@ public class TestMaintenanceState extends 
AdminStatesBaseTest {
   @Test(timeout = 360000)
   public void testPutDeadNodeToMaintenance() throws Exception {
     LOG.info("Starting testPutDeadNodeToMaintenance");
-    final int numNamenodes = 1;
-    final int numDatanodes = 1;
     final int replicas = 1;
-    final Path file1 = new Path("/testPutDeadNodeToMaintenance.dat");
+    final Path file = new Path("/testPutDeadNodeToMaintenance.dat");
 
-    startCluster(numNamenodes, numDatanodes);
+    startCluster(1, 1);
 
-    FileSystem fileSys = getCluster().getFileSystem(0);
-    FSNamesystem ns = getCluster().getNamesystem(0);
-    writeFile(fileSys, file1, replicas, 1);
+    final FileSystem fileSys = getCluster().getFileSystem(0);
+    final FSNamesystem ns = getCluster().getNamesystem(0);
+    writeFile(fileSys, file, replicas, 1);
 
-    MiniDFSCluster.DataNodeProperties dnProp = getCluster().stopDataNode(0);
+    final MiniDFSCluster.DataNodeProperties dnProp =
+        getCluster().stopDataNode(0);
     DFSTestUtil.waitForDatanodeState(
         getCluster(), dnProp.datanode.getDatanodeUuid(), false, 20000);
 
@@ -153,7 +170,7 @@ public class TestMaintenanceState extends 
AdminStatesBaseTest {
     assertEquals(deadInMaintenance + 1, ns.getNumInMaintenanceDeadDataNodes());
     assertEquals(liveInMaintenance, ns.getNumInMaintenanceLiveDataNodes());
 
-    cleanupFile(fileSys, file1);
+    cleanupFile(fileSys, file);
   }
 
   /**
@@ -164,16 +181,14 @@ public class TestMaintenanceState extends 
AdminStatesBaseTest {
   @Test(timeout = 360000)
   public void testPutDeadNodeToMaintenanceWithExpiration() throws Exception {
     LOG.info("Starting testPutDeadNodeToMaintenanceWithExpiration");
-    final int numNamenodes = 1;
-    final int numDatanodes = 1;
-    final int replicas = 1;
-    final Path file1 = new Path("/testPutDeadNodeToMaintenance.dat");
+    final Path file =
+        new Path("/testPutDeadNodeToMaintenanceWithExpiration.dat");
 
-    startCluster(numNamenodes, numDatanodes);
+    startCluster(1, 1);
 
     FileSystem fileSys = getCluster().getFileSystem(0);
     FSNamesystem ns = getCluster().getNamesystem(0);
-    writeFile(fileSys, file1, replicas, 1);
+    writeFile(fileSys, file, 1, 1);
 
     MiniDFSCluster.DataNodeProperties dnProp = getCluster().stopDataNode(0);
     DFSTestUtil.waitForDatanodeState(
@@ -184,16 +199,17 @@ public class TestMaintenanceState extends 
AdminStatesBaseTest {
 
     DatanodeInfo nodeOutofService = takeNodeOutofService(0,
         dnProp.datanode.getDatanodeUuid(),
-        Time.monotonicNow() + EXPIRATION_IN_MS, null,
-        AdminStates.IN_MAINTENANCE);
+        Long.MAX_VALUE, null, AdminStates.IN_MAINTENANCE);
 
-    waitNodeState(nodeOutofService, AdminStates.NORMAL);
+    // Adjust the expiration.
+    takeNodeOutofService(0, nodeOutofService.getDatanodeUuid(),
+        Time.monotonicNow() + EXPIRATION_IN_MS, null, AdminStates.NORMAL);
 
     // no change
     assertEquals(deadInMaintenance, ns.getNumInMaintenanceDeadDataNodes());
     assertEquals(liveInMaintenance, ns.getNumInMaintenanceLiveDataNodes());
 
-    cleanupFile(fileSys, file1);
+    cleanupFile(fileSys, file);
   }
 
   /**
@@ -202,15 +218,12 @@ public class TestMaintenanceState extends 
AdminStatesBaseTest {
   @Test(timeout = 360000)
   public void testTransitionFromDecommissioned() throws IOException {
     LOG.info("Starting testTransitionFromDecommissioned");
-    final int numNamenodes = 1;
-    final int numDatanodes = 4;
-    final int replicas = 3;
-    final Path file1 = new Path("/testTransitionFromDecommissioned.dat");
+    final Path file = new Path("/testTransitionFromDecommissioned.dat");
 
-    startCluster(numNamenodes, numDatanodes);
+    startCluster(1, 4);
 
-    FileSystem fileSys = getCluster().getFileSystem(0);
-    writeFile(fileSys, file1, replicas, 1);
+    final FileSystem fileSys = getCluster().getFileSystem(0);
+    writeFile(fileSys, file, 3, 1);
 
     DatanodeInfo nodeOutofService = takeNodeOutofService(0, null, 0, null,
         AdminStates.DECOMMISSIONED);
@@ -218,7 +231,7 @@ public class TestMaintenanceState extends 
AdminStatesBaseTest {
     takeNodeOutofService(0, nodeOutofService.getDatanodeUuid(), Long.MAX_VALUE,
         null, AdminStates.IN_MAINTENANCE);
 
-    cleanupFile(fileSys, file1);
+    cleanupFile(fileSys, file);
   }
 
   /**
@@ -228,34 +241,33 @@ public class TestMaintenanceState extends 
AdminStatesBaseTest {
   @Test(timeout = 360000)
   public void testTransitionFromDecommissionedAndExpired() throws IOException {
     LOG.info("Starting testTransitionFromDecommissionedAndExpired");
-    final int numNamenodes = 1;
-    final int numDatanodes = 4;
-    final int replicas = 3;
-    final Path file1 = new Path("/testTransitionFromDecommissioned.dat");
+    final Path file =
+        new Path("/testTransitionFromDecommissionedAndExpired.dat");
 
-    startCluster(numNamenodes, numDatanodes);
+    startCluster(1, 4);
 
-    FileSystem fileSys = getCluster().getFileSystem(0);
-    writeFile(fileSys, file1, replicas, 1);
+    final FileSystem fileSys = getCluster().getFileSystem(0);
+    writeFile(fileSys, file, 3, 1);
 
-    DatanodeInfo nodeOutofService = takeNodeOutofService(0, null, 0, null,
-        AdminStates.DECOMMISSIONED);
+    final DatanodeInfo nodeOutofService = takeNodeOutofService(0, null, 0,
+        null, AdminStates.DECOMMISSIONED);
 
     takeNodeOutofService(0, nodeOutofService.getDatanodeUuid(),
-        Time.monotonicNow() + EXPIRATION_IN_MS, null,
-        AdminStates.IN_MAINTENANCE);
+        Long.MAX_VALUE, null, AdminStates.IN_MAINTENANCE);
 
-    waitNodeState(nodeOutofService, AdminStates.NORMAL);
+    // Adjust the expiration.
+    takeNodeOutofService(0, nodeOutofService.getDatanodeUuid(),
+        Time.monotonicNow() + EXPIRATION_IN_MS, null, AdminStates.NORMAL);
 
-    cleanupFile(fileSys, file1);
+    cleanupFile(fileSys, file);
   }
 
   /**
    * When a node is put to maintenance, it first transitions to
    * AdminStates.ENTERING_MAINTENANCE. It makes sure all blocks have minimal
    * replication before it can be transitioned to AdminStates.IN_MAINTENANCE.
-   * If node becomes dead when it is in AdminStates.ENTERING_MAINTENANCE, admin
-   * state should stay in AdminStates.ENTERING_MAINTENANCE state.
+   * If node becomes dead when it is in AdminStates.ENTERING_MAINTENANCE, it
+   * should stay in AdminStates.ENTERING_MAINTENANCE state.
    */
   @Test(timeout = 360000)
   public void testNodeDeadWhenInEnteringMaintenance() throws Exception {
@@ -263,16 +275,16 @@ public class TestMaintenanceState extends 
AdminStatesBaseTest {
     final int numNamenodes = 1;
     final int numDatanodes = 1;
     final int replicas = 1;
-    final Path file1 = new Path("/testNodeDeadWhenInEnteringMaintenance.dat");
+    final Path file = new Path("/testNodeDeadWhenInEnteringMaintenance.dat");
 
     startCluster(numNamenodes, numDatanodes);
 
-    FileSystem fileSys = getCluster().getFileSystem(0);
-    FSNamesystem ns = getCluster().getNamesystem(0);
-    writeFile(fileSys, file1, replicas, 1);
+    final FileSystem fileSys = getCluster().getFileSystem(0);
+    final FSNamesystem ns = getCluster().getNamesystem(0);
+    writeFile(fileSys, file, replicas, 1);
 
     DatanodeInfo nodeOutofService = takeNodeOutofService(0,
-        getFirstBlockFirstReplicaUuid(fileSys, file1), Long.MAX_VALUE, null,
+        getFirstBlockFirstReplicaUuid(fileSys, file), Long.MAX_VALUE, null,
         AdminStates.ENTERING_MAINTENANCE);
     assertEquals(1, ns.getNumEnteringMaintenanceDataNodes());
 
@@ -281,30 +293,627 @@ public class TestMaintenanceState extends 
AdminStatesBaseTest {
     DFSTestUtil.waitForDatanodeState(
         getCluster(), nodeOutofService.getDatanodeUuid(), false, 20000);
     DFSClient client = getDfsClient(0);
-    assertEquals("maintenance node shouldn't be alive", numDatanodes - 1,
+    assertEquals("maintenance node shouldn't be live", numDatanodes - 1,
         client.datanodeReport(DatanodeReportType.LIVE).length);
+    assertEquals(1, ns.getNumEnteringMaintenanceDataNodes());
 
     getCluster().restartDataNode(dnProp, true);
     getCluster().waitActive();
     waitNodeState(nodeOutofService, AdminStates.ENTERING_MAINTENANCE);
     assertEquals(1, ns.getNumEnteringMaintenanceDataNodes());
+    assertEquals("maintenance node should be live", numDatanodes,
+        client.datanodeReport(DatanodeReportType.LIVE).length);
+
+    cleanupFile(fileSys, file);
+  }
+
+  /**
+   * When a node is put to maintenance, it first transitions to
+   * AdminStates.ENTERING_MAINTENANCE. It makes sure all blocks have
+   * been properly replicated before it can be transitioned to
+   * AdminStates.IN_MAINTENANCE. The expected replication count takes
+   * DFS_NAMENODE_MAINTENANCE_REPLICATION_MIN_KEY and
+   * its file's replication factor into account.
+   */
+  @Test(timeout = 360000)
+  public void testExpectedReplications() throws IOException {
+    LOG.info("Starting testExpectedReplications");
+    testExpectedReplication(1);
+    testExpectedReplication(2);
+    testExpectedReplication(3);
+    testExpectedReplication(4);
+  }
+
+  private void testExpectedReplication(int replicationFactor)
+      throws IOException {
+    testExpectedReplication(replicationFactor,
+        Math.max(replicationFactor - 1, this.minMaintenanceR));
+  }
+
+  private void testExpectedReplication(int replicationFactor,
+      int expectedReplicasInRead) throws IOException {
+    startCluster(1, 5);
+
+    final Path file = new Path("/testExpectedReplication.dat");
+
+    final FileSystem fileSys = getCluster().getFileSystem(0);
+    final FSNamesystem ns = getCluster().getNamesystem(0);
+
+    writeFile(fileSys, file, replicationFactor, 1);
+
+    DatanodeInfo nodeOutofService = takeNodeOutofService(0,
+        getFirstBlockFirstReplicaUuid(fileSys, file), Long.MAX_VALUE,
+        null, AdminStates.IN_MAINTENANCE);
+
+    // The block should be replicated to another datanode to meet
+    // expected replication count.
+    assertNull(checkWithRetry(ns, fileSys, file, expectedReplicasInRead,
+        nodeOutofService));
+
+    cleanupFile(fileSys, file);
+  }
+
+  /**
+   * Verify a node can transition directly to AdminStates.IN_MAINTENANCE when
+   * DFS_NAMENODE_MAINTENANCE_REPLICATION_MIN_KEY is set to zero.
+   */
+  @Test(timeout = 360000)
+  public void testZeroMinMaintenanceReplication() throws Exception {
+    LOG.info("Starting testZeroMinMaintenanceReplication");
+    setMinMaintenanceR(0);
+    startCluster(1, 1);
+
+    final Path file = new Path("/testZeroMinMaintenanceReplication.dat");
+    final int replicas = 1;
+
+    FileSystem fileSys = getCluster().getFileSystem(0);
+    writeFile(fileSys, file, replicas, 1);
+
+    takeNodeOutofService(0, null, Long.MAX_VALUE, null,
+        AdminStates.IN_MAINTENANCE);
+
+    cleanupFile(fileSys, file);
+  }
+
+  /**
+   * Verify a node can transition directly to AdminStates.IN_MAINTENANCE when
+   * DFS_NAMENODE_MAINTENANCE_REPLICATION_MIN_KEY is set to zero. Then later
+   * transition to NORMAL after maintenance expiration.
+   */
+  @Test(timeout = 360000)
+  public void testZeroMinMaintenanceReplicationWithExpiration()
+      throws Exception {
+    LOG.info("Starting testZeroMinMaintenanceReplicationWithExpiration");
+    setMinMaintenanceR(0);
+    startCluster(1, 1);
+
+    final Path file =
+        new Path("/testZeroMinMaintenanceReplicationWithExpiration.dat");
+
+    FileSystem fileSys = getCluster().getFileSystem(0);
+    writeFile(fileSys, file, 1, 1);
+
+    DatanodeInfo nodeOutofService = takeNodeOutofService(0, null,
+        Long.MAX_VALUE, null, AdminStates.IN_MAINTENANCE);
+
+    // Adjust the expiration.
+    takeNodeOutofService(0, nodeOutofService.getDatanodeUuid(),
+        Time.monotonicNow() + EXPIRATION_IN_MS, null, AdminStates.NORMAL);
+
+    cleanupFile(fileSys, file);
+  }
+
+  /**
+   * Transition from IN_MAINTENANCE to DECOMMISSIONED.
+   */
+  @Test(timeout = 360000)
+  public void testTransitionToDecommission() throws IOException {
+    LOG.info("Starting testTransitionToDecommission");
+    final int numNamenodes = 1;
+    final int numDatanodes = 4;
+    startCluster(numNamenodes, numDatanodes);
+
+    final Path file = new Path("testTransitionToDecommission.dat");
+    final int replicas = 3;
+
+    FileSystem fileSys = getCluster().getFileSystem(0);
+    FSNamesystem ns = getCluster().getNamesystem(0);
+
+    writeFile(fileSys, file, replicas, 1);
+
+    DatanodeInfo nodeOutofService = takeNodeOutofService(0,
+        getFirstBlockFirstReplicaUuid(fileSys, file), Long.MAX_VALUE, null,
+        AdminStates.IN_MAINTENANCE);
+
+    DFSClient client = getDfsClient(0);
+    assertEquals("All datanodes must be alive", numDatanodes,
+        client.datanodeReport(DatanodeReportType.LIVE).length);
+
+    // test 1, verify the replica in IN_MAINTENANCE state isn't in LocatedBlock
+    assertNull(checkWithRetry(ns, fileSys, file, replicas - 1,
+        nodeOutofService));
+
+    takeNodeOutofService(0, nodeOutofService.getDatanodeUuid(), 0, null,
+        AdminStates.DECOMMISSIONED);
+
+    // test 2 after decommission has completed, the replication count is
+    // replicas + 1 which includes the decommissioned node.
+    assertNull(checkWithRetry(ns, fileSys, file, replicas + 1, null));
+
+    // test 3, put the node in service, replication count should restore.
+    putNodeInService(0, nodeOutofService.getDatanodeUuid());
+    assertNull(checkWithRetry(ns, fileSys, file, replicas, null));
+
+    cleanupFile(fileSys, file);
+  }
+
+  /**
+   * Transition from decommissioning state to maintenance state.
+   */
+  @Test(timeout = 360000)
+  public void testTransitionFromDecommissioning() throws IOException {
+    LOG.info("Starting testTransitionFromDecommissioning");
+    startCluster(1, 3);
+
+    final Path file = new Path("/testTransitionFromDecommissioning.dat");
+    final int replicas = 3;
+
+    final FileSystem fileSys = getCluster().getFileSystem(0);
+    final FSNamesystem ns = getCluster().getNamesystem(0);
+
+    writeFile(fileSys, file, replicas);
+
+    final DatanodeInfo nodeOutofService = takeNodeOutofService(0, null, 0,
+        null, AdminStates.DECOMMISSION_INPROGRESS);
+
+    takeNodeOutofService(0, nodeOutofService.getDatanodeUuid(), Long.MAX_VALUE,
+        null, AdminStates.IN_MAINTENANCE);
+
+    assertNull(checkWithRetry(ns, fileSys, file, replicas - 1,
+        nodeOutofService));
+
+    cleanupFile(fileSys, file);
+  }
+
+
+  /**
+   * First put a node in maintenance, then put a different node
+   * in decommission. Make sure decommission process take
+   * maintenance replica into account.
+   */
+  @Test(timeout = 360000)
+  public void testDecommissionDifferentNodeAfterMaintenances()
+      throws Exception {
+    testDecommissionDifferentNodeAfterMaintenance(2);
+    testDecommissionDifferentNodeAfterMaintenance(3);
+    testDecommissionDifferentNodeAfterMaintenance(4);
+  }
+
+  private void testDecommissionDifferentNodeAfterMaintenance(int repl)
+      throws Exception {
+    startCluster(1, 5);
+
+    final Path file =
+        new Path("/testDecommissionDifferentNodeAfterMaintenance.dat");
+
+    final FileSystem fileSys = getCluster().getFileSystem(0);
+    final FSNamesystem ns = getCluster().getNamesystem(0);
+
+    writeFile(fileSys, file, repl, 1);
+    final DatanodeInfo[] nodes = getFirstBlockReplicasDatanodeInfos(fileSys,
+        file);
+    String maintenanceDNUuid = nodes[0].getDatanodeUuid();
+    String decommissionDNUuid = nodes[1].getDatanodeUuid();
+    DatanodeInfo maintenanceDN = takeNodeOutofService(0, maintenanceDNUuid,
+        Long.MAX_VALUE, null, null, AdminStates.IN_MAINTENANCE);
 
-    cleanupFile(fileSys, file1);
+    Map<DatanodeInfo, Long> maintenanceNodes = new HashMap<>();
+    maintenanceNodes.put(nodes[0], Long.MAX_VALUE);
+    takeNodeOutofService(0, decommissionDNUuid, 0, null, maintenanceNodes,
+        AdminStates.DECOMMISSIONED);
+    // Out of the replicas returned, one is the decommissioned node.
+    assertNull(checkWithRetry(ns, fileSys, file, repl, maintenanceDN));
+
+    putNodeInService(0, maintenanceDN);
+    assertNull(checkWithRetry(ns, fileSys, file, repl + 1, null));
+
+    cleanupFile(fileSys, file);
+  }
+
+
+  @Test(timeout = 360000)
+  public void testChangeReplicationFactors() throws IOException {
+    // Prior to any change, there is 1 maintenance node and 2 live nodes.
+
+    // Replication factor is adjusted from 3 to 4.
+    // After the change, given 1 maintenance + 2 live is less than the
+    // newFactor, one live nodes will be added.
+    testChangeReplicationFactor(3, 4, 3);
+
+    // Replication factor is adjusted from 3 to 2.
+    // After the change, given 2 live nodes is the same as the newFactor,
+    // no live nodes will be invalidated.
+    testChangeReplicationFactor(3, 2, 2);
+
+    // Replication factor is adjusted from 3 to 1.
+    // After the change, given 2 live nodes is greater than the newFactor,
+    // one live nodes will be invalidated.
+    testChangeReplicationFactor(3, 1, 1);
+  }
+
+  /**
+   * After the change of replication factor, # of live replicas <=
+   * the new replication factor.
+   */
+  private void testChangeReplicationFactor(int oldFactor, int newFactor,
+      int expectedLiveReplicas) throws IOException {
+    LOG.info("Starting testChangeReplicationFactor {} {} {}",
+        oldFactor, newFactor, expectedLiveReplicas);
+    startCluster(1, 5);
+
+    final Path file = new Path("/testChangeReplicationFactor.dat");
+
+    final FileSystem fileSys = getCluster().getFileSystem(0);
+    final FSNamesystem ns = getCluster().getNamesystem(0);
+
+    writeFile(fileSys, file, oldFactor, 1);
+
+    final DatanodeInfo nodeOutofService = takeNodeOutofService(0,
+        getFirstBlockFirstReplicaUuid(fileSys, file), Long.MAX_VALUE, null,
+        AdminStates.IN_MAINTENANCE);
+
+    // Verify that the nodeOutofService remains in blocksMap and
+    // # of live replicas For read operation is expected.
+    assertNull(checkWithRetry(ns, fileSys, file, oldFactor - 1,
+        nodeOutofService));
+
+    final DFSClient client = getDfsClient(0);
+    client.setReplication(file.toString(), (short)newFactor);
+
+    // Verify that the nodeOutofService remains in blocksMap and
+    // # of live replicas for read operation.
+    assertNull(checkWithRetry(ns, fileSys, file, expectedLiveReplicas,
+        nodeOutofService));
+
+    putNodeInService(0, nodeOutofService.getDatanodeUuid());
+    assertNull(checkWithRetry(ns, fileSys, file, newFactor, null));
+
+    cleanupFile(fileSys, file);
+  }
+
+
+  /**
+   * Verify the following scenario.
+   * a. Put a live node to maintenance => 1 maintenance, 2 live.
+   * b. The maintenance node becomes dead => block map still has 1 maintenance,
+   *    2 live.
+   * c. Take the node out of maintenance => NN should schedule the replication
+   *    and end up with 3 live.
+   */
+  @Test(timeout = 360000)
+  public void testTakeDeadNodeOutOfMaintenance() throws Exception {
+    LOG.info("Starting testTakeDeadNodeOutOfMaintenance");
+    final int numNamenodes = 1;
+    final int numDatanodes = 4;
+    startCluster(numNamenodes, numDatanodes);
+
+    final Path file = new Path("/testTakeDeadNodeOutOfMaintenance.dat");
+    final int replicas = 3;
+
+    final FileSystem fileSys = getCluster().getFileSystem(0);
+    final FSNamesystem ns = getCluster().getNamesystem(0);
+    writeFile(fileSys, file, replicas, 1);
+
+    final DatanodeInfo nodeOutofService = takeNodeOutofService(0,
+        getFirstBlockFirstReplicaUuid(fileSys, file), Long.MAX_VALUE, null,
+        AdminStates.IN_MAINTENANCE);
+
+    assertNull(checkWithRetry(ns, fileSys, file, replicas - 1,
+        nodeOutofService));
+
+    final DFSClient client = getDfsClient(0);
+    assertEquals("All datanodes must be alive", numDatanodes,
+        client.datanodeReport(DatanodeReportType.LIVE).length);
+
+    getCluster().stopDataNode(nodeOutofService.getXferAddr());
+    DFSTestUtil.waitForDatanodeState(
+        getCluster(), nodeOutofService.getDatanodeUuid(), false, 20000);
+    assertEquals("maintenance node shouldn't be alive", numDatanodes - 1,
+        client.datanodeReport(DatanodeReportType.LIVE).length);
+
+    // Dead maintenance node's blocks should remain in block map.
+    assertNull(checkWithRetry(ns, fileSys, file, replicas - 1,
+        nodeOutofService));
+
+    // When dead maintenance mode is transitioned to out of maintenance mode,
+    // its blocks should be removed from block map.
+    // This will then trigger replication to restore the live replicas back
+    // to replication factor.
+    putNodeInService(0, nodeOutofService.getDatanodeUuid());
+    assertNull(checkWithRetry(ns, fileSys, file, replicas, nodeOutofService,
+        null));
+
+    cleanupFile(fileSys, file);
   }
 
-  static protected String getFirstBlockFirstReplicaUuid(FileSystem fileSys,
+
+  /**
+   * Verify the following scenario.
+   * a. Put a live node to maintenance => 1 maintenance, 2 live.
+   * b. The maintenance node becomes dead => block map still has 1 maintenance,
+   *    2 live.
+   * c. Restart nn => block map only has 2 live => restore the 3 live.
+   * d. Restart the maintenance dn => 1 maintenance, 3 live.
+   * e. Take the node out of maintenance => over replication => 3 live.
+   */
+  @Test(timeout = 360000)
+  public void testWithNNAndDNRestart() throws Exception {
+    LOG.info("Starting testWithNNAndDNRestart");
+    final int numNamenodes = 1;
+    final int numDatanodes = 4;
+    startCluster(numNamenodes, numDatanodes);
+
+    final Path file = new Path("/testWithNNAndDNRestart.dat");
+    final int replicas = 3;
+
+    final FileSystem fileSys = getCluster().getFileSystem(0);
+    FSNamesystem ns = getCluster().getNamesystem(0);
+    writeFile(fileSys, file, replicas, 1);
+
+    DatanodeInfo nodeOutofService = takeNodeOutofService(0,
+        getFirstBlockFirstReplicaUuid(fileSys, file), Long.MAX_VALUE, null,
+        AdminStates.IN_MAINTENANCE);
+
+    assertNull(checkWithRetry(ns, fileSys, file, replicas - 1,
+        nodeOutofService));
+
+    DFSClient client = getDfsClient(0);
+    assertEquals("All datanodes must be alive", numDatanodes,
+        client.datanodeReport(DatanodeReportType.LIVE).length);
+
+    MiniDFSCluster.DataNodeProperties dnProp =
+        getCluster().stopDataNode(nodeOutofService.getXferAddr());
+    DFSTestUtil.waitForDatanodeState(
+        getCluster(), nodeOutofService.getDatanodeUuid(), false, 20000);
+    assertEquals("maintenance node shouldn't be alive", numDatanodes - 1,
+        client.datanodeReport(DatanodeReportType.LIVE).length);
+
+    // Dead maintenance node's blocks should remain in block map.
+    assertNull(checkWithRetry(ns, fileSys, file, replicas - 1,
+        nodeOutofService));
+
+    // restart nn, nn will restore 3 live replicas given it doesn't
+    // know the maintenance node has the replica.
+    getCluster().restartNameNode(0);
+    ns = getCluster().getNamesystem(0);
+    assertNull(checkWithRetry(ns, fileSys, file, replicas, null));
+
+    // restart dn, nn has 1 maintenance replica and 3 live replicas.
+    getCluster().restartDataNode(dnProp, true);
+    getCluster().waitActive();
+    assertNull(checkWithRetry(ns, fileSys, file, replicas, nodeOutofService));
+
+    // Put the node in service, a redundant replica should be removed.
+    putNodeInService(0, nodeOutofService.getDatanodeUuid());
+    assertNull(checkWithRetry(ns, fileSys, file, replicas, null));
+
+    cleanupFile(fileSys, file);
+  }
+
+
+  /**
+   * Machine under maintenance state won't be chosen for new block allocation.
+   */
+  @Test(timeout = 3600000)
+  public void testWriteAfterMaintenance() throws IOException {
+    LOG.info("Starting testWriteAfterMaintenance");
+    startCluster(1, 3);
+
+    final Path file = new Path("/testWriteAfterMaintenance.dat");
+    int replicas = 3;
+
+    final FileSystem fileSys = getCluster().getFileSystem(0);
+    FSNamesystem ns = getCluster().getNamesystem(0);
+
+    final DatanodeInfo nodeOutofService = takeNodeOutofService(0, null,
+        Long.MAX_VALUE, null, AdminStates.IN_MAINTENANCE);
+
+    writeFile(fileSys, file, replicas, 2);
+
+    // Verify nodeOutofService wasn't chosen for write operation.
+    assertNull(checkWithRetry(ns, fileSys, file, replicas - 1,
+        nodeOutofService, null));
+
+    // Put the node back to service, live replicas should be restored.
+    putNodeInService(0, nodeOutofService.getDatanodeUuid());
+    assertNull(checkWithRetry(ns, fileSys, file, replicas, null));
+
+    cleanupFile(fileSys, file);
+  }
+
+  /**
+   * A node has blocks under construction when it is put to maintenance.
+   * Given there are minReplication replicas somewhere else,
+   * it can be transitioned to AdminStates.IN_MAINTENANCE.
+   */
+  @Test(timeout = 360000)
+  public void testEnterMaintenanceWhenFileOpen() throws Exception {
+    LOG.info("Starting testEnterMaintenanceWhenFileOpen");
+    startCluster(1, 3);
+
+    final Path file = new Path("/testEnterMaintenanceWhenFileOpen.dat");
+
+    final FileSystem fileSys = getCluster().getFileSystem(0);
+    writeIncompleteFile(fileSys, file, (short)3, (short)2);
+
+    takeNodeOutofService(0, null, Long.MAX_VALUE, null,
+        AdminStates.IN_MAINTENANCE);
+
+    cleanupFile(fileSys, file);
+  }
+
+  /**
+   * Machine under maintenance state won't be chosen for invalidation.
+   */
+  @Test(timeout = 360000)
+  public void testInvalidation() throws IOException {
+    LOG.info("Starting testInvalidation");
+    int numNamenodes = 1;
+    int numDatanodes = 3;
+    startCluster(numNamenodes, numDatanodes);
+
+    Path file = new Path("/testInvalidation.dat");
+    int replicas = 3;
+
+    FileSystem fileSys = getCluster().getFileSystem(0);
+    FSNamesystem ns = getCluster().getNamesystem(0);
+
+    writeFile(fileSys, file, replicas);
+
+    DatanodeInfo nodeOutofService = takeNodeOutofService(0, null,
+        Long.MAX_VALUE, null, AdminStates.IN_MAINTENANCE);
+
+    DFSClient client = getDfsClient(0);
+    client.setReplication(file.toString(), (short) 1);
+
+    // Verify the nodeOutofService remains in blocksMap.
+    assertNull(checkWithRetry(ns, fileSys, file, 1, nodeOutofService));
+
+    // Restart NN and verify the nodeOutofService remains in blocksMap.
+    getCluster().restartNameNode(0);
+    ns = getCluster().getNamesystem(0);
+    assertNull(checkWithRetry(ns, fileSys, file, 1, nodeOutofService));
+
+    cleanupFile(fileSys, file);
+  }
+
+  static String getFirstBlockFirstReplicaUuid(FileSystem fileSys,
       Path name) throws IOException {
+    DatanodeInfo[] nodes = getFirstBlockReplicasDatanodeInfos(fileSys, name);
+    if (nodes != null && nodes.length != 0) {
+      return nodes[0].getDatanodeUuid();
+    } else {
+      return null;
+    }
+  }
+
+  /*
+  * Verify that the number of replicas are as expected for each block in
+  * the given file.
+  *
+  * @return - null if no failure found, else an error message string.
+  */
+  static String checkFile(FSNamesystem ns, FileSystem fileSys,
+      Path name, int repl, DatanodeInfo expectedExcludedNode,
+      DatanodeInfo expectedMaintenanceNode) throws IOException {
     // need a raw stream
     assertTrue("Not HDFS:"+fileSys.getUri(),
         fileSys instanceof DistributedFileSystem);
     HdfsDataInputStream dis = (HdfsDataInputStream)fileSys.open(name);
+    BlockManager bm = ns.getBlockManager();
     Collection<LocatedBlock> dinfo = dis.getAllBlocks();
+    String output;
     for (LocatedBlock blk : dinfo) { // for each block
       DatanodeInfo[] nodes = blk.getLocations();
-      if (nodes.length > 0) {
-        return nodes[0].getDatanodeUuid();
+      for (int j = 0; j < nodes.length; j++) { // for each replica
+        if (expectedExcludedNode != null &&
+            nodes[j].equals(expectedExcludedNode)) {
+          //excluded node must not be in LocatedBlock.
+          output = "For block " + blk.getBlock() + " replica on " +
+              nodes[j] + " found in LocatedBlock.";
+          LOG.info(output);
+          return output;
+        } else {
+          if (nodes[j].isInMaintenance()) {
+            //IN_MAINTENANCE node must not be in LocatedBlock.
+            output = "For block " + blk.getBlock() + " replica on " +
+                nodes[j] + " which is in maintenance state.";
+            LOG.info(output);
+            return output;
+          }
+        }
+      }
+      if (repl != nodes.length) {
+        output = "Wrong number of replicas for block " + blk.getBlock() +
+            ": expected " + repl + ", got " + nodes.length + " ,";
+        for (int j = 0; j < nodes.length; j++) { // for each replica
+          output += nodes[j] + ",";
+        }
+        output += "pending block # " + ns.getPendingReplicationBlocks() + " ,";
+        output += "under replicated # " + ns.getUnderReplicatedBlocks() + " ,";
+        if (expectedExcludedNode != null) {
+          output += "excluded node " + expectedExcludedNode;
+        }
+
+        LOG.info(output);
+        return output;
+      }
+
+      // Verify it has the expected maintenance node
+      Iterator<DatanodeStorageInfo> storageInfoIter =
+          bm.getStorages(blk.getBlock().getLocalBlock()).iterator();
+      List<DatanodeInfo> maintenanceNodes = new ArrayList<>();
+      while (storageInfoIter.hasNext()) {
+        DatanodeInfo node = storageInfoIter.next().getDatanodeDescriptor();
+        if (node.isMaintenance()) {
+          maintenanceNodes.add(node);
+        }
+      }
+
+      if (expectedMaintenanceNode != null) {
+        if (!maintenanceNodes.contains(expectedMaintenanceNode)) {
+          output = "No maintenance replica on " + expectedMaintenanceNode;
+          LOG.info(output);
+          return output;
+        }
+      } else {
+        if (maintenanceNodes.size() != 0) {
+          output = "Has maintenance replica(s)";
+          LOG.info(output);
+          return output;
+        }
       }
     }
     return null;
   }
+
+  static String checkWithRetry(FSNamesystem ns, FileSystem fileSys,
+      Path name, int repl, DatanodeInfo inMaintenanceNode)
+          throws IOException {
+    return checkWithRetry(ns, fileSys, name, repl, inMaintenanceNode,
+        inMaintenanceNode);
+  }
+
+  static String checkWithRetry(FSNamesystem ns, FileSystem fileSys,
+      Path name, int repl, DatanodeInfo excludedNode,
+      DatanodeInfo underMaintenanceNode) throws IOException {
+    int tries = 0;
+    String output = null;
+    while (tries++ < 200) {
+      try {
+        Thread.sleep(100);
+        output = checkFile(ns, fileSys, name, repl, excludedNode,
+            underMaintenanceNode);
+        if (output == null) {
+          break;
+        }
+      } catch (InterruptedException ie) {
+      }
+    }
+    return output;
+  }
+
+  static private DatanodeInfo[] getFirstBlockReplicasDatanodeInfos(
+      FileSystem fileSys, Path name) throws IOException {
+    // need a raw stream
+    assertTrue("Not HDFS:"+fileSys.getUri(),
+        fileSys instanceof DistributedFileSystem);
+    HdfsDataInputStream dis = (HdfsDataInputStream)fileSys.open(name);
+    Collection<LocatedBlock> dinfo = dis.getAllBlocks();
+    if (dinfo.iterator().hasNext()) { // for the first block
+      return dinfo.iterator().next().getLocations();
+    } else {
+      return null;
+    }
+  }
 }


http://git-wip-us.apache.org/repos/asf/hadoop/blob/b61fb267/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java
index 2c7c720..00bea1c 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java
@@ -415,9 +415,10 @@ public class TestBlockManager {
       throws Exception {
     assertEquals(0, bm.numOfUnderReplicatedBlocks());
     BlockInfo block = addBlockOnNodes(testIndex, origNodes);
-    assertFalse(bm.isNeededReconstruction(block, bm.countLiveNodes(block)));
+    assertFalse(bm.isNeededReconstruction(block,
+        bm.countNodes(block, fsn.isInStartupSafeMode())));
   }
-  
+
   @Test(timeout = 60000)
   public void testNeededReconstructionWhileAppending() throws IOException {
     Configuration conf = new HdfsConfiguration();
@@ -458,7 +459,8 @@ public class TestBlockManager {
         namenode.updatePipeline(clientName, oldBlock, newBlock,
             newLocatedBlock.getLocations(), newLocatedBlock.getStorageIDs());
         BlockInfo bi = bm.getStoredBlock(newBlock.getLocalBlock());
-        assertFalse(bm.isNeededReconstruction(bi, bm.countLiveNodes(bi)));
+        assertFalse(bm.isNeededReconstruction(bi, bm.countNodes(bi,
+            cluster.getNamesystem().isInStartupSafeMode())));
       } finally {
         IOUtils.closeStream(out);
       }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/b61fb267/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDecommissioningStatus.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDecommissioningStatus.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDecommissioningStatus.java
index 6bb6040..7f1cc9a 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDecommissioningStatus.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDecommissioningStatus.java
@@ -26,17 +26,16 @@ import java.net.InetSocketAddress;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
-import java.util.Random;
 
 import org.apache.commons.io.output.ByteArrayOutputStream;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.BlockLocation;
-import org.apache.hadoop.fs.CommonConfigurationKeys;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.LocatedFileStatus;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.hdfs.AdminStatesBaseTest;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
@@ -111,47 +110,22 @@ public class TestDecommissioningStatus {
     if(cluster != null) cluster.shutdown();
   }
 
-  private FSDataOutputStream writeIncompleteFile(FileSystem fileSys, Path name,
-      short repl) throws IOException {
-    // create and write a file that contains three blocks of data
-    FSDataOutputStream stm = fileSys.create(name, true, fileSys.getConf()
-        .getInt(CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_KEY, 4096), repl,
-        blockSize);
-    byte[] buffer = new byte[fileSize];
-    Random rand = new Random(seed);
-    rand.nextBytes(buffer);
-    stm.write(buffer);
-    // need to make sure that we actually write out both file blocks
-    // (see FSOutputSummer#flush)
-    stm.flush();
-    // Do not close stream, return it
-    // so that it is not garbage collected
-    return stm;
-  }
-  
-  static private void cleanupFile(FileSystem fileSys, Path name)
-      throws IOException {
-    assertTrue(fileSys.exists(name));
-    fileSys.delete(name, true);
-    assertTrue(!fileSys.exists(name));
-  }
-
   /*
    * Decommissions the node at the given index
    */
-  private String decommissionNode(FSNamesystem namesystem, DFSClient client,
+  private String decommissionNode(DFSClient client,
       int nodeIndex) throws IOException {
     DatanodeInfo[] info = client.datanodeReport(DatanodeReportType.LIVE);
 
     String nodename = info[nodeIndex].getXferAddr();
-    decommissionNode(namesystem, nodename);
+    decommissionNode(nodename);
     return nodename;
   }
 
   /*
    * Decommissions the node by name
    */
-  private void decommissionNode(FSNamesystem namesystem, String dnName)
+  private void decommissionNode(String dnName)
       throws IOException {
     System.out.println("Decommissioning node: " + dnName);
 
@@ -166,14 +140,14 @@ public class TestDecommissioningStatus {
       int expectedUnderRepInOpenFiles) {
     assertEquals("Unexpected num under-replicated blocks",
         expectedUnderRep,
-        decommNode.decommissioningStatus.getUnderReplicatedBlocks());
+        decommNode.getLeavingServiceStatus().getUnderReplicatedBlocks());
     assertEquals("Unexpected number of decom-only replicas",
         expectedDecommissionOnly,
-        decommNode.decommissioningStatus.getDecommissionOnlyReplicas());
+        decommNode.getLeavingServiceStatus().getOutOfServiceOnlyReplicas());
     assertEquals(
         "Unexpected number of replicas in under-replicated open files",
         expectedUnderRepInOpenFiles,
-        decommNode.decommissioningStatus.getUnderReplicatedInOpenFiles());
+        decommNode.getLeavingServiceStatus().getUnderReplicatedInOpenFiles());
   }
 
   private void checkDFSAdminDecommissionStatus(
@@ -237,13 +211,14 @@ public class TestDecommissioningStatus {
     short replicas = numDatanodes;
     //
     // Decommission one node. Verify the decommission status
-    // 
+    //
     Path file1 = new Path("decommission.dat");
     DFSTestUtil.createFile(fileSys, file1, fileSize, fileSize, blockSize,
         replicas, seed);
 
     Path file2 = new Path("decommission1.dat");
-    FSDataOutputStream st1 = writeIncompleteFile(fileSys, file2, replicas);
+    FSDataOutputStream st1 = AdminStatesBaseTest.writeIncompleteFile(fileSys,
+        file2, replicas, (short)(fileSize / blockSize));
     for (DataNode d: cluster.getDataNodes()) {
       DataNodeTestUtils.triggerBlockReport(d);
     }
@@ -251,7 +226,7 @@ public class TestDecommissioningStatus {
     FSNamesystem fsn = cluster.getNamesystem();
     final DatanodeManager dm = fsn.getBlockManager().getDatanodeManager();
     for (int iteration = 0; iteration < numDatanodes; iteration++) {
-      String downnode = decommissionNode(fsn, client, iteration);
+      String downnode = decommissionNode(client, iteration);
       dm.refreshNodes(conf);
       decommissionedNodes.add(downnode);
       BlockManagerTestUtil.recheckDecommissionState(dm);
@@ -281,8 +256,8 @@ public class TestDecommissioningStatus {
     hostsFileWriter.initExcludeHost("");
     dm.refreshNodes(conf);
     st1.close();
-    cleanupFile(fileSys, file1);
-    cleanupFile(fileSys, file2);
+    AdminStatesBaseTest.cleanupFile(fileSys, file1);
+    AdminStatesBaseTest.cleanupFile(fileSys, file2);
   }
 
   /**
@@ -308,7 +283,7 @@ public class TestDecommissioningStatus {
     // Decommission the DN.
     FSNamesystem fsn = cluster.getNamesystem();
     final DatanodeManager dm = fsn.getBlockManager().getDatanodeManager();
-    decommissionNode(fsn, dnName);
+    decommissionNode(dnName);
     dm.refreshNodes(conf);
 
     // Stop the DN when decommission is in progress.
@@ -343,7 +318,7 @@ public class TestDecommissioningStatus {
     
     // Delete the under-replicated file, which should let the 
     // DECOMMISSION_IN_PROGRESS node become DECOMMISSIONED
-    cleanupFile(fileSys, f);
+    AdminStatesBaseTest.cleanupFile(fileSys, f);
     BlockManagerTestUtil.recheckDecommissionState(dm);
     assertTrue("the node should be decommissioned",
         dead.get(0).isDecommissioned());
@@ -376,7 +351,7 @@ public class TestDecommissioningStatus {
     FSNamesystem fsn = cluster.getNamesystem();
     final DatanodeManager dm = fsn.getBlockManager().getDatanodeManager();
     DatanodeDescriptor dnDescriptor = dm.getDatanode(dnID);
-    decommissionNode(fsn, dnName);
+    decommissionNode(dnName);
     dm.refreshNodes(conf);
     BlockManagerTestUtil.recheckDecommissionState(dm);
     assertTrue(dnDescriptor.isDecommissioned());

http://git-wip-us.apache.org/repos/asf/hadoop/blob/b61fb267/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeCapacityReport.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeCapacityReport.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeCapacityReport.java
index 38ec9f8..c9fe2c3 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeCapacityReport.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeCapacityReport.java
@@ -195,9 +195,17 @@ public class TestNamenodeCapacityReport {
   private static final float EPSILON = 0.0001f;
   @Test
   public void testXceiverCount() throws Exception {
+    testXceiverCountInternal(0);
+    testXceiverCountInternal(1);
+  }
+
+  public void testXceiverCountInternal(int minMaintenanceR) throws Exception {
     Configuration conf = new HdfsConfiguration();
     // retry one time, if close fails
-    
conf.setInt(HdfsClientConfigKeys.BlockWrite.LOCATEFOLLOWINGBLOCK_RETRIES_KEY, 
1);
+    conf.setInt(
+        HdfsClientConfigKeys.BlockWrite.LOCATEFOLLOWINGBLOCK_RETRIES_KEY, 1);
+    conf.setInt(DFSConfigKeys.DFS_NAMENODE_MAINTENANCE_REPLICATION_MIN_KEY,
+        minMaintenanceR);
     MiniDFSCluster cluster = null;
 
     final int nodes = 8;
@@ -220,23 +228,23 @@ public class TestNamenodeCapacityReport {
       int expectedTotalLoad = nodes;  // xceiver server adds 1 to load
       int expectedInServiceNodes = nodes;
       int expectedInServiceLoad = nodes;
-      checkClusterHealth(nodes, namesystem, expectedTotalLoad, 
expectedInServiceNodes, expectedInServiceLoad);
-      
-      // shutdown half the nodes and force a heartbeat check to ensure
-      // counts are accurate
+      checkClusterHealth(nodes, namesystem, expectedTotalLoad,
+          expectedInServiceNodes, expectedInServiceLoad);
+
+      // Shutdown half the nodes followed by admin operations on those nodes.
+      // Ensure counts are accurate.
       for (int i=0; i < nodes/2; i++) {
         DataNode dn = datanodes.get(i);
         DatanodeDescriptor dnd = dnm.getDatanode(dn.getDatanodeId());
         dn.shutdown();
         DFSTestUtil.setDatanodeDead(dnd);
         BlockManagerTestUtil.checkHeartbeat(namesystem.getBlockManager());
-        //Verify decommission of dead node won't impact nodesInService metrics.
-        dnm.getDecomManager().startDecommission(dnd);
+        //Admin operations on dead nodes won't impact nodesInService metrics.
+        startDecommissionOrMaintenance(dnm, dnd, (i % 2 == 0));
         expectedInServiceNodes--;
         assertEquals(expectedInServiceNodes, namesystem.getNumLiveDataNodes());
         assertEquals(expectedInServiceNodes, getNumDNInService(namesystem));
-        //Verify recommission of dead node won't impact nodesInService metrics.
-        dnm.getDecomManager().stopDecommission(dnd);
+        stopDecommissionOrMaintenance(dnm, dnd, (i % 2 == 0));
         assertEquals(expectedInServiceNodes, getNumDNInService(namesystem));
       }
 
@@ -247,8 +255,9 @@ public class TestNamenodeCapacityReport {
       datanodes = cluster.getDataNodes();
       expectedInServiceNodes = nodes;
       assertEquals(nodes, datanodes.size());
-      checkClusterHealth(nodes, namesystem, expectedTotalLoad, 
expectedInServiceNodes, expectedInServiceLoad);
-      
+      checkClusterHealth(nodes, namesystem, expectedTotalLoad,
+          expectedInServiceNodes, expectedInServiceLoad);
+
       // create streams and hsync to force datastreamers to start
       DFSOutputStream[] streams = new DFSOutputStream[fileCount];
       for (int i=0; i < fileCount; i++) {
@@ -263,30 +272,32 @@ public class TestNamenodeCapacityReport {
       }
       // force nodes to send load update
       triggerHeartbeats(datanodes);
-      checkClusterHealth(nodes, namesystem, expectedTotalLoad, 
expectedInServiceNodes, expectedInServiceLoad);
+      checkClusterHealth(nodes, namesystem, expectedTotalLoad,
+          expectedInServiceNodes, expectedInServiceLoad);
 
-      // decomm a few nodes, substract their load from the expected load,
-      // trigger heartbeat to force load update
+      // admin operations on a few nodes, substract their load from the
+      // expected load, trigger heartbeat to force load update.
       for (int i=0; i < fileRepl; i++) {
         expectedInServiceNodes--;
         DatanodeDescriptor dnd =
             dnm.getDatanode(datanodes.get(i).getDatanodeId());
         expectedInServiceLoad -= dnd.getXceiverCount();
-        dnm.getDecomManager().startDecommission(dnd);
+        startDecommissionOrMaintenance(dnm, dnd, (i % 2 == 0));
         DataNodeTestUtils.triggerHeartbeat(datanodes.get(i));
         Thread.sleep(100);
-        checkClusterHealth(nodes, namesystem, expectedTotalLoad, 
expectedInServiceNodes, expectedInServiceLoad);
+        checkClusterHealth(nodes, namesystem, expectedTotalLoad,
+            expectedInServiceNodes, expectedInServiceLoad);
       }
-      
+
       // check expected load while closing each stream.  recalc expected
       // load based on whether the nodes in the pipeline are decomm
       for (int i=0; i < fileCount; i++) {
-        int decomm = 0;
+        int adminOps = 0;
         for (DatanodeInfo dni : streams[i].getPipeline()) {
           DatanodeDescriptor dnd = dnm.getDatanode(dni);
           expectedTotalLoad -= 2;
-          if (dnd.isDecommissionInProgress() || dnd.isDecommissioned()) {
-            decomm++;
+          if (!dnd.isInService()) {
+            adminOps++;
           } else {
             expectedInServiceLoad -= 2;
           }
@@ -297,16 +308,17 @@ public class TestNamenodeCapacityReport {
           // nodes will go decommissioned even if there's a UC block whose
           // other locations are decommissioned too.  we'll ignore that
           // bug for now
-          if (decomm < fileRepl) {
+          if (adminOps < fileRepl) {
             throw ioe;
           }
         }
         triggerHeartbeats(datanodes);
         // verify node count and loads 
-        checkClusterHealth(nodes, namesystem, expectedTotalLoad, 
expectedInServiceNodes, expectedInServiceLoad);
+        checkClusterHealth(nodes, namesystem, expectedTotalLoad,
+            expectedInServiceNodes, expectedInServiceLoad);
       }
 
-      // shutdown each node, verify node counts based on decomm state
+      // shutdown each node, verify node counts based on admin state
       for (int i=0; i < nodes; i++) {
         DataNode dn = datanodes.get(i);
         dn.shutdown();
@@ -320,13 +332,11 @@ public class TestNamenodeCapacityReport {
           expectedInServiceNodes--;
         }
         assertEquals(expectedInServiceNodes, getNumDNInService(namesystem));
-        
         // live nodes always report load of 1.  no nodes is load 0
         double expectedXceiverAvg = (i == nodes-1) ? 0.0 : 1.0;
         assertEquals((double)expectedXceiverAvg,
             getInServiceXceiverAverage(namesystem), EPSILON);
       }
-      
       // final sanity check
       checkClusterHealth(0, namesystem, 0.0, 0, 0.0);
     } finally {
@@ -336,6 +346,24 @@ public class TestNamenodeCapacityReport {
     }
   }
 
+  private void startDecommissionOrMaintenance(DatanodeManager dnm,
+      DatanodeDescriptor dnd, boolean decomm) {
+    if (decomm) {
+      dnm.getDecomManager().startDecommission(dnd);
+    } else {
+      dnm.getDecomManager().startMaintenance(dnd, Long.MAX_VALUE);
+    }
+  }
+
+  private void stopDecommissionOrMaintenance(DatanodeManager dnm,
+      DatanodeDescriptor dnd, boolean decomm) {
+    if (decomm) {
+      dnm.getDecomManager().stopDecommission(dnd);
+    } else {
+      dnm.getDecomManager().stopMaintenance(dnd);
+    }
+  }
+
   private static void checkClusterHealth(
     int numOfLiveNodes,
     FSNamesystem namesystem, double expectedTotalLoad,

http://git-wip-us.apache.org/repos/asf/hadoop/blob/b61fb267/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/HostsFileWriter.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/HostsFileWriter.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/HostsFileWriter.java
index 4c8fcef..e171e2b 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/HostsFileWriter.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/HostsFileWriter.java
@@ -54,6 +54,7 @@ public class HostsFileWriter {
     localFileSys = FileSystem.getLocal(conf);
     Path workingDir = new Path(MiniDFSCluster.getBaseDirectory());
     this.fullDir = new Path(workingDir, dir);
+    cleanup(); // In case there is some left over from previous run.
     assertTrue(localFileSys.mkdirs(this.fullDir));
 
     if (conf.getClass(


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

[36/50] [abbrv] hadoop git commit: HDFS-9390. Block management for maintenance states.

Reply via email to