Author: todd
Date: Fri Dec 30 00:30:27 2011
New Revision: 1225709

URL: http://svn.apache.org/viewvc?rev=1225709&view=rev
Log:
HDFS-2692. Fix bugs related to failover from/into safe mode. Contributed by 
Todd Lipcon.

Added:
    
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
Modified:
    
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
    
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
    
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java
    
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
    
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
    
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
    
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
    
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java

Modified: 
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt?rev=1225709&r1=1225708&r2=1225709&view=diff
==============================================================================
--- 
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
 (original)
+++ 
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
 Fri Dec 30 00:30:27 2011
@@ -77,3 +77,5 @@ HDFS-2693. Fix synchronization issues ar
 HDFS-1972. Fencing mechanism for block invalidations and replications (todd)
 
 HDFS-2714. Fix test cases which use standalone FSNamesystems (todd)
+
+HDFS-2692. Fix bugs related to failover from/into safe mode. (todd)

Modified: 
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java?rev=1225709&r1=1225708&r2=1225709&view=diff
==============================================================================
--- 
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
 (original)
+++ 
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
 Fri Dec 30 00:30:27 2011
@@ -2481,7 +2481,10 @@ assert storedBlock.findDatanode(dn) < 0 
 
   public void removeBlock(Block block) {
     assert namesystem.hasWriteLock();
-    block.setNumBytes(BlockCommand.NO_ACK);
+    // TODO(HA): the following causes some problems for HA:
+    // the SBN doesn't get block deletions until the next
+    // BR...
+    // block.setNumBytes(BlockCommand.NO_ACK);
     addToInvalidates(block);
     corruptReplicas.removeFromCorruptReplicasMap(block);
     blocksMap.removeBlock(block);

Modified: 
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java?rev=1225709&r1=1225708&r2=1225709&view=diff
==============================================================================
--- 
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java
 (original)
+++ 
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java
 Fri Dec 30 00:30:27 2011
@@ -243,11 +243,16 @@ class Checkpointer extends Daemon {
     
     long txid = bnImage.getLastAppliedTxId();
     
-    backupNode.namesystem.dir.setReady();
-    backupNode.namesystem.setBlockTotal();
-    
-    bnImage.saveFSImageInAllDirs(backupNode.getNamesystem(), txid);
-    bnStorage.writeAll();
+    backupNode.namesystem.writeLock();
+    try {
+      backupNode.namesystem.dir.setReady();
+      backupNode.namesystem.setBlockTotal();
+      
+      bnImage.saveFSImageInAllDirs(backupNode.getNamesystem(), txid);
+      bnStorage.writeAll();
+    } finally {
+      backupNode.namesystem.writeUnlock();
+    }
 
     if(cpCmd.needToReturnImage()) {
       TransferFsImage.uploadImageFromStorage(

Modified: 
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java?rev=1225709&r1=1225708&r2=1225709&view=diff
==============================================================================
--- 
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
 (original)
+++ 
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
 Fri Dec 30 00:30:27 2011
@@ -65,6 +65,7 @@ import com.google.common.base.Joiner;
 @InterfaceStability.Evolving
 public class FSEditLogLoader {
   private final FSNamesystem fsNamesys;
+  private long maxGenStamp = 0;
 
   public FSEditLogLoader(FSNamesystem fsNamesys) {
     this.fsNamesys = fsNamesys;
@@ -78,14 +79,19 @@ public class FSEditLogLoader {
   int loadFSEdits(EditLogInputStream edits, long expectedStartingTxId)
   throws IOException {
     long startTime = now();
-    int numEdits = loadFSEdits(edits, true, expectedStartingTxId);
-    FSImage.LOG.info("Edits file " + edits.getName() 
-        + " of size " + edits.length() + " edits # " + numEdits 
-        + " loaded in " + (now()-startTime)/1000 + " seconds.");
-    return numEdits;
+    fsNamesys.writeLock();
+    try {
+      int numEdits = loadFSEdits(edits, true, expectedStartingTxId);
+      FSImage.LOG.info("Edits file " + edits.getName() 
+          + " of size " + edits.length() + " edits # " + numEdits 
+          + " loaded in " + (now()-startTime)/1000 + " seconds.");
+      return numEdits;
+    } finally {
+      fsNamesys.writeUnlock();
+    }
   }
 
-  int loadFSEdits(EditLogInputStream edits, boolean closeOnExit,
+  private int loadFSEdits(EditLogInputStream edits, boolean closeOnExit,
                   long expectedStartingTxId)
       throws IOException {
     int numEdits = 0;
@@ -95,6 +101,13 @@ public class FSEditLogLoader {
       numEdits = loadEditRecords(logVersion, edits, false, 
                                  expectedStartingTxId);
     } finally {
+      fsNamesys.setBlockTotal();
+      // Delay the notification of genstamp updates until after
+      // setBlockTotal() above. Otherwise, we will mark blocks
+      // as "safe" before they've been incorporated in the expected
+      // totalBlocks and threshold for SafeMode -- triggering an
+      // assertion failure and/or exiting safemode too early!
+      fsNamesys.notifyGenStampUpdate(maxGenStamp);
       if(closeOnExit) {
         edits.close();
       }
@@ -485,9 +498,9 @@ public class FSEditLogLoader {
       }
     }
     
-    if (addCloseOp.blocks.length > 0) {
-      fsNamesys.notifyGenStampUpdate(
-          addCloseOp.blocks[addCloseOp.blocks.length - 
1].getGenerationStamp());
+    // Record the max genstamp seen
+    for (Block b : addCloseOp.blocks) {
+      maxGenStamp = Math.max(maxGenStamp, b.getGenerationStamp());
     }
   }
 

Modified: 
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1225709&r1=1225708&r2=1225709&view=diff
==============================================================================
--- 
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
 (original)
+++ 
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
 Fri Dec 30 00:30:27 2011
@@ -418,6 +418,7 @@ public class FSNamesystem implements Nam
       startOpt = StartupOption.REGULAR;
     }
     boolean success = false;
+    writeLock();
     try {
       // We shouldn't be calling saveNamespace if we've come up in standby 
state.
       if (fsImage.recoverTransitionRead(startOpt, this) && !haEnabled) {
@@ -434,6 +435,7 @@ public class FSNamesystem implements Nam
       if (!success) {
         fsImage.close();
       }
+      writeUnlock();
     }
     dir.imageLoadComplete();
   }
@@ -3244,9 +3246,7 @@ public class FSNamesystem implements Nam
      * @return true if in safe mode
      */
     private synchronized boolean isOn() {
-      assert isConsistent() : " SafeMode: Inconsistent filesystem state: "
-        + "Total num of blocks, active blocks, or "
-        + "total safe blocks don't match.";
+      doConsistencyCheck();
       return this.reached >= 0;
     }
       
@@ -3362,6 +3362,9 @@ public class FSNamesystem implements Nam
      * Check and trigger safe mode if needed. 
      */
     private void checkMode() {
+      // Have to have write-lock since leaving safemode initializes
+      // repl queues, which requires write lock
+      assert hasWriteLock();
       if (needEnter()) {
         enter();
         // check if we are ready to initialize replication queues
@@ -3541,16 +3544,26 @@ public class FSNamesystem implements Nam
       
     /**
      * Checks consistency of the class state.
-     * This is costly and currently called only in assert.
-     * @throws IOException 
+     * This is costly so only runs if asserts are enabled.
      */
-    private boolean isConsistent() {
+    private void doConsistencyCheck() {
+      boolean assertsOn = false;
+      assert assertsOn = true; // set to true if asserts are on
+      if (!assertsOn) return;
+      
+      
+      int activeBlocks = blockManager.getActiveBlockCount();
       if (blockTotal == -1 && blockSafe == -1) {
-        return true; // manual safe mode
+        return; // manual safe mode
+      }
+      if ((blockTotal != activeBlocks) &&
+          !(blockSafe >= 0 && blockSafe <= blockTotal)) {
+        throw new AssertionError(
+            " SafeMode: Inconsistent filesystem state: "
+        + "SafeMode data: blockTotal=" + blockTotal
+        + " blockSafe=" + blockSafe + "; "
+        + "BlockManager data: active="  + activeBlocks);
       }
-      int activeBlocks = blockManager.getActiveBlockCount();
-      return (blockTotal == activeBlocks) ||
-        (blockSafe >= 0 && blockSafe <= blockTotal);
     }
   }
     
@@ -3663,7 +3676,7 @@ public class FSNamesystem implements Nam
   /**
    * Set the total number of blocks in the system. 
    */
-  void setBlockTotal() {
+  public void setBlockTotal() {
     // safeMode is volatile, and may be set to null at any time
     SafeModeInfo safeMode = this.safeMode;
     if (safeMode == null)
@@ -4822,10 +4835,15 @@ public class FSNamesystem implements Nam
   }
   
   public void notifyGenStampUpdate(long gs) {
-    LOG.info("=> notified of genstamp update for: " + gs);
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Generation stamp " + gs + " has been reached. " +
+          "Processing pending messages from DataNodes...");
+    }
     DataNodeMessage msg = pendingDatanodeMessages.take(gs);
     while (msg != null) {
-      LOG.info("processing message: " + msg);
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Processing previously pending message: " + msg);
+      }
       try {
         switch (msg.getType()) {
         case BLOCK_RECEIVED_DELETE:

Modified: 
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java?rev=1225709&r1=1225708&r2=1225709&view=diff
==============================================================================
--- 
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
 (original)
+++ 
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
 Fri Dec 30 00:30:27 2011
@@ -49,6 +49,8 @@ public class EditLogTailer {
   private final FSImage image;
   private final FSEditLog editLog;
   
+  private volatile Throwable lastError = null;
+  
   public EditLogTailer(FSNamesystem namesystem) {
     this.tailerThread = new EditLogTailerThread();
     this.namesystem = namesystem;
@@ -81,6 +83,11 @@ public class EditLogTailer {
     tailerThread.interrupt();
   }
   
+  @VisibleForTesting
+  public Throwable getLastError() {
+    return lastError;
+  }
+  
   public void catchupDuringFailover() throws IOException {
     Preconditions.checkState(tailerThread == null ||
         !tailerThread.isAlive(),
@@ -146,12 +153,19 @@ public class EditLogTailer {
           try {
             doTailEdits();
           } catch (IOException e) {
+            if (e.getCause() instanceof RuntimeException) {
+              throw (RuntimeException)e.getCause();
+            } else if (e.getCause() instanceof Error) {
+              throw (Error)e.getCause();
+            }
+                
             // Will try again
             LOG.info("Got error, will try again.", e);
           }
         } catch (Throwable t) {
           // TODO(HA): What should we do in this case? Shutdown the standby NN?
           LOG.error("Edit log tailer received throwable", t);
+          lastError = t;
         }
 
         try {

Modified: 
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java?rev=1225709&r1=1225708&r2=1225709&view=diff
==============================================================================
--- 
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
 (original)
+++ 
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
 Fri Dec 30 00:30:27 2011
@@ -1452,7 +1452,8 @@ public class MiniDFSCluster {
     sizes = NameNodeAdapter.getStats(nameNode.getNamesystem());
     boolean isUp = false;
     synchronized (this) {
-      isUp = ((!nameNode.isInSafeMode() || !waitSafeMode) && sizes[0] != 0);
+      isUp = ((!nameNode.isInSafeMode() || !waitSafeMode) &&
+          sizes[ClientProtocol.GET_STATS_CAPACITY_IDX] != 0);
     }
     return isUp;
   }

Modified: 
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java?rev=1225709&r1=1225708&r2=1225709&view=diff
==============================================================================
--- 
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java
 (original)
+++ 
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java
 Fri Dec 30 00:30:27 2011
@@ -410,7 +410,7 @@ public class TestDNFencing {
     return count;
   }
 
-  private void waitForDNDeletions(final MiniDFSCluster cluster)
+  static void waitForDNDeletions(final MiniDFSCluster cluster)
       throws TimeoutException, InterruptedException {
     GenericTestUtils.waitFor(new Supplier<Boolean>() {
       @Override
@@ -426,7 +426,7 @@ public class TestDNFencing {
     
   }
 
-  private void waitForNNToIssueDeletions(final NameNode nn)
+  static void waitForNNToIssueDeletions(final NameNode nn)
       throws Exception {
     GenericTestUtils.waitFor(new Supplier<Boolean>() {
       @Override

Added: 
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java?rev=1225709&view=auto
==============================================================================
--- 
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
 (added)
+++ 
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
 Fri Dec 30 00:30:27 2011
@@ -0,0 +1,334 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import static org.junit.Assert.*;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.TestDFSClientFailover;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Tests that exercise safemode in an HA cluster.
+ */
+public class TestHASafeMode {
+  private static final Log LOG = LogFactory.getLog(TestHASafeMode.class);
+  private static final int BLOCK_SIZE = 1024;
+  private NameNode nn0;
+  private NameNode nn1;
+  private FileSystem fs;
+  private MiniDFSCluster cluster;
+  
+  @Before
+  public void setupCluster() throws Exception {
+    Configuration conf = new Configuration();
+    conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
+    conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
+    cluster = new MiniDFSCluster.Builder(conf)
+      .nnTopology(MiniDFSNNTopology.simpleHATopology())
+      .numDataNodes(3)
+      .waitSafeMode(false)
+      .build();
+    cluster.waitActive();
+    
+    nn0 = cluster.getNameNode(0);
+    nn1 = cluster.getNameNode(1);
+    fs = TestDFSClientFailover.configureFailoverFs(cluster, conf);
+
+    cluster.transitionToActive(0);
+  }
+  
+  @After
+  public void shutdownCluster() throws IOException {
+    if (cluster != null) {
+      assertNull(nn1.getNamesystem().getEditLogTailer().getLastError());
+      cluster.shutdown();
+    }
+  }
+  
+  private void restartStandby() throws IOException {
+    cluster.shutdownNameNode(1);
+    // Set the safemode extension to be lengthy, so that the tests
+    // can check the safemode message after the safemode conditions
+    // have been achieved, without being racy.
+    cluster.getConfiguration(1).setInt(
+        DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, 30000);
+    cluster.restartNameNode(1);
+    nn1 = cluster.getNameNode(1);
+    nn1.getNamesystem().getEditLogTailer().setSleepTime(250);
+    nn1.getNamesystem().getEditLogTailer().interrupt();
+  }
+  
+  /**
+   * Tests the case where, while a standby is down, more blocks are
+   * added to the namespace, but not rolled. So, when it starts up,
+   * it receives notification about the new blocks during
+   * the safemode extension period.
+   */
+  @Test
+  public void testBlocksAddedBeforeStandbyRestart() throws Exception {
+    banner("Starting with NN0 active and NN1 standby, creating some blocks");
+    DFSTestUtil.createFile(fs, new Path("/test"), 3*BLOCK_SIZE, (short) 3, 1L);
+    // Roll edit log so that, when the SBN restarts, it will load
+    // the namespace during startup.
+    nn0.getRpcServer().rollEditLog();
+
+    banner("Creating some blocks that won't be in the edit log");
+    DFSTestUtil.createFile(fs, new Path("/test2"), 5*BLOCK_SIZE, (short) 3, 
1L);
+    
+    banner("Restarting standby");
+    restartStandby();
+
+    // We expect it to be stuck in safemode (not the extension) because
+    // the block reports are delayed (since they include blocks
+    // from /test2 which are too-high genstamps.
+    String status = nn1.getNamesystem().getSafemode();
+    assertTrue("Bad safemode status: '" + status + "'",
+        status.startsWith(
+            "Safe mode is ON." +
+            "The reported blocks 0 needs additional 3 blocks to reach"));
+
+    banner("Waiting for standby to catch up to active namespace");
+    TestEditLogTailer.waitForStandbyToCatchUp(nn0, nn1);
+
+    status = nn1.getNamesystem().getSafemode();
+    assertTrue("Bad safemode status: '" + status + "'",
+        status.startsWith(
+            "Safe mode is ON." +
+            "The reported blocks 8 has reached the threshold 0.9990 of " +
+            "total blocks 8. Safe mode will be turned off automatically"));
+  }
+  
+  /**
+   * Similar to {@link #testBlocksAddedBeforeStandbyRestart()} except that
+   * the new blocks are allocated after the SBN has restarted. So, the
+   * blocks were not present in the original block reports at startup
+   * but are reported separately by blockReceived calls.
+   */
+  @Test
+  public void testBlocksAddedWhileInSafeMode() throws Exception {
+    banner("Starting with NN0 active and NN1 standby, creating some blocks");
+    DFSTestUtil.createFile(fs, new Path("/test"), 3*BLOCK_SIZE, (short) 3, 1L);
+    // Roll edit log so that, when the SBN restarts, it will load
+    // the namespace during startup.
+    nn0.getRpcServer().rollEditLog();
+    
+    banner("Restarting standby");
+    restartStandby();
+    
+    String status = nn1.getNamesystem().getSafemode();
+    assertTrue("Bad safemode status: '" + status + "'",
+        status.startsWith(
+            "Safe mode is ON." +
+            "The reported blocks 3 has reached the threshold 0.9990 of " +
+            "total blocks 3. Safe mode will be turned off automatically"));
+    
+    // Create a few blocks which will send blockReceived calls to the
+    // SBN.
+    banner("Creating some blocks while SBN is in safe mode");
+    DFSTestUtil.createFile(fs, new Path("/test2"), 5*BLOCK_SIZE, (short) 3, 
1L);
+
+    
+    banner("Waiting for standby to catch up to active namespace");
+    TestEditLogTailer.waitForStandbyToCatchUp(nn0, nn1);
+
+    status = nn1.getNamesystem().getSafemode();
+    assertTrue("Bad safemode status: '" + status + "'",
+        status.startsWith(
+            "Safe mode is ON." +
+            "The reported blocks 8 has reached the threshold 0.9990 of " +
+            "total blocks 8. Safe mode will be turned off automatically"));
+  }
+
+  /**
+   * Test for the following case proposed by ATM:
+   * 1. Both NNs are up, one is active. There are 100 blocks. Both are
+   *    out of safemode.
+   * 2. 10 block deletions get processed by NN1. NN2 enqueues these DN messages
+   *     until it next reads from a checkpointed edits file.
+   * 3. NN2 gets restarted. Its queues are lost.
+   * 4. NN2 comes up, reads from all the finalized edits files. Concludes there
+   *    should still be 100 blocks.
+   * 5. NN2 receives a block report from all the DNs, which only accounts for
+   *    90 blocks. It doesn't leave safemode.
+   * 6. NN1 dies or is transitioned to standby.
+   * 7. NN2 is transitioned to active. It reads all the edits from NN1. It now
+   *    knows there should only be 90 blocks, but it's still in safemode.
+   * 8. NN2 doesn't ever recheck whether it should leave safemode.
+   * 
+   * This is essentially the inverse of {@link 
#testBlocksAddedWhileStandbyShutdown()}
+   */
+  @Test
+  public void testBlocksRemovedBeforeStandbyRestart() throws Exception {
+    banner("Starting with NN0 active and NN1 standby, creating some blocks");
+    DFSTestUtil.createFile(fs, new Path("/test"), 5*BLOCK_SIZE, (short) 3, 1L);
+
+    // Roll edit log so that, when the SBN restarts, it will load
+    // the namespace during startup.
+    nn0.getRpcServer().rollEditLog();
+
+    // Delete those blocks again, so they won't get reported to the SBN
+    // once it starts up
+    banner("Removing the blocks without rolling the edit log");
+    fs.delete(new Path("/test"), true);
+    BlockManagerTestUtil.computeAllPendingWork(
+        nn0.getNamesystem().getBlockManager());
+    cluster.triggerHeartbeats();
+
+    banner("Restarting standby");
+    restartStandby();
+    String status = nn1.getNamesystem().getSafemode();
+    assertTrue("Bad safemode status: '" + status + "'",
+        status.startsWith(
+            "Safe mode is ON." +
+            "The reported blocks 0 needs additional 5 blocks to reach"));
+    
+    banner("Waiting for standby to catch up to active namespace");
+    TestEditLogTailer.waitForStandbyToCatchUp(nn0, nn1);
+    status = nn1.getNamesystem().getSafemode();
+    assertTrue("Bad safemode status: '" + status + "'",
+        status.startsWith(
+            "Safe mode is ON." +
+            "The reported blocks 0 has reached the threshold 0.9990 of " +
+            "total blocks 0. Safe mode will be turned off automatically"));
+  }
+  
+  /**
+   * Similar to {@link #testBlocksRemovedBeforeStandbyRestart()} except that
+   * the blocks are removed after the SBN has restarted. So, the
+   * blocks were present in the original block reports at startup
+   * but are deleted separately later by deletion reports.
+   */
+  @Test
+  public void testBlocksRemovedWhileInSafeMode() throws Exception {
+    banner("Starting with NN0 active and NN1 standby, creating some blocks");
+    DFSTestUtil.createFile(fs, new Path("/test"), 10*BLOCK_SIZE, (short) 3, 
1L);
+
+    // Roll edit log so that, when the SBN restarts, it will load
+    // the namespace during startup.
+    nn0.getRpcServer().rollEditLog();
+ 
+    banner("Restarting standby");
+    restartStandby();
+    
+    // It will initially have all of the blocks necessary.
+    String status = nn1.getNamesystem().getSafemode();
+    assertTrue("Bad safemode status: '" + status + "'",
+        status.startsWith(
+            "Safe mode is ON." +
+            "The reported blocks 10 has reached the threshold 0.9990 of " +
+            "total blocks 10. Safe mode will be turned off automatically"));
+
+    // Delete those blocks while the SBN is in safe mode - this
+    // should reduce it back below the threshold
+    banner("Removing the blocks without rolling the edit log");
+    fs.delete(new Path("/test"), true);
+    BlockManagerTestUtil.computeAllPendingWork(
+        nn0.getNamesystem().getBlockManager());
+    
+    banner("Triggering deletions on DNs and Deletion Reports");
+    cluster.triggerHeartbeats();
+    TestDNFencing.waitForDNDeletions(cluster);
+    cluster.triggerDeletionReports();
+
+    status = nn1.getNamesystem().getSafemode();
+    assertTrue("Bad safemode status: '" + status + "'",
+        status.startsWith(
+            "Safe mode is ON." +
+            "The reported blocks 0 needs additional 10 blocks"));
+
+    banner("Waiting for standby to catch up to active namespace");
+    TestEditLogTailer.waitForStandbyToCatchUp(nn0, nn1);
+
+    status = nn1.getNamesystem().getSafemode();
+    assertTrue("Bad safemode status: '" + status + "'",
+        status.startsWith(
+            "Safe mode is ON." +
+            "The reported blocks 0 has reached the threshold 0.9990 of " +
+            "total blocks 0. Safe mode will be turned off automatically"));
+  }
+  
+  /**
+   * Set up a namesystem with several edits, both deletions and
+   * additions, and failover to a new NN while that NN is in
+   * safemode. Ensure that it will exit safemode.
+   */
+  @Test
+  public void testComplexFailoverIntoSafemode() throws Exception {
+    banner("Starting with NN0 active and NN1 standby, creating some blocks");
+    DFSTestUtil.createFile(fs, new Path("/test"), 3*BLOCK_SIZE, (short) 3, 1L);
+    // Roll edit log so that, when the SBN restarts, it will load
+    // the namespace during startup and enter safemode.
+    nn0.getRpcServer().rollEditLog();
+
+    banner("Creating some blocks that won't be in the edit log");
+    DFSTestUtil.createFile(fs, new Path("/test2"), 5*BLOCK_SIZE, (short) 3, 
1L);
+    
+    banner("Deleting the original blocks");
+    fs.delete(new Path("/test"), true);
+    
+    banner("Restarting standby");
+    restartStandby();
+
+    // We expect it to be stuck in safemode (not the extension) because
+    // the block reports are delayed (since they include blocks
+    // from /test2 which are too-high genstamps.
+    String status = nn1.getNamesystem().getSafemode();
+    assertTrue("Bad safemode status: '" + status + "'",
+        status.startsWith(
+            "Safe mode is ON." +
+            "The reported blocks 0 needs additional 3 blocks to reach"));
+
+    // Initiate a failover into it while it's in safemode
+    banner("Initiating a failover into NN1 in safemode");
+    NameNodeAdapter.abortEditLogs(nn0);
+    cluster.transitionToActive(1);
+
+    status = nn1.getNamesystem().getSafemode();
+    assertTrue("Bad safemode status: '" + status + "'",
+        status.startsWith(
+            "Safe mode is ON." +
+            "The reported blocks 5 has reached the threshold 0.9990 of " +
+            "total blocks 5. Safe mode will be turned off automatically"));
+  }
+  
+  /**
+   * Print a big banner in the test log to make debug easier.
+   */
+  static void banner(String string) {
+    LOG.info("\n\n\n\n================================================\n" +
+        string + "\n" +
+        "==================================================\n\n");
+  }
+
+}


Reply via email to