HDFS-9329. TestBootstrapStandby#testRateThrottling is flaky because fsimage 
size is smaller than IO buffer size. Contributed by Zhe Zhang.

Change-Id: I09896c46e9ee0718b67c64fac5acfb3f7decf0b9


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/259bea3b
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/259bea3b
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/259bea3b

Branch: refs/heads/HDFS-7240
Commit: 259bea3b48de7469a500831efb3306e8464a2dc9
Parents: 04d97f8
Author: Zhe Zhang <z...@apache.org>
Authored: Mon Nov 2 10:03:39 2015 -0800
Committer: Zhe Zhang <z...@apache.org>
Committed: Mon Nov 2 10:03:39 2015 -0800

----------------------------------------------------------------------
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt     |  3 +
 .../namenode/ha/TestBootstrapStandby.java       | 79 +++++++++++++++-----
 2 files changed, 62 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/259bea3b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt 
b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 3b2d997..a2e4824 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -2204,6 +2204,9 @@ Release 2.8.0 - UNRELEASED
     HDFS-9343. Empty caller context considered invalid. (Mingliang Liu via
     Arpit Agarwal)
 
+    HDFS-9329. TestBootstrapStandby#testRateThrottling is flaky because fsimage
+    size is smaller than IO buffer size. (zhz)
+
 Release 2.7.2 - UNRELEASED
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/259bea3b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java
index fd45816..9f0d95b 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java
@@ -25,13 +25,16 @@ import java.io.File;
 import java.io.IOException;
 import java.net.URI;
 import java.util.concurrent.TimeoutException;
+import java.util.concurrent.atomic.AtomicBoolean;
 
 import com.google.common.base.Supplier;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSUtilClient;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
 import org.apache.hadoop.hdfs.server.namenode.CheckpointSignature;
@@ -109,12 +112,16 @@ public class TestBootstrapStandby {
             "storage directory does not exist or is not accessible", ioe);
       }
 
+      int expectedCheckpointTxId = (int)NameNodeAdapter.getNamesystem(nn0)
+          .getFSImage().getMostRecentCheckpointTxId();
+
       int rc = BootstrapStandby.run(new String[] { "-nonInteractive" },
           cluster.getConfiguration(index));
       assertEquals(0, rc);
 
       // Should have copied over the namespace from the active
-      FSImageTestUtil.assertNNHasCheckpoints(cluster, index, 
ImmutableList.of(0));
+      FSImageTestUtil.assertNNHasCheckpoints(cluster, index,
+          ImmutableList.of(expectedCheckpointTxId));
     }
 
     // We should now be able to start the standbys successfully.
@@ -221,7 +228,7 @@ public class TestBootstrapStandby {
    * {@link DFSConfigKeys#DFS_IMAGE_TRANSFER_BOOTSTRAP_STANDBY_RATE_KEY}
    * created by HDFS-8808.
    */
-  @Test
+  @Test(timeout=30000)
   public void testRateThrottling() throws Exception {
     cluster.getConfiguration(0).setLong(
         DFSConfigKeys.DFS_IMAGE_TRANSFER_RATE_KEY, 1);
@@ -229,23 +236,46 @@ public class TestBootstrapStandby {
     cluster.waitActive();
     nn0 = cluster.getNameNode(0);
     cluster.transitionToActive(0);
-    // Each edit has at least 1 byte. So the lowRate definitely should cause
-    // a timeout, if enforced. If lowRate is not enforced, any reasonable test
-    // machine should at least download an image with 5 edits in 5 seconds.
-    for (int i = 0; i < 5; i++) {
+    // Any reasonable test machine should be able to transfer 1 byte per MS
+    // (which is ~1K/s)
+    final int minXferRatePerMS = 1;
+    int imageXferBufferSize = DFSUtilClient.getIoFileBufferSize(
+        new Configuration());
+    File imageFile = null;
+    int dirIdx = 0;
+    while (imageFile == null || imageFile.length() < imageXferBufferSize) {
+      for (int i = 0; i < 5; i++) {
+        cluster.getFileSystem(0).mkdirs(new Path("/foo" + dirIdx++));
+      }
       nn0.getRpcServer().rollEditLog();
+      NameNodeAdapter.enterSafeMode(nn0, false);
+      NameNodeAdapter.saveNamespace(nn0);
+      NameNodeAdapter.leaveSafeMode(nn0);
+      imageFile = FSImageTestUtil.findLatestImageFile(FSImageTestUtil
+          .getFSImage(nn0).getStorage().getStorageDir(0));
     }
+
+    final int timeOut = (int)(imageFile.length() / minXferRatePerMS) + 1;
     // A very low DFS_IMAGE_TRANSFER_RATE_KEY value won't affect bootstrapping
+    final AtomicBoolean bootStrapped = new AtomicBoolean(false);
+    new Thread(
+        new Runnable() {
+          @Override
+          public void run() {
+            try {
+              testSuccessfulBaseCase();
+              bootStrapped.set(true);
+            } catch (Exception e) {
+              fail(e.getMessage());
+            }
+          }
+        }
+    ).start();
     GenericTestUtils.waitFor(new Supplier<Boolean>() {
       public Boolean get() {
-        try {
-          testSuccessfulBaseCase();
-          return true;
-        } catch (Exception e) {
-          return false;
-        }
+        return bootStrapped.get();
       }
-    }, 500, 5000);
+    }, 50, timeOut);
 
     shutdownCluster();
     setupCluster();
@@ -257,17 +287,26 @@ public class TestBootstrapStandby {
     cluster.transitionToActive(0);
     // A very low DFS_IMAGE_TRANSFER_BOOTSTRAP_STANDBY_RATE_KEY value should
     // cause timeout
+    bootStrapped.set(false);
+    new Thread(
+        new Runnable() {
+          @Override
+          public void run() {
+            try {
+              testSuccessfulBaseCase();
+              bootStrapped.set(true);
+            } catch (Exception e) {
+              LOG.info(e.getMessage());
+            }
+          }
+        }
+    ).start();
     try {
       GenericTestUtils.waitFor(new Supplier<Boolean>() {
         public Boolean get() {
-          try {
-            testSuccessfulBaseCase();
-            return true;
-          } catch (Exception e) {
-            return false;
-          }
+          return bootStrapped.get();
         }
-      }, 500, 5000);
+      }, 50, timeOut);
       fail("Did not timeout");
     } catch (TimeoutException e) {
       LOG.info("Encountered expected timeout.");

Reply via email to