http://git-wip-us.apache.org/repos/asf/hadoop/blob/49dfad94/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAConfiguration.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAConfiguration.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAConfiguration.java index c4a2988..62643ae 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAConfiguration.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAConfiguration.java @@ -23,10 +23,12 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; import java.io.IOException; +import java.net.MalformedURLException; import java.net.URI; import java.net.URL; import java.util.Collection; +import com.google.common.base.Joiner; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSUtil; @@ -58,19 +60,23 @@ public class TestHAConfiguration { } } - private Configuration getHAConf(String nsId, String host1, String host2) { + private Configuration getHAConf(String nsId, String ... hosts) { Configuration conf = new Configuration(); conf.set(DFSConfigKeys.DFS_NAMESERVICES, nsId); - conf.set(DFSUtil.addKeySuffixes( - DFSConfigKeys.DFS_HA_NAMENODES_KEY_PREFIX, nsId), - "nn1,nn2"); conf.set(DFSConfigKeys.DFS_HA_NAMENODE_ID_KEY, "nn1"); + + String[] nnids = new String[hosts.length]; + for (int i = 0; i < hosts.length; i++) { + String nnid = "nn" + (i + 1); + nnids[i] = nnid; + conf.set(DFSUtil.addKeySuffixes( + DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY, nsId, nnid), + hosts[i] + ":12345"); + } + conf.set(DFSUtil.addKeySuffixes( - DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY, nsId, "nn1"), - host1 + ":12345"); - conf.set(DFSUtil.addKeySuffixes( - DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY, nsId, "nn2"), - host2 + ":12345"); + DFSConfigKeys.DFS_HA_NAMENODES_KEY_PREFIX, nsId), + Joiner.on(',').join(nnids)); return conf; } @@ -87,11 +93,28 @@ public class TestHAConfiguration { // 0.0.0.0, it should substitute the address from the RPC configuration // above. StandbyCheckpointer checkpointer = new StandbyCheckpointer(conf, fsn); - assertEquals(new URL("http", "1.2.3.2", - DFSConfigKeys.DFS_NAMENODE_HTTP_PORT_DEFAULT, ""), - checkpointer.getActiveNNAddress()); + assertAddressMatches("1.2.3.2", checkpointer.getActiveNNAddresses().get(0)); + + //test when there are three NNs + // Use non-local addresses to avoid host address matching + conf = getHAConf("ns1", "1.2.3.1", "1.2.3.2", "1.2.3.3"); + + // This is done by the NN before the StandbyCheckpointer is created + NameNode.initializeGenericKeys(conf, "ns1", "nn1"); + + checkpointer = new StandbyCheckpointer(conf, fsn); + assertEquals("Got an unexpected number of possible active NNs", 2, checkpointer + .getActiveNNAddresses().size()); + assertEquals(new URL("http", "1.2.3.2", DFSConfigKeys.DFS_NAMENODE_HTTP_PORT_DEFAULT, ""), + checkpointer.getActiveNNAddresses().get(0)); + assertAddressMatches("1.2.3.2", checkpointer.getActiveNNAddresses().get(0)); + assertAddressMatches("1.2.3.3", checkpointer.getActiveNNAddresses().get(1)); } - + + private void assertAddressMatches(String address, URL url) throws MalformedURLException { + assertEquals(new URL("http", address, DFSConfigKeys.DFS_NAMENODE_HTTP_PORT_DEFAULT, ""), url); + } + /** * Tests that the namenode edits dirs and shared edits dirs are gotten with * duplicates removed
http://git-wip-us.apache.org/repos/asf/hadoop/blob/49dfad94/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java index 76a62ff..3da37f5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java @@ -24,6 +24,7 @@ import static org.junit.Assert.fail; import java.io.IOException; import java.security.PrivilegedExceptionAction; +import java.util.Random; import java.util.concurrent.TimeoutException; import org.apache.commons.logging.Log; @@ -81,24 +82,33 @@ public class TestPipelinesFailover { private static final int STRESS_NUM_THREADS = 25; private static final int STRESS_RUNTIME = 40000; - + + private static final int NN_COUNT = 3; + private static final long FAILOVER_SEED = System.currentTimeMillis(); + private static final Random failoverRandom = new Random(FAILOVER_SEED); + static{ + // log the failover seed so we can reproduce the test exactly + LOG.info("Using random seed: " + FAILOVER_SEED + + " for selecting active target NN during failover"); + } + enum TestScenario { GRACEFUL_FAILOVER { @Override - void run(MiniDFSCluster cluster) throws IOException { - cluster.transitionToStandby(0); - cluster.transitionToActive(1); + void run(MiniDFSCluster cluster, int previousActive, int activeIndex) throws IOException { + cluster.transitionToStandby(previousActive); + cluster.transitionToActive(activeIndex); } }, ORIGINAL_ACTIVE_CRASHED { @Override - void run(MiniDFSCluster cluster) throws IOException { - cluster.restartNameNode(0); - cluster.transitionToActive(1); + void run(MiniDFSCluster cluster, int previousActive, int activeIndex) throws IOException { + cluster.restartNameNode(previousActive); + cluster.transitionToActive(activeIndex); } }; - abstract void run(MiniDFSCluster cluster) throws IOException; + abstract void run(MiniDFSCluster cluster, int previousActive, int activeIndex) throws IOException; } enum MethodToTestIdempotence { @@ -135,10 +145,7 @@ public class TestPipelinesFailover { conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY, 1000); FSDataOutputStream stm = null; - MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) - .nnTopology(MiniDFSNNTopology.simpleHATopology()) - .numDataNodes(3) - .build(); + MiniDFSCluster cluster = newMiniCluster(conf, 3); try { int sizeWritten = 0; @@ -157,15 +164,15 @@ public class TestPipelinesFailover { // Make sure all of the blocks are written out before failover. stm.hflush(); - LOG.info("Failing over to NN 1"); - scenario.run(cluster); + LOG.info("Failing over to another NN"); + int activeIndex = failover(cluster, scenario); // NOTE: explicitly do *not* make any further metadata calls // to the NN here. The next IPC call should be to allocate the next // block. Any other call would notice the failover and not test // idempotence of the operation (HDFS-3031) - FSNamesystem ns1 = cluster.getNameNode(1).getNamesystem(); + FSNamesystem ns1 = cluster.getNameNode(activeIndex).getNamesystem(); BlockManagerTestUtil.updateState(ns1.getBlockManager()); assertEquals(0, ns1.getPendingReplicationBlocks()); assertEquals(0, ns1.getCorruptReplicaBlocks()); @@ -213,10 +220,7 @@ public class TestPipelinesFailover { conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); FSDataOutputStream stm = null; - MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) - .nnTopology(MiniDFSNNTopology.simpleHATopology()) - .numDataNodes(5) - .build(); + MiniDFSCluster cluster = newMiniCluster(conf, 5); try { cluster.waitActive(); cluster.transitionToActive(0); @@ -232,8 +236,7 @@ public class TestPipelinesFailover { // Make sure all the blocks are written before failover stm.hflush(); - LOG.info("Failing over to NN 1"); - scenario.run(cluster); + int nextActive = failover(cluster, scenario); assertTrue(fs.exists(TEST_PATH)); @@ -242,9 +245,9 @@ public class TestPipelinesFailover { // write another block and a half AppendTestUtil.write(stm, BLOCK_AND_A_HALF, BLOCK_AND_A_HALF); stm.hflush(); - - LOG.info("Failing back to NN 0"); - cluster.transitionToStandby(1); + + LOG.info("Failing back from NN " + nextActive + " to NN 0"); + cluster.transitionToStandby(nextActive); cluster.transitionToActive(0); cluster.stopDataNode(1); @@ -262,7 +265,7 @@ public class TestPipelinesFailover { cluster.shutdown(); } } - + /** * Tests lease recovery if a client crashes. This approximates the * use case of HBase WALs being recovered after a NN failover. @@ -275,10 +278,7 @@ public class TestPipelinesFailover { conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); FSDataOutputStream stm = null; - final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) - .nnTopology(MiniDFSNNTopology.simpleHATopology()) - .numDataNodes(3) - .build(); + final MiniDFSCluster cluster = newMiniCluster(conf, 3); try { cluster.waitActive(); cluster.transitionToActive(0); @@ -329,10 +329,7 @@ public class TestPipelinesFailover { conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); FSDataOutputStream stm = null; - final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) - .nnTopology(MiniDFSNNTopology.simpleHATopology()) - .numDataNodes(3) - .build(); + final MiniDFSCluster cluster = newMiniCluster(conf, 3); try { cluster.waitActive(); cluster.transitionToActive(0); @@ -406,7 +403,20 @@ public class TestPipelinesFailover { cluster.shutdown(); } } - + + /** + * Create a MiniCluster with the specified base configuration and the specified number of + * DataNodes. Helper method to ensure that the we use the same number of NNs across all the tests. + * @return mini cluster ready to use + * @throws IOException cluster cannot be started + */ + private MiniDFSCluster newMiniCluster(Configuration conf, int dnCount) throws IOException { + return new MiniDFSCluster.Builder(conf) + .nnTopology(MiniDFSNNTopology.simpleHATopology(NN_COUNT)) + .numDataNodes(dnCount) + .build(); + } + /** * Stress test for pipeline/lease recovery. Starts a number of * threads, each of which creates a file and has another client @@ -485,6 +495,38 @@ public class TestPipelinesFailover { } /** + * Fail-over using the given scenario, assuming NN0 is currently active + * @param cluster cluster on which to run the scenario + * @param scenario failure scenario to run + * @return the index of the new active NN + * @throws IOException + */ + private int failover(MiniDFSCluster cluster, TestScenario scenario) throws IOException { + return failover(cluster, scenario, 0); + } + + /** + * Do a fail-over with the given scenario. + * @param cluster cluster on which to run the scenario + * @param scenario failure scenario to run + * @param activeIndex index of the currently active node + * @throws IOException on failure + * @return the index of the new active NN + */ + private int failover(MiniDFSCluster cluster, TestScenario scenario, int activeIndex) + throws IOException { + // get index of the next node that should be active, ensuring its not the same as the currently + // active node + int nextActive = failoverRandom.nextInt(NN_COUNT); + if (nextActive == activeIndex) { + nextActive = (nextActive + 1) % NN_COUNT; + } + LOG.info("Failing over to a standby NN:" + nextActive + " from NN " + activeIndex); + scenario.run(cluster, activeIndex, nextActive); + return nextActive; + } + + /** * Test thread which creates a file, has another fake user recover * the lease on the file, and then ensures that the file's contents * are properly readable. If any of these steps fails, propagates http://git-wip-us.apache.org/repos/asf/hadoop/blob/49dfad94/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRemoteNameNodeInfo.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRemoteNameNodeInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRemoteNameNodeInfo.java new file mode 100644 index 0000000..cb2a4fc --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRemoteNameNodeInfo.java @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode.ha; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.MiniDFSNNTopology; +import org.junit.Test; + +import java.util.List; + +import static org.junit.Assert.assertEquals; + +/** + * Test that we correctly obtain remote namenode information + */ +public class TestRemoteNameNodeInfo { + + @Test + public void testParseMultipleNameNodes() throws Exception { + // start with an empty configuration + Configuration conf = new Configuration(false); + + // add in keys for each of the NNs + String nameservice = "ns1"; + MiniDFSNNTopology topology = new MiniDFSNNTopology() + .addNameservice(new MiniDFSNNTopology.NSConf(nameservice) + .addNN(new MiniDFSNNTopology.NNConf("nn1").setIpcPort(10001)) + .addNN(new MiniDFSNNTopology.NNConf("nn2").setIpcPort(10002)) + .addNN(new MiniDFSNNTopology.NNConf("nn3").setIpcPort(10003))); + + // add the configurations of the NNs to the passed conf, so we can parse it back out + MiniDFSCluster.configureNameNodes(topology, false, conf); + + // set the 'local' one as nn1 + conf.set(DFSConfigKeys.DFS_HA_NAMENODE_ID_KEY, "nn1"); + + List<RemoteNameNodeInfo> nns = RemoteNameNodeInfo.getRemoteNameNodes(conf); + + // make sure it matches when we pass in the nameservice + List<RemoteNameNodeInfo> nns2 = RemoteNameNodeInfo.getRemoteNameNodes(conf, + nameservice); + assertEquals(nns, nns2); + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hadoop/blob/49dfad94/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestSeveralNameNodes.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestSeveralNameNodes.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestSeveralNameNodes.java new file mode 100644 index 0000000..dbe8070 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestSeveralNameNodes.java @@ -0,0 +1,179 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode.ha; + +import static org.junit.Assert.assertEquals; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.test.MultithreadedTestUtil.RepeatingTestThread; +import org.apache.hadoop.test.MultithreadedTestUtil.TestContext; +import org.junit.Test; + +/** + * Test that we can start several and run with namenodes on the same minicluster + */ +public class TestSeveralNameNodes { + + private static final Log LOG = LogFactory.getLog(TestSeveralNameNodes.class); + + /** ms between failovers between NNs */ + private static final int TIME_BETWEEN_FAILOVERS = 200; + private static final int NUM_NAMENODES = 3; + private static final int NUM_THREADS = 3; + private static final int LIST_LENGTH = 50; + /** ms for length of test */ + private static final long RUNTIME = 100000; + + @Test + public void testCircularLinkedListWrites() throws Exception { + HAStressTestHarness harness = new HAStressTestHarness(); + // setup the harness + harness.setNumberOfNameNodes(NUM_NAMENODES); + harness.addFailoverThread(TIME_BETWEEN_FAILOVERS); + + final MiniDFSCluster cluster = harness.startCluster(); + try { + cluster.waitActive(); + cluster.transitionToActive(0); + + // setup the a circular writer + FileSystem fs = harness.getFailoverFs(); + TestContext context = harness.testCtx; + List<CircularWriter> writers = new ArrayList<CircularWriter>(); + for (int i = 0; i < NUM_THREADS; i++) { + Path p = new Path("/test-" + i); + fs.mkdirs(p); + CircularWriter writer = new CircularWriter(context, LIST_LENGTH, fs, p); + writers.add(writer); + context.addThread(writer); + } + harness.startThreads(); + + // wait for all the writer threads to finish, or that we exceed the time + long start = System.currentTimeMillis(); + while ((System.currentTimeMillis() - start) < RUNTIME) { + for (int i = 0; i < writers.size(); i++) { + CircularWriter writer = writers.get(i); + // remove the writer from the ones to check + if (writer.done.await(10, TimeUnit.MILLISECONDS)) { + writers.remove(i--); + } + } + } + assertEquals( + "Some writers didn't complete in expected runtime! Current writer state:" + + writers, 0, + writers.size()); + + harness.stopThreads(); + } finally { + System.err.println("===========================\n\n\n\n"); + harness.shutdown(); + } + } + + private static class CircularWriter extends RepeatingTestThread { + + private final int maxLength; + private final Path dir; + private final FileSystem fs; + private int currentListIndex = 0; + private CountDownLatch done = new CountDownLatch(1); + + public CircularWriter(TestContext context, int listLength, FileSystem fs, + Path parentDir) { + super(context); + this.fs = fs; + this.maxLength = listLength; + this.dir = parentDir; + } + + @Override + public String toString() { + StringBuilder builder = new StringBuilder("Circular Writer:\n"); + builder.append("\t directory: " + dir + "\n"); + builder.append("\t target length: " + maxLength + "\n"); + // might be a little racy, but we just want a close count + builder.append("\t current item: " + currentListIndex + "\n"); + builder.append("\t done: " + (done.getCount() == 0) + "\n"); + return builder.toString(); + } + + @Override + public void doAnAction() throws Exception { + if (currentListIndex == maxLength) { + checkList(); + this.stopTestThread(); + done.countDown(); + } else { + writeList(); + } + } + + /** + * Make sure we can traverse the entire linked list + */ + private void checkList() throws IOException { + for (int i = 0; i < maxLength; i++) { + Path nextFile = getNextFile(i); + if (!fs.exists(nextFile)) { + throw new RuntimeException("Next file " + nextFile + + " for list does not exist!"); + } + // read the next file name + FSDataInputStream in = fs.open(nextFile); + nextFile = getNextFile(in.read()); + in.close(); + } + + } + + private void cleanup() throws IOException { + if (!fs.delete(dir, true)) { + throw new RuntimeException("Didn't correctly delete " + dir); + } + if (!fs.mkdirs(dir)) { + throw new RuntimeException("Didn't correctly make directory " + dir); + } + } + + private void writeList() throws IOException { + Path nextPath = getNextFile(currentListIndex++); + LOG.info("Writing next file: " + nextPath); + FSDataOutputStream file = fs.create(nextPath); + file.write(currentListIndex); + file.close(); + } + + private Path getNextFile(int i) { + return new Path(dir, Integer.toString(i)); + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hadoop/blob/49dfad94/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java index 33af0e2..cd32502 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java @@ -63,8 +63,9 @@ import static org.junit.Assert.*; public class TestStandbyCheckpoints { private static final int NUM_DIRS_IN_LOG = 200000; + protected static int NUM_NNS = 3; protected MiniDFSCluster cluster; - protected NameNode nn0, nn1; + protected NameNode[] nns = new NameNode[NUM_NNS]; protected FileSystem fs; private final Random random = new Random(); protected File tmpOivImgDir; @@ -88,7 +89,8 @@ public class TestStandbyCheckpoints { MiniDFSNNTopology topology = new MiniDFSNNTopology() .addNameservice(new MiniDFSNNTopology.NSConf("ns1") .addNN(new MiniDFSNNTopology.NNConf("nn1").setHttpPort(basePort)) - .addNN(new MiniDFSNNTopology.NNConf("nn2").setHttpPort(basePort + 1))); + .addNN(new MiniDFSNNTopology.NNConf("nn2").setHttpPort(basePort + 1)) + .addNN(new MiniDFSNNTopology.NNConf("nn3").setHttpPort(basePort + 2))); cluster = new MiniDFSCluster.Builder(conf) .nnTopology(topology) @@ -96,8 +98,8 @@ public class TestStandbyCheckpoints { .build(); cluster.waitActive(); - nn0 = cluster.getNameNode(0); - nn1 = cluster.getNameNode(1); + setNNs(); + fs = HATestUtil.configureFailoverFs(cluster, conf); cluster.transitionToActive(0); @@ -110,6 +112,12 @@ public class TestStandbyCheckpoints { } } + protected void setNNs(){ + for (int i = 0; i < NUM_NNS; i++) { + nns[i] = cluster.getNameNode(i); + } + } + protected Configuration setupCommonConfig() { tmpOivImgDir = Files.createTempDir(); @@ -136,10 +144,10 @@ public class TestStandbyCheckpoints { @Test(timeout = 300000) public void testSBNCheckpoints() throws Exception { - JournalSet standbyJournalSet = NameNodeAdapter.spyOnJournalSet(nn1); - + JournalSet standbyJournalSet = NameNodeAdapter.spyOnJournalSet(nns[1]); + doEdits(0, 10); - HATestUtil.waitForStandbyToCatchUp(nn0, nn1); + HATestUtil.waitForStandbyToCatchUp(nns[0], nns[1]); // Once the standby catches up, it should notice that it needs to // do a checkpoint and save one to its local directories. HATestUtil.waitForCheckpoint(cluster, 1, ImmutableList.of(12)); @@ -147,10 +155,9 @@ public class TestStandbyCheckpoints { GenericTestUtils.waitFor(new Supplier<Boolean>() { @Override public Boolean get() { - if(tmpOivImgDir.list().length > 0) { + if (tmpOivImgDir.list().length > 0) { return true; - } - else { + } else { return false; } } @@ -189,9 +196,9 @@ public class TestStandbyCheckpoints { HATestUtil.waitForCheckpoint(cluster, 1, ImmutableList.of(12)); HATestUtil.waitForCheckpoint(cluster, 0, ImmutableList.of(12)); - assertEquals(12, nn0.getNamesystem().getFSImage() + assertEquals(12, nns[0].getNamesystem().getFSImage() .getMostRecentCheckpointTxId()); - assertEquals(12, nn1.getNamesystem().getFSImage() + assertEquals(12, nns[1].getNamesystem().getFSImage() .getMostRecentCheckpointTxId()); List<File> dirs = Lists.newArrayList(); @@ -214,17 +221,17 @@ public class TestStandbyCheckpoints { cluster.getConfiguration(1).setInt( DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, 0); cluster.restartNameNode(1); - nn1 = cluster.getNameNode(1); - - FSImage spyImage1 = NameNodeAdapter.spyOnFsImage(nn1); - + nns[1] = cluster.getNameNode(1); + + FSImage spyImage1 = NameNodeAdapter.spyOnFsImage(nns[1]); + // We shouldn't save any checkpoints at txid=0 Thread.sleep(1000); Mockito.verify(spyImage1, Mockito.never()) .saveNamespace((FSNamesystem) Mockito.anyObject()); // Roll the primary and wait for the standby to catch up - HATestUtil.waitForStandbyToCatchUp(nn0, nn1); + HATestUtil.waitForStandbyToCatchUp(nns[0], nns[1]); Thread.sleep(2000); // We should make exactly one checkpoint at this new txid. @@ -259,7 +266,7 @@ public class TestStandbyCheckpoints { cluster.getConfiguration(1).setInt( DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, 0); cluster.restartNameNode(1); - nn1 = cluster.getNameNode(1); + nns[1] = cluster.getNameNode(1); cluster.transitionToActive(0); @@ -284,31 +291,42 @@ public class TestStandbyCheckpoints { @Test(timeout=60000) public void testCheckpointCancellationDuringUpload() throws Exception { // don't compress, we want a big image - cluster.getConfiguration(0).setBoolean( - DFSConfigKeys.DFS_IMAGE_COMPRESS_KEY, false); - cluster.getConfiguration(1).setBoolean( - DFSConfigKeys.DFS_IMAGE_COMPRESS_KEY, false); + for (int i = 0; i < NUM_NNS; i++) { + cluster.getConfiguration(i).setBoolean( + DFSConfigKeys.DFS_IMAGE_COMPRESS_KEY, false); + } + // Throttle SBN upload to make it hang during upload to ANN - cluster.getConfiguration(1).setLong( - DFSConfigKeys.DFS_IMAGE_TRANSFER_RATE_KEY, 100); - cluster.restartNameNode(0); - cluster.restartNameNode(1); - nn0 = cluster.getNameNode(0); - nn1 = cluster.getNameNode(1); + for (int i = 1; i < NUM_NNS; i++) { + cluster.getConfiguration(i).setLong( + DFSConfigKeys.DFS_IMAGE_TRANSFER_RATE_KEY, 100); + } + for (int i = 0; i < NUM_NNS; i++) { + cluster.restartNameNode(i); + } + + // update references to each of the nns + setNNs(); cluster.transitionToActive(0); doEdits(0, 100); - HATestUtil.waitForStandbyToCatchUp(nn0, nn1); - HATestUtil.waitForCheckpoint(cluster, 1, ImmutableList.of(104)); + + for (int i = 1; i < NUM_NNS; i++) { + HATestUtil.waitForStandbyToCatchUp(nns[0], nns[i]); + HATestUtil.waitForCheckpoint(cluster, i, ImmutableList.of(104)); + } + cluster.transitionToStandby(0); cluster.transitionToActive(1); + // Wait to make sure background TransferFsImageUpload thread was cancelled. // This needs to be done before the next test in the suite starts, so that a // file descriptor is not held open during the next cluster init. cluster.shutdown(); cluster = null; + GenericTestUtils.waitFor(new Supplier<Boolean>() { @Override public Boolean get() { @@ -325,7 +343,7 @@ public class TestStandbyCheckpoints { }, 1000, 30000); // Assert that former active did not accept the canceled checkpoint file. - assertEquals(0, nn0.getFSImage().getMostRecentCheckpointTxId()); + assertEquals(0, nns[0].getFSImage().getMostRecentCheckpointTxId()); } /** @@ -337,7 +355,7 @@ public class TestStandbyCheckpoints { public void testStandbyExceptionThrownDuringCheckpoint() throws Exception { // Set it up so that we know when the SBN checkpoint starts and ends. - FSImage spyImage1 = NameNodeAdapter.spyOnFsImage(nn1); + FSImage spyImage1 = NameNodeAdapter.spyOnFsImage(nns[1]); DelayAnswer answerer = new DelayAnswer(LOG); Mockito.doAnswer(answerer).when(spyImage1) .saveNamespace(Mockito.any(FSNamesystem.class), @@ -345,7 +363,7 @@ public class TestStandbyCheckpoints { // Perform some edits and wait for a checkpoint to start on the SBN. doEdits(0, 1000); - nn0.getRpcServer().rollEditLog(); + nns[0].getRpcServer().rollEditLog(); answerer.waitForCall(); assertTrue("SBN is not performing checkpoint but it should be.", answerer.getFireCount() == 1 && answerer.getResultCount() == 0); @@ -355,7 +373,7 @@ public class TestStandbyCheckpoints { ThreadUtil.sleepAtLeastIgnoreInterrupts(1000); try { // Perform an RPC to the SBN and make sure it throws a StandbyException. - nn1.getRpcServer().getFileInfo("/"); + nns[1].getRpcServer().getFileInfo("/"); fail("Should have thrown StandbyException, but instead succeeded."); } catch (StandbyException se) { GenericTestUtils.assertExceptionContains("is not supported", se); @@ -382,7 +400,7 @@ public class TestStandbyCheckpoints { public void testReadsAllowedDuringCheckpoint() throws Exception { // Set it up so that we know when the SBN checkpoint starts and ends. - FSImage spyImage1 = NameNodeAdapter.spyOnFsImage(nn1); + FSImage spyImage1 = NameNodeAdapter.spyOnFsImage(nns[1]); DelayAnswer answerer = new DelayAnswer(LOG); Mockito.doAnswer(answerer).when(spyImage1) .saveNamespace(Mockito.any(FSNamesystem.class), @@ -391,7 +409,7 @@ public class TestStandbyCheckpoints { // Perform some edits and wait for a checkpoint to start on the SBN. doEdits(0, 1000); - nn0.getRpcServer().rollEditLog(); + nns[0].getRpcServer().rollEditLog(); answerer.waitForCall(); assertTrue("SBN is not performing checkpoint but it should be.", answerer.getFireCount() == 1 && answerer.getResultCount() == 0); @@ -405,7 +423,7 @@ public class TestStandbyCheckpoints { @Override public void run() { try { - nn1.getRpcServer().restoreFailedStorage("false"); + nns[1].getRpcServer().restoreFailedStorage("false"); } catch (IOException e) { e.printStackTrace(); } @@ -415,16 +433,16 @@ public class TestStandbyCheckpoints { // Make sure that our thread is waiting for the lock. ThreadUtil.sleepAtLeastIgnoreInterrupts(1000); - - assertFalse(nn1.getNamesystem().getFsLockForTests().hasQueuedThreads()); - assertFalse(nn1.getNamesystem().getFsLockForTests().isWriteLocked()); - assertTrue(nn1.getNamesystem().getCpLockForTests().hasQueuedThreads()); - + + assertFalse(nns[1].getNamesystem().getFsLockForTests().hasQueuedThreads()); + assertFalse(nns[1].getNamesystem().getFsLockForTests().isWriteLocked()); + assertTrue(nns[1].getNamesystem().getCpLockForTests().hasQueuedThreads()); + // Get /jmx of the standby NN web UI, which will cause the FSNS read lock to // be taken. String pageContents = DFSTestUtil.urlGet(new URL("http://" + - nn1.getHttpAddress().getHostName() + ":" + - nn1.getHttpAddress().getPort() + "/jmx")); + nns[1].getHttpAddress().getHostName() + ":" + + nns[1].getHttpAddress().getPort() + "/jmx")); assertTrue(pageContents.contains("NumLiveDataNodes")); // Make sure that the checkpoint is still going on, implying that the client http://git-wip-us.apache.org/repos/asf/hadoop/blob/49dfad94/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-0.23-reserved.tgz ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-0.23-reserved.tgz b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-0.23-reserved.tgz index 0f53f2a..abc7bbd 100644 Binary files a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-0.23-reserved.tgz and b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-0.23-reserved.tgz differ http://git-wip-us.apache.org/repos/asf/hadoop/blob/49dfad94/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-1-reserved.tgz ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-1-reserved.tgz b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-1-reserved.tgz index 737ad2d..b3f8b9d 100644 Binary files a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-1-reserved.tgz and b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-1-reserved.tgz differ http://git-wip-us.apache.org/repos/asf/hadoop/blob/49dfad94/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-2-reserved.tgz ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-2-reserved.tgz b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-2-reserved.tgz index 3cb2ee6..2256fba 100644 Binary files a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-2-reserved.tgz and b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-2-reserved.tgz differ http://git-wip-us.apache.org/repos/asf/hadoop/blob/49dfad94/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-22-dfs-dir.tgz ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-22-dfs-dir.tgz b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-22-dfs-dir.tgz index b69741c..c4959b4 100644 Binary files a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-22-dfs-dir.tgz and b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-22-dfs-dir.tgz differ http://git-wip-us.apache.org/repos/asf/hadoop/blob/49dfad94/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop1-bbw.tgz ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop1-bbw.tgz b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop1-bbw.tgz index 2574f8b..e7d3fbd 100644 Binary files a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop1-bbw.tgz and b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop1-bbw.tgz differ http://git-wip-us.apache.org/repos/asf/hadoop/blob/49dfad94/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/log4j.properties ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/log4j.properties b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/log4j.properties index ef3e249..c671ccc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/log4j.properties +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/log4j.properties @@ -20,4 +20,4 @@ log4j.rootLogger=info,stdout log4j.threshold=ALL log4j.appender.stdout=org.apache.log4j.ConsoleAppender log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n +log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} (%F:%M(%L)) - %m%n