Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDFSUpgradeWithHA.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDFSUpgradeWithHA.java?rev=1561381&r1=1561380&r2=1561381&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDFSUpgradeWithHA.java (original) +++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDFSUpgradeWithHA.java Sat Jan 25 20:01:26 2014 @@ -1,89 +1,506 @@ /** -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.hadoop.hdfs.server.namenode.ha; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.File; import java.io.IOException; import java.net.URI; +import java.net.URISyntaxException; +import java.util.Collection; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSNNTopology; +import org.apache.hadoop.hdfs.qjournal.MiniQJMHACluster; +import org.apache.hadoop.hdfs.qjournal.MiniQJMHACluster.Builder; +import org.apache.hadoop.hdfs.qjournal.server.Journal; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; import org.apache.hadoop.hdfs.server.common.Storage; +import org.apache.hadoop.hdfs.server.namenode.NameNode; +import org.apache.hadoop.hdfs.tools.DFSAdmin; +import org.apache.hadoop.hdfs.util.PersistentLongFile; import org.apache.hadoop.test.GenericTestUtils; +import org.junit.Before; import org.junit.Test; -import com.google.common.collect.Lists; +import com.google.common.base.Joiner; /** * Tests for upgrading with HA enabled. */ public class TestDFSUpgradeWithHA { - + private static final Log LOG = LogFactory.getLog(TestDFSUpgradeWithHA.class); + + private Configuration conf; + + @Before + public void createConfiguration() { + conf = new HdfsConfiguration(); + // Turn off persistent IPC, so that the DFSClient can survive NN restart + conf.setInt( + CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_KEY, + 0); + } + + private static void assertCTimesEqual(MiniDFSCluster cluster) { + long nn1CTime = cluster.getNamesystem(0).getFSImage().getStorage().getCTime(); + long nn2CTime = cluster.getNamesystem(1).getFSImage().getStorage().getCTime(); + assertEquals(nn1CTime, nn2CTime); + } + + private static void checkClusterPreviousDirExistence(MiniDFSCluster cluster, + boolean shouldExist) { + for (int i = 0; i < 2; i++) { + checkNnPreviousDirExistence(cluster, i, shouldExist); + } + } + + private static void checkNnPreviousDirExistence(MiniDFSCluster cluster, + int index, boolean shouldExist) { + Collection<URI> nameDirs = cluster.getNameDirs(index); + for (URI nnDir : nameDirs) { + checkPreviousDirExistence(new File(nnDir), shouldExist); + } + } + private static void checkJnPreviousDirExistence(MiniQJMHACluster jnCluster, + boolean shouldExist) throws IOException { + for (int i = 0; i < 3; i++) { + checkPreviousDirExistence( + jnCluster.getJournalCluster().getJournalDir(i, "ns1"), shouldExist); + } + if (shouldExist) { + assertEpochFilesCopied(jnCluster); + } + } + + private static void assertEpochFilesCopied(MiniQJMHACluster jnCluster) + throws IOException { + for (int i = 0; i < 3; i++) { + File journalDir = jnCluster.getJournalCluster().getJournalDir(i, "ns1"); + File currDir = new File(journalDir, "current"); + File prevDir = new File(journalDir, "previous"); + for (String fileName : new String[]{ Journal.LAST_PROMISED_FILENAME, + Journal.LAST_WRITER_EPOCH }) { + File prevFile = new File(prevDir, fileName); + // Possible the prev file doesn't exist, e.g. if there has never been a + // writer before the upgrade. + if (prevFile.exists()) { + PersistentLongFile prevLongFile = new PersistentLongFile(prevFile, -10); + PersistentLongFile currLongFile = new PersistentLongFile(new File(currDir, + fileName), -11); + assertTrue("Value in " + fileName + " has decreased on upgrade in " + + journalDir, prevLongFile.get() <= currLongFile.get()); + } + } + } + } + + private static void checkPreviousDirExistence(File rootDir, + boolean shouldExist) { + File previousDir = new File(rootDir, "previous"); + if (shouldExist) { + assertTrue(previousDir + " does not exist", previousDir.exists()); + } else { + assertFalse(previousDir + " does exist", previousDir.exists()); + } + } + + private void runFinalizeCommand(MiniDFSCluster cluster) + throws IOException { + HATestUtil.setFailoverConfigurations(cluster, conf); + new DFSAdmin(conf).finalizeUpgrade(); + } + /** - * Make sure that an HA NN refuses to start if given an upgrade-related - * startup option. + * Ensure that an admin cannot finalize an HA upgrade without at least one NN + * being active. */ @Test - public void testStartingWithUpgradeOptionsFails() throws IOException { - for (StartupOption startOpt : Lists.newArrayList(new StartupOption[] { - StartupOption.UPGRADE, StartupOption.FINALIZE, - StartupOption.ROLLBACK })) { - MiniDFSCluster cluster = null; + public void testCannotFinalizeIfNoActive() throws IOException, + URISyntaxException { + MiniDFSCluster cluster = null; + FileSystem fs = null; + try { + cluster = new MiniDFSCluster.Builder(conf) + .nnTopology(MiniDFSNNTopology.simpleHATopology()) + .numDataNodes(0) + .build(); + + File sharedDir = new File(cluster.getSharedEditsDir(0, 1)); + + // No upgrade is in progress at the moment. + checkClusterPreviousDirExistence(cluster, false); + assertCTimesEqual(cluster); + checkPreviousDirExistence(sharedDir, false); + + // Transition NN0 to active and do some FS ops. + cluster.transitionToActive(0); + fs = HATestUtil.configureFailoverFs(cluster, conf); + assertTrue(fs.mkdirs(new Path("/foo1"))); + + // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade + // flag. + cluster.shutdownNameNode(1); + cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE); + cluster.restartNameNode(0, false); + + checkNnPreviousDirExistence(cluster, 0, true); + checkNnPreviousDirExistence(cluster, 1, false); + checkPreviousDirExistence(sharedDir, true); + + // NN0 should come up in the active state when given the -upgrade option, + // so no need to transition it to active. + assertTrue(fs.mkdirs(new Path("/foo2"))); + + // Restart NN0 without the -upgrade flag, to make sure that works. + cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR); + cluster.restartNameNode(0, false); + + // Make sure we can still do FS ops after upgrading. + cluster.transitionToActive(0); + assertTrue(fs.mkdirs(new Path("/foo3"))); + + // Now bootstrap the standby with the upgraded info. + int rc = BootstrapStandby.run( + new String[]{"-force"}, + cluster.getConfiguration(1)); + assertEquals(0, rc); + + // Now restart NN1 and make sure that we can do ops against that as well. + cluster.restartNameNode(1); + cluster.transitionToStandby(0); + cluster.transitionToActive(1); + assertTrue(fs.mkdirs(new Path("/foo4"))); + + assertCTimesEqual(cluster); + + // Now there's no active NN. + cluster.transitionToStandby(1); + try { - cluster = new MiniDFSCluster.Builder(new Configuration()) - .nnTopology(MiniDFSNNTopology.simpleHATopology()) - .startupOption(startOpt) - .numDataNodes(0) - .build(); - fail("Should not have been able to start an HA NN in upgrade mode"); - } catch (IllegalArgumentException iae) { + runFinalizeCommand(cluster); + fail("Should not have been able to finalize upgrade with no NN active"); + } catch (IOException ioe) { GenericTestUtils.assertExceptionContains( - "Cannot perform DFS upgrade with HA enabled.", iae); - LOG.info("Got expected exception", iae); - } finally { - if (cluster != null) { - cluster.shutdown(); - } + "Cannot finalize with no NameNode active", ioe); + } + } finally { + if (fs != null) { + fs.close(); + } + if (cluster != null) { + cluster.shutdown(); } } } - + /** - * Make sure that an HA NN won't start if a previous upgrade was in progress. + * Make sure that an HA NN with NFS-based HA can successfully start and + * upgrade. */ @Test - public void testStartingWithUpgradeInProgressFails() throws Exception { + public void testNfsUpgrade() throws IOException, URISyntaxException { MiniDFSCluster cluster = null; + FileSystem fs = null; try { - cluster = new MiniDFSCluster.Builder(new Configuration()) + cluster = new MiniDFSCluster.Builder(conf) .nnTopology(MiniDFSNNTopology.simpleHATopology()) .numDataNodes(0) .build(); + File sharedDir = new File(cluster.getSharedEditsDir(0, 1)); + + // No upgrade is in progress at the moment. + checkClusterPreviousDirExistence(cluster, false); + assertCTimesEqual(cluster); + checkPreviousDirExistence(sharedDir, false); + + // Transition NN0 to active and do some FS ops. + cluster.transitionToActive(0); + fs = HATestUtil.configureFailoverFs(cluster, conf); + assertTrue(fs.mkdirs(new Path("/foo1"))); + + // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade + // flag. + cluster.shutdownNameNode(1); + cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE); + cluster.restartNameNode(0, false); + + checkNnPreviousDirExistence(cluster, 0, true); + checkNnPreviousDirExistence(cluster, 1, false); + checkPreviousDirExistence(sharedDir, true); + + // NN0 should come up in the active state when given the -upgrade option, + // so no need to transition it to active. + assertTrue(fs.mkdirs(new Path("/foo2"))); + + // Restart NN0 without the -upgrade flag, to make sure that works. + cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR); + cluster.restartNameNode(0, false); + + // Make sure we can still do FS ops after upgrading. + cluster.transitionToActive(0); + assertTrue(fs.mkdirs(new Path("/foo3"))); + + // Now bootstrap the standby with the upgraded info. + int rc = BootstrapStandby.run( + new String[]{"-force"}, + cluster.getConfiguration(1)); + assertEquals(0, rc); + + // Now restart NN1 and make sure that we can do ops against that as well. + cluster.restartNameNode(1); + cluster.transitionToStandby(0); + cluster.transitionToActive(1); + assertTrue(fs.mkdirs(new Path("/foo4"))); + + assertCTimesEqual(cluster); + } finally { + if (fs != null) { + fs.close(); + } + if (cluster != null) { + cluster.shutdown(); + } + } + } + + /** + * Make sure that an HA NN can successfully upgrade when configured using + * JournalNodes. + */ + @Test + public void testUpgradeWithJournalNodes() throws IOException, + URISyntaxException { + MiniQJMHACluster qjCluster = null; + FileSystem fs = null; + try { + Builder builder = new MiniQJMHACluster.Builder(conf); + builder.getDfsBuilder() + .numDataNodes(0); + qjCluster = builder.build(); + + MiniDFSCluster cluster = qjCluster.getDfsCluster(); + + // No upgrade is in progress at the moment. + checkJnPreviousDirExistence(qjCluster, false); + checkClusterPreviousDirExistence(cluster, false); + assertCTimesEqual(cluster); + + // Transition NN0 to active and do some FS ops. + cluster.transitionToActive(0); + fs = HATestUtil.configureFailoverFs(cluster, conf); + assertTrue(fs.mkdirs(new Path("/foo1"))); + + // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade + // flag. + cluster.shutdownNameNode(1); + cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE); + cluster.restartNameNode(0, false); + + checkNnPreviousDirExistence(cluster, 0, true); + checkNnPreviousDirExistence(cluster, 1, false); + checkJnPreviousDirExistence(qjCluster, true); + + // NN0 should come up in the active state when given the -upgrade option, + // so no need to transition it to active. + assertTrue(fs.mkdirs(new Path("/foo2"))); + + // Restart NN0 without the -upgrade flag, to make sure that works. + cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR); + cluster.restartNameNode(0, false); + + // Make sure we can still do FS ops after upgrading. + cluster.transitionToActive(0); + assertTrue(fs.mkdirs(new Path("/foo3"))); + + // Now bootstrap the standby with the upgraded info. + int rc = BootstrapStandby.run( + new String[]{"-force"}, + cluster.getConfiguration(1)); + assertEquals(0, rc); + + // Now restart NN1 and make sure that we can do ops against that as well. + cluster.restartNameNode(1); + cluster.transitionToStandby(0); + cluster.transitionToActive(1); + assertTrue(fs.mkdirs(new Path("/foo4"))); + + assertCTimesEqual(cluster); + } finally { + if (fs != null) { + fs.close(); + } + if (qjCluster != null) { + qjCluster.shutdown(); + } + } + } + + @Test + public void testFinalizeWithJournalNodes() throws IOException, + URISyntaxException { + MiniQJMHACluster qjCluster = null; + FileSystem fs = null; + try { + Builder builder = new MiniQJMHACluster.Builder(conf); + builder.getDfsBuilder() + .numDataNodes(0); + qjCluster = builder.build(); + + MiniDFSCluster cluster = qjCluster.getDfsCluster(); + + // No upgrade is in progress at the moment. + checkJnPreviousDirExistence(qjCluster, false); + checkClusterPreviousDirExistence(cluster, false); + assertCTimesEqual(cluster); + + // Transition NN0 to active and do some FS ops. + cluster.transitionToActive(0); + fs = HATestUtil.configureFailoverFs(cluster, conf); + assertTrue(fs.mkdirs(new Path("/foo1"))); + + // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade + // flag. + cluster.shutdownNameNode(1); + cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE); + cluster.restartNameNode(0, false); + + assertTrue(fs.mkdirs(new Path("/foo2"))); + + checkNnPreviousDirExistence(cluster, 0, true); + checkNnPreviousDirExistence(cluster, 1, false); + checkJnPreviousDirExistence(qjCluster, true); + + // Now bootstrap the standby with the upgraded info. + int rc = BootstrapStandby.run( + new String[]{"-force"}, + cluster.getConfiguration(1)); + assertEquals(0, rc); + + cluster.restartNameNode(1); + + runFinalizeCommand(cluster); + + checkClusterPreviousDirExistence(cluster, false); + checkJnPreviousDirExistence(qjCluster, false); + assertCTimesEqual(cluster); + } finally { + if (fs != null) { + fs.close(); + } + if (qjCluster != null) { + qjCluster.shutdown(); + } + } + } + + /** + * Make sure that even if the NN which initiated the upgrade is in the standby + * state that we're allowed to finalize. + */ + @Test + public void testFinalizeFromSecondNameNodeWithJournalNodes() + throws IOException, URISyntaxException { + MiniQJMHACluster qjCluster = null; + FileSystem fs = null; + try { + Builder builder = new MiniQJMHACluster.Builder(conf); + builder.getDfsBuilder() + .numDataNodes(0); + qjCluster = builder.build(); + + MiniDFSCluster cluster = qjCluster.getDfsCluster(); + + // No upgrade is in progress at the moment. + checkJnPreviousDirExistence(qjCluster, false); + checkClusterPreviousDirExistence(cluster, false); + assertCTimesEqual(cluster); + + // Transition NN0 to active and do some FS ops. + cluster.transitionToActive(0); + fs = HATestUtil.configureFailoverFs(cluster, conf); + assertTrue(fs.mkdirs(new Path("/foo1"))); + + // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade + // flag. + cluster.shutdownNameNode(1); + cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE); + cluster.restartNameNode(0, false); + + checkNnPreviousDirExistence(cluster, 0, true); + checkNnPreviousDirExistence(cluster, 1, false); + checkJnPreviousDirExistence(qjCluster, true); + + // Now bootstrap the standby with the upgraded info. + int rc = BootstrapStandby.run( + new String[]{"-force"}, + cluster.getConfiguration(1)); + assertEquals(0, rc); + + cluster.restartNameNode(1); + + // Make the second NN (not the one that initiated the upgrade) active when + // the finalize command is run. + cluster.transitionToStandby(0); + cluster.transitionToActive(1); + + runFinalizeCommand(cluster); + + checkClusterPreviousDirExistence(cluster, false); + checkJnPreviousDirExistence(qjCluster, false); + assertCTimesEqual(cluster); + } finally { + if (fs != null) { + fs.close(); + } + if (qjCluster != null) { + qjCluster.shutdown(); + } + } + } + + /** + * Make sure that an HA NN will start if a previous upgrade was in progress. + */ + @Test + public void testStartingWithUpgradeInProgressSucceeds() throws Exception { + MiniDFSCluster cluster = null; + try { + cluster = new MiniDFSCluster.Builder(conf) + .nnTopology(MiniDFSNNTopology.simpleHATopology()) + .numDataNodes(0) + .build(); + // Simulate an upgrade having started. for (int i = 0; i < 2; i++) { for (URI uri : cluster.getNameDirs(i)) { @@ -92,18 +509,226 @@ public class TestDFSUpgradeWithHA { assertTrue(prevTmp.mkdirs()); } } - + cluster.restartNameNodes(); - fail("Should not have been able to start an HA NN with an in-progress upgrade"); - } catch (IOException ioe) { - GenericTestUtils.assertExceptionContains( - "Cannot start an HA namenode with name dirs that need recovery.", - ioe); - LOG.info("Got expected exception", ioe); } finally { if (cluster != null) { cluster.shutdown(); } } } + + /** + * Test rollback with NFS shared dir. + */ + @Test + public void testRollbackWithNfs() throws Exception { + MiniDFSCluster cluster = null; + FileSystem fs = null; + try { + cluster = new MiniDFSCluster.Builder(conf) + .nnTopology(MiniDFSNNTopology.simpleHATopology()) + .numDataNodes(0) + .build(); + + File sharedDir = new File(cluster.getSharedEditsDir(0, 1)); + + // No upgrade is in progress at the moment. + checkClusterPreviousDirExistence(cluster, false); + assertCTimesEqual(cluster); + checkPreviousDirExistence(sharedDir, false); + + // Transition NN0 to active and do some FS ops. + cluster.transitionToActive(0); + fs = HATestUtil.configureFailoverFs(cluster, conf); + assertTrue(fs.mkdirs(new Path("/foo1"))); + + // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade + // flag. + cluster.shutdownNameNode(1); + cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE); + cluster.restartNameNode(0, false); + + checkNnPreviousDirExistence(cluster, 0, true); + checkNnPreviousDirExistence(cluster, 1, false); + checkPreviousDirExistence(sharedDir, true); + + // NN0 should come up in the active state when given the -upgrade option, + // so no need to transition it to active. + assertTrue(fs.mkdirs(new Path("/foo2"))); + + // Now bootstrap the standby with the upgraded info. + int rc = BootstrapStandby.run( + new String[]{"-force"}, + cluster.getConfiguration(1)); + assertEquals(0, rc); + + cluster.restartNameNode(1); + + checkNnPreviousDirExistence(cluster, 0, true); + checkNnPreviousDirExistence(cluster, 1, false); + checkPreviousDirExistence(sharedDir, true); + assertCTimesEqual(cluster); + + // Now shut down the cluster and do the rollback. + Collection<URI> nn1NameDirs = cluster.getNameDirs(0); + cluster.shutdown(); + + conf.setStrings(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, Joiner.on(",").join(nn1NameDirs)); + NameNode.doRollback(conf, false); + + // The rollback operation should have rolled back the first NN's local + // dirs, and the shared dir, but not the other NN's dirs. Those have to be + // done by bootstrapping the standby. + checkNnPreviousDirExistence(cluster, 0, false); + checkPreviousDirExistence(sharedDir, false); + } finally { + if (fs != null) { + fs.close(); + } + if (cluster != null) { + cluster.shutdown(); + } + } + } + + @Test + public void testRollbackWithJournalNodes() throws IOException, + URISyntaxException { + MiniQJMHACluster qjCluster = null; + FileSystem fs = null; + try { + Builder builder = new MiniQJMHACluster.Builder(conf); + builder.getDfsBuilder() + .numDataNodes(0); + qjCluster = builder.build(); + + MiniDFSCluster cluster = qjCluster.getDfsCluster(); + + // No upgrade is in progress at the moment. + checkClusterPreviousDirExistence(cluster, false); + assertCTimesEqual(cluster); + checkJnPreviousDirExistence(qjCluster, false); + + // Transition NN0 to active and do some FS ops. + cluster.transitionToActive(0); + fs = HATestUtil.configureFailoverFs(cluster, conf); + assertTrue(fs.mkdirs(new Path("/foo1"))); + + // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade + // flag. + cluster.shutdownNameNode(1); + cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE); + cluster.restartNameNode(0, false); + + checkNnPreviousDirExistence(cluster, 0, true); + checkNnPreviousDirExistence(cluster, 1, false); + checkJnPreviousDirExistence(qjCluster, true); + + // NN0 should come up in the active state when given the -upgrade option, + // so no need to transition it to active. + assertTrue(fs.mkdirs(new Path("/foo2"))); + + // Now bootstrap the standby with the upgraded info. + int rc = BootstrapStandby.run( + new String[]{"-force"}, + cluster.getConfiguration(1)); + assertEquals(0, rc); + + cluster.restartNameNode(1); + + checkNnPreviousDirExistence(cluster, 0, true); + checkNnPreviousDirExistence(cluster, 1, false); + checkJnPreviousDirExistence(qjCluster, true); + assertCTimesEqual(cluster); + + // Shut down the NNs, but deliberately leave the JNs up and running. + Collection<URI> nn1NameDirs = cluster.getNameDirs(0); + cluster.shutdown(); + + conf.setStrings(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, Joiner.on(",").join(nn1NameDirs)); + NameNode.doRollback(conf, false); + + // The rollback operation should have rolled back the first NN's local + // dirs, and the shared dir, but not the other NN's dirs. Those have to be + // done by bootstrapping the standby. + checkNnPreviousDirExistence(cluster, 0, false); + checkJnPreviousDirExistence(qjCluster, false); + } finally { + if (fs != null) { + fs.close(); + } + if (qjCluster != null) { + qjCluster.shutdown(); + } + } + } + + /** + * Make sure that starting a second NN with the -upgrade flag fails if the + * other NN has already done that. + */ + @Test + public void testCannotUpgradeSecondNameNode() throws IOException, + URISyntaxException { + MiniDFSCluster cluster = null; + FileSystem fs = null; + try { + cluster = new MiniDFSCluster.Builder(conf) + .nnTopology(MiniDFSNNTopology.simpleHATopology()) + .numDataNodes(0) + .build(); + + File sharedDir = new File(cluster.getSharedEditsDir(0, 1)); + + // No upgrade is in progress at the moment. + checkClusterPreviousDirExistence(cluster, false); + assertCTimesEqual(cluster); + checkPreviousDirExistence(sharedDir, false); + + // Transition NN0 to active and do some FS ops. + cluster.transitionToActive(0); + fs = HATestUtil.configureFailoverFs(cluster, conf); + assertTrue(fs.mkdirs(new Path("/foo1"))); + + // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade + // flag. + cluster.shutdownNameNode(1); + cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE); + cluster.restartNameNode(0, false); + + checkNnPreviousDirExistence(cluster, 0, true); + checkNnPreviousDirExistence(cluster, 1, false); + checkPreviousDirExistence(sharedDir, true); + + // NN0 should come up in the active state when given the -upgrade option, + // so no need to transition it to active. + assertTrue(fs.mkdirs(new Path("/foo2"))); + + // Restart NN0 without the -upgrade flag, to make sure that works. + cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR); + cluster.restartNameNode(0, false); + + // Make sure we can still do FS ops after upgrading. + cluster.transitionToActive(0); + assertTrue(fs.mkdirs(new Path("/foo3"))); + + // Make sure that starting the second NN with the -upgrade flag fails. + cluster.getNameNodeInfos()[1].setStartOpt(StartupOption.UPGRADE); + try { + cluster.restartNameNode(1, false); + fail("Should not have been able to start second NN with -upgrade"); + } catch (IOException ioe) { + GenericTestUtils.assertExceptionContains( + "It looks like the shared log is already being upgraded", ioe); + } + } finally { + if (fs != null) { + fs.close(); + } + if (cluster != null) { + cluster.shutdown(); + } + } + } }
Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestInitializeSharedEdits.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestInitializeSharedEdits.java?rev=1561381&r1=1561380&r2=1561381&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestInitializeSharedEdits.java (original) +++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestInitializeSharedEdits.java Sat Jan 25 20:01:26 2014 @@ -96,7 +96,7 @@ public class TestInitializeSharedEdits { } catch (IOException ioe) { LOG.info("Got expected exception", ioe); GenericTestUtils.assertExceptionContains( - "Cannot start an HA namenode with name dirs that need recovery", ioe); + "storage directory does not exist or is not accessible", ioe); } try { cluster.restartNameNode(1, false); @@ -104,7 +104,7 @@ public class TestInitializeSharedEdits { } catch (IOException ioe) { LOG.info("Got expected exception", ioe); GenericTestUtils.assertExceptionContains( - "Cannot start an HA namenode with name dirs that need recovery", ioe); + "storage directory does not exist or is not accessible", ioe); } }