Author: kihwal Date: Wed May 14 20:46:38 2014 New Revision: 1594710 URL: http://svn.apache.org/r1594710 Log: svn merge -c 1594709 merging from trunk to branch-2 to fix:HDFS-2949. Add check to active state transition to prevent operator-induced split brain. Contributed by Rushabh S Shah.
Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdmin.java hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1594710&r1=1594709&r2=1594710&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Wed May 14 20:46:38 2014 @@ -104,6 +104,9 @@ Release 2.5.0 - UNRELEASED HDFS-6293. Issues with OIV processing PB-based fsimages. (kihwal) + HDFS-2949. Add check to active state transition to prevent operator-induced + split brain. (Rushabh S Shah via kihwal) + OPTIMIZATIONS HDFS-6214. Webhdfs has poor throughput for files >2GB (daryn) Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java?rev=1594710&r1=1594709&r2=1594710&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java Wed May 14 20:46:38 2014 @@ -1672,7 +1672,11 @@ public class NameNode implements NameNod public boolean isStandbyState() { return (state.equals(STANDBY_STATE)); } - + + public boolean isActiveState() { + return (state.equals(ACTIVE_STATE)); + } + /** * Check that a request to change this node's HA state is valid. * In particular, verifies that, if auto failover is enabled, non-forced Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java?rev=1594710&r1=1594709&r2=1594710&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java Wed May 14 20:46:38 2014 @@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs.tools; import java.io.PrintStream; import java.util.Arrays; +import java.util.Collection; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -27,6 +28,7 @@ import org.apache.hadoop.fs.CommonConfig import org.apache.hadoop.ha.HAAdmin; import org.apache.hadoop.ha.HAServiceTarget; import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.util.ToolRunner; @@ -117,7 +119,15 @@ public class DFSHAAdmin extends HAAdmin return super.runCmd(argv); } - + + /** + * returns the list of all namenode ids for the given configuration + */ + @Override + protected Collection<String> getTargetIds(String namenodeToActivate) { + return DFSUtil.getNameNodeIds(getConf(), (nameserviceId != null)? nameserviceId : DFSUtil.getNamenodeNameServiceId(getConf())); + } + public static void main(String[] argv) throws Exception { int res = ToolRunner.run(new DFSHAAdmin(), argv); System.exit(res); Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdmin.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdmin.java?rev=1594710&r1=1594709&r2=1594710&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdmin.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdmin.java Wed May 14 20:46:38 2014 @@ -176,6 +176,7 @@ public class TestDFSHAAdmin { @Test public void testTransitionToActive() throws Exception { + Mockito.doReturn(STANDBY_READY_RESULT).when(mockProtocol).getServiceStatus(); assertEquals(0, runTool("-transitionToActive", "nn1")); Mockito.verify(mockProtocol).transitionToActive( reqInfoCaptor.capture()); Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java?rev=1594710&r1=1594709&r2=1594710&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java (original) +++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java Wed May 14 20:46:38 2014 @@ -31,6 +31,7 @@ import org.apache.commons.logging.LogFac import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.ha.HAAdmin; +import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSNNTopology; @@ -204,6 +205,70 @@ public class TestDFSHAAdminMiniCluster { assertEquals(0, runTool("-checkHealth", "nn2")); } + /** + * Test case to check whether both the name node is active or not + * @throws Exception + */ + @Test + public void testTransitionToActiveWhenOtherNamenodeisActive() + throws Exception { + NameNode nn1 = cluster.getNameNode(0); + NameNode nn2 = cluster.getNameNode(1); + if(nn1.getState() != null && !nn1.getState(). + equals(HAServiceState.STANDBY.name()) ) { + cluster.transitionToStandby(0); + } + if(nn2.getState() != null && !nn2.getState(). + equals(HAServiceState.STANDBY.name()) ) { + cluster.transitionToStandby(1); + } + //Making sure both the namenode are in standby state + assertTrue(nn1.isStandbyState()); + assertTrue(nn2.isStandbyState()); + // Triggering the transition for both namenode to Active + runTool("-transitionToActive", "nn1"); + runTool("-transitionToActive", "nn2"); + + assertFalse("Both namenodes cannot be active", nn1.isActiveState() + && nn2.isActiveState()); + + /* This test case doesn't allow nn2 to transition to Active even with + forceActive switch since nn1 is already active */ + if(nn1.getState() != null && !nn1.getState(). + equals(HAServiceState.STANDBY.name()) ) { + cluster.transitionToStandby(0); + } + if(nn2.getState() != null && !nn2.getState(). + equals(HAServiceState.STANDBY.name()) ) { + cluster.transitionToStandby(1); + } + //Making sure both the namenode are in standby state + assertTrue(nn1.isStandbyState()); + assertTrue(nn2.isStandbyState()); + + runTool("-transitionToActive", "nn1"); + runTool("-transitionToActive", "nn2","--forceactive"); + + assertFalse("Both namenodes cannot be active even though with forceActive", + nn1.isActiveState() && nn2.isActiveState()); + + /* In this test case, we have deliberately shut down nn1 and this will + cause HAAAdmin#isOtherTargetNodeActive to throw an Exception + and transitionToActive for nn2 with forceActive switch will succeed + even with Exception */ + cluster.shutdownNameNode(0); + if(nn2.getState() != null && !nn2.getState(). + equals(HAServiceState.STANDBY.name()) ) { + cluster.transitionToStandby(1); + } + //Making sure both the namenode (nn2) is in standby state + assertTrue(nn2.isStandbyState()); + assertFalse(cluster.isNameNodeUp(0)); + + runTool("-transitionToActive", "nn2", "--forceactive"); + assertTrue("Namenode nn2 should be active", nn2.isActiveState()); + } + private int runTool(String ... args) throws Exception { errOutBytes.reset(); LOG.info("Running: DFSHAAdmin " + Joiner.on(" ").join(args));