Author: todd Date: Sat Apr 7 23:24:57 2012 New Revision: 1310919 URL: http://svn.apache.org/viewvc?rev=1310919&view=rev Log: HADOOP-8246. Auto-HA: automatically scope znode by nameservice ID. Contributed by Todd Lipcon.
Modified: hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/CHANGES.HDFS-3042.txt hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/MiniZKFCCluster.java hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverController.java Modified: hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/CHANGES.HDFS-3042.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/CHANGES.HDFS-3042.txt?rev=1310919&r1=1310918&r2=1310919&view=diff ============================================================================== --- hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/CHANGES.HDFS-3042.txt (original) +++ hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/CHANGES.HDFS-3042.txt Sat Apr 7 23:24:57 2012 @@ -15,3 +15,5 @@ HADOOP-8245. Fix flakiness in TestZKFail HADOOP-8257. TestZKFailoverControllerStress occasionally fails with Mockito error (todd) HADOOP-8260. Replace ClientBaseWithFixes with our own modified copy of the class (todd) + +HADOOP-8246. Auto-HA: automatically scope znode by nameservice ID (todd) Modified: hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java?rev=1310919&r1=1310918&r2=1310919&view=diff ============================================================================== --- hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java (original) +++ hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java Sat Apr 7 23:24:57 2012 @@ -80,8 +80,6 @@ public abstract class ZKFailoverControll private HAServiceTarget localTarget; - private String parentZnode; - private State lastHealthState = State.INITIALIZING; /** Set if a fatal error occurs */ @@ -95,10 +93,18 @@ public abstract class ZKFailoverControll protected abstract byte[] targetToData(HAServiceTarget target); - protected abstract HAServiceTarget getLocalTarget(); + protected abstract HAServiceTarget getLocalTarget(); protected abstract HAServiceTarget dataToTarget(byte[] data); protected abstract void loginAsFCUser() throws IOException; + /** + * Return the name of a znode inside the configured parent znode in which + * the ZKFC will do all of its work. This is so that multiple federated + * nameservices can run on the same ZK quorum without having to manually + * configure them to separate subdirectories. + */ + protected abstract String getScopeInsideParentNode(); + @Override public Configuration getConf() { return conf; @@ -204,6 +210,7 @@ public abstract class ZKFailoverControll } private boolean confirmFormat() { + String parentZnode = getParentZnode(); System.err.println( "===============================================\n" + "The configured parent znode " + parentZnode + " already exists.\n" + @@ -234,9 +241,6 @@ public abstract class ZKFailoverControll String zkQuorum = conf.get(ZK_QUORUM_KEY); int zkTimeout = conf.getInt(ZK_SESSION_TIMEOUT_KEY, ZK_SESSION_TIMEOUT_DEFAULT); - parentZnode = conf.get(ZK_PARENT_ZNODE_KEY, - ZK_PARENT_ZNODE_DEFAULT); - // Parse ACLs from configuration. String zkAclConf = conf.get(ZK_ACL_KEY, ZK_ACL_DEFAULT); zkAclConf = HAZKUtil.resolveConfIndirection(zkAclConf); @@ -264,10 +268,19 @@ public abstract class ZKFailoverControll elector = new ActiveStandbyElector(zkQuorum, - zkTimeout, parentZnode, zkAcls, zkAuths, + zkTimeout, getParentZnode(), zkAcls, zkAuths, new ElectorCallbacks()); } + private String getParentZnode() { + String znode = conf.get(ZK_PARENT_ZNODE_KEY, + ZK_PARENT_ZNODE_DEFAULT); + if (!znode.endsWith("/")) { + znode += "/"; + } + return znode + getScopeInsideParentNode(); + } + private synchronized void mainLoop() throws InterruptedException { while (fatalError == null) { wait(); Modified: hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/MiniZKFCCluster.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/MiniZKFCCluster.java?rev=1310919&r1=1310918&r2=1310919&view=diff ============================================================================== --- hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/MiniZKFCCluster.java (original) +++ hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/MiniZKFCCluster.java Sat Apr 7 23:24:57 2012 @@ -187,8 +187,7 @@ public class MiniZKFCCluster { throws NoNodeException { Stat stat = new Stat(); byte[] data = zks.getZKDatabase().getData( - ZKFailoverController.ZK_PARENT_ZNODE_DEFAULT + "/" + - ActiveStandbyElector.LOCK_FILENAME, stat, null); + DummyZKFC.LOCK_ZNODE, stat, null); assertArrayEquals(Ints.toByteArray(svcs[idx].index), data); long session = stat.getEphemeralOwner(); @@ -206,7 +205,7 @@ public class MiniZKFCCluster { throws Exception { DummyHAService svc = idx == null ? null : svcs[idx]; ActiveStandbyElectorTestUtil.waitForActiveLockData(ctx, zks, - ZKFailoverController.ZK_PARENT_ZNODE_DEFAULT, + DummyZKFC.SCOPED_PARENT_ZNODE, (idx == null) ? null : Ints.toByteArray(svc.index)); } @@ -255,6 +254,12 @@ public class MiniZKFCCluster { } static class DummyZKFC extends ZKFailoverController { + private static final String DUMMY_CLUSTER = "dummy-cluster"; + public static final String SCOPED_PARENT_ZNODE = + ZKFailoverController.ZK_PARENT_ZNODE_DEFAULT + "/" + + DUMMY_CLUSTER; + private static final String LOCK_ZNODE = + SCOPED_PARENT_ZNODE + "/" + ActiveStandbyElector.LOCK_FILENAME; private final DummyHAService localTarget; public DummyZKFC(DummyHAService localTarget) { @@ -280,5 +285,10 @@ public class MiniZKFCCluster { @Override protected void loginAsFCUser() throws IOException { } + + @Override + protected String getScopeInsideParentNode() { + return DUMMY_CLUSTER; + } } } \ No newline at end of file Modified: hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverController.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverController.java?rev=1310919&r1=1310918&r2=1310919&view=diff ============================================================================== --- hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverController.java (original) +++ hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverController.java Sat Apr 7 23:24:57 2012 @@ -94,6 +94,40 @@ public class TestZKFailoverController ex assertEquals(0, runFC(svc, "-formatZK", "-force")); } + @Test + public void testFormatOneClusterLeavesOtherClustersAlone() throws Exception { + DummyHAService svc = cluster.getService(1); + + DummyZKFC zkfcInOtherCluster = new DummyZKFC(cluster.getService(1)) { + @Override + protected String getScopeInsideParentNode() { + return "other-scope"; + } + }; + zkfcInOtherCluster.setConf(conf); + + // Run without formatting the base dir, + // should barf + assertEquals(ZKFailoverController.ERR_CODE_NO_PARENT_ZNODE, + runFC(svc)); + + // Format the base dir, should succeed + assertEquals(0, runFC(svc, "-formatZK")); + + // Run the other cluster without formatting, should barf because + // it uses a different parent znode + assertEquals(ZKFailoverController.ERR_CODE_NO_PARENT_ZNODE, + zkfcInOtherCluster.run(new String[]{})); + + // Should succeed in formatting the second cluster + assertEquals(0, zkfcInOtherCluster.run(new String[]{"-formatZK"})); + + // But should not have deleted the original base node from the first + // cluster + assertEquals(ZKFailoverController.ERR_CODE_FORMAT_DENIED, + runFC(svc, "-formatZK", "-nonInteractive")); + } + /** * Test that, if ACLs are specified in the configuration, that * it sets the ACLs when formatting the parent node.