Repository: hbase Updated Branches: refs/heads/branch-1.1 ac95c1a0b -> 96260ab51
HBASE-13993 WALProcedureStore fencing is not effective if new WAL rolls Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/96260ab5 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/96260ab5 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/96260ab5 Branch: refs/heads/branch-1.1 Commit: 96260ab516da548a597ddb6757f7dc4235d3f5fe Parents: ac95c1a Author: Enis Soztutar <e...@apache.org> Authored: Fri Jul 10 13:36:45 2015 -0700 Committer: Enis Soztutar <e...@apache.org> Committed: Fri Jul 10 13:48:21 2015 -0700 ---------------------------------------------------------------------- .../procedure2/store/wal/WALProcedureStore.java | 17 ++++++- .../procedure2/ProcedureTestingUtility.java | 4 ++ .../TestMasterFailoverWithProcedures.java | 52 +++++++++++++++----- .../procedure/TestWALProcedureStoreOnHDFS.java | 15 ++++++ 4 files changed, 74 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/96260ab5/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/store/wal/WALProcedureStore.java ---------------------------------------------------------------------- diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/store/wal/WALProcedureStore.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/store/wal/WALProcedureStore.java index 0faa7aa..cf85458 100644 --- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/store/wal/WALProcedureStore.java +++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/store/wal/WALProcedureStore.java @@ -671,7 +671,22 @@ public class WALProcedureStore implements ProcedureStore { } protected boolean rollWriter() throws IOException { - return rollWriter(flushLogId + 1); + // Create new state-log + if (!rollWriter(flushLogId + 1)) { + LOG.warn("someone else has already created log " + flushLogId); + return false; + } + + // We have the lease on the log, + // but we should check if someone else has created new files + if (getMaxLogId(getLogFiles()) > flushLogId) { + LOG.warn("Someone else created new logs. Expected maxLogId < " + flushLogId); + logs.getLast().removeFile(); + return false; + } + + // We have the lease on the log + return true; } private boolean rollWriter(final long logId) throws IOException { http://git-wip-us.apache.org/repos/asf/hbase/blob/96260ab5/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/ProcedureTestingUtility.java ---------------------------------------------------------------------- diff --git a/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/ProcedureTestingUtility.java b/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/ProcedureTestingUtility.java index 8e322a7..da590b8 100644 --- a/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/ProcedureTestingUtility.java +++ b/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/ProcedureTestingUtility.java @@ -166,6 +166,10 @@ public class ProcedureTestingUtility { public static class TestProcedure extends Procedure<Void> { public TestProcedure() {} + public TestProcedure(long procId) { + this(procId, 0); + } + public TestProcedure(long procId, long parentId) { setProcId(procId); if (parentId > 0) { http://git-wip-us.apache.org/repos/asf/hbase/blob/96260ab5/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestMasterFailoverWithProcedures.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestMasterFailoverWithProcedures.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestMasterFailoverWithProcedures.java index 429c83b..6e967e1 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestMasterFailoverWithProcedures.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestMasterFailoverWithProcedures.java @@ -35,9 +35,9 @@ import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.procedure2.Procedure; import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; +import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility.TestProcedure; import org.apache.hadoop.hbase.procedure2.store.ProcedureStore; import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore; -import org.apache.hadoop.hbase.procedure2.store.wal.TestWALProcedureStore.TestSequentialProcedure; import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.CreateTableState; import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.DeleteTableState; import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.DisableTableState; @@ -165,13 +165,32 @@ public class TestMasterFailoverWithProcedures { backupStore3Abort.await(); } - @Test(timeout=60000) + /** + * Tests proper fencing in case the current WAL store is fenced + */ + @Test + public void testWALfencingWithoutWALRolling() throws IOException { + testWALfencing(false); + } + + /** + * Tests proper fencing in case the current WAL store does not receive writes until after the + * new WAL does a couple of WAL rolls. + */ + @Test public void testWALfencingWithWALRolling() throws IOException { + testWALfencing(true); + } + + public void testWALfencing(boolean walRolls) throws IOException { final ProcedureStore procStore = getMasterProcedureExecutor().getStore(); assertTrue("expected WALStore for this test", procStore instanceof WALProcedureStore); HMaster firstMaster = UTIL.getHBaseCluster().getMaster(); + // cause WAL rolling after a delete in WAL: + firstMaster.getConfiguration().setLong("hbase.procedure.store.wal.roll.threshold", 1); + HMaster backupMaster3 = Mockito.mock(HMaster.class); Mockito.doReturn(firstMaster.getConfiguration()).when(backupMaster3).getConfiguration(); Mockito.doReturn(true).when(backupMaster3).isActiveMaster(); @@ -185,20 +204,27 @@ public class TestMasterFailoverWithProcedures { procStore2.start(1); procStore2.recoverLease(); - LOG.info("Inserting into second WALProcedureStore"); - // insert something to the second store then delete it, causing a WAL roll - Procedure proc2 = new TestSequentialProcedure(); - procStore2.insert(proc2, null); - procStore2.rollWriterOrDie(); + // before writing back to the WAL store, optionally do a couple of WAL rolls (which causes + // to delete the old WAL files). + if (walRolls) { + LOG.info("Inserting into second WALProcedureStore, causing WAL rolls"); + for (int i = 0; i < 512; i++) { + // insert something to the second store then delete it, causing a WAL roll(s) + Procedure proc2 = new TestProcedure(i); + procStore2.insert(proc2, null); + procStore2.delete(proc2.getProcId()); // delete the procedure so that the WAL is removed later + } + } + // Now, insert something to the first store, should fail. + // If the store does a WAL roll and continue with another logId without checking higher logIds + // it will incorrectly succeed. LOG.info("Inserting into first WALProcedureStore"); - // insert something to the first store - proc2 = new TestSequentialProcedure(); try { - procStore.insert(proc2, null); - fail("expected RuntimeException 'sync aborted'"); - } catch (RuntimeException e) { - LOG.info("got " + e.getMessage()); + procStore.insert(new TestProcedure(11), null); + fail("Inserting into Procedure Store should have failed"); + } catch (Exception ex) { + LOG.info("Received expected exception", ex); } } http://git-wip-us.apache.org/repos/asf/hbase/blob/96260ab5/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestWALProcedureStoreOnHDFS.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestWALProcedureStoreOnHDFS.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestWALProcedureStoreOnHDFS.java index 89ad5d6..8dc5285 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestWALProcedureStoreOnHDFS.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestWALProcedureStoreOnHDFS.java @@ -187,6 +187,7 @@ public class TestWALProcedureStoreOnHDFS { UTIL.getDFSCluster().restartDataNode(dnCount); for (long i = 2; i < 100; ++i) { store.insert(new TestProcedure(i, -1), null); + waitForNumReplicas(3); Thread.sleep(100); if ((i % 30) == 0) { LOG.info("Restart Data Node"); @@ -195,4 +196,18 @@ public class TestWALProcedureStoreOnHDFS { } assertTrue(store.isRunning()); } + + public void waitForNumReplicas(int numReplicas) throws Exception { + while (UTIL.getDFSCluster().getDataNodes().size() < numReplicas) { + Thread.sleep(100); + } + + for (int i = 0; i < numReplicas; ++i) { + for (DataNode dn: UTIL.getDFSCluster().getDataNodes()) { + while (!dn.isDatanodeFullyStarted()) { + Thread.sleep(100); + } + } + } + } }