Repository: hbase
Updated Branches:
  refs/heads/branch-1 a81b3c5af -> 2c076a30e


HBASE-13993 WALProcedureStore fencing is not effective if new WAL rolls


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/2c076a30
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/2c076a30
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/2c076a30

Branch: refs/heads/branch-1
Commit: 2c076a30e5b269113a6ef526d458aafae6fcf339
Parents: a81b3c5
Author: Enis Soztutar <e...@apache.org>
Authored: Fri Jul 10 13:36:45 2015 -0700
Committer: Enis Soztutar <e...@apache.org>
Committed: Fri Jul 10 13:44:32 2015 -0700

----------------------------------------------------------------------
 .../procedure2/store/wal/WALProcedureStore.java | 17 ++++++-
 .../procedure2/ProcedureTestingUtility.java     |  4 ++
 .../TestMasterFailoverWithProcedures.java       | 52 +++++++++++++++-----
 .../procedure/TestWALProcedureStoreOnHDFS.java  | 15 ++++++
 4 files changed, 74 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/2c076a30/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/store/wal/WALProcedureStore.java
----------------------------------------------------------------------
diff --git 
a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/store/wal/WALProcedureStore.java
 
b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/store/wal/WALProcedureStore.java
index b0e2258..04715de 100644
--- 
a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/store/wal/WALProcedureStore.java
+++ 
b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/store/wal/WALProcedureStore.java
@@ -650,7 +650,22 @@ public class WALProcedureStore extends ProcedureStoreBase {
   }
 
   protected boolean rollWriter() throws IOException {
-    return rollWriter(flushLogId + 1);
+    // Create new state-log
+    if (!rollWriter(flushLogId + 1)) {
+      LOG.warn("someone else has already created log " + flushLogId);
+      return false;
+    }
+
+    // We have the lease on the log,
+    // but we should check if someone else has created new files
+    if (getMaxLogId(getLogFiles()) > flushLogId) {
+      LOG.warn("Someone else created new logs. Expected maxLogId < " + 
flushLogId);
+      logs.getLast().removeFile();
+      return false;
+    }
+
+    // We have the lease on the log
+    return true;
   }
 
   private boolean rollWriter(final long logId) throws IOException {

http://git-wip-us.apache.org/repos/asf/hbase/blob/2c076a30/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/ProcedureTestingUtility.java
----------------------------------------------------------------------
diff --git 
a/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/ProcedureTestingUtility.java
 
b/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/ProcedureTestingUtility.java
index 1534a5f..34774ed 100644
--- 
a/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/ProcedureTestingUtility.java
+++ 
b/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/ProcedureTestingUtility.java
@@ -188,6 +188,10 @@ public class ProcedureTestingUtility {
   public static class TestProcedure extends Procedure<Void> {
     public TestProcedure() {}
 
+    public TestProcedure(long procId) {
+      this(procId, 0);
+    }
+
     public TestProcedure(long procId, long parentId) {
       setProcId(procId);
       if (parentId > 0) {

http://git-wip-us.apache.org/repos/asf/hbase/blob/2c076a30/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestMasterFailoverWithProcedures.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestMasterFailoverWithProcedures.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestMasterFailoverWithProcedures.java
index 429c83b..6e967e1 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestMasterFailoverWithProcedures.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestMasterFailoverWithProcedures.java
@@ -35,9 +35,9 @@ import org.apache.hadoop.hbase.master.HMaster;
 import org.apache.hadoop.hbase.procedure2.Procedure;
 import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
 import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
+import 
org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility.TestProcedure;
 import org.apache.hadoop.hbase.procedure2.store.ProcedureStore;
 import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore;
-import 
org.apache.hadoop.hbase.procedure2.store.wal.TestWALProcedureStore.TestSequentialProcedure;
 import 
org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.CreateTableState;
 import 
org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.DeleteTableState;
 import 
org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.DisableTableState;
@@ -165,13 +165,32 @@ public class TestMasterFailoverWithProcedures {
     backupStore3Abort.await();
   }
 
-  @Test(timeout=60000)
+  /**
+   * Tests proper fencing in case the current WAL store is fenced
+   */
+  @Test
+  public void testWALfencingWithoutWALRolling() throws IOException {
+    testWALfencing(false);
+  }
+
+  /**
+   * Tests proper fencing in case the current WAL store does not receive 
writes until after the
+   * new WAL does a couple of WAL rolls.
+   */
+  @Test
   public void testWALfencingWithWALRolling() throws IOException {
+    testWALfencing(true);
+  }
+
+  public void testWALfencing(boolean walRolls) throws IOException {
     final ProcedureStore procStore = getMasterProcedureExecutor().getStore();
     assertTrue("expected WALStore for this test", procStore instanceof 
WALProcedureStore);
 
     HMaster firstMaster = UTIL.getHBaseCluster().getMaster();
 
+    // cause WAL rolling after a delete in WAL:
+    
firstMaster.getConfiguration().setLong("hbase.procedure.store.wal.roll.threshold",
 1);
+
     HMaster backupMaster3 = Mockito.mock(HMaster.class);
     
Mockito.doReturn(firstMaster.getConfiguration()).when(backupMaster3).getConfiguration();
     Mockito.doReturn(true).when(backupMaster3).isActiveMaster();
@@ -185,20 +204,27 @@ public class TestMasterFailoverWithProcedures {
     procStore2.start(1);
     procStore2.recoverLease();
 
-    LOG.info("Inserting into second WALProcedureStore");
-    // insert something to the second store then delete it, causing a WAL roll
-    Procedure proc2 = new TestSequentialProcedure();
-    procStore2.insert(proc2, null);
-    procStore2.rollWriterOrDie();
+    // before writing back to the WAL store, optionally do a couple of WAL 
rolls (which causes
+    // to delete the old WAL files).
+    if (walRolls) {
+      LOG.info("Inserting into second WALProcedureStore, causing WAL rolls");
+      for (int i = 0; i < 512; i++) {
+        // insert something to the second store then delete it, causing a WAL 
roll(s)
+        Procedure proc2 = new TestProcedure(i);
+        procStore2.insert(proc2, null);
+        procStore2.delete(proc2.getProcId()); // delete the procedure so that 
the WAL is removed later
+      }
+    }
 
+    // Now, insert something to the first store, should fail.
+    // If the store does a WAL roll and continue with another logId without 
checking higher logIds
+    // it will incorrectly succeed.
     LOG.info("Inserting into first WALProcedureStore");
-    // insert something to the first store
-    proc2 = new TestSequentialProcedure();
     try {
-      procStore.insert(proc2, null);
-      fail("expected RuntimeException 'sync aborted'");
-    } catch (RuntimeException e) {
-      LOG.info("got " + e.getMessage());
+      procStore.insert(new TestProcedure(11), null);
+      fail("Inserting into Procedure Store should have failed");
+    } catch (Exception ex) {
+      LOG.info("Received expected exception", ex);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/2c076a30/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestWALProcedureStoreOnHDFS.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestWALProcedureStoreOnHDFS.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestWALProcedureStoreOnHDFS.java
index 89ad5d6..8dc5285 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestWALProcedureStoreOnHDFS.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestWALProcedureStoreOnHDFS.java
@@ -187,6 +187,7 @@ public class TestWALProcedureStoreOnHDFS {
     UTIL.getDFSCluster().restartDataNode(dnCount);
     for (long i = 2; i < 100; ++i) {
       store.insert(new TestProcedure(i, -1), null);
+      waitForNumReplicas(3);
       Thread.sleep(100);
       if ((i % 30) == 0) {
         LOG.info("Restart Data Node");
@@ -195,4 +196,18 @@ public class TestWALProcedureStoreOnHDFS {
     }
     assertTrue(store.isRunning());
   }
+
+  public void waitForNumReplicas(int numReplicas) throws Exception {
+    while (UTIL.getDFSCluster().getDataNodes().size() < numReplicas) {
+      Thread.sleep(100);
+    }
+
+    for (int i = 0; i < numReplicas; ++i) {
+      for (DataNode dn: UTIL.getDFSCluster().getDataNodes()) {
+        while (!dn.isDatanodeFullyStarted()) {
+          Thread.sleep(100);
+        }
+      }
+    }
+  }
 }

Reply via email to