Repository: hadoop
Updated Branches:
  refs/heads/trunk 9130af3f7 -> 8a0fa0f7e


HDFS-10733. NameNode terminated after full GC thinking QJM is unresponsive. 
Contributed by Vinitha Gankidi.

Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/8a0fa0f7
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/8a0fa0f7
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/8a0fa0f7

Branch: refs/heads/trunk
Commit: 8a0fa0f7e88c45a98c6f266d6349cb426dd06495
Parents: 9130af3
Author: Vinitha Reddy Gankidi <vigank...@linkedin.com>
Authored: Tue Jan 17 17:21:12 2017 -0800
Committer: Konstantin V Shvachko <s...@apache.org>
Committed: Wed Jan 18 12:46:32 2017 -0800

----------------------------------------------------------------------
 .../hadoop/hdfs/qjournal/client/QuorumCall.java | 26 +++++++++++++++++++-
 .../hdfs/qjournal/client/TestQuorumCall.java    | 17 +++++++++++++
 2 files changed, 42 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/8a0fa0f7/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumCall.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumCall.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumCall.java
index f15e462..dc32318 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumCall.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumCall.java
@@ -20,8 +20,10 @@ package org.apache.hadoop.hdfs.qjournal.client;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.concurrent.TimeoutException;
+import java.util.concurrent.TimeUnit;
 
 import org.apache.hadoop.ipc.RemoteException;
+import org.apache.hadoop.util.StopWatch;
 import org.apache.hadoop.util.Time;
 
 import com.google.common.base.Joiner;
@@ -58,6 +60,7 @@ class QuorumCall<KEY, RESULT> {
    * fraction of the configured timeout for any call.
    */
   private static final float WAIT_PROGRESS_WARN_THRESHOLD = 0.7f;
+  private final StopWatch quorumStopWatch = new StopWatch();
   
   static <KEY, RESULT> QuorumCall<KEY, RESULT> create(
       Map<KEY, ? extends ListenableFuture<RESULT>> calls) {
@@ -83,6 +86,16 @@ class QuorumCall<KEY, RESULT> {
   private QuorumCall() {
     // Only instantiated from factory method above
   }
+
+  private void restartQuorumStopWatch() {
+    quorumStopWatch.reset().start();
+  }
+
+  private boolean shouldIncreaseQuorumTimeout(long offset, int millis) {
+    long elapsed = quorumStopWatch.now(TimeUnit.MILLISECONDS);
+    return elapsed + offset > (millis * WAIT_PROGRESS_INFO_THRESHOLD);
+  }
+
   
   /**
    * Wait for the quorum to achieve a certain number of responses.
@@ -110,6 +123,7 @@ class QuorumCall<KEY, RESULT> {
     long nextLogTime = st + (long)(millis * WAIT_PROGRESS_INFO_THRESHOLD);
     long et = st + millis;
     while (true) {
+      restartQuorumStopWatch();
       checkAssertionErrors();
       if (minResponses > 0 && countResponses() >= minResponses) return;
       if (minSuccesses > 0 && countSuccesses() >= minSuccesses) return;
@@ -139,11 +153,21 @@ class QuorumCall<KEY, RESULT> {
       }
       long rem = et - now;
       if (rem <= 0) {
-        throw new TimeoutException();
+        // Increase timeout if a full GC occurred after restarting stopWatch
+        if (shouldIncreaseQuorumTimeout(0, millis)) {
+          et = et + millis;
+        } else {
+          throw new TimeoutException();
+        }
       }
+      restartQuorumStopWatch();
       rem = Math.min(rem, nextLogTime - now);
       rem = Math.max(rem, 1);
       wait(rem);
+      // Increase timeout if a full GC occurred after restarting stopWatch
+      if (shouldIncreaseQuorumTimeout(-rem, millis)) {
+        et = et + millis;
+      }
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/8a0fa0f7/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumCall.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumCall.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumCall.java
index 2295384..506497e 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumCall.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumCall.java
@@ -66,4 +66,21 @@ public class TestQuorumCall {
       // expected
     }
   }
+  @Test(timeout=10000)
+  public void testQuorumFailsWithoutResponse() throws Exception {
+    Map<String, SettableFuture<String>> futures = ImmutableMap.of(
+        "f1", SettableFuture.<String>create());
+
+    QuorumCall<String, String> q = QuorumCall.create(futures);
+    assertEquals("The number of quorum calls for which a response has been"
+            + " received should be 0", 0, q.countResponses());
+
+    try {
+      q.waitFor(0, 1, 100, 10, "test");
+      fail("Didn't time out waiting for more responses than came back");
+    } catch (TimeoutException te) {
+      // expected
+    }
+  }
+
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to