HDFS-9305. Delayed heartbeat processing causes storm of subsequent heartbeats. 
(Contributed by Arpit Agarwal)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/d8736eb9
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/d8736eb9
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/d8736eb9

Branch: refs/heads/HDFS-8966
Commit: d8736eb9ca351b82854601ea3b1fbc3c9fab44e4
Parents: e8aefdf
Author: Arpit Agarwal <a...@apache.org>
Authored: Mon Oct 26 15:45:02 2015 -0700
Committer: Arpit Agarwal <a...@apache.org>
Committed: Mon Oct 26 15:54:14 2015 -0700

----------------------------------------------------------------------
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt     |  3 +++
 .../hdfs/server/datanode/BPServiceActor.java    |  4 ++--
 .../datanode/TestBpServiceActorScheduler.java   | 22 ++++++++++++++++++++
 3 files changed, 27 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/d8736eb9/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt 
b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 478d48b..e26abcc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -2218,6 +2218,9 @@ Release 2.7.2 - UNRELEASED
     HDFS-9290. DFSClient#callAppend() is not backward compatible for slightly
     older NameNodes. (Tony Wu via kihwal)
 
+    HDFS-9305. Delayed heartbeat processing causes storm of subsequent
+    heartbeats. (Arpit Agarwal)
+
 Release 2.7.1 - 2015-07-06
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/d8736eb9/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
index 85ea6ae..575e7cc 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
@@ -538,6 +538,7 @@ class BPServiceActor implements Runnable {
   
   HeartbeatResponse sendHeartBeat(boolean requestBlockReportLease)
       throws IOException {
+    scheduler.scheduleNextHeartbeat();
     StorageReport[] reports =
         dn.getFSDataset().getStorageReports(bpos.getBlockPoolId());
     if (LOG.isDebugEnabled()) {
@@ -651,7 +652,6 @@ class BPServiceActor implements Runnable {
           //
           boolean requestBlockReportLease = (fullBlockReportLeaseId == 0) &&
                   scheduler.isBlockReportDue(startTime);
-          scheduler.scheduleNextHeartbeat();
           if (!dn.areHeartbeatsDisabledForTests()) {
             resp = sendHeartBeat(requestBlockReportLease);
             assert resp != null;
@@ -1064,7 +1064,7 @@ class BPServiceActor implements Runnable {
 
     long scheduleNextHeartbeat() {
       // Numerical overflow is possible here and is okay.
-      nextHeartbeatTime += heartbeatIntervalMs;
+      nextHeartbeatTime = monotonicNow() + heartbeatIntervalMs;
       return nextHeartbeatTime;
     }
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/d8736eb9/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBpServiceActorScheduler.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBpServiceActorScheduler.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBpServiceActorScheduler.java
index b9b6512..efdd87c 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBpServiceActorScheduler.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBpServiceActorScheduler.java
@@ -144,6 +144,28 @@ public class TestBpServiceActorScheduler {
     }
   }
 
+
+  /**
+   * Regression test for HDFS-9305.
+   * Delayed processing of a heartbeat can cause a subsequent heartbeat
+   * storm.
+   */
+  @Test
+  public void testScheduleDelayedHeartbeat() {
+    for (final long now : getTimestamps()) {
+      Scheduler scheduler = makeMockScheduler(now);
+      scheduler.scheduleNextHeartbeat();
+      assertFalse(scheduler.isHeartbeatDue(now));
+
+      // Simulate a delayed heartbeat e.g. due to slow processing by NN.
+      scheduler.nextHeartbeatTime = now - (HEARTBEAT_INTERVAL_MS * 10);
+      scheduler.scheduleNextHeartbeat();
+
+      // Ensure that the next heartbeat is not due immediately.
+      assertFalse(scheduler.isHeartbeatDue(now));
+    }
+  }
+
   private Scheduler makeMockScheduler(long now) {
     LOG.info("Using now = " + now);
     Scheduler mockScheduler = spy(new Scheduler(HEARTBEAT_INTERVAL_MS, 
BLOCK_REPORT_INTERVAL_MS));

Reply via email to