HDFS-9145. Tracking methods that hold FSNamesytemLock for too long. Contributed 
by Mingliang Liu.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/d1e1925b
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/d1e1925b
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/d1e1925b

Branch: refs/heads/HDFS-7966
Commit: d1e1925bf6c3cf7fd23ed8df5a5e18677fc299d8
Parents: 59e9980
Author: Haohui Mai <whe...@apache.org>
Authored: Tue Oct 13 13:20:11 2015 -0700
Committer: Haohui Mai <whe...@apache.org>
Committed: Tue Oct 13 13:24:15 2015 -0700

----------------------------------------------------------------------
 .../apache/hadoop/test/GenericTestUtils.java    |  4 ++
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt     |  3 ++
 .../hdfs/server/namenode/FSNamesystem.java      | 21 ++++++++
 .../hdfs/server/namenode/TestFSNamesystem.java  | 56 ++++++++++++++++++++
 4 files changed, 84 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/d1e1925b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java
----------------------------------------------------------------------
diff --git 
a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java
 
b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java
index 1e5852b..5bc1252 100644
--- 
a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java
+++ 
b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java
@@ -196,6 +196,10 @@ public abstract class GenericTestUtils {
       logger.removeAppender(appender);
 
     }
+
+    public void clearOutput() {
+      sw.getBuffer().setLength(0);
+    }
   }
   
   

http://git-wip-us.apache.org/repos/asf/hadoop/blob/d1e1925b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt 
b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 617c345..f2d5f7c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -1547,6 +1547,9 @@ Release 2.8.0 - UNRELEASED
     HDFS-9139. Enable parallel JUnit tests for HDFS Pre-commit
     (Chris Nauroth and Vinayakumar B via vinayakumarb)
 
+    HDFS-9145. Tracking methods that hold FSNamesytemLock for too long.
+    (Mingliang Liu via wheat9)
+
   BUG FIXES
 
     HDFS-7501. TransactionsSinceLastCheckpoint can be negative on SBNs.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/d1e1925b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index a5b6c77..9d3f582 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -1458,6 +1458,11 @@ public class FSNamesystem implements Namesystem, 
FSNamesystemMBean,
     return Util.stringCollectionAsURIs(dirNames);
   }
 
+  /** Threshold (ms) for long holding write lock report. */
+  static final short WRITELOCK_REPORTING_THRESHOLD = 1000;
+  /** Last time stamp for write lock. Keep the longest one for 
multi-entrance.*/
+  private long writeLockHeldTimeStamp;
+
   @Override
   public void readLock() {
     this.fsLock.readLock().lock();
@@ -1469,14 +1474,30 @@ public class FSNamesystem implements Namesystem, 
FSNamesystemMBean,
   @Override
   public void writeLock() {
     this.fsLock.writeLock().lock();
+    if (fsLock.getWriteHoldCount() == 1) {
+      writeLockHeldTimeStamp = monotonicNow();
+    }
   }
   @Override
   public void writeLockInterruptibly() throws InterruptedException {
     this.fsLock.writeLock().lockInterruptibly();
+    if (fsLock.getWriteHoldCount() == 1) {
+      writeLockHeldTimeStamp = monotonicNow();
+    }
   }
   @Override
   public void writeUnlock() {
+    final boolean needReport = fsLock.getWriteHoldCount() == 1 &&
+        fsLock.isWriteLockedByCurrentThread();
     this.fsLock.writeLock().unlock();
+
+    if (needReport) {
+      long writeLockInterval = monotonicNow() - writeLockHeldTimeStamp;
+      if (writeLockInterval >= WRITELOCK_REPORTING_THRESHOLD) {
+        LOG.info("FSNamesystem write lock held for " + writeLockInterval +
+            " ms via\n" + StringUtils.getStackTrace(Thread.currentThread()));
+      }
+    }
   }
   @Override
   public boolean hasWriteLock() {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/d1e1925b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java
index d533cf6..15fc40e 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java
@@ -39,6 +39,9 @@ import 
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.SafeModeInfo;
 import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
 import org.apache.hadoop.hdfs.server.namenode.ha.HAState;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.apache.hadoop.test.GenericTestUtils.LogCapturer;
+import org.apache.log4j.Level;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Test;
@@ -268,6 +271,59 @@ public class TestFSNamesystem {
                         threadCount, rwLock.getQueueLength());
   }
 
+  /**
+   * Test when FSNamesystem lock is held for a long time, logger will report 
it.
+   */
+  @Test(timeout=45000)
+  public void testFSLockLongHoldingReport() throws Exception {
+    Configuration conf = new Configuration();
+    FSImage fsImage = Mockito.mock(FSImage.class);
+    FSEditLog fsEditLog = Mockito.mock(FSEditLog.class);
+    Mockito.when(fsImage.getEditLog()).thenReturn(fsEditLog);
+    FSNamesystem fsn = new FSNamesystem(conf, fsImage);
+
+    LogCapturer logs = LogCapturer.captureLogs(FSNamesystem.LOG);
+    GenericTestUtils.setLogLevel(FSNamesystem.LOG, Level.INFO);
+
+    // Don't report if the write lock is held for a short time
+    fsn.writeLock();
+    Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2);
+    fsn.writeUnlock();
+    assertFalse(logs.getOutput().contains(GenericTestUtils.getMethodName()));
+
+
+    // Report if the write lock is held for a long time
+    fsn.writeLock();
+    Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD + 100);
+    logs.clearOutput();
+    fsn.writeUnlock();
+    assertTrue(logs.getOutput().contains(GenericTestUtils.getMethodName()));
+
+    // Report if the write lock is held (interruptibly) for a long time
+    fsn.writeLockInterruptibly();
+    Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD + 100);
+    logs.clearOutput();
+    fsn.writeUnlock();
+    assertTrue(logs.getOutput().contains(GenericTestUtils.getMethodName()));
+
+    // Report if it's held for a long time when re-entering write lock
+    fsn.writeLock();
+    Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2 + 1);
+    fsn.writeLockInterruptibly();
+    Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2 + 1);
+    fsn.writeLock();
+    Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2);
+    logs.clearOutput();
+    fsn.writeUnlock();
+    assertFalse(logs.getOutput().contains(GenericTestUtils.getMethodName()));
+    logs.clearOutput();
+    fsn.writeUnlock();
+    assertFalse(logs.getOutput().contains(GenericTestUtils.getMethodName()));
+    logs.clearOutput();
+    fsn.writeUnlock();
+    assertTrue(logs.getOutput().contains(GenericTestUtils.getMethodName()));
+  }
+
   @Test
   public void testSafemodeReplicationConf() throws IOException {
     Configuration conf = new Configuration();

Reply via email to