Author: atm
Date: Fri Jun 20 02:38:00 2014
New Revision: 1604074

URL: http://svn.apache.org/r1604074
Log:
HDFS-6403. Add metrics for log warnings reported by JVM pauses. Contributed by 
Yongjun Zhang.

Modified:
    
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/source/JvmMetrics.java
    
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/source/JvmMetricsInfo.java
    
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JvmPauseMonitor.java
    
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/site/apt/Metrics.apt.vm
    
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/source/TestJvmMetrics.java

Modified: 
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/source/JvmMetrics.java
URL: 
http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/source/JvmMetrics.java?rev=1604074&r1=1604073&r2=1604074&view=diff
==============================================================================
--- 
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/source/JvmMetrics.java
 (original)
+++ 
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/source/JvmMetrics.java
 Fri Jun 20 02:38:00 2014
@@ -38,6 +38,7 @@ import org.apache.hadoop.metrics2.lib.De
 import org.apache.hadoop.metrics2.lib.Interns;
 import static org.apache.hadoop.metrics2.source.JvmMetricsInfo.*;
 import static org.apache.hadoop.metrics2.impl.MsInfo.*;
+import org.apache.hadoop.util.JvmPauseMonitor;
 
 /**
  * JVM and logging related metrics.
@@ -65,6 +66,7 @@ public class JvmMetrics implements Metri
       ManagementFactory.getGarbageCollectorMXBeans();
   final ThreadMXBean threadMXBean = ManagementFactory.getThreadMXBean();
   final String processName, sessionId;
+  private JvmPauseMonitor pauseMonitor = null;
   final ConcurrentHashMap<String, MetricsInfo[]> gcInfoCache =
       new ConcurrentHashMap<String, MetricsInfo[]>();
 
@@ -73,6 +75,10 @@ public class JvmMetrics implements Metri
     this.sessionId = sessionId;
   }
 
+  public void setPauseMonitor(final JvmPauseMonitor pauseMonitor) {
+    this.pauseMonitor = pauseMonitor;
+  }
+
   public static JvmMetrics create(String processName, String sessionId,
                                   MetricsSystem ms) {
     return ms.register(JvmMetrics.name(), JvmMetrics.description(),
@@ -120,6 +126,15 @@ public class JvmMetrics implements Metri
     }
     rb.addCounter(GcCount, count)
       .addCounter(GcTimeMillis, timeMillis);
+    
+    if (pauseMonitor != null) {
+      rb.addCounter(GcNumWarnThresholdExceeded,
+          pauseMonitor.getNumGcWarnThreadholdExceeded());
+      rb.addCounter(GcNumInfoThresholdExceeded,
+          pauseMonitor.getNumGcInfoThresholdExceeded());
+      rb.addCounter(GcTotalExtraSleepTime,
+          pauseMonitor.getTotalGcExtraSleepTime());
+    }
   }
 
   private MetricsInfo[] getGcInfo(String gcName) {

Modified: 
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/source/JvmMetricsInfo.java
URL: 
http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/source/JvmMetricsInfo.java?rev=1604074&r1=1604073&r2=1604074&view=diff
==============================================================================
--- 
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/source/JvmMetricsInfo.java
 (original)
+++ 
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/source/JvmMetricsInfo.java
 Fri Jun 20 02:38:00 2014
@@ -48,7 +48,10 @@ public enum JvmMetricsInfo implements Me
   LogFatal("Total number of fatal log events"),
   LogError("Total number of error log events"),
   LogWarn("Total number of warning log events"),
-  LogInfo("Total number of info log events");
+  LogInfo("Total number of info log events"),
+  GcNumWarnThresholdExceeded("Number of times that the GC warn threshold is 
exceeded"),
+  GcNumInfoThresholdExceeded("Number of times that the GC info threshold is 
exceeded"),
+  GcTotalExtraSleepTime("Total GC extra sleep time in milliseconds");
 
   private final String desc;
 

Modified: 
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JvmPauseMonitor.java
URL: 
http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JvmPauseMonitor.java?rev=1604074&r1=1604073&r2=1604074&view=diff
==============================================================================
--- 
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JvmPauseMonitor.java
 (original)
+++ 
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JvmPauseMonitor.java
 Fri Jun 20 02:38:00 2014
@@ -62,10 +62,13 @@ public class JvmPauseMonitor {
       "jvm.pause.info-threshold.ms";
   private static final long INFO_THRESHOLD_DEFAULT = 1000;
 
-  
+  private long numGcWarnThresholdExceeded = 0;
+  private long numGcInfoThresholdExceeded = 0;
+  private long totalGcExtraSleepTime = 0;
+   
   private Thread monitorThread;
   private volatile boolean shouldRun = true;
-  
+
   public JvmPauseMonitor(Configuration conf) {
     this.warnThresholdMs = conf.getLong(WARN_THRESHOLD_KEY, 
WARN_THRESHOLD_DEFAULT);
     this.infoThresholdMs = conf.getLong(INFO_THRESHOLD_KEY, 
INFO_THRESHOLD_DEFAULT);
@@ -87,6 +90,22 @@ public class JvmPauseMonitor {
       Thread.currentThread().interrupt();
     }
   }
+
+  public boolean isStarted() {
+    return monitorThread != null;
+  }
+  
+  public long getNumGcWarnThreadholdExceeded() {
+    return numGcWarnThresholdExceeded;
+  }
+  
+  public long getNumGcInfoThresholdExceeded() {
+    return numGcInfoThresholdExceeded;
+  }
+  
+  public long getTotalGcExtraSleepTime() {
+    return totalGcExtraSleepTime;
+  }
   
   private String formatMessage(long extraSleepTime,
       Map<String, GcTimes> gcTimesAfterSleep,
@@ -166,13 +185,15 @@ public class JvmPauseMonitor {
         Map<String, GcTimes> gcTimesAfterSleep = getGcTimes();
 
         if (extraSleepTime > warnThresholdMs) {
+          ++numGcWarnThresholdExceeded;
           LOG.warn(formatMessage(
               extraSleepTime, gcTimesAfterSleep, gcTimesBeforeSleep));
         } else if (extraSleepTime > infoThresholdMs) {
+          ++numGcInfoThresholdExceeded;
           LOG.info(formatMessage(
               extraSleepTime, gcTimesAfterSleep, gcTimesBeforeSleep));
         }
-        
+        totalGcExtraSleepTime += extraSleepTime;
         gcTimesBeforeSleep = gcTimesAfterSleep;
       }
     }

Modified: 
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/site/apt/Metrics.apt.vm
URL: 
http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/site/apt/Metrics.apt.vm?rev=1604074&r1=1604073&r2=1604074&view=diff
==============================================================================
--- 
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/site/apt/Metrics.apt.vm
 (original)
+++ 
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/site/apt/Metrics.apt.vm
 Fri Jun 20 02:38:00 2014
@@ -86,6 +86,14 @@ jvm context
 *-------------------------------------+--------------------------------------+
 |<<<LogInfo>>> | Total number of INFO logs
 *-------------------------------------+--------------------------------------+
+|<<<GcNumWarnThresholdExceeded>>> | Number of times that the GC warn
+                                  | threshold is exceeded
+*-------------------------------------+--------------------------------------+
+|<<<GcNumInfoThresholdExceeded>>> | Number of times that the GC info
+                                  | threshold is exceeded
+*-------------------------------------+--------------------------------------+
+|<<<GcTotalExtraSleepTime>>> | Total GC extra sleep time in msec
+*-------------------------------------+--------------------------------------+
 
 rpc context
 

Modified: 
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/source/TestJvmMetrics.java
URL: 
http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/source/TestJvmMetrics.java?rev=1604074&r1=1604073&r2=1604074&view=diff
==============================================================================
--- 
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/source/TestJvmMetrics.java
 (original)
+++ 
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/source/TestJvmMetrics.java
 Fri Jun 20 02:38:00 2014
@@ -19,18 +19,25 @@
 package org.apache.hadoop.metrics2.source;
 
 import org.junit.Test;
+
 import static org.mockito.Mockito.*;
 import static org.apache.hadoop.test.MetricsAsserts.*;
 
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.metrics2.MetricsCollector;
 import org.apache.hadoop.metrics2.MetricsRecordBuilder;
+import org.apache.hadoop.util.JvmPauseMonitor;
+
 import static org.apache.hadoop.metrics2.source.JvmMetricsInfo.*;
 import static org.apache.hadoop.metrics2.impl.MsInfo.*;
 
 public class TestJvmMetrics {
 
   @Test public void testPresence() {
-    MetricsRecordBuilder rb = getMetrics(new JvmMetrics("test", "test"));
+    JvmPauseMonitor pauseMonitor = new JvmPauseMonitor(new Configuration());
+    JvmMetrics jvmMetrics = new JvmMetrics("test", "test");
+    jvmMetrics.setPauseMonitor(pauseMonitor);
+    MetricsRecordBuilder rb = getMetrics(jvmMetrics);
     MetricsCollector mc = rb.parent();
 
     verify(mc).addRecord(JvmMetrics);


Reply via email to