Author: atm Date: Fri Jun 20 02:38:00 2014 New Revision: 1604074 URL: http://svn.apache.org/r1604074 Log: HDFS-6403. Add metrics for log warnings reported by JVM pauses. Contributed by Yongjun Zhang.
Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/source/JvmMetrics.java hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/source/JvmMetricsInfo.java hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JvmPauseMonitor.java hadoop/common/trunk/hadoop-common-project/hadoop-common/src/site/apt/Metrics.apt.vm hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/source/TestJvmMetrics.java Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/source/JvmMetrics.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/source/JvmMetrics.java?rev=1604074&r1=1604073&r2=1604074&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/source/JvmMetrics.java (original) +++ hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/source/JvmMetrics.java Fri Jun 20 02:38:00 2014 @@ -38,6 +38,7 @@ import org.apache.hadoop.metrics2.lib.De import org.apache.hadoop.metrics2.lib.Interns; import static org.apache.hadoop.metrics2.source.JvmMetricsInfo.*; import static org.apache.hadoop.metrics2.impl.MsInfo.*; +import org.apache.hadoop.util.JvmPauseMonitor; /** * JVM and logging related metrics. @@ -65,6 +66,7 @@ public class JvmMetrics implements Metri ManagementFactory.getGarbageCollectorMXBeans(); final ThreadMXBean threadMXBean = ManagementFactory.getThreadMXBean(); final String processName, sessionId; + private JvmPauseMonitor pauseMonitor = null; final ConcurrentHashMap<String, MetricsInfo[]> gcInfoCache = new ConcurrentHashMap<String, MetricsInfo[]>(); @@ -73,6 +75,10 @@ public class JvmMetrics implements Metri this.sessionId = sessionId; } + public void setPauseMonitor(final JvmPauseMonitor pauseMonitor) { + this.pauseMonitor = pauseMonitor; + } + public static JvmMetrics create(String processName, String sessionId, MetricsSystem ms) { return ms.register(JvmMetrics.name(), JvmMetrics.description(), @@ -120,6 +126,15 @@ public class JvmMetrics implements Metri } rb.addCounter(GcCount, count) .addCounter(GcTimeMillis, timeMillis); + + if (pauseMonitor != null) { + rb.addCounter(GcNumWarnThresholdExceeded, + pauseMonitor.getNumGcWarnThreadholdExceeded()); + rb.addCounter(GcNumInfoThresholdExceeded, + pauseMonitor.getNumGcInfoThresholdExceeded()); + rb.addCounter(GcTotalExtraSleepTime, + pauseMonitor.getTotalGcExtraSleepTime()); + } } private MetricsInfo[] getGcInfo(String gcName) { Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/source/JvmMetricsInfo.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/source/JvmMetricsInfo.java?rev=1604074&r1=1604073&r2=1604074&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/source/JvmMetricsInfo.java (original) +++ hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/source/JvmMetricsInfo.java Fri Jun 20 02:38:00 2014 @@ -48,7 +48,10 @@ public enum JvmMetricsInfo implements Me LogFatal("Total number of fatal log events"), LogError("Total number of error log events"), LogWarn("Total number of warning log events"), - LogInfo("Total number of info log events"); + LogInfo("Total number of info log events"), + GcNumWarnThresholdExceeded("Number of times that the GC warn threshold is exceeded"), + GcNumInfoThresholdExceeded("Number of times that the GC info threshold is exceeded"), + GcTotalExtraSleepTime("Total GC extra sleep time in milliseconds"); private final String desc; Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JvmPauseMonitor.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JvmPauseMonitor.java?rev=1604074&r1=1604073&r2=1604074&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JvmPauseMonitor.java (original) +++ hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JvmPauseMonitor.java Fri Jun 20 02:38:00 2014 @@ -62,10 +62,13 @@ public class JvmPauseMonitor { "jvm.pause.info-threshold.ms"; private static final long INFO_THRESHOLD_DEFAULT = 1000; - + private long numGcWarnThresholdExceeded = 0; + private long numGcInfoThresholdExceeded = 0; + private long totalGcExtraSleepTime = 0; + private Thread monitorThread; private volatile boolean shouldRun = true; - + public JvmPauseMonitor(Configuration conf) { this.warnThresholdMs = conf.getLong(WARN_THRESHOLD_KEY, WARN_THRESHOLD_DEFAULT); this.infoThresholdMs = conf.getLong(INFO_THRESHOLD_KEY, INFO_THRESHOLD_DEFAULT); @@ -87,6 +90,22 @@ public class JvmPauseMonitor { Thread.currentThread().interrupt(); } } + + public boolean isStarted() { + return monitorThread != null; + } + + public long getNumGcWarnThreadholdExceeded() { + return numGcWarnThresholdExceeded; + } + + public long getNumGcInfoThresholdExceeded() { + return numGcInfoThresholdExceeded; + } + + public long getTotalGcExtraSleepTime() { + return totalGcExtraSleepTime; + } private String formatMessage(long extraSleepTime, Map<String, GcTimes> gcTimesAfterSleep, @@ -166,13 +185,15 @@ public class JvmPauseMonitor { Map<String, GcTimes> gcTimesAfterSleep = getGcTimes(); if (extraSleepTime > warnThresholdMs) { + ++numGcWarnThresholdExceeded; LOG.warn(formatMessage( extraSleepTime, gcTimesAfterSleep, gcTimesBeforeSleep)); } else if (extraSleepTime > infoThresholdMs) { + ++numGcInfoThresholdExceeded; LOG.info(formatMessage( extraSleepTime, gcTimesAfterSleep, gcTimesBeforeSleep)); } - + totalGcExtraSleepTime += extraSleepTime; gcTimesBeforeSleep = gcTimesAfterSleep; } } Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/src/site/apt/Metrics.apt.vm URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/site/apt/Metrics.apt.vm?rev=1604074&r1=1604073&r2=1604074&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-common-project/hadoop-common/src/site/apt/Metrics.apt.vm (original) +++ hadoop/common/trunk/hadoop-common-project/hadoop-common/src/site/apt/Metrics.apt.vm Fri Jun 20 02:38:00 2014 @@ -86,6 +86,14 @@ jvm context *-------------------------------------+--------------------------------------+ |<<<LogInfo>>> | Total number of INFO logs *-------------------------------------+--------------------------------------+ +|<<<GcNumWarnThresholdExceeded>>> | Number of times that the GC warn + | threshold is exceeded +*-------------------------------------+--------------------------------------+ +|<<<GcNumInfoThresholdExceeded>>> | Number of times that the GC info + | threshold is exceeded +*-------------------------------------+--------------------------------------+ +|<<<GcTotalExtraSleepTime>>> | Total GC extra sleep time in msec +*-------------------------------------+--------------------------------------+ rpc context Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/source/TestJvmMetrics.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/source/TestJvmMetrics.java?rev=1604074&r1=1604073&r2=1604074&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/source/TestJvmMetrics.java (original) +++ hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/source/TestJvmMetrics.java Fri Jun 20 02:38:00 2014 @@ -19,18 +19,25 @@ package org.apache.hadoop.metrics2.source; import org.junit.Test; + import static org.mockito.Mockito.*; import static org.apache.hadoop.test.MetricsAsserts.*; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.metrics2.MetricsCollector; import org.apache.hadoop.metrics2.MetricsRecordBuilder; +import org.apache.hadoop.util.JvmPauseMonitor; + import static org.apache.hadoop.metrics2.source.JvmMetricsInfo.*; import static org.apache.hadoop.metrics2.impl.MsInfo.*; public class TestJvmMetrics { @Test public void testPresence() { - MetricsRecordBuilder rb = getMetrics(new JvmMetrics("test", "test")); + JvmPauseMonitor pauseMonitor = new JvmPauseMonitor(new Configuration()); + JvmMetrics jvmMetrics = new JvmMetrics("test", "test"); + jvmMetrics.setPauseMonitor(pauseMonitor); + MetricsRecordBuilder rb = getMetrics(jvmMetrics); MetricsCollector mc = rb.parent(); verify(mc).addRecord(JvmMetrics);