YARN-4811. Generate histograms in ContainerMetrics for actual container resource usage
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/0dd9bcab Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/0dd9bcab Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/0dd9bcab Branch: refs/heads/HDFS-1312 Commit: 0dd9bcab97ccdf24a2174636604110b74664cf80 Parents: 7a02147 Author: Jian He <jia...@apache.org> Authored: Thu Mar 31 14:28:13 2016 -0700 Committer: Jian He <jia...@apache.org> Committed: Thu Mar 31 14:28:13 2016 -0700 ---------------------------------------------------------------------- .../hadoop/metrics2/lib/MutableQuantiles.java | 7 +- .../hadoop/metrics2/util/QuantileEstimator.java | 32 +++++++++ .../hadoop/metrics2/util/SampleQuantiles.java | 2 +- .../hadoop-yarn-server-nodemanager/pom.xml | 5 ++ .../monitor/ContainerMetrics.java | 69 ++++++++++++++++++++ .../monitor/TestContainerMetrics.java | 58 +++++++++++++++- 6 files changed, 170 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/0dd9bcab/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableQuantiles.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableQuantiles.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableQuantiles.java index 2e6053f..a4711db 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableQuantiles.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableQuantiles.java @@ -31,6 +31,7 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.metrics2.MetricsInfo; import org.apache.hadoop.metrics2.MetricsRecordBuilder; import org.apache.hadoop.metrics2.util.Quantile; +import org.apache.hadoop.metrics2.util.QuantileEstimator; import org.apache.hadoop.metrics2.util.SampleQuantiles; import com.google.common.annotations.VisibleForTesting; @@ -54,7 +55,7 @@ public class MutableQuantiles extends MutableMetric { private final MetricsInfo[] quantileInfos; private final int interval; - private SampleQuantiles estimator; + private QuantileEstimator estimator; private long previousCount = 0; @VisibleForTesting @@ -134,6 +135,10 @@ public class MutableQuantiles extends MutableMetric { return interval; } + public synchronized void setEstimator(QuantileEstimator quantileEstimator) { + this.estimator = quantileEstimator; + } + /** * Runnable used to periodically roll over the internal * {@link SampleQuantiles} every interval. http://git-wip-us.apache.org/repos/asf/hadoop/blob/0dd9bcab/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/QuantileEstimator.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/QuantileEstimator.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/QuantileEstimator.java new file mode 100644 index 0000000..075b879 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/QuantileEstimator.java @@ -0,0 +1,32 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.metrics2.util; + +import java.util.Map; + +public interface QuantileEstimator { + + void insert(long value); + + Map<Quantile, Long> snapshot(); + + long getCount(); + + void clear(); +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/0dd9bcab/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/SampleQuantiles.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/SampleQuantiles.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/SampleQuantiles.java index d540036..0c5d98f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/SampleQuantiles.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/SampleQuantiles.java @@ -47,7 +47,7 @@ import com.google.common.base.Preconditions; * */ @InterfaceAudience.Private -public class SampleQuantiles { +public class SampleQuantiles implements QuantileEstimator { /** * Total number of items in stream http://git-wip-us.apache.org/repos/asf/hadoop/blob/0dd9bcab/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml index 051bb4e..59c3332 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml @@ -117,6 +117,11 @@ <groupId>com.google.protobuf</groupId> <artifactId>protobuf-java</artifactId> </dependency> + + <dependency> + <groupId>com.codahale.metrics</groupId> + <artifactId>metrics-core</artifactId> + </dependency> <!-- junit must be before mockito-all on the classpath. mockito-all bundles its own copy of the hamcrest classes, but they don't match our junit version. http://git-wip-us.apache.org/repos/asf/hadoop/blob/0dd9bcab/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java index 48128c1..9d17db0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java @@ -18,6 +18,9 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor; +import com.codahale.metrics.Histogram; +import com.codahale.metrics.Snapshot; +import com.codahale.metrics.UniformReservoir; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.metrics2.MetricsCollector; import org.apache.hadoop.metrics2.MetricsInfo; @@ -29,13 +32,17 @@ import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.MetricsRegistry; import org.apache.hadoop.metrics2.lib.MutableGaugeInt; import org.apache.hadoop.metrics2.lib.MutableGaugeLong; +import org.apache.hadoop.metrics2.lib.MutableQuantiles; import org.apache.hadoop.metrics2.lib.MutableStat; +import org.apache.hadoop.metrics2.util.Quantile; +import org.apache.hadoop.metrics2.util.QuantileEstimator; import org.apache.hadoop.yarn.api.records.ContainerId; import java.util.HashMap; import java.util.Map; import java.util.Timer; import java.util.TimerTask; +import java.util.TreeMap; import static org.apache.hadoop.metrics2.lib.Interns.info; @@ -47,10 +54,13 @@ public class ContainerMetrics implements MetricsSource { public static final String VMEM_LIMIT_METRIC_NAME = "vMemLimitMBs"; public static final String VCORE_LIMIT_METRIC_NAME = "vCoreLimit"; public static final String PMEM_USAGE_METRIC_NAME = "pMemUsageMBs"; + public static final String PMEM_USAGE_QUANTILES_NAME = "pMemUsageMBHistogram"; public static final String LAUNCH_DURATION_METRIC_NAME = "launchDurationMs"; public static final String LOCALIZATION_DURATION_METRIC_NAME = "localizationDurationMs"; private static final String PHY_CPU_USAGE_METRIC_NAME = "pCpuUsagePercent"; + private static final String PHY_CPU_USAGE_QUANTILES_NAME = + "pCpuUsagePercentHistogram"; // Use a multiplier of 1000 to avoid losing too much precision when // converting to integers @@ -59,6 +69,9 @@ public class ContainerMetrics implements MetricsSource { @Metric public MutableStat pMemMBsStat; + @Metric + public MutableQuantiles pMemMBQuantiles; + // This tracks overall CPU percentage of the machine in terms of percentage // of 1 core similar to top // Thus if you use 2 cores completely out of 4 available cores this value @@ -67,6 +80,9 @@ public class ContainerMetrics implements MetricsSource { public MutableStat cpuCoreUsagePercent; @Metric + public MutableQuantiles cpuCoreUsagePercentQuantiles; + + @Metric public MutableStat milliVcoresUsed; @Metric @@ -127,9 +143,23 @@ public class ContainerMetrics implements MetricsSource { this.pMemMBsStat = registry.newStat( PMEM_USAGE_METRIC_NAME, "Physical memory stats", "Usage", "MBs", true); + this.pMemMBQuantiles = registry + .newQuantiles(PMEM_USAGE_QUANTILES_NAME, "Physical memory quantiles", + "Usage", "MBs", 1); + ContainerMetricsQuantiles memEstimator = + new ContainerMetricsQuantiles(MutableQuantiles.quantiles); + pMemMBQuantiles.setEstimator(memEstimator); + this.cpuCoreUsagePercent = registry.newStat( PHY_CPU_USAGE_METRIC_NAME, "Physical Cpu core percent usage stats", "Usage", "Percents", true); + this.cpuCoreUsagePercentQuantiles = registry + .newQuantiles(PHY_CPU_USAGE_QUANTILES_NAME, + "Physical Cpu core percent usage quantiles", "Usage", "Percents", + 1); + ContainerMetricsQuantiles cpuEstimator = + new ContainerMetricsQuantiles(MutableQuantiles.quantiles); + cpuCoreUsagePercentQuantiles.setEstimator(cpuEstimator); this.milliVcoresUsed = registry.newStat( VCORE_USAGE_METRIC_NAME, "1000 times Vcore usage", "Usage", "MilliVcores", true); @@ -216,6 +246,7 @@ public class ContainerMetrics implements MetricsSource { public void recordMemoryUsage(int memoryMBs) { if (memoryMBs >= 0) { this.pMemMBsStat.add(memoryMBs); + this.pMemMBQuantiles.add(memoryMBs); } } @@ -223,6 +254,7 @@ public class ContainerMetrics implements MetricsSource { int totalPhysicalCpuPercent, int milliVcoresUsed) { if (totalPhysicalCpuPercent >=0) { this.cpuCoreUsagePercent.add(totalPhysicalCpuPercent); + this.cpuCoreUsagePercentQuantiles.add(totalPhysicalCpuPercent); } if (milliVcoresUsed >= 0) { this.milliVcoresUsed.add(milliVcoresUsed); @@ -274,4 +306,41 @@ public class ContainerMetrics implements MetricsSource { }; unregisterContainerMetricsTimer.schedule(timerTask, unregisterDelayMs); } + + public static class ContainerMetricsQuantiles implements QuantileEstimator { + + private final Histogram histogram = new Histogram(new UniformReservoir()); + + private Quantile[] quantiles; + + ContainerMetricsQuantiles(Quantile[] q) { + quantiles = q; + } + + @Override + public synchronized void insert(long value) { + histogram.update(value); + } + + @Override + synchronized public long getCount() { + return histogram.getCount(); + } + + @Override + synchronized public void clear() { + // don't do anything because we want metrics over the lifetime of the + // container + } + + @Override + public synchronized Map<Quantile, Long> snapshot() { + Snapshot snapshot = histogram.getSnapshot(); + Map<Quantile, Long> values = new TreeMap<>(); + for (Quantile quantile : quantiles) { + values.put(quantile, (long) snapshot.getValue(quantile.quantile)); + } + return values; + } + } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/0dd9bcab/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainerMetrics.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainerMetrics.java index 2beb927..fb482c4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainerMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainerMetrics.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor; +import org.apache.hadoop.metrics2.AbstractMetric; import org.apache.hadoop.metrics2.MetricsRecord; import org.apache.hadoop.metrics2.MetricsSystem; import org.apache.hadoop.metrics2.impl.MetricsCollectorImpl; @@ -26,10 +27,15 @@ import org.apache.hadoop.metrics2.impl.MetricsSystemImpl; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.junit.Assert; import org.junit.Test; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static org.mockito.Matchers.any; import static org.mockito.Matchers.anyString; @@ -158,4 +164,54 @@ public class TestContainerMetrics { system, containerId3, 1, 0)); system.shutdown(); } + + /** + * Run a test to submit values for actual memory usage and see if the + * histogram comes out correctly. + * @throws Exception + */ + @Test + public void testContainerMetricsHistogram() throws Exception { + + // submit 2 values - 1024 and 2048. 75th, 90th, 95th and 99th percentiles + // will be 2048. 50th percentile will be 1536((1024+2048)/2) + // if we keep recording 1024 and 2048 in a loop, the 50th percentile + // will tend closer to 2048 + Map<String, Long> expectedValues = new HashMap<>(); + expectedValues.put("PMemUsageMBHistogram50thPercentileMBs", 1536L); + expectedValues.put("PMemUsageMBHistogram75thPercentileMBs", 2048L); + expectedValues.put("PMemUsageMBHistogram90thPercentileMBs", 2048L); + expectedValues.put("PMemUsageMBHistogram95thPercentileMBs", 2048L); + expectedValues.put("PMemUsageMBHistogram99thPercentileMBs", 2048L); + expectedValues.put("PCpuUsagePercentHistogram50thPercentilePercents", 0L); + expectedValues.put("PCpuUsagePercentHistogram75thPercentilePercents", 0L); + expectedValues.put("PCpuUsagePercentHistogram90thPercentilePercents", 0L); + expectedValues.put("PCpuUsagePercentHistogram95thPercentilePercents", 0L); + expectedValues.put("PCpuUsagePercentHistogram99thPercentilePercents", 0L); + Set<String> testResults = new HashSet<>(); + int delay = 10; + int rolloverDelay = 1000; + MetricsCollectorImpl collector = new MetricsCollectorImpl(); + ContainerId containerId = mock(ContainerId.class); + ContainerMetrics metrics = + ContainerMetrics.forContainer(containerId, delay, 0); + + metrics.recordMemoryUsage(1024); + metrics.recordMemoryUsage(2048); + Thread.sleep(rolloverDelay + 10); + metrics.getMetrics(collector, true); + for (MetricsRecord record : collector.getRecords()) { + for (AbstractMetric metric : record.metrics()) { + String metricName = metric.name(); + if (expectedValues.containsKey(metricName)) { + Long expectedValue = expectedValues.get(metricName); + Assert.assertEquals( + "Metric " + metricName + " doesn't have expected value", + expectedValue, metric.value()); + testResults.add(metricName); + } + } + } + Assert.assertEquals(expectedValues.keySet(), testResults); + } }