Repository: hadoop Updated Branches: refs/heads/branch-2.8 313fcd075 -> 8f10d0209
YARN-7728: Expose container preemptions related information in Capacity Scheduler queue metrics Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/8f10d020 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/8f10d020 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/8f10d020 Branch: refs/heads/branch-2.8 Commit: 8f10d0209569f626ce583a2e802e0bb4991e86c8 Parents: 313fcd0 Author: Eric Payne <epa...@apache.org> Authored: Wed Feb 7 09:12:17 2018 -0600 Committer: Eric Payne <epa...@apache.org> Committed: Wed Feb 7 09:12:17 2018 -0600 ---------------------------------------------------------------------- .../resourcemanager/scheduler/QueueMetrics.java | 29 ++++++++++++++++++++ .../scheduler/capacity/CapacityScheduler.java | 19 +++++++++++++ ...TestCapacitySchedulerSurgicalPreemption.java | 4 +++ 3 files changed, 52 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/8f10d020/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java index a7f907b..e2e9101 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java @@ -68,6 +68,10 @@ public class QueueMetrics implements MetricsSource { MutableCounterLong aggregateOffSwitchContainersAllocated; @Metric("Aggregate # of preempted containers") MutableCounterLong aggregateContainersPreempted; + @Metric("Aggregate # of preempted memory seconds") MutableCounterLong + aggregateMemoryMBSecondsPreempted; + @Metric("Aggregate # of preempted vcore seconds") MutableCounterLong + aggregateVcoreSecondsPreempted; @Metric("# of active users") MutableGaugeInt activeUsers; @Metric("# of active applications") MutableGaugeInt activeApplications; @Metric("App Attempt First Container Allocation Delay") @@ -521,6 +525,27 @@ public class QueueMetrics implements MetricsSource { } } + public void preemptContainer() { + aggregateContainersPreempted.incr(); + if (parent != null) { + parent.preemptContainer(); + } + } + + public void updatePreemptedMemoryMBSeconds(long mbSeconds) { + aggregateMemoryMBSecondsPreempted.incr(mbSeconds); + if (parent != null) { + parent.updatePreemptedMemoryMBSeconds(mbSeconds); + } + } + + public void updatePreemptedVcoreSeconds(long vcoreSeconds) { + aggregateVcoreSecondsPreempted.incr(vcoreSeconds); + if (parent != null) { + parent.updatePreemptedVcoreSeconds(vcoreSeconds); + } + } + public void reserveResource(String partition, String user, Resource res) { if(partition == null || partition.equals(RMNodeLabelsManager.NO_LABEL)) { reserveResource(user, res); @@ -697,4 +722,8 @@ public class QueueMetrics implements MetricsSource { public long getAggegatedReleasedContainers() { return aggregateContainersReleased.value(); } + + public long getAggregatePreemptedContainers() { + return aggregateContainersPreempted.value(); + } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/8f10d020/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java index c7254b3..cc9f93c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java @@ -21,6 +21,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang.time.DateUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience.LimitedPrivate; @@ -35,6 +36,7 @@ import org.apache.hadoop.util.Time; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.NodeId; @@ -1609,6 +1611,23 @@ public class CapacityScheduler extends LeafQueue queue = (LeafQueue)application.getQueue(); queue.completedContainer(clusterResource, application, node, rmContainer, containerStatus, event, null, true); + if (ContainerExitStatus.PREEMPTED == containerStatus.getExitStatus()) { + updateQueuePreemptionMetrics(queue, rmContainer); + } + } + + private void updateQueuePreemptionMetrics( + CSQueue queue, RMContainer rmc) { + QueueMetrics qMetrics = queue.getMetrics(); + long usedMillis = rmc.getFinishTime() - rmc.getCreationTime(); + Resource containerResource = rmc.getAllocatedResource(); + qMetrics.preemptContainer(); + long mbSeconds = (containerResource.getMemorySize() * usedMillis) + / DateUtils.MILLIS_PER_SECOND; + long vcSeconds = (containerResource.getVirtualCores() * usedMillis) + / DateUtils.MILLIS_PER_SECOND; + qMetrics.updatePreemptedMemoryMBSeconds(mbSeconds); + qMetrics.updatePreemptedVcoreSeconds(vcSeconds); } @Override http://git-wip-us.apache.org/repos/asf/hadoop/blob/8f10d020/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerSurgicalPreemption.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerSurgicalPreemption.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerSurgicalPreemption.java index 908e0e8..df0c0cf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerSurgicalPreemption.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerSurgicalPreemption.java @@ -150,6 +150,10 @@ public class TestCapacitySchedulerSurgicalPreemption waitNumberOfLiveContainersOnNodeFromApp(cs.getNode(rmNode2.getNodeID()), am1.getApplicationAttemptId(), 16); + // Ensure preemption metrics were recored. + Assert.assertEquals("Number of preempted containers incorrectly recorded:", + 4, cs.getQueue("root").getMetrics().getAggregatePreemptedContainers()); + rm1.close(); } --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org