Author: tucu Date: Thu May 9 22:17:36 2013 New Revision: 1480810 URL: http://svn.apache.org/r1480810 Log: YARN-655. Fair scheduler metrics should subtract allocated memory from available memory. (sandyr via tucu)
Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt?rev=1480810&r1=1480809&r2=1480810&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt (original) +++ hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt Thu May 9 22:17:36 2013 @@ -289,6 +289,9 @@ Release 2.0.5-beta - UNRELEASED YARN-637. FS: maxAssign is not honored. (kkambatl via tucu) + YARN-655. Fair scheduler metrics should subtract allocated memory from + available memory. (sandyr via tucu) + Release 2.0.4-alpha - 2013-04-25 INCOMPATIBLE CHANGES Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java?rev=1480810&r1=1480809&r2=1480810&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java (original) +++ hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java Thu May 9 22:17:36 2013 @@ -43,6 +43,7 @@ import org.apache.hadoop.yarn.api.record import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; +import org.apache.hadoop.yarn.util.BuilderUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -425,6 +426,10 @@ public class QueueMetrics implements Met public int getAppsFailed() { return appsFailed.value(); } + + public Resource getAllocatedResources() { + return BuilderUtils.newResource(allocatedMB.value(), 0); + } public int getAllocatedMB() { return allocatedMB.value(); Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java?rev=1480810&r1=1480809&r2=1480810&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java (original) +++ hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java Thu May 9 22:17:36 2013 @@ -225,10 +225,6 @@ public class FairScheduler implements Re // Recursively compute fair shares for all queues // and update metrics rootQueue.recomputeShares(); - - // Update recorded capacity of root queue (child queues are updated - // when fair share is calculated). - rootMetrics.setAvailableResourcesToQueue(clusterCapacity); } /** @@ -617,6 +613,7 @@ public class FairScheduler implements Re } else { application.containerCompleted(rmContainer, containerStatus, event); node.releaseContainer(container); + updateRootQueueMetrics(); } LOG.info("Application " + applicationAttemptId + @@ -628,6 +625,7 @@ public class FairScheduler implements Re private synchronized void addNode(RMNode node) { nodes.put(node.getNodeID(), new FSSchedulerNode(node)); Resources.addTo(clusterCapacity, node.getTotalCapability()); + updateRootQueueMetrics(); LOG.info("Added node " + node.getNodeAddress() + " cluster capacity: " + clusterCapacity); @@ -636,6 +634,7 @@ public class FairScheduler implements Re private synchronized void removeNode(RMNode rmNode) { FSSchedulerNode node = nodes.get(rmNode.getNodeID()); Resources.subtractFrom(clusterCapacity, rmNode.getTotalCapability()); + updateRootQueueMetrics(); // Remove running containers List<RMContainer> runningContainers = node.getRunningContainers(); @@ -840,6 +839,7 @@ public class FairScheduler implements Re if ((assignedContainers >= maxAssign) && (maxAssign > 0)) { break; } } } + updateRootQueueMetrics(); } @Override @@ -861,6 +861,18 @@ public class FairScheduler implements Re } return new SchedulerAppReport(applications.get(appAttemptId)); } + + /** + * Subqueue metrics might be a little out of date because fair shares are + * recalculated at the update interval, but the root queue metrics needs to + * be updated synchronously with allocations and completions so that cluster + * metrics will be consistent. + */ + private void updateRootQueueMetrics() { + rootMetrics.setAvailableResourcesToQueue( + Resources.subtract( + clusterCapacity, rootMetrics.getAllocatedResources())); + } @Override public QueueMetrics getRootQueueMetrics() { Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java?rev=1480810&r1=1480809&r2=1480810&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java (original) +++ hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java Thu May 9 22:17:36 2013 @@ -67,6 +67,7 @@ import org.apache.hadoop.yarn.server.res import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedSchedulerEvent; @@ -127,6 +128,7 @@ public class TestFairScheduler { public void tearDown() { scheduler = null; resourceManager = null; + QueueMetrics.clearQueueMetrics(); } private Configuration createConfiguration() { @@ -336,6 +338,13 @@ public class TestFairScheduler { assertEquals(1024, scheduler.getQueueManager().getQueue("queue1"). getResourceUsage().getMemory()); + + // verify metrics + QueueMetrics queue1Metrics = scheduler.getQueueManager().getQueue("queue1") + .getMetrics(); + assertEquals(1024, queue1Metrics.getAllocatedMB()); + assertEquals(1024, scheduler.getRootQueueMetrics().getAllocatedMB()); + assertEquals(512, scheduler.getRootQueueMetrics().getAvailableMB()); } @Test (timeout = 5000) @@ -1245,6 +1254,7 @@ public class TestFairScheduler { scheduler.handle(updateEvent); assertEquals(1, app.getLiveContainers().size()); + assertEquals(0, scheduler.getRootQueueMetrics().getAvailableMB()); // Create request at higher priority createSchedulingRequestExistingApplication(1024, 1, attId); @@ -1260,6 +1270,7 @@ public class TestFairScheduler { // Complete container scheduler.allocate(attId, new ArrayList<ResourceRequest>(), Arrays.asList(containerId)); + assertEquals(1024, scheduler.getRootQueueMetrics().getAvailableMB()); // Schedule at opening scheduler.update(); @@ -1271,6 +1282,7 @@ public class TestFairScheduler { for (RMContainer liveContainer : liveContainers) { Assert.assertEquals(2, liveContainer.getContainer().getPriority().getPriority()); } + assertEquals(0, scheduler.getRootQueueMetrics().getAvailableMB()); } @Test @@ -1575,4 +1587,24 @@ public class TestFairScheduler { assertEquals(1, app.getLiveContainers().size()); assertEquals(0, app.getReservedContainers().size()); } + + @Test + public void testRemoveNodeUpdatesRootQueueMetrics() { + assertEquals(0, scheduler.getRootQueueMetrics().getAvailableMB()); + + RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(1024)); + NodeAddedSchedulerEvent addEvent = new NodeAddedSchedulerEvent(node1); + scheduler.handle(addEvent); + + assertEquals(1024, scheduler.getRootQueueMetrics().getAvailableMB()); + scheduler.update(); // update shouldn't change things + assertEquals(1024, scheduler.getRootQueueMetrics().getAvailableMB()); + + NodeRemovedSchedulerEvent removeEvent = new NodeRemovedSchedulerEvent(node1); + scheduler.handle(removeEvent); + + assertEquals(0, scheduler.getRootQueueMetrics().getAvailableMB()); + scheduler.update(); // update shouldn't change things + assertEquals(0, scheduler.getRootQueueMetrics().getAvailableMB()); + } }