[ https://issues.apache.org/jira/browse/YARN-10546?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17254453#comment-17254453 ]
zhuqi commented on YARN-10546: ------------------------------ [~leftnoteasy] [~tangzhankun] [~Jim_Brennan] [~Tao Yang] Our cluster now is FairScheduler, it will happened sometimes before YARN-4270 . When i write a test, i confirmed CS should also handle it. I will fixed it later. {code:java} @Test @SuppressWarnings("unchecked") public void testReservationLimitedForSingleApp() throws Exception { // Test that we now unreserve and use a node that has space CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration(); setup(csConf); // Manipulate queue 'a' LeafQueue a = stubLeafQueue((LeafQueue) queues.get(A)); // Users final String user_0 = "user_0"; // Submit applications final ApplicationAttemptId appAttemptId_0 = TestUtils .getMockApplicationAttemptId(0, 0); FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), spyRMContext); app_0 = spy(app_0); Mockito.doNothing().when(app_0).updateAMContainerDiagnostics(any(AMState.class), any(String.class)); rmContext.getRMApps().put(app_0.getApplicationId(), mock(RMApp.class)); a.submitApplicationAttempt(app_0, user_0); // Setup 5 nodes String host_0 = "host_0"; FiCaSchedulerNode node_0 = TestUtils.getMockNode(host_0, DEFAULT_RACK, 0, 8 * GB); String host_1 = "host_1"; FiCaSchedulerNode node_1 = TestUtils.getMockNode(host_1, DEFAULT_RACK1, 0, 8 * GB); String host_2 = "host_2"; FiCaSchedulerNode node_2 = TestUtils.getMockNode(host_2, DEFAULT_RACK2, 0, 8 * GB); String host_3 = "host_3"; FiCaSchedulerNode node_3 = TestUtils.getMockNode(host_3, DEFAULT_RACK3, 0, 8 * GB); String host_4 = "host_4"; FiCaSchedulerNode node_4 = TestUtils.getMockNode(host_4, DEFAULT_RACK4, 0, 8 * GB); Map<ApplicationAttemptId, FiCaSchedulerApp> apps = ImmutableMap.of( app_0.getApplicationAttemptId(), app_0); Map<NodeId, FiCaSchedulerNode> nodes = ImmutableMap.of(node_0.getNodeID(), node_0, node_1.getNodeID(), node_1, node_2.getNodeID(), node_2, node_3.getNodeID(), node_3, node_4.getNodeID(), node_4 ); when(csContext.getNode(node_0.getNodeID())).thenReturn(node_0); when(csContext.getNode(node_1.getNodeID())).thenReturn(node_1); when(csContext.getNode(node_2.getNodeID())).thenReturn(node_2); when(csContext.getNode(node_3.getNodeID())).thenReturn(node_3); when(csContext.getNode(node_4.getNodeID())).thenReturn(node_4); cs.getNodeTracker().addNode(node_0); cs.getNodeTracker().addNode(node_1); cs.getNodeTracker().addNode(node_2); cs.getNodeTracker().addNode(node_3); cs.getNodeTracker().addNode(node_4); final int numNodes = 5; Resource clusterResource = Resources.createResource(numNodes * (8 * GB)); when(csContext.getNumClusterNodes()).thenReturn(numNodes); root.updateClusterResource(clusterResource, new ResourceLimits(clusterResource)); // Setup resource-requests Priority priorityAM = TestUtils.createMockPriority(1); app_0.updateResourceRequests(Collections.singletonList(TestUtils .createResourceRequest(ResourceRequest.ANY, 5 * GB, 5, true, priorityAM, recordFactory))); TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps); TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_1, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps); TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_2, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps); TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_3, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps); TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_4, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps); app_0.updateResourceRequests(Collections.singletonList(TestUtils .createResourceRequest(ResourceRequest.ANY, 4 * GB, 8, true, priorityAM, recordFactory))); TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps); TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_1, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps); TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_2, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps); TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_3, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps); TestUtils.applyResourceCommitRequest(clusterResource, a.assignContainers(clusterResource, node_4, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), nodes, apps); System.out.println(a.getUsedResources()); System.out.println(a.getMetrics().getReservedMB()); System.out.println(app_0.getReservedContainers()); System.out.println(node_0.getReservedContainer()); System.out.println(node_1.getReservedContainer()); System.out.println(node_2.getReservedContainer()); System.out.println(node_3.getReservedContainer()); System.out.println(node_4.getReservedContainer()); } {code} > Limit application resource reservation on nodes for non-node/rack specific > requests shoud be supported in CS. > ------------------------------------------------------------------------------------------------------------- > > Key: YARN-10546 > URL: https://issues.apache.org/jira/browse/YARN-10546 > Project: Hadoop YARN > Issue Type: Bug > Components: capacityscheduler > Affects Versions: 3.3.0 > Reporter: zhuqi > Assignee: zhuqi > Priority: Major > > Just as fixed in YARN-4270 about FairScheduler. > The capacityScheduler should also fixed it. > It is a big problem in production cluster, when it happended. > Also we should support fs convert to cs to support it. -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: yarn-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: yarn-issues-h...@hadoop.apache.org