[ https://issues.apache.org/jira/browse/YARN-1889?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13951774#comment-13951774 ]
Hong Zhiguo commented on YARN-1889: ----------------------------------- Hi, Sandy, During processing NodeUpdate events, the number of GC and the accumulated GC time is reduced about half. > avoid creating new objects on each fair scheduler call to AppSchedulable > comparator > ----------------------------------------------------------------------------------- > > Key: YARN-1889 > URL: https://issues.apache.org/jira/browse/YARN-1889 > Project: Hadoop YARN > Issue Type: Improvement > Components: scheduler > Reporter: Hong Zhiguo > Priority: Minor > Attachments: YARN-1889.patch > > > In fair scheduler, in each scheduling attempt, a full sort is > performed on List of AppSchedulable, which invokes Comparator.compare > method many times. Both FairShareComparator and DRFComparator call > AppSchedulable.getWeights, and AppSchedulable.getPriority. > A new ResourceWeights object is allocated on each call of getWeights, > and the same for getPriority. This introduces a lot of pressure to > GC because these methods are called very very frequently. > Below test case shows improvement on performance and GC behaviour. The > results show that the GC pressure during processing NodeUpdate is recuded > half by this patch. > The code to show the improvement: (Add it to TestFairScheduler.java) > import java.lang.management.GarbageCollectorMXBean; > import java.lang.management.ManagementFactory; > public void printGCStats() { > long totalGarbageCollections = 0; > long garbageCollectionTime = 0; > for(GarbageCollectorMXBean gc : > ManagementFactory.getGarbageCollectorMXBeans()) { > long count = gc.getCollectionCount(); > if(count >= 0) { > totalGarbageCollections += count; > } > long time = gc.getCollectionTime(); > if(time >= 0) { > garbageCollectionTime += time; > } > } > System.out.println("Total Garbage Collections: " > + totalGarbageCollections); > System.out.println("Total Garbage Collection Time (ms): " > + garbageCollectionTime); > } > @Test > public void testImpactOnGC() throws Exception { > scheduler.reinitialize(conf, resourceManager.getRMContext()); > // Add nodes > int numNode = 10000; > for (int i = 0; i < numNode; ++i) { > String host = String.format("192.1.%d.%d", i/256, i%256); > RMNode node = > MockNodes.newNodeInfo(1, Resources.createResource(1024 * 64), i, > host); > NodeAddedSchedulerEvent nodeEvent = new NodeAddedSchedulerEvent(node); > scheduler.handle(nodeEvent); > assertEquals(1024 * 64 * (i+1), > scheduler.getClusterCapacity().getMemory()); > } > assertEquals(numNode, scheduler.getNumClusterNodes()); > assertEquals(1024 * 64 * numNode, > scheduler.getClusterCapacity().getMemory()); > // add apps, each app has 100 containers. > int minReqSize = > > FairSchedulerConfiguration.DEFAULT_RM_SCHEDULER_INCREMENT_ALLOCATION_MB; > int numApp = 8000; > int priority = 1; > for (int i = 1; i < numApp + 1; ++i) { > ApplicationAttemptId attemptId = createAppAttemptId(i, 1); > AppAddedSchedulerEvent appAddedEvent = new AppAddedSchedulerEvent( > attemptId.getApplicationId(), "queue1", "user1"); > scheduler.handle(appAddedEvent); > AppAttemptAddedSchedulerEvent attemptAddedEvent = > new AppAttemptAddedSchedulerEvent(attemptId, false); > scheduler.handle(attemptAddedEvent); > createSchedulingRequestExistingApplication(minReqSize * 2, 1, > priority, attemptId); > } > scheduler.update(); > assertEquals(numApp, scheduler.getQueueManager().getLeafQueue("queue1", > true) > .getRunnableAppSchedulables().size()); > System.out.println("GC stats before NodeUpdate processing:"); > printGCStats(); > int hb_num = 5000; > long start = System.nanoTime(); > for (int i = 0; i < hb_num; ++i) { > String host = String.format("192.1.%d.%d", i/256, i%256); > RMNode node = > MockNodes.newNodeInfo(1, Resources.createResource(1024 * 64), 5000, > host); > NodeUpdateSchedulerEvent nodeEvent = new NodeUpdateSchedulerEvent(node); > scheduler.handle(nodeEvent); > } > long end = System.nanoTime(); > System.out.printf("processing time for a NodeUpdate in average: %d us\n", > (end - start)/(hb_num * 1000)); > System.out.println("GC stats after NodeUpdate processing:"); > printGCStats(); > } -- This message was sent by Atlassian JIRA (v6.2#6252)