[ https://issues.apache.org/jira/browse/YARN-6901?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16108146#comment-16108146 ]
Wangda Tan commented on YARN-6901: ---------------------------------- Thanks [~jlowe], bq. Now I'm wondering how we did not have the lock when trying to get the leaf queue path... This may not be possible, ideally moving an app from one queue to another need to lock queue, and assign container need to lock queue as well. It should be safe. bq. On a related note, I found it odd that LeafQueue#assignContainers explicitly locks the app before callling assignContainers. I just checked the code, assignContainers in app grabs synchronized lock, so I think we can remove the synchronize(app) from LeafQueue safely. > A CapacityScheduler app->LeafQueue deadlock found in branch-2.8 > ---------------------------------------------------------------- > > Key: YARN-6901 > URL: https://issues.apache.org/jira/browse/YARN-6901 > Project: Hadoop YARN > Issue Type: Bug > Affects Versions: 2.8.0 > Reporter: Wangda Tan > Assignee: Wangda Tan > Priority: Blocker > Attachments: YARN-6901.branch-2.8.001.patch > > > Stacktrace: > {code} > Thread 22068: (state = BLOCKED) > - > org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.AbstractCSQueue.getParent() > @bci=0, line=185 (Compiled frame) > - > org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue.getQueuePath() > @bci=8, line=262 (Compiled frame) > - > org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.AbstractContainerAllocator.getCSAssignmentFromAllocateResult(org.apache.hadoop.yarn.api.records.Resource, > > org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.ContainerAllocation, > org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) > @bci=183, line=80 (Compiled frame) > - > org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.RegularContainerAllocator.assignContainers(org.apache.hadoop.yarn.api.records.Resource, > > org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode, > > org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.SchedulingMode, > org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits, > org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) > @bci=204, line=747 (Compiled frame) > - > org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.ContainerAllocator.assignContainers(org.apache.hadoop.yarn.api.records.Resource, > > org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode, > > org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.SchedulingMode, > org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits, > org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) > @bci=16, line=49 (Compiled frame) > - > org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp.assignContainers(org.apache.hadoop.yarn.api.records.Resource, > > org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode, > org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits, > org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.SchedulingMode, > org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) > @bci=61, line=468 (Compiled frame) > - > org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue.assignContainers(org.apache.hadoop.yarn.api.records.Resource, > > org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode, > org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits, > org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.SchedulingMode) > @bci=148, line=876 (Compiled frame) > - > org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler.allocateContainersToNode(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode) > @bci=157, line=1149 (Compiled frame) > - > org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler.handle(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent) > @bci=266, line=1277 (Compiled frame) > ================ > Thread 22124: (state = BLOCKED) > - > org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt.getReservedContainers() > @bci=0, line=336 (Compiled frame) > - > org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.FifoCandidatesSelector.preemptFrom(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp, > org.apache.hadoop.yarn.api.records.Resource, java.util.Map, java.util.List, > org.apache.hadoop.yarn.api.records.Resource, java.util.Map, > org.apache.hadoop.yarn.api.records.Resource) @bci=61, line=277 (Compiled > frame) > - > org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.FifoCandidatesSelector.selectCandidates(java.util.Map, > org.apache.hadoop.yarn.api.records.Resource, > org.apache.hadoop.yarn.api.records.Resource) @bci=374, line=138 (Compiled > frame) > - > org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy.containerBasedPreemptOrKill(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSQueue, > org.apache.hadoop.yarn.api.records.Resource) @bci=264, line=342 (Compiled > frame) > - > org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy.editSchedule() > @bci=34, line=202 (Compiled frame) > - > org.apache.hadoop.yarn.server.resourcemanager.monitor.SchedulingMonitor.invokePolicy() > @bci=4, line=81 (Compiled frame) > - > org.apache.hadoop.yarn.server.resourcemanager.monitor.SchedulingMonitor$PreemptionChecker.run() > @bci=23, line=92 (Interpreted frame) > - java.lang.Thread.run() @bci=11, line=745 (Interpreted frame) > {code} -- This message was sent by Atlassian JIRA (v6.4.14#64029) --------------------------------------------------------------------- To unsubscribe, e-mail: yarn-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: yarn-issues-h...@hadoop.apache.org