[ https://issues.apache.org/jira/browse/YARN-6901?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16108904#comment-16108904 ]
Jason Lowe commented on YARN-6901: ---------------------------------- bq. This may not be possible, ideally moving an app from one queue to another need to lock queue, and assign container need to lock queue as well. It should be safe. I'm not sure I understand. Could you elaborate on how the deadlock is happening? Looking at the branch-2.8 code, I don't see how LeafQueue is calling assignContainers on the app without holding a lock on the queue. Therefore I'm confused why the app's assignContainers is blocking on a queue lock it should already have. > A CapacityScheduler app->LeafQueue deadlock found in branch-2.8 > ---------------------------------------------------------------- > > Key: YARN-6901 > URL: https://issues.apache.org/jira/browse/YARN-6901 > Project: Hadoop YARN > Issue Type: Bug > Affects Versions: 2.8.0 > Reporter: Wangda Tan > Assignee: Wangda Tan > Priority: Blocker > Attachments: YARN-6901.branch-2.8.001.patch > > > Stacktrace: > {code} > Thread 22068: (state = BLOCKED) > - > org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.AbstractCSQueue.getParent() > @bci=0, line=185 (Compiled frame) > - > org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue.getQueuePath() > @bci=8, line=262 (Compiled frame) > - > org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.AbstractContainerAllocator.getCSAssignmentFromAllocateResult(org.apache.hadoop.yarn.api.records.Resource, > > org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.ContainerAllocation, > org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) > @bci=183, line=80 (Compiled frame) > - > org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.RegularContainerAllocator.assignContainers(org.apache.hadoop.yarn.api.records.Resource, > > org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode, > > org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.SchedulingMode, > org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits, > org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) > @bci=204, line=747 (Compiled frame) > - > org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.ContainerAllocator.assignContainers(org.apache.hadoop.yarn.api.records.Resource, > > org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode, > > org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.SchedulingMode, > org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits, > org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) > @bci=16, line=49 (Compiled frame) > - > org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp.assignContainers(org.apache.hadoop.yarn.api.records.Resource, > > org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode, > org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits, > org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.SchedulingMode, > org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) > @bci=61, line=468 (Compiled frame) > - > org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue.assignContainers(org.apache.hadoop.yarn.api.records.Resource, > > org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode, > org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits, > org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.SchedulingMode) > @bci=148, line=876 (Compiled frame) > - > org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler.allocateContainersToNode(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode) > @bci=157, line=1149 (Compiled frame) > - > org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler.handle(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent) > @bci=266, line=1277 (Compiled frame) > ================ > Thread 22124: (state = BLOCKED) > - > org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt.getReservedContainers() > @bci=0, line=336 (Compiled frame) > - > org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.FifoCandidatesSelector.preemptFrom(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp, > org.apache.hadoop.yarn.api.records.Resource, java.util.Map, java.util.List, > org.apache.hadoop.yarn.api.records.Resource, java.util.Map, > org.apache.hadoop.yarn.api.records.Resource) @bci=61, line=277 (Compiled > frame) > - > org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.FifoCandidatesSelector.selectCandidates(java.util.Map, > org.apache.hadoop.yarn.api.records.Resource, > org.apache.hadoop.yarn.api.records.Resource) @bci=374, line=138 (Compiled > frame) > - > org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy.containerBasedPreemptOrKill(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSQueue, > org.apache.hadoop.yarn.api.records.Resource) @bci=264, line=342 (Compiled > frame) > - > org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy.editSchedule() > @bci=34, line=202 (Compiled frame) > - > org.apache.hadoop.yarn.server.resourcemanager.monitor.SchedulingMonitor.invokePolicy() > @bci=4, line=81 (Compiled frame) > - > org.apache.hadoop.yarn.server.resourcemanager.monitor.SchedulingMonitor$PreemptionChecker.run() > @bci=23, line=92 (Interpreted frame) > - java.lang.Thread.run() @bci=11, line=745 (Interpreted frame) > {code} -- This message was sent by Atlassian JIRA (v6.4.14#64029) --------------------------------------------------------------------- To unsubscribe, e-mail: yarn-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: yarn-issues-h...@hadoop.apache.org