[ https://issues.apache.org/jira/browse/TEZ-2421?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14529679#comment-14529679 ]
Bikas Saha commented on TEZ-2421: --------------------------------- "App Shared Pool - #1" #102 daemon prio=5 os_prio=0 tid=0x0000000002426000 nid=0x8bd waiting on condition [0x00007fa2a841d000] java.lang.Thread.State: WAITING (parking) at sun.misc.Unsafe.park(Native Method) - parking to wait for <0x00000006f58b09c0> (a java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync) at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175) at java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:836) at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireQueued(AbstractQueuedSynchronizer.java:870) at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire(AbstractQueuedSynchronizer.java:1199) at java.util.concurrent.locks.ReentrantReadWriteLock$WriteLock.lock(ReentrantReadWriteLock.java:943) at org.apache.tez.dag.app.dag.impl.VertexImpl.scheduleTasks(VertexImpl.java:1389) at org.apache.tez.dag.app.dag.impl.VertexManager$VertexManagerPluginContextImpl.scheduleVertexTasks(VertexManager.java:206) - locked <0x00000006f58d2c08> (a org.apache.tez.dag.app.dag.impl.VertexManager$VertexManagerPluginContextImpl) at org.apache.tez.dag.library.vertexmanager.InputReadyVertexManager.handleSourceTaskFinished(InputReadyVertexManager.java:277) at org.apache.tez.dag.library.vertexmanager.InputReadyVertexManager.onSourceTaskCompleted(InputReadyVertexManager.java:198) - locked <0x00000006f58d2d90> (a org.apache.tez.dag.library.vertexmanager.InputReadyVertexManager) at org.apache.tez.dag.app.dag.impl.VertexManager$VertexManagerEventSourceTaskCompleted.invoke(VertexManager.java:601) at org.apache.tez.dag.app.dag.impl.VertexManager$VertexManagerEvent$1.run(VertexManager.java:656) - locked <0x00000006f58d2d30> (a org.apache.tez.dag.app.dag.impl.VertexManager) at org.apache.tez.dag.app.dag.impl.VertexManager$VertexManagerEvent$1.run(VertexManager.java:651) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1657) at org.apache.tez.dag.app.dag.impl.VertexManager$VertexManagerEvent.call(VertexManager.java:651) at org.apache.tez.dag.app.dag.impl.VertexManager$VertexManagerEvent.call(VertexManager.java:640) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) "TaskSchedulerAppCaller #0" #92 daemon prio=5 os_prio=0 tid=0x0000000001884800 nid=0x8af waiting on condition [0x00007fa2a9127000] java.lang.Thread.State: WAITING (parking) at sun.misc.Unsafe.park(Native Method) - parking to wait for <0x00000007aa165038> (a java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync) at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175) at java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:836) at java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireShared(AbstractQueuedSynchronizer.java:967) at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireShared(AbstractQueuedSynchronizer.java:1283) at java.util.concurrent.locks.ReentrantReadWriteLock$ReadLock.lock(ReentrantReadWriteLock.java:727) at org.apache.tez.dag.app.dag.impl.TaskAttemptImpl.getState(TaskAttemptImpl.java:547) at org.apache.tez.dag.app.dag.impl.TaskImpl.selectBestAttempt(TaskImpl.java:715) at org.apache.tez.dag.app.dag.impl.TaskImpl.getProgress(TaskImpl.java:473) at org.apache.tez.dag.app.dag.impl.VertexImpl.computeProgress(VertexImpl.java:1179) at org.apache.tez.dag.app.dag.impl.VertexImpl.getProgress(VertexImpl.java:1117) at org.apache.tez.dag.app.dag.impl.DAGImpl.getProgress(DAGImpl.java:767) at org.apache.tez.dag.app.DAGAppMaster.getProgress(DAGAppMaster.java:1134) at org.apache.tez.dag.app.rm.TaskSchedulerEventHandler.getProgress(TaskSchedulerEventHandler.java:556) at org.apache.tez.dag.app.rm.TaskSchedulerAppCallbackWrapper$GetProgressCallable.call(TaskSchedulerAppCallbackWrapper.java:291) at org.apache.tez.dag.app.rm.TaskSchedulerAppCallbackWrapper$GetProgressCallable.call(TaskSchedulerAppCallbackWrapper.java:282) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) "IPC Server handler 0 on 59316" #52 daemon prio=5 os_prio=0 tid=0x00007fa2d967b000 nid=0x887 waiting on condition [0x00007fa2abb4a000] java.lang.Thread.State: WAITING (parking) at sun.misc.Unsafe.park(Native Method) - parking to wait for <0x00000006f58b09c0> (a java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync) at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175) at java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:836) at java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireShared(AbstractQueuedSynchronizer.java:967) at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireShared(AbstractQueuedSynchronizer.java:1283) at java.util.concurrent.locks.ReentrantReadWriteLock$ReadLock.lock(ReentrantReadWriteLock.java:727) at org.apache.tez.dag.app.dag.impl.VertexImpl.getVertexProgress(VertexImpl.java:1126) at org.apache.tez.dag.app.dag.impl.DAGImpl.getDAGStatus(DAGImpl.java:806) at org.apache.tez.dag.app.dag.impl.DAGImpl.getDAGStatus(DAGImpl.java:867) at org.apache.tez.dag.api.client.DAGClientHandler.getDAGStatus(DAGClientHandler.java:67) at org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolBlockingPBServerImpl.getDAGStatus(DAGClientAMProtocolBlockingPBServerImpl.java:99) at org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC$DAGClientAMProtocol$2.callBlockingMethod(DAGClientAMProtocolRPC.java:7465) at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:616) at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:969) at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2049) at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2045) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1657) at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2043) "Dispatcher thread: Central" #36 prio=5 os_prio=0 tid=0x00007fa2bc653800 nid=0x878 waiting on condition [0x00007fa2ac353000] java.lang.Thread.State: WAITING (parking) at sun.misc.Unsafe.park(Native Method) - parking to wait for <0x00000006f58b09c0> (a java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync) at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175) at java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:836) at java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireShared(AbstractQueuedSynchronizer.java:967) at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireShared(AbstractQueuedSynchronizer.java:1283) at java.util.concurrent.locks.ReentrantReadWriteLock$ReadLock.lock(ReentrantReadWriteLock.java:727) at org.apache.tez.dag.app.dag.impl.VertexImpl.getTaskLocationHint(VertexImpl.java:1162) at org.apache.tez.dag.app.dag.impl.TaskAttemptImpl.getTaskLocationHint(TaskAttemptImpl.java:940) at org.apache.tez.dag.app.dag.impl.TaskAttemptImpl$ScheduleTaskattemptTransition.transition(TaskAttemptImpl.java:1033) at org.apache.tez.dag.app.dag.impl.TaskAttemptImpl$ScheduleTaskattemptTransition.transition(TaskAttemptImpl.java:1003) at org.apache.hadoop.yarn.state.StateMachineFactory$MultipleInternalArc.doTransition(StateMachineFactory.java:385) at org.apache.hadoop.yarn.state.StateMachineFactory.doTransition(StateMachineFactory.java:302) at org.apache.hadoop.yarn.state.StateMachineFactory.access$300(StateMachineFactory.java:46) at org.apache.hadoop.yarn.state.StateMachineFactory$InternalStateMachine.doTransition(StateMachineFactory.java:448) - locked <0x00000007aa1651b8> (a org.apache.hadoop.yarn.state.StateMachineFactory$InternalStateMachine) at org.apache.tez.dag.app.dag.impl.TaskAttemptImpl.handle(TaskAttemptImpl.java:676) at org.apache.tez.dag.app.dag.impl.TaskAttemptImpl.handle(TaskAttemptImpl.java:113) at org.apache.tez.dag.app.DAGAppMaster$TaskAttemptEventDispatcher.handle(DAGAppMaster.java:1920) at org.apache.tez.dag.app.DAGAppMaster$TaskAttemptEventDispatcher.handle(DAGAppMaster.java:1905) at org.apache.tez.common.AsyncDispatcher.dispatch(AsyncDispatcher.java:183) at org.apache.tez.common.AsyncDispatcher$1.run(AsyncDispatcher.java:114) at java.lang.Thread.run(Thread.java:745) > Deadlock in AM because attempt and vertex locking each other out > ---------------------------------------------------------------- > > Key: TEZ-2421 > URL: https://issues.apache.org/jira/browse/TEZ-2421 > Project: Apache Tez > Issue Type: Bug > Reporter: Bikas Saha > Assignee: Bikas Saha > > Ideally locks should be taken one way - either going down or up. Preferably > not going up because most such data can be passed in during object > construction. -- This message was sent by Atlassian JIRA (v6.3.4#6332)