[
https://issues.apache.org/jira/browse/TEZ-1547?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14187985#comment-14187985
]
Rajesh Balamohan commented on TEZ-1547:
---------------------------------------
Used tez (commit log: d59b2318dd66ee1784dabca28820e9b8e65f8bf1) with .5 patch.
It works fine with smaller jobs. With large jobs (large number of tasks with
very small runtimes), DAGAppMaster gets locked out. Pasting the threaddump
here for reference.
{code}
Thread 25715: (state = BLOCKED)
- sun.misc.Unsafe.park(boolean, long) @bci=0 (Compiled frame; information may
be imprecise)
- java.util.concurrent.locks.LockSupport.park(java.lang.Object) @bci=14,
line=175 (Compiled frame)
-
java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt()
@bci=1, line=836 (Compiled frame)
-
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireQueued(java.util.concurrent.locks.AbstractQueuedSynchronizer$Node,
int) @bci=67, line=870 (Compiled frame)
- java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire(int) @bci=17,
line=1199 (Compiled frame)
- java.util.concurrent.locks.ReentrantReadWriteLock$WriteLock.lock() @bci=5,
line=943 (Compiled frame)
-
org.apache.tez.dag.app.dag.StateChangeNotifier.unregisterForVertexUpdates(java.lang.String,
org.apache.tez.dag.app.dag.VertexStateUpdateListener) @bci=10, line=92
(Interpreted frame)
-
org.apache.tez.dag.app.dag.impl.VertexManager$VertexManagerPluginContextImpl.unregisterForVertexStatusUpdates()
@bci=45, line=254 (Interpreted frame)
-
org.apache.tez.dag.app.dag.impl.VertexManager$VertexManagerPluginContextImpl.vertexManagerDone()
@bci=51, line=269 (Interpreted frame)
-
org.apache.tez.dag.library.vertexmanager.ShuffleVertexManager.schedulePendingTasks(int)
@bci=131, line=540 (Interpreted frame)
-
org.apache.tez.dag.library.vertexmanager.ShuffleVertexManager.schedulePendingTasks()
@bci=365, line=627 (Compiled frame)
-
org.apache.tez.dag.library.vertexmanager.ShuffleVertexManager.onVertexStateUpdated(org.apache.tez.dag.api.event.VertexStateUpdate)
@bci=208, line=710 (Interpreted frame)
-
org.apache.tez.dag.app.dag.impl.VertexManager$VertexManagerPluginContextImpl.onStateUpdated(org.apache.tez.dag.api.event.VertexStateUpdate)
@bci=96, line=293 (Interpreted frame)
-
org.apache.tez.dag.app.dag.StateChangeNotifier$ListenerContainer.sendStateUpdate(org.apache.tez.dag.api.event.VertexStateUpdate)
@bci=21, line=138 (Interpreted frame)
-
org.apache.tez.dag.app.dag.StateChangeNotifier$ListenerContainer.access$100(org.apache.tez.dag.app.dag.StateChangeNotifier$ListenerContainer,
org.apache.tez.dag.api.event.VertexStateUpdate) @bci=2, line=122 (Interpreted
frame)
-
org.apache.tez.dag.app.dag.StateChangeNotifier.sendStateUpdate(org.apache.tez.dag.records.TezVertexID,
org.apache.tez.dag.api.event.VertexStateUpdate) @bci=39, line=116 (Interpreted
frame)
-
org.apache.tez.dag.app.dag.StateChangeNotifier.stateChanged(org.apache.tez.dag.records.TezVertexID,
org.apache.tez.dag.api.event.VertexStateUpdate) @bci=35, line=106 (Interpreted
frame)
- org.apache.tez.dag.app.dag.impl.VertexImpl.doneReconfiguringVertex()
@bci=55, line=1467 (Interpreted frame)
-
org.apache.tez.dag.app.dag.impl.VertexManager$VertexManagerPluginContextImpl.doneReconfiguringVertex()
@bci=11, line=281 (Interpreted frame)
-
org.apache.tez.dag.library.vertexmanager.ShuffleVertexManager.schedulePendingTasks(int)
@bci=34, line=529 (Interpreted frame)
-
org.apache.tez.dag.library.vertexmanager.ShuffleVertexManager.schedulePendingTasks()
@bci=142, line=584 (Compiled frame)
-
org.apache.tez.dag.library.vertexmanager.ShuffleVertexManager.onSourceTaskCompleted(java.lang.String,
java.lang.Integer) @bci=74, line=365 (Interpreted frame)
-
org.apache.tez.dag.app.dag.impl.VertexManager.onSourceTaskCompleted(org.apache.tez.dag.records.TezTaskID)
@bci=52, line=364 (Interpreted frame)
-
org.apache.tez.dag.app.dag.impl.VertexImpl$SourceTaskAttemptCompletedEventTransition.transition(org.apache.tez.dag.app.dag.impl.VertexImpl,
org.apache.tez.dag.app.dag.event.VertexEvent) @bci=118, line=3364 (Interpreted
frame)
-
org.apache.tez.dag.app.dag.impl.VertexImpl$SourceTaskAttemptCompletedEventTransition.transition(java.lang.Object,
java.lang.Object) @bci=9, line=3345 (Interpreted frame)
-
org.apache.hadoop.yarn.state.StateMachineFactory$MultipleInternalArc.doTransition(java.lang.Object,
java.lang.Enum, java.lang.Object, java.lang.Enum) @bci=6, line=385 (Compiled
frame)
-
org.apache.hadoop.yarn.state.StateMachineFactory.doTransition(java.lang.Object,
java.lang.Enum, java.lang.Enum, java.lang.Object) @bci=45, line=302 (Compiled
frame)
-
org.apache.hadoop.yarn.state.StateMachineFactory.access$300(org.apache.hadoop.yarn.state.StateMachineFactory,
java.lang.Object, java.lang.Enum, java.lang.Enum, java.lang.Object) @bci=6,
line=46 (Compiled frame)
-
org.apache.hadoop.yarn.state.StateMachineFactory$InternalStateMachine.doTransition(java.lang.Enum,
java.lang.Object) @bci=15, line=448 (Compiled frame)
- org.apache.tez.state.StateMachineTez.doTransition(java.lang.Enum,
java.lang.Object) @bci=16, line=57 (Compiled frame)
-
org.apache.tez.dag.app.dag.impl.VertexImpl.handle(org.apache.tez.dag.app.dag.event.VertexEvent)
@bci=101, line=1489 (Compiled frame)
-
org.apache.tez.dag.app.dag.impl.VertexImpl.handle(org.apache.hadoop.yarn.event.Event)
@bci=5, line=175 (Compiled frame)
-
org.apache.tez.dag.app.DAGAppMaster$VertexEventDispatcher.handle(org.apache.tez.dag.app.dag.event.VertexEvent)
@bci=60, line=1734 (Compiled frame)
-
org.apache.tez.dag.app.DAGAppMaster$VertexEventDispatcher.handle(org.apache.hadoop.yarn.event.Event)
@bci=5, line=1720 (Compiled frame)
-
org.apache.hadoop.yarn.event.AsyncDispatcher.dispatch(org.apache.hadoop.yarn.event.Event)
@bci=86, line=173 (Compiled frame)
- org.apache.hadoop.yarn.event.AsyncDispatcher$1.run() @bci=140, line=106
(Interpreted frame)
- java.lang.Thread.run() @bci=11, line=745 (Interpreted frame)
{code}
> Make use of state change notifier in VertexManagerPlugins
> ---------------------------------------------------------
>
> Key: TEZ-1547
> URL: https://issues.apache.org/jira/browse/TEZ-1547
> Project: Apache Tez
> Issue Type: Improvement
> Reporter: Siddharth Seth
> Assignee: Bikas Saha
> Attachments: TEZ-1547.1.patch, TEZ-1547.3.patch, TEZ-1547.4.patch,
> TEZ-1547.5.patch
>
>
> Instead of the various APIs like onVertexStarted, simple notifications could
> be sent.
> Some existing APIs could end up being deprecated.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)