[
https://issues.apache.org/jira/browse/TEZ-1238?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14052164#comment-14052164
]
Jeff Zhang commented on TEZ-1238:
---------------------------------
Attach the patch.
After the patch. The message in client side is as following:
*Missing jar in LocalResource of Vertex*
{code}
DAG diagnostics:[Vertex failed, vertexName=tokenizer,
vertexId=vertex_1404448568107_0001_1_00, diagnostics=[Task failed,
taskId=task_1404448568107_0001_1_00_000000, diagnostics=[TaskAttempt 0 failed,
info=[Error: Failure while running
task:org.apache.tez.dag.api.TezUncheckedException: Unable to load class:
com.zjffdu.tutorial.tez.WordCount$TokenProcessor
at org.apache.tez.common.RuntimeUtils.getClazz(RuntimeUtils.java:44)
at
org.apache.tez.common.RuntimeUtils.createClazzInstance(RuntimeUtils.java:66)
at
org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.createProcessor(LogicalIOProcessorRuntimeTask.java:534)
at
org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.initialize(LogicalIOProcessorRuntimeTask.java:170)
at
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:177)
at
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:172)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548)
at
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:172)
at
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:167)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.ClassNotFoundException:
com.zjffdu.tutorial.tez.WordCount$TokenProcessor
at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:270)
at org.apache.tez.common.RuntimeUtils.getClazz(RuntimeUtils.java:42)
... 14 more
], TaskAttempt 1 failed, info=[Error: Failure while running
task:org.apache.tez.dag.api.TezUncheckedException: Unable to load class:
com.zjffdu.tutorial.tez.WordCount$TokenProcessor
at org.apache.tez.common.RuntimeUtils.getClazz(RuntimeUtils.java:44)
at
org.apache.tez.common.RuntimeUtils.createClazzInstance(RuntimeUtils.java:66)
at
org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.createProcessor(LogicalIOProcessorRuntimeTask.java:534)
at
org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.initialize(LogicalIOProcessorRuntimeTask.java:170)
at
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:177)
at
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:172)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548)
at
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:172)
at
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:167)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.ClassNotFoundException:
com.zjffdu.tutorial.tez.WordCount$TokenProcessor
at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:270)
at org.apache.tez.common.RuntimeUtils.getClazz(RuntimeUtils.java:42)
... 14 more
], TaskAttempt 2 failed, info=[Error: Failure while running
task:org.apache.tez.dag.api.TezUncheckedException: Unable to load class:
com.zjffdu.tutorial.tez.WordCount$TokenProcessor
at org.apache.tez.common.RuntimeUtils.getClazz(RuntimeUtils.java:44)
at
org.apache.tez.common.RuntimeUtils.createClazzInstance(RuntimeUtils.java:66)
at
org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.createProcessor(LogicalIOProcessorRuntimeTask.java:534)
at
org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.initialize(LogicalIOProcessorRuntimeTask.java:170)
at
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:177)
at
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:172)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548)
at
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:172)
at
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:167)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.ClassNotFoundException:
com.zjffdu.tutorial.tez.WordCount$TokenProcessor
at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:270)
at org.apache.tez.common.RuntimeUtils.getClazz(RuntimeUtils.java:42)
... 14 more
], TaskAttempt 3 failed, info=[Error: Failure while running
task:org.apache.tez.dag.api.TezUncheckedException: Unable to load class:
com.zjffdu.tutorial.tez.WordCount$TokenProcessor
at org.apache.tez.common.RuntimeUtils.getClazz(RuntimeUtils.java:44)
at
org.apache.tez.common.RuntimeUtils.createClazzInstance(RuntimeUtils.java:66)
at
org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.createProcessor(LogicalIOProcessorRuntimeTask.java:534)
at
org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.initialize(LogicalIOProcessorRuntimeTask.java:170)
at
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:177)
at
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:172)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548)
at
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:172)
at
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:167)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.ClassNotFoundException:
com.zjffdu.tutorial.tez.WordCount$TokenProcessor
at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:270)
at org.apache.tez.common.RuntimeUtils.getClazz(RuntimeUtils.java:42)
... 14 more
]], Vertex failed as one or more tasks failed. failedTasks:1], Vertex killed,
vertexName=summer, vertexId=vertex_1404448568107_0001_1_01, diagnostics=[Vertex
received Kill while in RUNNING state., Vertex killed as other vertex failed.
failedTasks:0], DAG failed due to vertex failure. failedVertices:1
killedVertices:1]
{code}
*Exception happen in Processor*
{code}
DAG diagnostics:[Vertex re-running, vertexName=tokenizer,
vertexId=vertex_1404445909469_0005_1_00, Vertex failed, vertexName=summer,
vertexId=vertex_1404445909469_0005_1_01, diagnostics=[Task failed,
taskId=task_1404445909469_0005_1_01_000000, diagnostics=[TaskAttempt 0 failed,
info=[Error: Failure while running task:java.lang.ClassCastException:
org.apache.tez.runtime.library.input.ShuffledMergedInput$ShuffledMergedKeyValuesReader
cannot be cast to org.apache.tez.runtime.library.api.KeyValueReader
at com.zjffdu.tutorial.tez.WordCount$SumProcessor.run(WordCount.java:94)
at
org.apache.tez.runtime.library.processor.SimpleProcessor.run(SimpleProcessor.java:37)
at
org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:309)
at
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:180)
at
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:172)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548)
at
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:172)
at
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:167)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
], TaskAttempt 1 failed, info=[Error: Failure while running
task:java.lang.ClassCastException:
org.apache.tez.runtime.library.input.ShuffledMergedInput$ShuffledMergedKeyValuesReader
cannot be cast to org.apache.tez.runtime.library.api.KeyValueReader
at com.zjffdu.tutorial.tez.WordCount$SumProcessor.run(WordCount.java:94)
at
org.apache.tez.runtime.library.processor.SimpleProcessor.run(SimpleProcessor.java:37)
at
org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:309)
at
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:180)
at
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:172)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548)
at
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:172)
at
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:167)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
], TaskAttempt 2 failed, info=[Error: Failure while running
task:java.lang.ClassCastException:
org.apache.tez.runtime.library.input.ShuffledMergedInput$ShuffledMergedKeyValuesReader
cannot be cast to org.apache.tez.runtime.library.api.KeyValueReader
at com.zjffdu.tutorial.tez.WordCount$SumProcessor.run(WordCount.java:94)
at
org.apache.tez.runtime.library.processor.SimpleProcessor.run(SimpleProcessor.java:37)
at
org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:309)
at
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:180)
at
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:172)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548)
at
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:172)
at
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:167)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
], TaskAttempt 3 failed, info=[Error: Failure while running
task:java.lang.ClassCastException:
org.apache.tez.runtime.library.input.ShuffledMergedInput$ShuffledMergedKeyValuesReader
cannot be cast to org.apache.tez.runtime.library.api.KeyValueReader
at com.zjffdu.tutorial.tez.WordCount$SumProcessor.run(WordCount.java:94)
at
org.apache.tez.runtime.library.processor.SimpleProcessor.run(SimpleProcessor.java:37)
at
org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:309)
at
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:180)
at
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:172)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548)
at
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:172)
at
org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:167)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
]], Vertex failed as one or more tasks failed. failedTasks:1], Vertex killed,
vertexName=tokenizer, vertexId=vertex_1404445909469_0005_1_00,
diagnostics=[Vertex received Kill while in RUNNING state., Vertex killed as
other vertex failed. failedTasks:0], DAG failed due to vertex failure.
failedVertices:1 killedVertices:1]
14/07/04 12:30:27 INFO client.TezSession: Shutting down Tez Session,
sessionName=tez-session, applicationId=application_1404445909469_0005
{code}
> Display more clear diagnostics info on client side if missing jar in
> LocalResource or Exception happen in Processor
> -------------------------------------------------------------------------------------------------------------------
>
> Key: TEZ-1238
> URL: https://issues.apache.org/jira/browse/TEZ-1238
> Project: Apache Tez
> Issue Type: Sub-task
> Affects Versions: 0.4.0
> Reporter: Jeff Zhang
> Assignee: Jeff Zhang
> Attachments: Tez-1238.patch
>
>
> I have a tez job which is failed due to that I didn't put my jar to the local
> resources. But on the client side, the exception is not clear for me to
> figure what's wrong with it. The real reason is that It couldn't load the
> Processor class. I have to run command "yarn logs" to find the real exception
> in the container logs.
> I also have another case that has exception in the my Processor, the message
> on the client side is still not clear to me. I think that should we pass the
> real exception to the diagnostics and display it in client side, this should
> help user to find out what's wrong with their program.
> *Exception on client side*
> {code}
> 14/06/26 14:57:15 INFO rpc.DAGClientRPCImpl: VertexStatus: VertexName:
> summer Progress: 0% TotalTasks: 1 Succeeded: 0 Running: 0 Failed: 0 Killed:
> 114/06/26 14:57:15 INFO rpc.DAGClientRPCImpl: VertexStatus: VertexName:
> tokenizer Progress: 0% TotalTasks: 1 Succeeded: 0 Running: 0 Failed: 1
> Killed: 014/06/26 14:57:15 INFO rpc.DAGClientRPCImpl: DAG completed.
> FinalState=FAILEDDAG diagnostics:[Vertex failed, vertexName=tokenizer,
> vertexId=vertex_1403765612557_0004_1_00, diagnostics=[Task failed,
> taskId=task_1403765612557_0004_1_00_000000, diagnostics=[TaskAttempt 0
> failed, info=[Container container_1403765612557_0004_01_000002 COMPLETED
> with diagnostics set to [Exception from container-launch:
> org.apache.hadoop.util.Shell$ExitCodeException:
> org.apache.hadoop.util.Shell$ExitCodeException: at
> org.apache.hadoop.util.Shell.runCommand(Shell.java:505)
> at org.apache.hadoop.util.Shell.run(Shell.java:418)
> at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:650)
> at
> org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(
> DefaultContainerExecutor.java:195)
> at
> org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(
> ContainerLaunch.java:300)
> at
> org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(
> ContainerLaunch.java:81)
> at java.util.concurrent.FutureTask.run(FutureTask.java:262)
> at java.util.concurrent.ThreadPoolExecutor.runWorker(
> ThreadPoolExecutor.java:1145)
> at java.util.concurrent.ThreadPoolExecutor$Worker.run(
> ThreadPoolExecutor.java:615)
> at java.lang.Thread.run(Thread.java:745)
> Container exited with a non-zero exit code 1
> {code}
> *The real exception in container log:*
> {code}
> 2014-06-26 14:57:02,146 ERROR [main]
> org.apache.hadoop.yarn.YarnUncaughtExceptionHandler: Thread
> Thread[main,5,main] threw an Exception.
> org.apache.tez.dag.api.TezUncheckedException: Unable to load class:
> com.zjffdu.tutorial.tez.WordCount$TokenProcessor
> at org.apache.tez.common.RuntimeUtils.getClazz(RuntimeUtils.java:44)
> at
> org.apache.tez.common.RuntimeUtils.createClazzInstance(RuntimeUtils.java:66)
> at
> org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.createProcessor(LogicalIOProcessorRuntimeTask.java:533)
> at
> org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.<init>(LogicalIOProcessorRuntimeTask.java:146)
> at
> org.apache.tez.runtime.task.TezTaskRunner.<init>(TezTaskRunner.java:78)
> at org.apache.tez.runtime.task.TezChild.run(TezChild.java:208)
> at org.apache.tez.runtime.task.TezChild.main(TezChild.java:363)
> {code}
--
This message was sent by Atlassian JIRA
(v6.2#6252)