[ https://issues.apache.org/jira/browse/TEZ-1621?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14147296#comment-14147296 ]
Jeff Zhang edited comment on TEZ-1621 at 9/25/14 5:38 AM: ---------------------------------------------------------- It is one kind of Exception that cause the TezChild Container shutdown. We should report the error to AM before shutting down TezChild {code} } else if (cause instanceof Error) { LOG.error("Exception of type Error. Exiting now", cause); ExitUtil.terminate(-1, cause); } else { {code} was (Author: zjffdu): It is one kind of Exception that cause the TezChild Container shutdown. We should report the error to task before shutting down TezChild {code} } else if (cause instanceof Error) { LOG.error("Exception of type Error. Exiting now", cause); ExitUtil.terminate(-1, cause); } else { {code} > Actual error message not thrown on console, does appear in the YARN > application log > ----------------------------------------------------------------------------------- > > Key: TEZ-1621 > URL: https://issues.apache.org/jira/browse/TEZ-1621 > Project: Apache Tez > Issue Type: Sub-task > Reporter: Deepesh Khandelwal > Attachments: app_logs.txt, console.txt > > > While running an in session testorderedwordcount example the DAG failed with > the following error on the console: > {noformat} > 14/09/25 01:55:53 INFO examples.TestOrderedWordCount: DAG 1 diagnostics: > [Vertex failed, vertexName=initialmap, > vertexId=vertex_1411586515507_0110_1_00, diagnostics=[Task failed, > taskId=task_1411586515507_0110_1_00_000000, diagnostics=[TaskAttempt 0 > failed, info=[Container container_1411586515507_0110_01_000002 finished with > diagnostics set to [Container failed. Exception from container-launch. > Container id: container_1411586515507_0110_01_000002 > Exit code: 255 > Stack trace: ExitCodeException exitCode=255: > at org.apache.hadoop.util.Shell.runCommand(Shell.java:538) > at org.apache.hadoop.util.Shell.run(Shell.java:455) > at > org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:702) > at > org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor.launchContainer(LinuxContainerExecutor.java:290) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:299) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:81) > at java.util.concurrent.FutureTask.run(FutureTask.java:262) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:745) > {noformat} > This wasn't very helpful, the root cause is in the application log: > {noformat} > 2014-09-25 01:55:41,246 ERROR [TezChild] > org.apache.tez.runtime.task.TezTaskRunner: Exception of type Error. Exiting > now > java.lang.UnsatisfiedLinkError: > org.apache.hadoop.util.NativeCrc32.nativeVerifyChunkedSums(IILjava/nio/ByteBuffer;ILjava/nio/ByteBuffer;IILjava/lang/String;J)V > at org.apache.hadoop.util.NativeCrc32.nativeVerifyChunkedSums(Native > Method) > at > org.apache.hadoop.util.NativeCrc32.verifyChunkedSums(NativeCrc32.java:57) > at > org.apache.hadoop.util.DataChecksum.verifyChunkedSums(DataChecksum.java:291) > at > org.apache.hadoop.hdfs.BlockReaderLocal.fillBuffer(BlockReaderLocal.java:344) > at > org.apache.hadoop.hdfs.BlockReaderLocal.fillDataBuf(BlockReaderLocal.java:444) > at > org.apache.hadoop.hdfs.BlockReaderLocal.readWithBounceBuffer(BlockReaderLocal.java:575) > at > org.apache.hadoop.hdfs.BlockReaderLocal.read(BlockReaderLocal.java:539) > at > org.apache.hadoop.hdfs.DFSInputStream$ByteArrayStrategy.doRead(DFSInputStream.java:683) > at > org.apache.hadoop.hdfs.DFSInputStream.readBuffer(DFSInputStream.java:739) > at > org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:796) > at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:837) > at java.io.DataInputStream.read(DataInputStream.java:100) > at org.apache.hadoop.util.LineReader.fillBuffer(LineReader.java:180) > at > org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:216) > at org.apache.hadoop.util.LineReader.readLine(LineReader.java:174) > at > org.apache.hadoop.mapreduce.lib.input.LineRecordReader.nextKeyValue(LineRecordReader.java:149) > at > org.apache.hadoop.mapreduce.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.nextKeyValue(TezGroupedSplitsInputFormat.java:167) > at > org.apache.tez.mapreduce.lib.MRReaderMapReduce.next(MRReaderMapReduce.java:116) > at > org.apache.tez.mapreduce.processor.map.MapProcessor$NewRecordReader.nextKeyValue(MapProcessor.java:266) > at > org.apache.tez.mapreduce.hadoop.mapreduce.MapContextImpl.nextKeyValue(MapContextImpl.java:81) > at > org.apache.hadoop.mapreduce.lib.map.WrappedMapper$Context.nextKeyValue(WrappedMapper.java:91) > at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144) > at > org.apache.tez.mapreduce.processor.map.MapProcessor.runNewMapper(MapProcessor.java:237) > at > org.apache.tez.mapreduce.processor.map.MapProcessor.run(MapProcessor.java:124) > at > org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:324) > at > org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:180) > at > org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:172) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:415) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548) > at > org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:172) > at > org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:167) > at java.util.concurrent.FutureTask.run(FutureTask.java:262) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:745) > 2014-09-25 01:55:41,250 INFO [TezChild] org.apache.hadoop.util.ExitUtil: > Exiting with status -1 > {noformat} > Attached are the complete console.log and application log. -- This message was sent by Atlassian JIRA (v6.3.4#6332)