Hi,

I have several Spark jobs including both batch job and Stream jobs to
process the system log and analyze them. We are using Kafka as the pipeline
to connect each jobs.

Once upgrade to Spark 2.1.0 + Spark Kafka Streaming 010, I found some of
the jobs(both batch or streaming) are thrown below exceptions
randomly(either after several hours run or just run in 20 mins). Can anyone
give me some suggestions about how to figure out the real root cause?
(Looks like google result is not very useful...)

Thanks,
Martin

00:30:04,510 WARN  - 17/07/22 00:30:04 WARN TaskSetManager: Lost task 60.0
in stage 1518490.0 (TID 338070, 10.133.96.21, executor 0):
java.io.FileNotFoundException:
/mnt/mesos/work_dir/slaves/20160924-021501-274760970-5050-7646-S2/frameworks/40aeb8e5-e82a-4df9-b034-8815a7a7564b-2543/executors/0/runs/fd15c15d-2511-4f37-a106-27431f583153/blockmgr-a0e0e673-f88b-4d12-a802-c35643e6c6b2/33/shuffle_2090_60_0.index.b66235be-79be-4455-9759-1c7ba70f91f6
(No such file or directory)
00:30:04,510 WARN  -     at java.io.FileOutputStream.open0(Native Method)
00:30:04,510 WARN  -     at
java.io.FileOutputStream.open(FileOutputStream.java:270)
00:30:04,510 WARN  -     at
java.io.FileOutputStream.<init>(FileOutputStream.java:213)
00:30:04,510 WARN  -     at
java.io.FileOutputStream.<init>(FileOutputStream.java:162)
00:30:04,510 WARN  -     at
org.apache.spark.shuffle.IndexShuffleBlockResolver.writeIndexFileAndCommit(IndexShuffleBlockResolver.scala:144)
00:30:04,510 WARN  -     at
org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:128)
00:30:04,510 WARN  -     at
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96)
00:30:04,510 WARN  -     at
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)
00:30:04,510 WARN  -     at
org.apache.spark.scheduler.Task.run(Task.scala:99)
00:30:04,510 WARN  -     at
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:282)
00:30:04,510 WARN  -     at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
00:30:04,510 WARN  -     at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
00:30:04,510 WARN  -     at java.lang.Thread.run(Thread.java:748)

00:30:04,580 INFO  - Driver stacktrace:
00:30:04,580 INFO  - org.apache.spark.scheduler.DAGScheduler.org
$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1435)
00:30:04,580 INFO  -
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1423)
00:30:04,580 INFO  -
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1422)
00:30:04,580 INFO  -
scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
00:30:04,580 INFO  -
scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
00:30:04,580 INFO  -
org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1422)
00:30:04,580 INFO  -
org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802)
00:30:04,580 INFO  -
org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802)
00:30:04,580 INFO  - scala.Option.foreach(Option.scala:257)
00:30:04,580 INFO  -
org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:802)
00:30:04,580 INFO  -
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1650)
00:30:04,580 INFO  -
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1605)
00:30:04,580 INFO  -
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1594)
00:30:04,580 INFO  -
org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
00:30:04,580 INFO  -
org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:628)
00:30:04,580 INFO  -
org.apache.spark.SparkContext.runJob(SparkContext.scala:1918)
00:30:04,580 INFO  -
org.apache.spark.SparkContext.runJob(SparkContext.scala:1931)
00:30:04,580 INFO  -
org.apache.spark.SparkContext.runJob(SparkContext.scala:1944)
00:30:04,580 INFO  -
org.apache.spark.rdd.RDD$$anonfun$take$1.apply(RDD.scala:1353)
00:30:04,580 INFO  -
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
00:30:04,580 INFO  -
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
00:30:04,580 INFO  - org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
00:30:04,580 INFO  - org.apache.spark.rdd.RDD.take(RDD.scala:1326)
00:30:04,580 INFO  -
org.apache.spark.rdd.RDD$$anonfun$isEmpty$1.apply$mcZ$sp(RDD.scala:1461)
00:30:04,580 INFO  -
org.apache.spark.rdd.RDD$$anonfun$isEmpty$1.apply(RDD.scala:1461)
00:30:04,580 INFO  -
org.apache.spark.rdd.RDD$$anonfun$isEmpty$1.apply(RDD.scala:1461)
00:30:04,580 INFO  -
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
00:30:04,580 INFO  -
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
00:30:04,580 INFO  - org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
00:30:04,580 INFO  - org.apache.spark.rdd.RDD.isEmpty(RDD.scala:1460)
00:30:04,580 INFO  -
com.ericsson.mediafirst.spark.clientlogsenrichment.ClientLogsEnrichmentJob$.executeIteration(ClientLogsEnrichmentJob.scala:133)
00:30:04,580 INFO  -
com.ericsson.mediafirst.spark.clientlogsenrichment.ClientLogsEnrichmentJob$.runIteration(ClientLogsEnrichmentJob.scala:76)
00:30:04,581 INFO  -
com.ericsson.mediafirst.spark.clientlogsenrichment.ClientLogsEnrichmentJob$.runBatch(ClientLogsEnrichmentJob.scala:59)
00:30:04,581 INFO  -
com.ericsson.mediafirst.sparkutils.jobtemplates.BatchJob.main(BatchJob.scala:35)
00:30:04,581 INFO  -
com.ericsson.mediafirst.spark.clientlogsenrichment.ClientLogsEnrichmentJob.main(ClientLogsEnrichmentJob.scala)
00:30:04,581 INFO  - sun.reflect.NativeMethodAccessorImpl.invoke0(Native
Method)
00:30:04,581 INFO  -
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
00:30:04,581 INFO  -
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
00:30:04,581 INFO  - java.lang.reflect.Method.invoke(Method.java:498)
00:30:04,581 INFO  -
org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:738)
00:30:04,581 INFO  -
org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:187)
00:30:04,581 INFO  -
org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:212)
00:30:04,581 INFO  -
org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:126)
00:30:04,581 INFO  -
org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
00:30:04,581 WARN  - 17/07/22 00:30:04 WARN JobProgressListener: Task start
for unknown stage 1518491
00:30:04,670 WARN  - 17/07/22 00:30:04 ERROR LiveListenerBus:
SparkListenerBus has already stopped! Dropping event
SparkListenerBlockUpdated(BlockUpdatedInfo(BlockManagerId(0, 10.133.96.21,
45377, None),rdd_15721_0,StorageLevel(memory, deserialized, 1
replicas),12024,0))
00:30:04,673 WARN  - 17/07/22 00:30:04 ERROR LiveListenerBus:
SparkListenerBus has already stopped! Dropping event
SparkListenerBlockUpdated(BlockUpdatedInfo(BlockManagerId(0, 10.133.96.21,
45377, None),rdd_15721_1,StorageLevel(memory, deserialized, 1
replicas),13736,0))
00:30:04,679 WARN  - 17/07/22 00:30:04 ERROR TransportRequestHandler: Error
while invoking RpcHandler#receive() for one-way message.
00:30:04,679 WARN  - org.apache.spark.SparkException: Could not find
CoarseGrainedScheduler.
00:30:04,679 WARN  -     at
org.apache.spark.rpc.netty.Dispatcher.postMessage(Dispatcher.scala:154)
00:30:04,679 WARN  -     at
org.apache.spark.rpc.netty.Dispatcher.postOneWayMessage(Dispatcher.scala:134)
00:30:04,679 WARN  -     at
org.apache.spark.rpc.netty.NettyRpcHandler.receive(NettyRpcEnv.scala:570)
00:30:04,679 WARN  -     at
org.apache.spark.network.server.TransportRequestHandler.processOneWayMessage(TransportRequestHandler.java:180)
00:30:04,679 WARN  -     at
org.apache.spark.network.server.TransportRequestHandler.handle(TransportRequestHandler.java:109)
00:30:04,679 WARN  -     at
org.apache.spark.network.server.TransportChannelHandler.channelRead0(TransportChannelHandler.java:119)
00:30:04,679 WARN  -     at
org.apache.spark.network.server.TransportChannelHandler.channelRead0(TransportChannelHandler.java:51)
00:30:04,679 WARN  -     at
io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:105)
00:30:04,679 WARN  -     at
io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:367)
00:30:04,679 WARN  -     at
io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:353)
00:30:04,679 WARN  -     at
io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:346)
00:30:04,679 WARN  -     at
io.netty.handler.timeout.IdleStateHandler.channelRead(IdleStateHandler.java:266)
00:30:04,679 WARN  -     at
io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:367)
00:30:04,679 WARN  -     at
io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:353)
00:30:04,679 WARN  -     at
io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:346)
00:30:04,679 WARN  -     at
io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:102)
00:30:04,679 WARN  -     at
io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:367)
00:30:04,679 WARN  -     at
io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:353)
00:30:04,679 WARN  -     at
io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:346)
00:30:04,679 WARN  -     at
org.apache.spark.network.util.TransportFrameDecoder.channelRead(TransportFrameDecoder.java:85)
00:30:04,679 WARN  -     at
io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:367)
00:30:04,679 WARN  -     at
io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:353)
00:30:04,679 WARN  -     at
io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:346)
00:30:04,679 WARN  -     at
io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1294)
00:30:04,679 WARN  -     at
io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:367)
00:30:04,679 WARN  -     at
io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:353)
00:30:04,679 WARN  -     at
io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:911)
00:30:04,679 WARN  -     at
io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:131)
00:30:04,679 WARN  -     at
io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:652)
00:30:04,679 WARN  -     at
io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:575)
00:30:04,679 WARN  -     at
io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:489)
00:30:04,679 WARN  -     at
io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:451)
00:30:04,679 WARN  -     at
io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:140)
00:30:04,679 WARN  -     at
io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:144)
00:30:04,679 WARN  -     at java.lang.Thread.run(Thread.java:748)
00:30:04,679 WARN  - 17/07/22 00:30:04 ERROR TransportRequestHandler: Error
while invoking RpcHandler#receive() for one-way message.
00:30:04,679 WARN  - org.apache.spark.SparkException: Could not find
CoarseGrainedScheduler.
00:30:04,679 WARN  -     at
org.apache.spark.rpc.netty.Dispatcher.postMessage(Dispatcher.scala:154)
00:30:04,679 WARN  -     at
org.apache.spark.rpc.netty.Dispatcher.postOneWayMessage(Dispatcher.scala:134)
00:30:04,679 WARN  -     at
org.apache.spark.rpc.netty.NettyRpcHandler.receive(NettyRpcEnv.scala:570)
00:30:04,679 WARN  -     at
org.apache.spark.network.server.TransportRequestHandler.processOneWayMessage(TransportRequestHandler.java:180)
00:30:04,679 WARN  -     at
org.apache.spark.network.server.TransportRequestHandler.handle(TransportRequestHandler.java:109)
00:30:04,679 WARN  -     at
org.apache.spark.network.server.TransportChannelHandler.channelRead0(TransportChannelHandler.java:119)
00:30:04,679 WARN  -     at
org.apache.spark.network.server.TransportChannelHandler.channelRead0(TransportChannelHandler.java:51)
00:30:04,679 WARN  -     at
io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:105)
00:30:04,679 WARN  -     at
io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:367)
00:30:04,679 WARN  -     at
io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:353)
00:30:04,679 WARN  -     at
io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:346)
00:30:04,679 WARN  -     at
io.netty.handler.timeout.IdleStateHandler.channelRead(IdleStateHandler.java:266)
00:30:04,679 WARN  -     at
io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:367)
00:30:04,679 WARN  -     at
io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:353)
00:30:04,679 WARN  -     at
io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:346)
00:30:04,679 WARN  -     at
io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:102)
00:30:04,679 WARN  -     at
io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:367)
00:30:04,679 WARN  -     at
io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:353)
00:30:04,679 WARN  -     at
io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:346)
00:30:04,679 WARN  -     at
org.apache.spark.network.util.TransportFrameDecoder.channelRead(TransportFrameDecoder.java:85)
00:30:04,679 WARN  -     at
io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:367)
00:30:04,679 WARN  -     at
io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:353)
00:30:04,679 WARN  -     at
io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:346)
00:30:04,679 WARN  -     at
io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1294)
00:30:04,679 WARN  -     at
io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:367)
00:30:04,679 WARN  -     at
io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:353)
00:30:04,679 WARN  -     at
io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:911)
00:30:04,679 WARN  -     at
io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:131)
00:30:04,679 WARN  -     at
io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:652)
00:30:04,679 WARN  -     at
io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:575)
00:30:04,679 WARN  -     at
io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:489)
00:30:04,679 WARN  -     at
io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:451)
00:30:04,679 WARN  -     at
io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:140)
00:30:04,679 WARN  -     at
io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:144)
00:30:04,679 WARN  -     at java.lang.Thread.run(Thread.java:748)
00:30:11,318 WARN  - I0722 00:30:11.318724 2921 sched.cpp:2021] Asked to
stop the driver
00:30:11,318 WARN  - I0722 00:30:11.318838 2988 sched.cpp:1203] Stopping
framework 40aeb8e5-e82a-4df9-b034-8815a7a7564b-2543

Reply via email to