[ https://issues.apache.org/jira/browse/SPARK-16330?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15362678#comment-15362678 ]
Cheng Lian commented on SPARK-16330: ------------------------------------ Please find the root cause analysis in the comment area: https://github.com/databricks/spark-avro/issues/139 > Null pointer getting count from avro file in mesos distributed > -------------------------------------------------------------- > > Key: SPARK-16330 > URL: https://issues.apache.org/jira/browse/SPARK-16330 > Project: Spark > Issue Type: Question > Affects Versions: 2.0.0 > Reporter: John Erwin > Assignee: Cheng Lian > > I have a need to utilize jersey 2 and process an avro file. I am trying to > utilize spark 2.0.0-preview with com.databricks:spark-avro_2.10:3.0.0-preview > for avro support. We are running the process within mesos. I have found that > when the master is set to the mesos master that the process will fail with a > null pointer exception, however when the master is set to local the process > will succeed. I am creating a shadow jar that has > org.scala-lang:scala-library::2.10.5, org.apache.hadoop:hadoop-aws:2.7.1 and > com.databricks:spark-avro_2.10:3.0.0-preview packaged with the test class. > The sample code I am testing with: > import org.apache.spark.sql.{DataFrame, SQLContext} > import org.apache.spark.sql.types.StructType > import org.apache.spark.{SparkConf, SparkContext} > import org.slf4j.{Logger, LoggerFactory} > object Test { > var schema: StructType = null > val logger: Logger = LoggerFactory.getLogger(this.getClass) > def main(args: Array[String]): Unit = { > var fileType = args(0).toLowerCase > var fileLocation = args(1).toLowerCase > val (sqlContext) = setup() > run(sqlContext, fileType, fileLocation) > } > def setup() = { > val conf = new SparkConf().setAppName("TestSpark") > conf.set( "spark.serializer", > "org.apache.spark.serializer.KryoSerializer" ) > conf.set("spark.sql.shuffle.partitions", "1") > conf.set("fs.s3a.connection.maximum","1500") > conf.set("fs.s3a.threads.max","2048") > conf.set("fs.s3a.threads.core","1500") > conf.set("spark.sql.avro.compression.codec","uncompressed") > val sparkContext = new SparkContext(conf) > val sqlContext = new SQLContext(sparkContext) > sqlContext > } > def run(sqlContext: SQLContext, fileType: String, fileLocation: String) = { > if (fileType.equalsIgnoreCase("json")) { > logger.info("Processing JSON File") > countJson(sqlContext, fileLocation) > } > else if(fileType.equalsIgnoreCase("avro")) { > logger.info("Processing AVRO File") > countAvro(sqlContext, fileLocation) > } > else { > throw new IllegalArgumentException("Unkown File Format"); > } > } > def countAvro(sqlContext: SQLContext, fileLocation: String) { > var dataframe = > sqlContext.read.format("com.databricks.spark.avro").load(fileLocation).cache > logger.info("AVRO COUNT = " + dataframe.count()) > } > def countJson(sqlContext: SQLContext, fileLocation: String) { > var dataframe = sqlContext.read.json(fileLocation) > logger.info("JSON COUNT = " + dataframe.count()) > } > } > Other Pertinent Details: > Mesos Version: 0.28.1 > SPARK_HOME = /path/to/spark-2.0.0-preview-bin-hadoop2.7 > Using chronos to submit the jobs. > I am unclear if this is a problem in the preview of spark or the preview of > the avro support. Here are the findings of running avro and json counts in > the mesos cluster: > $SPARK_HOME/bin/spark-submit --conf > spark.executor.uri=http://hosted/path/of/spark-2.0.0-preview-bin-hadoop2.7.tgz > --conf spark.app.name=TestSpark --conf spark.executor.cores=1 --conf > spark.cleaner.ttl=86400 --conf spark.worker.cleanup.enabled=true --conf > spark.worker.cleanup.appDataTtl=259200 --conf spark.local.dir=/tmp/ > --deploy-mode client --master mesos://zk://master.mesos:2181/mesos --class > Test testSpark.jar avro s3a://path/to//file.avro > FAILED: > Exception in thread "main" org.apache.spark.SparkException: Job aborted due > to stage failure: Task 0 in stage 0.0 failed 4 times, most recent failure: > Lost task 0.3 in stage 0.0 (TID 3, ip-10-0-11-60.ec2.internal): > java.lang.NullPointerException > at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:367) > at org.apache.hadoop.fs.Path.getFileSystem(Path.java:295) > at org.apache.avro.mapred.FsInput.<init>(FsInput.java:37) > at > com.databricks.spark.avro.DefaultSource$$anonfun$buildReader$1.apply(DefaultSource.scala:146) > at > com.databricks.spark.avro.DefaultSource$$anonfun$buildReader$1.apply(DefaultSource.scala:143) > at > org.apache.spark.sql.execution.datasources.FileFormat$$anon$1.apply(fileSourceInterfaces.scala:278) > at > org.apache.spark.sql.execution.datasources.FileFormat$$anon$1.apply(fileSourceInterfaces.scala:262) > at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:114) > at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:91) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown > Source) > at > org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) > at > org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$7$$anon$1.hasNext(WholeStageCodegenExec.scala:357) > at > org.apache.spark.sql.execution.columnar.InMemoryRelation$$anonfun$3$$anon$1.hasNext(InMemoryTableScanExec.scala:178) > at > org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:213) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:911) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:902) > at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:858) > at > org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:902) > at > org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:660) > at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:329) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:280) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:318) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:282) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:318) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:282) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:318) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:282) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:79) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:47) > at org.apache.spark.scheduler.Task.run(Task.scala:85) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Driver stacktrace: > at > org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1450) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1438) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1437) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) > at > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1437) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:811) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:811) > at scala.Option.foreach(Option.scala:257) > at > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:811) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1659) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1618) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1607) > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) > at > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:632) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1863) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1876) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1889) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1903) > at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:883) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:357) > at org.apache.spark.rdd.RDD.collect(RDD.scala:882) > at > org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:290) > at > org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$execute$1$1.apply(Dataset.scala:2122) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:57) > at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2436) > at > org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$execute$1(Dataset.scala:2121) > at > org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collect(Dataset.scala:2128) > at > org.apache.spark.sql.Dataset$$anonfun$count$1.apply(Dataset.scala:2156) > at > org.apache.spark.sql.Dataset$$anonfun$count$1.apply(Dataset.scala:2155) > at org.apache.spark.sql.Dataset.withCallback(Dataset.scala:2449) > at org.apache.spark.sql.Dataset.count(Dataset.scala:2155) > at Test$.countAvro(Test.scala:50) > at Test$.run(Test.scala:40) > at Test$.main(Test.scala:16) > at Test.main(Test.scala) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:724) > at > org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180) > at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205) > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:119) > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) > Caused by: java.lang.NullPointerException > at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:367) > at org.apache.hadoop.fs.Path.getFileSystem(Path.java:295) > at org.apache.avro.mapred.FsInput.<init>(FsInput.java:37) > at > com.databricks.spark.avro.DefaultSource$$anonfun$buildReader$1.apply(DefaultSource.scala:146) > at > com.databricks.spark.avro.DefaultSource$$anonfun$buildReader$1.apply(DefaultSource.scala:143) > at > org.apache.spark.sql.execution.datasources.FileFormat$$anon$1.apply(fileSourceInterfaces.scala:278) > at > org.apache.spark.sql.execution.datasources.FileFormat$$anon$1.apply(fileSourceInterfaces.scala:262) > at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:114) > at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:91) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown > Source) > at > org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) > at > org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$7$$anon$1.hasNext(WholeStageCodegenExec.scala:357) > at > org.apache.spark.sql.execution.columnar.InMemoryRelation$$anonfun$3$$anon$1.hasNext(InMemoryTableScanExec.scala:178) > at > org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:213) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:911) > at > org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:902) > at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:858) > at > org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:902) > at > org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:660) > at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:329) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:280) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:318) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:282) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:318) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:282) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:318) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:282) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:79) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:47) > at org.apache.spark.scheduler.Task.run(Task.scala:85) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > ================================================================================ > $SPARK_HOME/bin/spark-submit --conf > spark.executor.uri=http://hosted/path/of/spark-2.0.0-preview-bin-hadoop2.7.tgz > --conf spark.app.name=TestSpark --conf spark.executor.cores=1 --conf > spark.cleaner.ttl=86400 --conf spark.worker.cleanup.enabled=true --conf > spark.worker.cleanup.appDataTtl=259200 --conf spark.local.dir=/tmp/ > --deploy-mode client --master local --class Test testSpark.jar avro > s3a://path/to//file.avro > SUCCEEDED > ================================================================================ > $SPARK_HOME/bin/spark-submit --conf > spark.executor.uri=http://hosted/path/of/spark-2.0.0-preview-bin-hadoop2.7.tgz > --conf spark.app.name=TestSpark --conf spark.executor.cores=1 --conf > spark.cleaner.ttl=86400 --conf spark.worker.cleanup.enabled=true --conf > spark.worker.cleanup.appDataTtl=259200 --conf spark.local.dir=/tmp/ > --deploy-mode client --master mesos://zk://master.mesos:2181/mesos --class > Test testSpark.jar json s3a://path/to//file.json > SUCCEEDED But had exception : > org.apache.spark.SparkException: Exception thrown in awaitResult > at > org.apache.spark.rpc.RpcTimeout$$anonfun$1.applyOrElse(RpcTimeout.scala:77) > at > org.apache.spark.rpc.RpcTimeout$$anonfun$1.applyOrElse(RpcTimeout.scala:75) > at > scala.runtime.AbstractPartialFunction.apply(AbstractPartialFunction.scala:36) > at > org.apache.spark.rpc.RpcTimeout$$anonfun$addMessageIfTimeout$1.applyOrElse(RpcTimeout.scala:59) > at > org.apache.spark.rpc.RpcTimeout$$anonfun$addMessageIfTimeout$1.applyOrElse(RpcTimeout.scala:59) > at scala.PartialFunction$OrElse.apply(PartialFunction.scala:167) > at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:83) > at > org.apache.spark.rpc.RpcEndpointRef.askWithRetry(RpcEndpointRef.scala:102) > at > org.apache.spark.rpc.RpcEndpointRef.askWithRetry(RpcEndpointRef.scala:78) > at > org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend.removeExecutor(CoarseGrainedSchedulerBackend.scala:414) > at > org.apache.spark.scheduler.cluster.mesos.CoarseMesosSchedulerBackend.executorTerminated(CoarseMesosSchedulerBackend.scala:553) > at > org.apache.spark.scheduler.cluster.mesos.CoarseMesosSchedulerBackend.statusUpdate(CoarseMesosSchedulerBackend.scala:494) > Caused by: org.apache.spark.SparkException: Could not find > CoarseGrainedScheduler or it has been stopped. > at > org.apache.spark.rpc.netty.Dispatcher.postMessage(Dispatcher.scala:162) > at > org.apache.spark.rpc.netty.Dispatcher.postLocalMessage(Dispatcher.scala:127) > at org.apache.spark.rpc.netty.NettyRpcEnv.ask(NettyRpcEnv.scala:225) > at > org.apache.spark.rpc.netty.NettyRpcEndpointRef.ask(NettyRpcEnv.scala:508) > at > org.apache.spark.rpc.RpcEndpointRef.askWithRetry(RpcEndpointRef.scala:101) > ... 4 more > 16/06/30 14:05:19 WARN NettyRpcEndpointRef: Error sending message [message = > RemoveExecutor(15,Executor finished with state FINISHED)] in 2 attempts > org.apache.spark.SparkException: Exception thrown in awaitResult > at > org.apache.spark.rpc.RpcTimeout$$anonfun$1.applyOrElse(RpcTimeout.scala:77) > at > org.apache.spark.rpc.RpcTimeout$$anonfun$1.applyOrElse(RpcTimeout.scala:75) > at > scala.runtime.AbstractPartialFunction.apply(AbstractPartialFunction.scala:36) > at > org.apache.spark.rpc.RpcTimeout$$anonfun$addMessageIfTimeout$1.applyOrElse(RpcTimeout.scala:59) > at > org.apache.spark.rpc.RpcTimeout$$anonfun$addMessageIfTimeout$1.applyOrElse(RpcTimeout.scala:59) > at scala.PartialFunction$OrElse.apply(PartialFunction.scala:167) > at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:83) > at > org.apache.spark.rpc.RpcEndpointRef.askWithRetry(RpcEndpointRef.scala:102) > at > org.apache.spark.rpc.RpcEndpointRef.askWithRetry(RpcEndpointRef.scala:78) > at > org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend.removeExecutor(CoarseGrainedSchedulerBackend.scala:414) > at > org.apache.spark.scheduler.cluster.mesos.CoarseMesosSchedulerBackend.executorTerminated(CoarseMesosSchedulerBackend.scala:553) > at > org.apache.spark.scheduler.cluster.mesos.CoarseMesosSchedulerBackend.statusUpdate(CoarseMesosSchedulerBackend.scala:494) > Caused by: org.apache.spark.SparkException: Could not find > CoarseGrainedScheduler or it has been stopped. > at > org.apache.spark.rpc.netty.Dispatcher.postMessage(Dispatcher.scala:162) > at > org.apache.spark.rpc.netty.Dispatcher.postLocalMessage(Dispatcher.scala:127) > at org.apache.spark.rpc.netty.NettyRpcEnv.ask(NettyRpcEnv.scala:225) > at > org.apache.spark.rpc.netty.NettyRpcEndpointRef.ask(NettyRpcEnv.scala:508) > at > org.apache.spark.rpc.RpcEndpointRef.askWithRetry(RpcEndpointRef.scala:101) > ... 4 more > 16/06/30 14:05:22 WARN NettyRpcEndpointRef: Error sending message [message = > RemoveExecutor(15,Executor finished with state FINISHED)] in 3 attempts > org.apache.spark.SparkException: Exception thrown in awaitResult > at > org.apache.spark.rpc.RpcTimeout$$anonfun$1.applyOrElse(RpcTimeout.scala:77) > at > org.apache.spark.rpc.RpcTimeout$$anonfun$1.applyOrElse(RpcTimeout.scala:75) > at > scala.runtime.AbstractPartialFunction.apply(AbstractPartialFunction.scala:36) > at > org.apache.spark.rpc.RpcTimeout$$anonfun$addMessageIfTimeout$1.applyOrElse(RpcTimeout.scala:59) > at > org.apache.spark.rpc.RpcTimeout$$anonfun$addMessageIfTimeout$1.applyOrElse(RpcTimeout.scala:59) > at scala.PartialFunction$OrElse.apply(PartialFunction.scala:167) > at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:83) > at > org.apache.spark.rpc.RpcEndpointRef.askWithRetry(RpcEndpointRef.scala:102) > at > org.apache.spark.rpc.RpcEndpointRef.askWithRetry(RpcEndpointRef.scala:78) > at > org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend.removeExecutor(CoarseGrainedSchedulerBackend.scala:414) > at > org.apache.spark.scheduler.cluster.mesos.CoarseMesosSchedulerBackend.executorTerminated(CoarseMesosSchedulerBackend.scala:553) > at > org.apache.spark.scheduler.cluster.mesos.CoarseMesosSchedulerBackend.statusUpdate(CoarseMesosSchedulerBackend.scala:494) > Caused by: org.apache.spark.SparkException: Could not find > CoarseGrainedScheduler or it has been stopped. > at > org.apache.spark.rpc.netty.Dispatcher.postMessage(Dispatcher.scala:162) > at > org.apache.spark.rpc.netty.Dispatcher.postLocalMessage(Dispatcher.scala:127) > at org.apache.spark.rpc.netty.NettyRpcEnv.ask(NettyRpcEnv.scala:225) > at > org.apache.spark.rpc.netty.NettyRpcEndpointRef.ask(NettyRpcEnv.scala:508) > at > org.apache.spark.rpc.RpcEndpointRef.askWithRetry(RpcEndpointRef.scala:101) > ... 4 more > Exception in thread "Thread-71" org.apache.spark.SparkException: Error > notifying standalone scheduler's driver endpoint > at > org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend.removeExecutor(CoarseGrainedSchedulerBackend.scala:417) > at > org.apache.spark.scheduler.cluster.mesos.CoarseMesosSchedulerBackend.executorTerminated(CoarseMesosSchedulerBackend.scala:553) > at > org.apache.spark.scheduler.cluster.mesos.CoarseMesosSchedulerBackend.statusUpdate(CoarseMesosSchedulerBackend.scala:494) > Caused by: org.apache.spark.SparkException: Error sending message [message = > RemoveExecutor(15,Executor finished with state FINISHED)] > at > org.apache.spark.rpc.RpcEndpointRef.askWithRetry(RpcEndpointRef.scala:119) > at > org.apache.spark.rpc.RpcEndpointRef.askWithRetry(RpcEndpointRef.scala:78) > at > org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend.removeExecutor(CoarseGrainedSchedulerBackend.scala:414) > ... 2 more > Caused by: org.apache.spark.SparkException: Exception thrown in awaitResult > at > org.apache.spark.rpc.RpcTimeout$$anonfun$1.applyOrElse(RpcTimeout.scala:77) > at > org.apache.spark.rpc.RpcTimeout$$anonfun$1.applyOrElse(RpcTimeout.scala:75) > at > scala.runtime.AbstractPartialFunction.apply(AbstractPartialFunction.scala:36) > at > org.apache.spark.rpc.RpcTimeout$$anonfun$addMessageIfTimeout$1.applyOrElse(RpcTimeout.scala:59) > at > org.apache.spark.rpc.RpcTimeout$$anonfun$addMessageIfTimeout$1.applyOrElse(RpcTimeout.scala:59) > at scala.PartialFunction$OrElse.apply(PartialFunction.scala:167) > at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:83) > at > org.apache.spark.rpc.RpcEndpointRef.askWithRetry(RpcEndpointRef.scala:102) > ... 4 more > Caused by: org.apache.spark.SparkException: Could not find > CoarseGrainedScheduler or it has been stopped. > at > org.apache.spark.rpc.netty.Dispatcher.postMessage(Dispatcher.scala:162) > at > org.apache.spark.rpc.netty.Dispatcher.postLocalMessage(Dispatcher.scala:127) > at org.apache.spark.rpc.netty.NettyRpcEnv.ask(NettyRpcEnv.scala:225) > at > org.apache.spark.rpc.netty.NettyRpcEndpointRef.ask(NettyRpcEnv.scala:508) > at > org.apache.spark.rpc.RpcEndpointRef.askWithRetry(RpcEndpointRef.scala:101) > ... 4 more > ================================================================================ > $SPARK_HOME/bin/spark-submit --conf > spark.executor.uri=http://hosted/path/of/spark-2.0.0-preview-bin-hadoop2.7.tgz > --conf spark.app.name=TestSpark --conf spark.executor.cores=1 --conf > spark.cleaner.ttl=86400 --conf spark.worker.cleanup.enabled=true --conf > spark.worker.cleanup.appDataTtl=259200 --conf spark.local.dir=/tmp/ > --deploy-mode client --master local --class Test testSpark.jar json > s3a://path/to//file.json > SUCCEEDED -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org