[ 
https://issues.apache.org/jira/browse/MESOS-1535?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Ajay Viswanathan updated MESOS-1535:
------------------------------------

    Description: 
This is an error that I get while running fine-grained PySpark on the mesos 
cluster. This comes after running some 200-1000 tasks generally.

Pyspark code:

while True:
sc.parallelize(range(10)).map(lambda n : n*2).collect()

Error log:
(In console)

ERROR DAGSchedulerActorSupervisor: eventProcesserActor failed due to the error 
EOF reached before Python server acknowledged; shutting down SparkContext

Traceback (most recent call last):
  File "<stdin>", line 2, in <module>
  File ".../spark-1.0.0/python/pyspark/rdd.py", line 583, in collect
    bytesInJava = self._jrdd.collect().iterator()
  File ".../spark-1.0.0/python/lib/py4j-0.8.1-src.zip/py4j/java_gateway.py", 
line 537,
  File ".../spark-1.0.0/python/lib/py4j-0.8.1-src.zip/py4j/protocol.py", line 
300, in
py4j.protocol.Py4JJavaError: An error occurred while calling o847.collect.

org.apache.spark.SparkException: Job 75 cancelled as part of cancellation of 
all jobs
        at 
org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$fail
        at 
org.apache.spark.scheduler.DAGScheduler.handleJobCancellation(DAGScheduler.scala:998)
        at 
org.apache.spark.scheduler.DAGScheduler$$anonfun$doCancelAllJobs$1.apply$mcVI$sp(DAGS
        at 
org.apache.spark.scheduler.DAGScheduler$$anonfun$doCancelAllJobs$1.apply(DAGScheduler
        at 
org.apache.spark.scheduler.DAGScheduler$$anonfun$doCancelAllJobs$1.apply(DAGScheduler
        at scala.collection.mutable.HashSet.foreach(HashSet.scala:79)
        at 
org.apache.spark.scheduler.DAGScheduler.doCancelAllJobs(DAGScheduler.scala:499)
        at 
org.apache.spark.scheduler.DAGSchedulerActorSupervisor$$anonfun$2.applyOrElse(DAGSche
        at 
org.apache.spark.scheduler.DAGSchedulerActorSupervisor$$anonfun$2.applyOrElse(DAGSche
        at akka.actor.SupervisorStrategy.handleFailure(FaultHandling.scala:295)
        at 
akka.actor.dungeon.FaultHandling$class.handleFailure(FaultHandling.scala:253)
        at akka.actor.ActorCell.handleFailure(ActorCell.scala:338)
        at akka.actor.ActorCell.invokeAll$1(ActorCell.scala:423)
        at akka.actor.ActorCell.systemInvoke(ActorCell.scala:447)
        at akka.dispatch.Mailbox.processAllSystemMessages(Mailbox.scala:262)
        at akka.dispatch.Mailbox.run(Mailbox.scala:218)
        at 
akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.s
        at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
        at 
scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
        at 
scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
        at 
scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)

>>> 14/06/24 02:58:19 ERROR OneForOneStrategy:
java.lang.UnsupportedOperationException
        at 
org.apache.spark.scheduler.SchedulerBackend$class.killTask(SchedulerBackend.scala:32)
        at 
org.apache.spark.scheduler.cluster.mesos.MesosSchedulerBackend.killTask(MesosSchedule
        at 
org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$cancelTasks$3$$anonfun$apply$1.
        at 
org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$cancelTasks$3$$anonfun$apply$1.
        at 
org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$cancelTasks$3$$anonfun$apply$1.
        at scala.collection.mutable.HashSet.foreach(HashSet.scala:79)
        at 
org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$cancelTasks$3.apply(TaskSchedul
        at 
org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$cancelTasks$3.apply(TaskSchedul
        at scala.Option.foreach(Option.scala:236)
        at 
org.apache.spark.scheduler.TaskSchedulerImpl.cancelTasks(TaskSchedulerImpl.scala:176)
        at 
org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGSchedu
        at 
org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGSchedu
        at 
org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGSchedu
        at scala.collection.mutable.HashSet.foreach(HashSet.scala:79)
        at 
org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$fail
        at 
org.apache.spark.scheduler.DAGScheduler.handleJobCancellation(DAGScheduler.scala:998)
        at 
org.apache.spark.scheduler.DAGScheduler$$anonfun$doCancelAllJobs$1.apply$mcVI$sp(DAGS
        at 
org.apache.spark.scheduler.DAGScheduler$$anonfun$doCancelAllJobs$1.apply(DAGScheduler
        at 
org.apache.spark.scheduler.DAGScheduler$$anonfun$doCancelAllJobs$1.apply(DAGScheduler
        at scala.collection.mutable.HashSet.foreach(HashSet.scala:79)
        at 
org.apache.spark.scheduler.DAGScheduler.doCancelAllJobs(DAGScheduler.scala:499)
        at 
org.apache.spark.scheduler.DAGSchedulerActorSupervisor$$anonfun$2.applyOrElse(DAGSche
        at 
org.apache.spark.scheduler.DAGSchedulerActorSupervisor$$anonfun$2.applyOrElse(DAGSche
        at akka.actor.SupervisorStrategy.handleFailure(FaultHandling.scala:295)
        at 
akka.actor.dungeon.FaultHandling$class.handleFailure(FaultHandling.scala:253)
        at akka.actor.ActorCell.handleFailure(ActorCell.scala:338)
        at akka.actor.ActorCell.invokeAll$1(ActorCell.scala:423)
        at akka.actor.ActorCell.systemInvoke(ActorCell.scala:447)
        at akka.dispatch.Mailbox.processAllSystemMessages(Mailbox.scala:262)
        at akka.dispatch.Mailbox.run(Mailbox.scala:218)
        at 
akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.s
        at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
        at 
scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
        at 
scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
        at 
scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)


  was:
This is an error that I get while running fine-grained PySpark on the mesos 
cluster. This comes after running some 200-1000 tasks generally.

Pyspark code:

while True:
sc.parallelize(range(10)).map(lambda n : n*2).collect()

Error log:
(In console)

ERROR DAGSchedulerActorSupervisor: eventProcesserActor failed due to the error 
EOF reached before Python server acknowledged; shutting down SparkContext

org.apache.spark.SparkException: Job 75 cancelled as part of cancellation of 
all jobs
        at 
org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$fail
        at 
org.apache.spark.scheduler.DAGScheduler.handleJobCancellation(DAGScheduler.scala:998)
        at 
org.apache.spark.scheduler.DAGScheduler$$anonfun$doCancelAllJobs$1.apply$mcVI$sp(DAGS
        at 
org.apache.spark.scheduler.DAGScheduler$$anonfun$doCancelAllJobs$1.apply(DAGScheduler
        at 
org.apache.spark.scheduler.DAGScheduler$$anonfun$doCancelAllJobs$1.apply(DAGScheduler
        at scala.collection.mutable.HashSet.foreach(HashSet.scala:79)
        at 
org.apache.spark.scheduler.DAGScheduler.doCancelAllJobs(DAGScheduler.scala:499)
        at 
org.apache.spark.scheduler.DAGSchedulerActorSupervisor$$anonfun$2.applyOrElse(DAGSche
        at 
org.apache.spark.scheduler.DAGSchedulerActorSupervisor$$anonfun$2.applyOrElse(DAGSche
        at akka.actor.SupervisorStrategy.handleFailure(FaultHandling.scala:295)
        at 
akka.actor.dungeon.FaultHandling$class.handleFailure(FaultHandling.scala:253)
        at akka.actor.ActorCell.handleFailure(ActorCell.scala:338)
        at akka.actor.ActorCell.invokeAll$1(ActorCell.scala:423)
        at akka.actor.ActorCell.systemInvoke(ActorCell.scala:447)
        at akka.dispatch.Mailbox.processAllSystemMessages(Mailbox.scala:262)
        at akka.dispatch.Mailbox.run(Mailbox.scala:218)
        at 
akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.s
        at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
        at 
scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
        at 
scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
        at 
scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)

>>> 14/06/24 02:58:19 ERROR OneForOneStrategy:
java.lang.UnsupportedOperationException
        at 
org.apache.spark.scheduler.SchedulerBackend$class.killTask(SchedulerBackend.scala:32)
        at 
org.apache.spark.scheduler.cluster.mesos.MesosSchedulerBackend.killTask(MesosSchedule
        at 
org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$cancelTasks$3$$anonfun$apply$1.
        at 
org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$cancelTasks$3$$anonfun$apply$1.
        at 
org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$cancelTasks$3$$anonfun$apply$1.
        at scala.collection.mutable.HashSet.foreach(HashSet.scala:79)
        at 
org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$cancelTasks$3.apply(TaskSchedul
        at 
org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$cancelTasks$3.apply(TaskSchedul
        at scala.Option.foreach(Option.scala:236)
        at 
org.apache.spark.scheduler.TaskSchedulerImpl.cancelTasks(TaskSchedulerImpl.scala:176)
        at 
org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGSchedu
        at 
org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGSchedu
        at 
org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGSchedu
        at scala.collection.mutable.HashSet.foreach(HashSet.scala:79)
        at 
org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$fail
        at 
org.apache.spark.scheduler.DAGScheduler.handleJobCancellation(DAGScheduler.scala:998)
        at 
org.apache.spark.scheduler.DAGScheduler$$anonfun$doCancelAllJobs$1.apply$mcVI$sp(DAGS
        at 
org.apache.spark.scheduler.DAGScheduler$$anonfun$doCancelAllJobs$1.apply(DAGScheduler
        at 
org.apache.spark.scheduler.DAGScheduler$$anonfun$doCancelAllJobs$1.apply(DAGScheduler
        at scala.collection.mutable.HashSet.foreach(HashSet.scala:79)
        at 
org.apache.spark.scheduler.DAGScheduler.doCancelAllJobs(DAGScheduler.scala:499)
        at 
org.apache.spark.scheduler.DAGSchedulerActorSupervisor$$anonfun$2.applyOrElse(DAGSche
        at 
org.apache.spark.scheduler.DAGSchedulerActorSupervisor$$anonfun$2.applyOrElse(DAGSche
        at akka.actor.SupervisorStrategy.handleFailure(FaultHandling.scala:295)
        at 
akka.actor.dungeon.FaultHandling$class.handleFailure(FaultHandling.scala:253)
        at akka.actor.ActorCell.handleFailure(ActorCell.scala:338)
        at akka.actor.ActorCell.invokeAll$1(ActorCell.scala:423)
        at akka.actor.ActorCell.systemInvoke(ActorCell.scala:447)
        at akka.dispatch.Mailbox.processAllSystemMessages(Mailbox.scala:262)
        at akka.dispatch.Mailbox.run(Mailbox.scala:218)
        at 
akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.s
        at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
        at 
scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
        at 
scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
        at 
scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)



> Pyspark on Mesos scheduler error
> --------------------------------
>
>                 Key: MESOS-1535
>                 URL: https://issues.apache.org/jira/browse/MESOS-1535
>             Project: Mesos
>          Issue Type: Bug
>    Affects Versions: 0.18.0, 0.18.1
>         Environment: Running a Mesos on a cluster of Centos 6.5 machines. 180 
> GB memory.
>            Reporter: Ajay Viswanathan
>              Labels: pyspark
>
> This is an error that I get while running fine-grained PySpark on the mesos 
> cluster. This comes after running some 200-1000 tasks generally.
> Pyspark code:
> while True:
> sc.parallelize(range(10)).map(lambda n : n*2).collect()
> Error log:
> (In console)
> ERROR DAGSchedulerActorSupervisor: eventProcesserActor failed due to the 
> error EOF reached before Python server acknowledged; shutting down 
> SparkContext
> Traceback (most recent call last):
>   File "<stdin>", line 2, in <module>
>   File ".../spark-1.0.0/python/pyspark/rdd.py", line 583, in collect
>     bytesInJava = self._jrdd.collect().iterator()
>   File ".../spark-1.0.0/python/lib/py4j-0.8.1-src.zip/py4j/java_gateway.py", 
> line 537,
>   File ".../spark-1.0.0/python/lib/py4j-0.8.1-src.zip/py4j/protocol.py", line 
> 300, in
> py4j.protocol.Py4JJavaError: An error occurred while calling o847.collect.
> org.apache.spark.SparkException: Job 75 cancelled as part of cancellation of 
> all jobs
>         at 
> org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$fail
>         at 
> org.apache.spark.scheduler.DAGScheduler.handleJobCancellation(DAGScheduler.scala:998)
>         at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$doCancelAllJobs$1.apply$mcVI$sp(DAGS
>         at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$doCancelAllJobs$1.apply(DAGScheduler
>         at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$doCancelAllJobs$1.apply(DAGScheduler
>         at scala.collection.mutable.HashSet.foreach(HashSet.scala:79)
>         at 
> org.apache.spark.scheduler.DAGScheduler.doCancelAllJobs(DAGScheduler.scala:499)
>         at 
> org.apache.spark.scheduler.DAGSchedulerActorSupervisor$$anonfun$2.applyOrElse(DAGSche
>         at 
> org.apache.spark.scheduler.DAGSchedulerActorSupervisor$$anonfun$2.applyOrElse(DAGSche
>         at 
> akka.actor.SupervisorStrategy.handleFailure(FaultHandling.scala:295)
>         at 
> akka.actor.dungeon.FaultHandling$class.handleFailure(FaultHandling.scala:253)
>         at akka.actor.ActorCell.handleFailure(ActorCell.scala:338)
>         at akka.actor.ActorCell.invokeAll$1(ActorCell.scala:423)
>         at akka.actor.ActorCell.systemInvoke(ActorCell.scala:447)
>         at akka.dispatch.Mailbox.processAllSystemMessages(Mailbox.scala:262)
>         at akka.dispatch.Mailbox.run(Mailbox.scala:218)
>         at 
> akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.s
>         at 
> scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
>         at 
> scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
>         at 
> scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
>         at 
> scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
> >>> 14/06/24 02:58:19 ERROR OneForOneStrategy:
> java.lang.UnsupportedOperationException
>         at 
> org.apache.spark.scheduler.SchedulerBackend$class.killTask(SchedulerBackend.scala:32)
>         at 
> org.apache.spark.scheduler.cluster.mesos.MesosSchedulerBackend.killTask(MesosSchedule
>         at 
> org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$cancelTasks$3$$anonfun$apply$1.
>         at 
> org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$cancelTasks$3$$anonfun$apply$1.
>         at 
> org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$cancelTasks$3$$anonfun$apply$1.
>         at scala.collection.mutable.HashSet.foreach(HashSet.scala:79)
>         at 
> org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$cancelTasks$3.apply(TaskSchedul
>         at 
> org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$cancelTasks$3.apply(TaskSchedul
>         at scala.Option.foreach(Option.scala:236)
>         at 
> org.apache.spark.scheduler.TaskSchedulerImpl.cancelTasks(TaskSchedulerImpl.scala:176)
>         at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGSchedu
>         at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGSchedu
>         at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGSchedu
>         at scala.collection.mutable.HashSet.foreach(HashSet.scala:79)
>         at 
> org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$fail
>         at 
> org.apache.spark.scheduler.DAGScheduler.handleJobCancellation(DAGScheduler.scala:998)
>         at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$doCancelAllJobs$1.apply$mcVI$sp(DAGS
>         at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$doCancelAllJobs$1.apply(DAGScheduler
>         at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$doCancelAllJobs$1.apply(DAGScheduler
>         at scala.collection.mutable.HashSet.foreach(HashSet.scala:79)
>         at 
> org.apache.spark.scheduler.DAGScheduler.doCancelAllJobs(DAGScheduler.scala:499)
>         at 
> org.apache.spark.scheduler.DAGSchedulerActorSupervisor$$anonfun$2.applyOrElse(DAGSche
>         at 
> org.apache.spark.scheduler.DAGSchedulerActorSupervisor$$anonfun$2.applyOrElse(DAGSche
>         at 
> akka.actor.SupervisorStrategy.handleFailure(FaultHandling.scala:295)
>         at 
> akka.actor.dungeon.FaultHandling$class.handleFailure(FaultHandling.scala:253)
>         at akka.actor.ActorCell.handleFailure(ActorCell.scala:338)
>         at akka.actor.ActorCell.invokeAll$1(ActorCell.scala:423)
>         at akka.actor.ActorCell.systemInvoke(ActorCell.scala:447)
>         at akka.dispatch.Mailbox.processAllSystemMessages(Mailbox.scala:262)
>         at akka.dispatch.Mailbox.run(Mailbox.scala:218)
>         at 
> akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.s
>         at 
> scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
>         at 
> scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
>         at 
> scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
>         at 
> scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)



--
This message was sent by Atlassian JIRA
(v6.2#6252)

Reply via email to