Spark shuffle service does not work in stand alone
Has anyone tried shuffle service in Stand Alone cluster mode? I want to enable it for d.a. but my jobs never start when I submit them. This happens with all my jobs. 15/10/13 08:29:45 INFO DAGScheduler: Job 0 failed: json at DataLoader.scala:86, took 16.318615 s Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 in stage 0.0 (TID 7, 162.101.194.47): ExecutorLostFailure (executor 4 lost) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1283) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1271) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1270) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1270) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697) at scala.Option.foreach(Option.scala:236) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:697) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1496) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1458) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1447) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:567) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1822) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1942) at org.apache.spark.rdd.RDD$$anonfun$reduce$1.apply(RDD.scala:1003) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108) at org.apache.spark.rdd.RDD.withScope(RDD.scala:306) at org.apache.spark.rdd.RDD.reduce(RDD.scala:985) at org.apache.spark.rdd.RDD$$anonfun$treeAggregate$1.apply(RDD.scala:1114) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108) at org.apache.spark.rdd.RDD.withScope(RDD.scala:306) at org.apache.spark.rdd.RDD.treeAggregate(RDD.scala:1091) at org.apache.spark.sql.execution.datasources.json.InferSchema$.apply(InferSchema.scala:58) at org.apache.spark.sql.execution.datasources.json.JSONRelation$$anonfun$6.apply(JSONRelation.scala:105) at org.apache.spark.sql.execution.datasources.json.JSONRelation$$anonfun$6.apply(JSONRelation.scala:100) at scala.Option.getOrElse(Option.scala:120) at org.apache.spark.sql.execution.datasources.json.JSONRelation.dataSchema$lzycompute(JSONRelation.scala:100) at org.apache.spark.sql.execution.datasources.json.JSONRelation.dataSchema(JSONRelation.scala:99) at org.apache.spark.sql.sources.HadoopFsRelation.schema$lzycompute(interfaces.scala:561) at org.apache.spark.sql.sources.HadoopFsRelation.schema(interfaces.scala:560) at org.apache.spark.sql.execution.datasources.LogicalRelation.(LogicalRelation.scala:31) at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:120) at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:104) at org.apache.spark.sql.DataFrameReader.json(DataFrameReader.scala:219) at org.apache.saif.loaders.DataLoader$.load_json(DataLoader.scala:86)
Re: Spark shuffle service does not work in stand alone
Hi, AFAIK, the shuffle service makes sense only to delegate the shuffle to mapreduce (as mapreduce shuffle is most of the time faster than the spark shuffle). As you run in standalone mode, shuffle service will use the spark shuffle. Not 100% thought. Regards JB On 10/13/2015 04:23 PM, saif.a.ell...@wellsfargo.com wrote: Has anyone tried shuffle service in Stand Alone cluster mode? I want to enable it for d.a. but my jobs never start when I submit them. This happens with all my jobs. 15/10/13 08:29:45 INFO DAGScheduler: Job 0 failed: json at DataLoader.scala:86, took 16.318615 s Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 in stage 0.0 (TID 7, 162.101.194.47): ExecutorLostFailure (executor 4 lost) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1283) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1271) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1270) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1270) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697) at scala.Option.foreach(Option.scala:236) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:697) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1496) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1458) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1447) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:567) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1822) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1942) at org.apache.spark.rdd.RDD$$anonfun$reduce$1.apply(RDD.scala:1003) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108) at org.apache.spark.rdd.RDD.withScope(RDD.scala:306) at org.apache.spark.rdd.RDD.reduce(RDD.scala:985) at org.apache.spark.rdd.RDD$$anonfun$treeAggregate$1.apply(RDD.scala:1114) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108) at org.apache.spark.rdd.RDD.withScope(RDD.scala:306) at org.apache.spark.rdd.RDD.treeAggregate(RDD.scala:1091) at org.apache.spark.sql.execution.datasources.json.InferSchema$.apply(InferSchema.scala:58) at org.apache.spark.sql.execution.datasources.json.JSONRelation$$anonfun$6.apply(JSONRelation.scala:105) at org.apache.spark.sql.execution.datasources.json.JSONRelation$$anonfun$6.apply(JSONRelation.scala:100) at scala.Option.getOrElse(Option.scala:120) at org.apache.spark.sql.execution.datasources.json.JSONRelation.dataSchema$lzycompute(JSONRelation.scala:100) at org.apache.spark.sql.execution.datasources.json.JSONRelation.dataSchema(JSONRelation.scala:99) at org.apache.spark.sql.sources.HadoopFsRelation.schema$lzycompute(interfaces.scala:561) at org.apache.spark.sql.sources.HadoopFsRelation.schema(interfaces.scala:560) at org.apache.spark.sql.execution.datasources.LogicalRelation.(LogicalRelation.scala:31) at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:120) at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:104) at org.apache.spark.sql.DataFrameReader.json(DataFrameReader.scala:219) at org.apache.saif.loaders.DataLoader$.load_json(DataLoader.scala:86) -- Jean-Baptiste Onofré jbono...@apache.org http://blog.nanthrax.net Talend - http://www.talend.com - To unsubscribe, e-mail: user-unsubscr...@spark.apache.org For additional commands, e-mail: user-h...@spark.apache.org
Re: Spark shuffle service does not work in stand alone
It would probably be more helpful if you looked for the executor error and posted it. The screenshot you posted is the driver exception caused by the task failure, which is not terribly useful. On Tue, Oct 13, 2015 at 7:23 AM,wrote: > Has anyone tried shuffle service in Stand Alone cluster mode? I want to > enable it for d.a. but my jobs never start when I submit them. > This happens with all my jobs. > > > 15/10/13 08:29:45 INFO DAGScheduler: Job 0 failed: json at > DataLoader.scala:86, took 16.318615 s > Exception in thread "main" org.apache.spark.SparkException: Job aborted > due to stage failure: Task 0 in stage 0.0 failed 4 times, most recent > failure: Lost task 0.3 in stage 0.0 (TID 7, 162.101.194.47): > ExecutorLostFailure (executor 4 lost) > Driver stacktrace: > at org.apache.spark.scheduler.DAGScheduler.org > $apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1283) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1271) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1270) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at > scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) > at > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1270) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697) > at scala.Option.foreach(Option.scala:236) > at > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:697) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1496) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1458) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1447) > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) > at > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:567) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1822) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1942) > at org.apache.spark.rdd.RDD$$anonfun$reduce$1.apply(RDD.scala:1003) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:306) > at org.apache.spark.rdd.RDD.reduce(RDD.scala:985) > at > org.apache.spark.rdd.RDD$$anonfun$treeAggregate$1.apply(RDD.scala:1114) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:306) > at org.apache.spark.rdd.RDD.treeAggregate(RDD.scala:1091) > at > org.apache.spark.sql.execution.datasources.json.InferSchema$.apply(InferSchema.scala:58) > at > org.apache.spark.sql.execution.datasources.json.JSONRelation$$anonfun$6.apply(JSONRelation.scala:105) > at > org.apache.spark.sql.execution.datasources.json.JSONRelation$$anonfun$6.apply(JSONRelation.scala:100) > at scala.Option.getOrElse(Option.scala:120) > at > org.apache.spark.sql.execution.datasources.json.JSONRelation.dataSchema$lzycompute(JSONRelation.scala:100) > at > org.apache.spark.sql.execution.datasources.json.JSONRelation.dataSchema(JSONRelation.scala:99) > at > org.apache.spark.sql.sources.HadoopFsRelation.schema$lzycompute(interfaces.scala:561) > at > org.apache.spark.sql.sources.HadoopFsRelation.schema(interfaces.scala:560) > at > org.apache.spark.sql.execution.datasources.LogicalRelation.(LogicalRelation.scala:31) > at > org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:120) > at > org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:104) > at > org.apache.spark.sql.DataFrameReader.json(DataFrameReader.scala:219) > at > org.apache.saif.loaders.DataLoader$.load_json(DataLoader.scala:86) > > > -- Marcelo
RE: Spark shuffle service does not work in stand alone
Hi, thanks Executors are simply failing to connect to a shuffle server: 15/10/13 08:29:34 INFO BlockManagerMaster: Registered BlockManager 15/10/13 08:29:34 INFO BlockManager: Registering executor with local external shuffle service. 15/10/13 08:29:34 ERROR BlockManager: Failed to connect to external shuffle server, will retry 2 more times after waiting 5 seconds... java.io.IOException: Failed to connect to /162.xxx.zzz.yy:port at org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:193) at org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:156) at org.apache.spark.network.shuffle.ExternalShuffleClient.registerWithShuffleServer(ExternalShuffleClient.java:140) at org.apache.spark.storage.BlockManager$$anonfun$registerWithExternalShuffleServer$1.apply$mcVI$sp(BlockManager.scala:220) at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:141) at org.apache.spark.storage.BlockManager.registerWithExternalShuffleServer(BlockManager.scala:217) at org.apache.spark.storage.BlockManager.initialize(BlockManager.scala:203) at org.apache.spark.executor.Executor.(Executor.scala:85) at org.apache.spark.executor.CoarseGrainedExecutorBackend$$anonfun$receive$1.applyOrElse(CoarseGrainedExecutorBackend.scala:86) at org.apache.spark.rpc.akka.AkkaRpcEnv.org$apache$spark$rpc$akka$AkkaRpcEnv$$processMessage(AkkaRpcEnv.scala:177) at org.apache.spark.rpc.akka.AkkaRpcEnv$$anonfun$actorRef$lzycompute$1$1$$anon$1$$anonfun$receiveWithLogging$1$$anonfun$applyOrElse$4.apply$mcV$sp(AkkaRpcEnv.scala:126) at org.apache.spark.rpc.akka.AkkaRpcEnv.org$apache$spark$rpc$akka$AkkaRpcEnv$$safelyCall(AkkaRpcEnv.scala:197) at org.apache.spark.rpc.akka.AkkaRpcEnv$$anonfun$actorRef$lzycompute$1$1$$anon$1$$anonfun$receiveWithLogging$1.applyOrElse(AkkaRpcEnv.scala:125) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at org.apache.spark.util.ActorLogReceive$$anon$1.apply(ActorLogReceive.scala:59) at org.apache.spark.util.ActorLogReceive$$anon$1.apply(ActorLogReceive.scala:42) at scala.PartialFunction$class.applyOrElse(PartialFunction.scala:118) at org.apache.spark.util.ActorLogReceive$$anon$1.applyOrElse(ActorLogReceive.scala:42) at akka.actor.Actor$class.aroundReceive(Actor.scala:467) at org.apache.spark.rpc.akka.AkkaRpcEnv$$anonfun$actorRef$lzycompute$1$1$$anon$1.aroundReceive(AkkaRpcEnv.scala:92) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:516) at akka.actor.ActorCell.invoke(ActorCell.scala:487) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:238) at akka.dispatch.Mailbox.run(Mailbox.scala:220) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:397) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) Caused by: java.net.ConnectException: Connection refused: /162.xxx.zzz.yy:port at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method) at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717) at io.netty.channel.socket.nio.NioSocketChannel.doFinishConnect(NioSocketChannel.java:224) at io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.finishConnect(AbstractNioChannel.java:289) at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:528) at io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:468) at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:382) at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:354) at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:111) at java.lang.Thread.run(Thread.java:745) From: Marcelo Vanzin [mailto:van...@cloudera.com] Sent: Tuesday, October 13, 2015 1:13 PM To: Ellafi, Saif A. Cc: user@spark.apache.org Subject: Re: Spark shuffle service does not work in stand alone It would probably be more helpful if you looked for the executor error and posted it. The screenshot you posted is the driver exception caused by the task failure, which is not terribly useful. On Tue, Oct 13, 2015 at 7:23 AM, <saif.a.ell...@wellsfargo.com<mailto:saif.a.ell...@wellsfargo.com>> wrote: Has
RE: Spark shuffle service does not work in stand alone
I believe the confusion here is self-answered. The thing is that in the documentation, the spark shuffle service runs only under YARN, while here we are speaking about a stand alone cluster. The proper question is, how to launch a shuffle service for stand alone? Saif From: saif.a.ell...@wellsfargo.com [mailto:saif.a.ell...@wellsfargo.com] Sent: Tuesday, October 13, 2015 2:25 PM To: van...@cloudera.com Cc: user@spark.apache.org Subject: RE: Spark shuffle service does not work in stand alone Hi, thanks Executors are simply failing to connect to a shuffle server: 15/10/13 08:29:34 INFO BlockManagerMaster: Registered BlockManager 15/10/13 08:29:34 INFO BlockManager: Registering executor with local external shuffle service. 15/10/13 08:29:34 ERROR BlockManager: Failed to connect to external shuffle server, will retry 2 more times after waiting 5 seconds... java.io.IOException: Failed to connect to /162.xxx.zzz.yy:port at org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:193) at org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:156) at org.apache.spark.network.shuffle.ExternalShuffleClient.registerWithShuffleServer(ExternalShuffleClient.java:140) at org.apache.spark.storage.BlockManager$$anonfun$registerWithExternalShuffleServer$1.apply$mcVI$sp(BlockManager.scala:220) at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:141) at org.apache.spark.storage.BlockManager.registerWithExternalShuffleServer(BlockManager.scala:217) at org.apache.spark.storage.BlockManager.initialize(BlockManager.scala:203) at org.apache.spark.executor.Executor.(Executor.scala:85) at org.apache.spark.executor.CoarseGrainedExecutorBackend$$anonfun$receive$1.applyOrElse(CoarseGrainedExecutorBackend.scala:86) at org.apache.spark.rpc.akka.AkkaRpcEnv.org$apache$spark$rpc$akka$AkkaRpcEnv$$processMessage(AkkaRpcEnv.scala:177) at org.apache.spark.rpc.akka.AkkaRpcEnv$$anonfun$actorRef$lzycompute$1$1$$anon$1$$anonfun$receiveWithLogging$1$$anonfun$applyOrElse$4.apply$mcV$sp(AkkaRpcEnv.scala:126) at org.apache.spark.rpc.akka.AkkaRpcEnv.org$apache$spark$rpc$akka$AkkaRpcEnv$$safelyCall(AkkaRpcEnv.scala:197) at org.apache.spark.rpc.akka.AkkaRpcEnv$$anonfun$actorRef$lzycompute$1$1$$anon$1$$anonfun$receiveWithLogging$1.applyOrElse(AkkaRpcEnv.scala:125) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at org.apache.spark.util.ActorLogReceive$$anon$1.apply(ActorLogReceive.scala:59) at org.apache.spark.util.ActorLogReceive$$anon$1.apply(ActorLogReceive.scala:42) at scala.PartialFunction$class.applyOrElse(PartialFunction.scala:118) at org.apache.spark.util.ActorLogReceive$$anon$1.applyOrElse(ActorLogReceive.scala:42) at akka.actor.Actor$class.aroundReceive(Actor.scala:467) at org.apache.spark.rpc.akka.AkkaRpcEnv$$anonfun$actorRef$lzycompute$1$1$$anon$1.aroundReceive(AkkaRpcEnv.scala:92) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:516) at akka.actor.ActorCell.invoke(ActorCell.scala:487) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:238) at akka.dispatch.Mailbox.run(Mailbox.scala:220) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:397) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) Caused by: java.net.ConnectException: Connection refused: /162.xxx.zzz.yy:port at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method) at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717) at io.netty.channel.socket.nio.NioSocketChannel.doFinishConnect(NioSocketChannel.java:224) at io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.finishConnect(AbstractNioChannel.java:289) at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:528) at io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:468) at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:382) at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:354) at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:111) at java.lang.Thread.run(Thread.java:745) From: Marcelo Vanzin
Re: Spark shuffle service does not work in stand alone
You have to manually start the shuffle service if you're not running YARN. See the "sbin/start-shuffle-service.sh" script. On Tue, Oct 13, 2015 at 10:29 AM, <saif.a.ell...@wellsfargo.com> wrote: > I believe the confusion here is self-answered. > > The thing is that in the documentation, the spark shuffle service runs > only under YARN, while here we are speaking about a stand alone cluster. > > > > The proper question is, how to launch a shuffle service for stand alone? > > > > Saif > > > > *From:* saif.a.ell...@wellsfargo.com [mailto:saif.a.ell...@wellsfargo.com] > > *Sent:* Tuesday, October 13, 2015 2:25 PM > *To:* van...@cloudera.com > *Cc:* user@spark.apache.org > *Subject:* RE: Spark shuffle service does not work in stand alone > > > > Hi, thanks > > > > Executors are simply failing to connect to a shuffle server: > > > > 15/10/13 08:29:34 INFO BlockManagerMaster: Registered BlockManager > > 15/10/13 08:29:34 INFO BlockManager: Registering executor with local > external shuffle service. > > 15/10/13 08:29:34 ERROR BlockManager: Failed to connect to external > shuffle server, will retry 2 more times after waiting 5 seconds... > > java.io.IOException: Failed to connect to /162.xxx.zzz.yy:port > > at > org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:193) > > at > org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:156) > > at > org.apache.spark.network.shuffle.ExternalShuffleClient.registerWithShuffleServer(ExternalShuffleClient.java:140) > > at > org.apache.spark.storage.BlockManager$$anonfun$registerWithExternalShuffleServer$1.apply$mcVI$sp(BlockManager.scala:220) > > at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:141) > > at > org.apache.spark.storage.BlockManager.registerWithExternalShuffleServer(BlockManager.scala:217) > > at > org.apache.spark.storage.BlockManager.initialize(BlockManager.scala:203) > > at org.apache.spark.executor.Executor.(Executor.scala:85) > > at > org.apache.spark.executor.CoarseGrainedExecutorBackend$$anonfun$receive$1.applyOrElse(CoarseGrainedExecutorBackend.scala:86) > > at org.apache.spark.rpc.akka.AkkaRpcEnv.org > $apache$spark$rpc$akka$AkkaRpcEnv$$processMessage(AkkaRpcEnv.scala:177) > > at > org.apache.spark.rpc.akka.AkkaRpcEnv$$anonfun$actorRef$lzycompute$1$1$$anon$1$$anonfun$receiveWithLogging$1$$anonfun$applyOrElse$4.apply$mcV$sp(AkkaRpcEnv.scala:126) > > at org.apache.spark.rpc.akka.AkkaRpcEnv.org > $apache$spark$rpc$akka$AkkaRpcEnv$$safelyCall(AkkaRpcEnv.scala:197) > > at > org.apache.spark.rpc.akka.AkkaRpcEnv$$anonfun$actorRef$lzycompute$1$1$$anon$1$$anonfun$receiveWithLogging$1.applyOrElse(AkkaRpcEnv.scala:125) > > at > scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) > > at > scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) > > at > scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) > > at > org.apache.spark.util.ActorLogReceive$$anon$1.apply(ActorLogReceive.scala:59) > > at > org.apache.spark.util.ActorLogReceive$$anon$1.apply(ActorLogReceive.scala:42) > > at > scala.PartialFunction$class.applyOrElse(PartialFunction.scala:118) > > at > org.apache.spark.util.ActorLogReceive$$anon$1.applyOrElse(ActorLogReceive.scala:42) > > at akka.actor.Actor$class.aroundReceive(Actor.scala:467) > > at > org.apache.spark.rpc.akka.AkkaRpcEnv$$anonfun$actorRef$lzycompute$1$1$$anon$1.aroundReceive(AkkaRpcEnv.scala:92) > > at akka.actor.ActorCell.receiveMessage(ActorCell.scala:516) > > at akka.actor.ActorCell.invoke(ActorCell.scala:487) > > at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:238) > > at akka.dispatch.Mailbox.run(Mailbox.scala:220) > > at > akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:397) > > at > scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) > > at > scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) > > at > scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) > > at > scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) > > Caused by: java.net.ConnectException: Connection refused: > /162.xxx.zzz.yy:port > > at sun.nio.ch.SocketChannelI
RE: Spark shuffle service does not work in stand alone
Thanks, I missed that one. From: Marcelo Vanzin [mailto:van...@cloudera.com] Sent: Tuesday, October 13, 2015 2:36 PM To: Ellafi, Saif A. Cc: user@spark.apache.org Subject: Re: Spark shuffle service does not work in stand alone You have to manually start the shuffle service if you're not running YARN. See the "sbin/start-shuffle-service.sh" script. On Tue, Oct 13, 2015 at 10:29 AM, <saif.a.ell...@wellsfargo.com<mailto:saif.a.ell...@wellsfargo.com>> wrote: I believe the confusion here is self-answered. The thing is that in the documentation, the spark shuffle service runs only under YARN, while here we are speaking about a stand alone cluster. The proper question is, how to launch a shuffle service for stand alone? Saif From: saif.a.ell...@wellsfargo.com<mailto:saif.a.ell...@wellsfargo.com> [mailto:saif.a.ell...@wellsfargo.com<mailto:saif.a.ell...@wellsfargo.com>] Sent: Tuesday, October 13, 2015 2:25 PM To: van...@cloudera.com<mailto:van...@cloudera.com> Cc: user@spark.apache.org<mailto:user@spark.apache.org> Subject: RE: Spark shuffle service does not work in stand alone Hi, thanks Executors are simply failing to connect to a shuffle server: 15/10/13 08:29:34 INFO BlockManagerMaster: Registered BlockManager 15/10/13 08:29:34 INFO BlockManager: Registering executor with local external shuffle service. 15/10/13 08:29:34 ERROR BlockManager: Failed to connect to external shuffle server, will retry 2 more times after waiting 5 seconds... java.io.IOException: Failed to connect to /162.xxx.zzz.yy:port at org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:193) at org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:156) at org.apache.spark.network.shuffle.ExternalShuffleClient.registerWithShuffleServer(ExternalShuffleClient.java:140) at org.apache.spark.storage.BlockManager$$anonfun$registerWithExternalShuffleServer$1.apply$mcVI$sp(BlockManager.scala:220) at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:141) at org.apache.spark.storage.BlockManager.registerWithExternalShuffleServer(BlockManager.scala:217) at org.apache.spark.storage.BlockManager.initialize(BlockManager.scala:203) at org.apache.spark.executor.Executor.(Executor.scala:85) at org.apache.spark.executor.CoarseGrainedExecutorBackend$$anonfun$receive$1.applyOrElse(CoarseGrainedExecutorBackend.scala:86) at org.apache.spark.rpc.akka.AkkaRpcEnv.org<http://org.apache.spark.rpc.akka.AkkaRpcEnv.org>$apache$spark$rpc$akka$AkkaRpcEnv$$processMessage(AkkaRpcEnv.scala:177) at org.apache.spark.rpc.akka.AkkaRpcEnv$$anonfun$actorRef$lzycompute$1$1$$anon$1$$anonfun$receiveWithLogging$1$$anonfun$applyOrElse$4.apply$mcV$sp(AkkaRpcEnv.scala:126) at org.apache.spark.rpc.akka.AkkaRpcEnv.org<http://org.apache.spark.rpc.akka.AkkaRpcEnv.org>$apache$spark$rpc$akka$AkkaRpcEnv$$safelyCall(AkkaRpcEnv.scala:197) at org.apache.spark.rpc.akka.AkkaRpcEnv$$anonfun$actorRef$lzycompute$1$1$$anon$1$$anonfun$receiveWithLogging$1.applyOrElse(AkkaRpcEnv.scala:125) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33) at scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25) at org.apache.spark.util.ActorLogReceive$$anon$1.apply(ActorLogReceive.scala:59) at org.apache.spark.util.ActorLogReceive$$anon$1.apply(ActorLogReceive.scala:42) at scala.PartialFunction$class.applyOrElse(PartialFunction.scala:118) at org.apache.spark.util.ActorLogReceive$$anon$1.applyOrElse(ActorLogReceive.scala:42) at akka.actor.Actor$class.aroundReceive(Actor.scala:467) at org.apache.spark.rpc.akka.AkkaRpcEnv$$anonfun$actorRef$lzycompute$1$1$$anon$1.aroundReceive(AkkaRpcEnv.scala:92) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:516) at akka.actor.ActorCell.invoke(ActorCell.scala:487) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:238) at akka.dispatch.Mailbox.run(Mailbox.scala:220) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:397) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) Caused by: java.net.ConnectException: Connection refused: /162.xxx.zzz.yy:port at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method) at sun.nio.ch.