[ https://issues.apache.org/jira/browse/SPARK-6834?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Hyukjin Kwon updated SPARK-6834: -------------------------------- Labels: bulk-closed (was: ) > Failed with error: ‘invalid package name’ Error in as.name(name) : attempt to > use zero-length variable name > ----------------------------------------------------------------------------------------------------------- > > Key: SPARK-6834 > URL: https://issues.apache.org/jira/browse/SPARK-6834 > Project: Spark > Issue Type: Bug > Components: SparkR > Affects Versions: 1.4.0 > Reporter: Shivaram Venkataraman > Priority: Major > Labels: bulk-closed > > Context: trying to interface SparkR with foreach. Foreach is an abstraction > over several parallel backends. This would enable execution on spark cluster > of > 50 R packages including very popular caret and plyr. Simple foreach > examples work. caret unfortunately does not. > I have a repro but it is somewhat complex (it is the main example of model > fitting in caret on their website though, not something made on purpose to > make SparkR fail). If I find anything more straightforward, I will comment > here. Reproduced in an R --vanilla session, but I can uninstall all of my > packages, so I may have missed some deps. > Reproduce with: > install.packages(c("caret", "foreach", "devtools", "mlbench", "gbm", > "survival", "splines")) > library(caret) > library(foreach) > library(devtools) > install_github("RevolutionAnalytics/doParallelSpark", subdir = "pkg") > library(doParallelSpark) > registerDoParallelSpark() > library(mlbench) > data(Sonar) > set.seed(998) > inTraining <- createDataPartition(Sonar$Class, p = .75, list = FALSE) > training <- Sonar[ inTraining,] > testing <- Sonar[-inTraining,] > fitControl <- trainControl(## 10-fold CV > method = "repeatedcv", > number = 10, > ## repeated ten times > repeats = 10) > set.seed(825) > gbmFit1 <- train(Class ~ ., data = training, > method = "gbm", > trControl = fitControl, > ## This last option is actually one > ## for gbm() that passes through > verbose = FALSE) > Stack trace > Failed with error: ‘invalid package name’ > Failed with error: ‘invalid package name’ > Failed with error: ‘invalid package name’ > Failed with error: ‘invalid package name’ > Error in as.name(name) : attempt to use zero-length variable name > Calls: source ... withVisible -> eval -> eval -> getNamespace -> as.name > Execution halted > 15/03/26 14:32:30 ERROR Executor: Exception in task 0.0 in stage 3.0 (TID 5) > org.apache.spark.SparkException: R computation failed with > Failed with error: ‘invalid package name’ > Failed with error: ‘invalid package name’ > Failed with error: ‘invalid package name’ > Failed with error: ‘invalid package name’ > Error in as.name(name) : attempt to use zero-length variable name > Calls: source ... withVisible -> eval -> eval -> getNamespace -> as.name > Execution halted > at edu.berkeley.cs.amplab.sparkr.BaseRRDD.compute(RRDD.scala:80) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:229) > at edu.berkeley.cs.amplab.sparkr.BaseRRDD.compute(RRDD.scala:32) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:229) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:68) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) > at org.apache.spark.scheduler.Task.run(Task.scala:54) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:745) > 15/03/26 14:32:30 WARN TaskSetManager: Lost task 0.0 in stage 3.0 (TID 5, > localhost): org.apache.spark.SparkException: R computation failed with > Failed with error: ‘invalid package name’ > Failed with error: ‘invalid package name’ > Failed with error: ‘invalid package name’ > Failed with error: ‘invalid package name’ > Error in as.name(name) : attempt to use zero-length variable name > Calls: source ... withVisible -> eval -> eval -> getNamespace -> as.name > Execution halted > edu.berkeley.cs.amplab.sparkr.BaseRRDD.compute(RRDD.scala:80) > org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262) > org.apache.spark.rdd.RDD.iterator(RDD.scala:229) > edu.berkeley.cs.amplab.sparkr.BaseRRDD.compute(RRDD.scala:32) > org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262) > org.apache.spark.rdd.RDD.iterator(RDD.scala:229) > > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:68) > > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) > org.apache.spark.scheduler.Task.run(Task.scala:54) > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177) > > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > java.lang.Thread.run(Thread.java:745) > 15/03/26 14:32:30 ERROR TaskSetManager: Task 0 in stage 3.0 failed 1 times; > aborting job > collect on 33 failed with java.lang.reflect.InvocationTargetException > java.lang.reflect.InvocationTargetException > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at > edu.berkeley.cs.amplab.sparkr.SparkRBackendHandler.handleMethodCall(SparkRBackendHandler.scala:111) > at > edu.berkeley.cs.amplab.sparkr.SparkRBackendHandler.channelRead0(SparkRBackendHandler.scala:58) > at > edu.berkeley.cs.amplab.sparkr.SparkRBackendHandler.channelRead0(SparkRBackendHandler.scala:19) > at > io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:105) > at > io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:333) > at > io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:319) > at > io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:103) > at > io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:333) > at > io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:319) > at > io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:163) > at > io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:333) > at > io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:319) > at > io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:787) > at > io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:130) > at > io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:511) > at > io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:468) > at > io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:382) > at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:354) > at > io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:116) > at > io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:137) > at java.lang.Thread.run(Thread.java:745) > Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: > Task 0 in stage 3.0 failed 1 times, most recent failure: Lost task 0.0 in > stage 3.0 (TID 5, localhost): org.apache.spark.SparkException: R computation > failed with > Failed with error: ‘invalid package name’ > Failed with error: ‘invalid package name’ > Failed with error: ‘invalid package name’ > Failed with error: ‘invalid package name’ > Error in as.name(name) : attempt to use zero-length variable name > Calls: source ... withVisible -> eval -> eval -> getNamespace -> as.name > Execution halted > edu.berkeley.cs.amplab.sparkr.BaseRRDD.compute(RRDD.scala:80) > org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262) > org.apache.spark.rdd.RDD.iterator(RDD.scala:229) > edu.berkeley.cs.amplab.sparkr.BaseRRDD.compute(RRDD.scala:32) > org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262) > org.apache.spark.rdd.RDD.iterator(RDD.scala:229) > > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:68) > > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) > org.apache.spark.scheduler.Task.run(Task.scala:54) > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177) > > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > java.lang.Thread.run(Thread.java:745) > Driver stacktrace: > at > org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1185) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1174) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1173) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) > at > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1173) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:688) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:688) > at scala.Option.foreach(Option.scala:236) > at > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:688) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1391) > at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498) > at akka.actor.ActorCell.invoke(ActorCell.scala:456) > at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237) > at akka.dispatch.Mailbox.run(Mailbox.scala:219) > at > akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386) > at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) > at > scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) > at > scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) > at > scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) > Error: returnStatus == 0 is not TRUE -- This message was sent by Atlassian JIRA (v7.6.3#76005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org