[ https://issues.apache.org/jira/browse/SPARK-3013?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Michael Armbrust updated SPARK-3013: ------------------------------------ Assignee: Davies Liu > Doctest of inferSchema in Spark SQL Python API fails > ---------------------------------------------------- > > Key: SPARK-3013 > URL: https://issues.apache.org/jira/browse/SPARK-3013 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 1.0.2 > Reporter: Cheng Lian > Assignee: Davies Liu > Priority: Blocker > > Doctest of `inferSchema` in `sql.py` keeps failing and makes Jenkins crazy: > {code} > File "/home/jenkins/workspace/SparkPullRequestBuilder/python/pyspark/sql.py", > line 1021, in pyspark.sql.SQLContext.inferSchema > Failed example: > srdd.collect() > Exception raised: > Traceback (most recent call last): > File "/usr/lib64/python2.6/doctest.py", line 1253, in __run > compileflags, 1) in test.globs > File "<doctest pyspark.sql.SQLContext.inferSchema[6]>", line 1, in > <module> > srdd.collect() > File > "/home/jenkins/workspace/SparkPullRequestBuilder/python/pyspark/sql.py", line > 1613, in collect > rows = RDD.collect(self) > File > "/home/jenkins/workspace/SparkPullRequestBuilder/python/pyspark/rdd.py", line > 724, in collect > bytesInJava = self._jrdd.collect().iterator() > File > "/home/jenkins/workspace/SparkPullRequestBuilder/python/lib/py4j-0.8.2.1-src.zip/py4j/java_gateway.py", > line 538, in __call__ > self.target_id, self.name) > File > "/home/jenkins/workspace/SparkPullRequestBuilder/python/lib/py4j-0.8.2.1-src.zip/py4j/protocol.py", > line 300, in get_return_value > format(target_id, '.', name), value) > Py4JJavaError: An error occurred while calling o399.collect. > : org.apache.spark.SparkException: Job aborted due to stage failure: Task > 1 in stage 35.0 failed 1 times, most recent failure: Lost task 1.0 in stage > 35.0 (TID 72, localhost): java.lang.ClassCastException: java.lang.String > cannot be cast to java.util.ArrayList > > net.razorvine.pickle.objects.ArrayConstructor.construct(ArrayConstructor.java:33) > net.razorvine.pickle.Unpickler.load_reduce(Unpickler.java:617) > net.razorvine.pickle.Unpickler.dispatch(Unpickler.java:170) > net.razorvine.pickle.Unpickler.load(Unpickler.java:84) > net.razorvine.pickle.Unpickler.loads(Unpickler.java:97) > > org.apache.spark.api.python.PythonRDD$$anonfun$pythonToJavaArray$1$$anonfun$apply$4.apply(PythonRDD.scala:722) > > org.apache.spark.api.python.PythonRDD$$anonfun$pythonToJavaArray$1$$anonfun$apply$4.apply(PythonRDD.scala:721) > scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371) > scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) > scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) > scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) > scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) > scala.collection.Iterator$GroupedIterator.fill(Iterator.scala:966) > > scala.collection.Iterator$GroupedIterator.hasNext(Iterator.scala:972) > scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) > scala.collection.Iterator$class.foreach(Iterator.scala:727) > scala.collection.AbstractIterator.foreach(Iterator.scala:1157) > > scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48) > > scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103) > > scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47) > > scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273) > scala.collection.AbstractIterator.to(Iterator.scala:1157) > > scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265) > scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157) > > scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252) > scala.collection.AbstractIterator.toArray(Iterator.scala:1157) > org.apache.spark.rdd.RDD$$anonfun$16.apply(RDD.scala:774) > org.apache.spark.rdd.RDD$$anonfun$16.apply(RDD.scala:774) > > org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1121) > > org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1121) > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62) > org.apache.spark.scheduler.Task.run(Task.scala:54) > org.apache.spark.executSLF4J: Failed to load class > "org.slf4j.impl.StaticLoggerBinder". > SLF4J: Defaulting to no-operation (NOP) logger implementation > SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further > details. > or.Executor$TaskRunner.run(Executor.scala:199) > > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > java.lang.Thread.run(Thread.java:745) > Driver stacktrace: > at > org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1153) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1142) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1141) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) > at > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1141) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:682) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:682) > at scala.Option.foreach(Option.scala:236) > at > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:682) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1359) > at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498) > at akka.actor.ActorCell.invoke(ActorCell.scala:456) > at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237) > at akka.dispatch.Mailbox.run(Mailbox.scala:219) > at > akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386) > at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) > at > scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) > at > scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) > at > scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) > {code} > One of the failed builds can be found > [here|https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/18437/consoleFull]. -- This message was sent by Atlassian JIRA (v6.2#6252) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org