Hi, the following error is raised using Spark 1.5.2 or 1.6.0, in stand alone mode, on my computer. Has anyone had the same problem, and do you know what might cause this exception ? Thanks in advance.
/16/03/02 15:12:27 WARN TaskSetManager: Lost task 9.0 in stage 0.0 (TID 9, 192.168.1.36): java.lang.ClassCastException: org.apache.spark.sql.types.GenericArrayData cannot be cast to org.apache.spark.sql.catalyst.InternalRow at org.apache.spark.sql.catalyst.expressions.BaseGenericInternalRow$class.getStruct(rows.scala:50) at org.apache.spark.sql.catalyst.expressions.GenericMutableRow.getStruct(rows.scala:247) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificPredicate.eval(Unknown Source) at org.apache.spark.sql.catalyst.expressions.codegen.GeneratePredicate$$anonfun$create$2.apply(GeneratePredicate.scala:67) at org.apache.spark.sql.catalyst.expressions.codegen.GeneratePredicate$$anonfun$create$2.apply(GeneratePredicate.scala:67) at org.apache.spark.sql.execution.Filter$$anonfun$4$$anonfun$apply$4.apply(basicOperators.scala:117) at org.apache.spark.sql.execution.Filter$$anonfun$4$$anonfun$apply$4.apply(basicOperators.scala:115) at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:390) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at org.apache.spark.sql.execution.aggregate.TungstenAggregationIterator.processInputs(TungstenAggregationIterator.scala:365) at org.apache.spark.sql.execution.aggregate.TungstenAggregationIterator.start(TungstenAggregationIterator.scala:622) at org.apache.spark.sql.execution.aggregate.TungstenAggregate$$anonfun$doExecute$1.org$apache$spark$sql$execution$aggregate$TungstenAggregate$$anonfun$$executePartition$1(TungstenAggregate.scala:110) at org.apache.spark.sql.execution.aggregate.TungstenAggregate$$anonfun$doExecute$1$$anonfun$2.apply(TungstenAggregate.scala:119) at org.apache.spark.sql.execution.aggregate.TungstenAggregate$$anonfun$doExecute$1$$anonfun$2.apply(TungstenAggregate.scala:119) at org.apache.spark.rdd.MapPartitionsWithPreparationRDD.compute(MapPartitionsWithPreparationRDD.scala:64) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:300) at org.apache.spark.rdd.RDD.iterator(RDD.scala:264) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:300) at org.apache.spark.rdd.RDD.iterator(RDD.scala:264) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) at org.apache.spark.scheduler.Task.run(Task.scala:88) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) 16/03/02 15:12:27 INFO TaskSetManager: Starting task 9.1 in stage 0.0 (TID 17, 192.168.1.36, PROCESS_LOCAL, 2236 bytes) 16/03/02 15:12:27 INFO TaskSetManager: Finished task 11.0 in stage 0.0 (TID 11) in 921 ms on 192.168.1.36 (10/17) 16/03/02 15:12:27 INFO TaskSetManager: Finished task 13.0 in stage 0.0 (TID 13) in 871 ms on 192.168.1.36 (11/17) 16/03/02 15:12:27 INFO TaskSetManager: Finished task 14.0 in stage 0.0 (TID 14) in 885 ms on 192.168.1.36 (12/17) 16/03/02 15:12:27 INFO TaskSetManager: Finished task 8.0 in stage 0.0 (TID 8) in 981 ms on 192.168.1.36 (13/17) 16/03/02 15:12:27 INFO TaskSetManager: Finished task 15.0 in stage 0.0 (TID 15) in 844 ms on 192.168.1.36 (14/17) 16/03/02 15:12:27 INFO TaskSetManager: Finished task 10.0 in stage 0.0 (TID 10) in 1007 ms on 192.168.1.36 (15/17) 16/03/02 15:12:28 INFO TaskSetManager: Lost task 9.1 in stage 0.0 (TID 17) on executor 192.168.1.36: java.lang.ClassCastException (org.apache.spark.sql.types.GenericArrayData cannot be cast to org.apache.spark.sql.catalyst.InternalRow) [duplicate 1] 16/03/02 15:12:28 INFO TaskSetManager: Starting task 9.2 in stage 0.0 (TID 18, 192.168.1.36, PROCESS_LOCAL, 2236 bytes) 16/03/02 15:12:28 INFO TaskSetManager: Finished task 16.0 in stage 0.0 (TID 16) in 537 ms on 192.168.1.36 (16/17) 16/03/02 15:12:28 INFO TaskSetManager: Lost task 9.2 in stage 0.0 (TID 18) on executor 192.168.1.36: java.lang.ClassCastException (org.apache.spark.sql.types.GenericArrayData cannot be cast to org.apache.spark.sql.catalyst.InternalRow) [duplicate 2] 16/03/02 15:12:28 INFO TaskSetManager: Starting task 9.3 in stage 0.0 (TID 19, 192.168.1.36, PROCESS_LOCAL, 2236 bytes) 16/03/02 15:12:29 WARN TaskSetManager: Lost task 9.3 in stage 0.0 (TID 19, 192.168.1.36): java.lang.ClassCastException 16/03/02 15:12:29 ERROR TaskSetManager: Task 9 in stage 0.0 failed 4 times; aborting job 16/03/02 15:12:29 INFO TaskSchedulerImpl: Removed TaskSet 0.0, whose tasks have all completed, from pool 16/03/02 15:12:29 INFO TaskSchedulerImpl: Cancelling stage 0 16/03/02 15:12:29 INFO DAGScheduler: ShuffleMapStage 0 (count at Snippet.scala:17) failed in 5,215 s 16/03/02 15:12:29 INFO DAGScheduler: Job 0 failed: count at Snippet.scala:17, took 5,294801 s Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 9 in stage 0.0 failed 4 times, most recent failure: Lost task 9.3 in stage 0.0 (TID 19, 192.168.1.36): java.lang.ClassCastException Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1283) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1271) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1270) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1270) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697) at scala.Option.foreach(Option.scala:236) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:697) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1496) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1458) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1447) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:567) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1824) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1837) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1850) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1921) at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:909) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108) at org.apache.spark.rdd.RDD.withScope(RDD.scala:310) at org.apache.spark.rdd.RDD.collect(RDD.scala:908) at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:177) at org.apache.spark.sql.DataFrame$$anonfun$collect$1.apply(DataFrame.scala:1385) at org.apache.spark.sql.DataFrame$$anonfun$collect$1.apply(DataFrame.scala:1385) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:56) at org.apache.spark.sql.DataFrame.withNewExecutionId(DataFrame.scala:1903) at org.apache.spark.sql.DataFrame.collect(DataFrame.scala:1384) at org.apache.spark.sql.DataFrame.count(DataFrame.scala:1402) at Snippet$.main(Snippet.scala:17) at Snippet.main(Snippet.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:497) at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:674) at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180) at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:120) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) Caused by: java.lang.ClassCastException 16/03/02 15:12:29 INFO SparkContext: Invoking stop() from shutdown hook / You'll find attached : - a code snippet used to reproduce the exception : Snippet.scala <http://apache-spark-user-list.1001560.n3.nabble.com/file/n26377/Snippet.scala> - the driver logs : classClastException.log <http://apache-spark-user-list.1001560.n3.nabble.com/file/n26377/classClastException.log> Unfortunately, I can't join the data used to reproduce the exception, as it is confidential. Sincerely -- View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/org-apache-spark-sql-types-GenericArrayData-cannot-be-cast-to-org-apache-spark-sql-catalyst-Internalw-tp26377.html Sent from the Apache Spark User List mailing list archive at Nabble.com. --------------------------------------------------------------------- To unsubscribe, e-mail: user-unsubscr...@spark.apache.org For additional commands, e-mail: user-h...@spark.apache.org