Hi I am using using below program in spark-shell to load and filter data from the data sets. I am getting exceptions if I run the programs for multiple times, If I restart the shell it is working fine. 1) please let me know what I am doing wrong. 2) Also is there a way to make the program better instead of type casting every single time. 3) I tried to use spark-sql , but I have to filter lots of data as I am doing below, so I moved back to regular spark.
steps in the program: val path = "/abc/bcd/*/*.json" import scala.util.parsing.json.JSON val records = sc.textFile(path); val json_records = records.map(l => JSON.parseFull(l).get).cache() val valid_values = json_records.map{l => var m = l.asInstanceOf[Map[Any,Any]];(m("device_id"),m("time_stamp"),(m("raw_json").asInstanceOf[Map[String,Any]])("response").asInstanceOf[Map[String,Any]]("someContent"))} val filtered_values = valid_values.filter{f => f._3 != List()} val intermedate_values = filtered_values.map{f => val m = f._3.asInstanceOf[Map[String,Any]]("values");(f._1,f._2,m,m.asInstanceOf[Map[String,Any]]("remote_info"))} val recordsWithRemoteInfo = intermedate_values.filter(_._4 != List()) implicit def anyToString(i: Any): String = i match { case l: String => l.toString case _ => "" } case class Record(timeStamp: Long, deviceId: Int, connectingUptime: Double, availability: Double, serialNumber: String,rSerialNumber: String,TDMAMode: String, Temperature: String, LastReboot: String, Chains: String, BandWidth: String, tx_power: String) { def this() = this(0, 0, 0, 0, "","", "", "", "", "", "", "") } def getHash(str1: String, str2:String): Int = str1.hashCode ^ str2.hashCode val valid_records = recordsWithRemoteInfo.map{l => val connection = l._3.asInstanceOf[Map[String,Any]]("connection").asInstanceOf[Map[String,Any]]; val deviceDetails = l._3.asInstanceOf[Map[String,Any]]("device_info").asInstanceOf[Map[String,Any]]; val remoteDetails = l._3.asInstanceOf[Map[String,Any]]("remote_info").asInstanceOf[Map[String,Any]]; val mimo = l._3.asInstanceOf[Map[String,Any]]("mimo").asInstanceOf[Map[String,Any]]; val signalmeter = l._3.asInstanceOf[Map[String,Any]]("signalmeter").asInstanceOf[Map[String,Any]]; val serialNumber = deviceDetails("SerialNumber"); val remoteSerialNumber = remoteDetails("SerialNumber") getHash(serialNumber,remoteSerialNumber) ->Record(l._2.toString.toDouble.ceil.toLong,l._1.toString.toDouble.ceil.toInt,connection("ConnectingUptime").toString.toDouble,connection("availability").toString.toDouble,deviceDetails("SerialNumber"), remoteDetails("SerialNumber"),deviceDetails("TDMAMode"),deviceDetails("Temperature"),deviceDetails("LastReboot"), mimo("Chains"),signalmeter("BandWidth"),signalmeter("tx_power")) } valid_records.take(1).foreach(println) Exception trace: 14/08/19 13:29:41 INFO MemoryStore: Block rdd_53_1 stored as values to memory (estimated size 936.0 B, free 289.6 MB) 14/08/19 13:29:41 INFO MemoryStore: ensureFreeSpace(936) called with curMem=5544467, maxMem=309225062 14/08/19 13:29:41 INFO MemoryStore: Block rdd_53_4 stored as values to memory (estimated size 936.0 B, free 289.6 MB) 14/08/19 13:29:41 INFO MemoryStore: ensureFreeSpace(936) called with curMem=5545403, maxMem=309225062 14/08/19 13:29:41 INFO MemoryStore: Block rdd_53_3 stored as values to memory (estimated size 936.0 B, free 289.6 MB) 14/08/19 13:29:41 INFO BlockManagerInfo: Added rdd_53_1 in memory on 192.168.20.190:50718 (size: 936.0 B, free: 290.1 MB) 14/08/19 13:29:41 INFO MemoryStore: ensureFreeSpace(936) called with curMem=5546339, maxMem=309225062 14/08/19 13:29:41 INFO BlockManagerMaster: Updated info of block rdd_53_1 14/08/19 13:29:41 INFO MemoryStore: Block rdd_53_6 stored as values to memory (estimated size 936.0 B, free 289.6 MB) 14/08/19 13:29:41 INFO BlockManagerInfo: Added rdd_53_4 in memory on 192.168.20.190:50718 (size: 936.0 B, free: 290.1 MB) 14/08/19 13:29:41 INFO MemoryStore: ensureFreeSpace(936) called with curMem=5547275, maxMem=309225062 14/08/19 13:29:41 ERROR Executor: Exception in task ID 729 java.lang.ClassCastException: scala.collection.immutable.Map$Map3 cannot be cast to scala.collection.mutable.Map at $line142.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:44) at $line142.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:44) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:389) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:388) at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1014) at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:847) at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:847) at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1083) at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1083) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:111) at org.apache.spark.scheduler.Task.run(Task.scala:51) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:183) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) 14/08/19 13:29:41 INFO MemoryStore: Block rdd_53_2 stored as values to memory (estimated size 936.0 B, free 289.6 MB) 14/08/19 13:29:41 INFO BlockManagerInfo: Added rdd_53_3 in memory on 192.168.20.190:50718 (size: 936.0 B, free: 290.1 MB) 14/08/19 13:29:41 INFO BlockManagerMaster: Updated info of block rdd_53_4 14/08/19 13:29:41 INFO MemoryStore: ensureFreeSpace(936) called with curMem=5548211, maxMem=309225062 14/08/19 13:29:41 INFO BlockManagerMaster: Updated info of block rdd_53_3 14/08/19 13:29:41 ERROR Executor: Exception in task ID 732 java.lang.ClassCastException: scala.collection.immutable.Map$Map3 cannot be cast to scala.collection.mutable.Map at $line142.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:44) at $line142.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:44) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:389) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:388) at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1014) at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:847) at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:847) at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1083) at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1083) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:111) at org.apache.spark.scheduler.Task.run(Task.scala:51) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:183) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) 14/08/19 13:29:41 INFO BlockManagerInfo: Added rdd_53_6 in memory on 192.168.20.190:50718 (size: 936.0 B, free: 290.1 MB) 14/08/19 13:29:41 ERROR Executor: Exception in task ID 731 java.lang.ClassCastException: scala.collection.immutable.Map$Map3 cannot be cast to scala.collection.mutable.Map at $line142.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:44) at $line142.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:44) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:389) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:388) at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1014) at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:847) at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:847) at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1083) at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1083) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:111) at org.apache.spark.scheduler.Task.run(Task.scala:51) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:183) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) 14/08/19 13:29:41 INFO MemoryStore: Block rdd_53_7 stored as values to memory (estimated size 936.0 B, free 289.6 MB) 14/08/19 13:29:41 INFO BlockManagerInfo: Added rdd_53_2 in memory on 192.168.20.190:50718 (size: 936.0 B, free: 290.1 MB) 14/08/19 13:29:41 INFO BlockManagerMaster: Updated info of block rdd_53_6 14/08/19 13:29:41 INFO TaskSetManager: Starting task 21.0:8 as TID 736 on executor localhost: localhost (PROCESS_LOCAL) 14/08/19 13:29:41 INFO BlockManagerMaster: Updated info of block rdd_53_2 14/08/19 13:29:41 ERROR Executor: Exception in task ID 734 java.lang.ClassCastException: scala.collection.immutable.Map$Map3 cannot be cast to scala.collection.mutable.Map at $line142.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:44) at $line142.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:44) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:389) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:388) at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1014) at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:847) at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:847) at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1083) at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1083) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:111) at org.apache.spark.scheduler.Task.run(Task.scala:51) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:183) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) 14/08/19 13:29:41 INFO MemoryStore: ensureFreeSpace(936) called with curMem=5549147, maxMem=309225062 14/08/19 13:29:41 INFO TaskSetManager: Serialized task 21.0:8 as 2224 bytes in 0 ms 14/08/19 13:29:41 ERROR Executor: Exception in task ID 730 java.lang.ClassCastException: scala.collection.immutable.Map$Map3 cannot be cast to scala.collection.mutable.Map at $line142.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:44) at $line142.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:44) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:389) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:388) at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1014) at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:847) at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:847) at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1083) at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1083) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:111) at org.apache.spark.scheduler.Task.run(Task.scala:51) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:183) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) 14/08/19 13:29:41 INFO BlockManagerInfo: Added rdd_53_7 in memory on 192.168.20.190:50718 (size: 936.0 B, free: 290.1 MB) 14/08/19 13:29:41 INFO Executor: Running task ID 736 14/08/19 13:29:41 WARN TaskSetManager: Lost TID 729 (task 21.0:1) 14/08/19 13:29:41 INFO MemoryStore: Block rdd_53_5 stored as values to memory (estimated size 936.0 B, free 289.6 MB) 14/08/19 13:29:41 INFO BlockManagerMaster: Updated info of block rdd_53_7 14/08/19 13:29:41 INFO MemoryStore: ensureFreeSpace(936) called with curMem=5550083, maxMem=309225062 14/08/19 13:29:41 ERROR Executor: Exception in task ID 735 java.lang.ClassCastException: scala.collection.immutable.Map$Map3 cannot be cast to scala.collection.mutable.Map at $line142.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:44) at $line142.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:44) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:389) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:388) at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1014) at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:847) at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:847) at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1083) at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1083) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:111) at org.apache.spark.scheduler.Task.run(Task.scala:51) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:183) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) 14/08/19 13:29:41 WARN TaskSetManager: Loss was due to java.lang.ClassCastException java.lang.ClassCastException: scala.collection.immutable.Map$Map3 cannot be cast to scala.collection.mutable.Map at $line142.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:44) at $line142.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:44) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:389) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:388) at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1014) at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:847) at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:847) at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1083) at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1083) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:111) at org.apache.spark.scheduler.Task.run(Task.scala:51) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:183) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) 14/08/19 13:29:41 INFO MemoryStore: Block rdd_53_0 stored as values to memory (estimated size 936.0 B, free 289.6 MB) 14/08/19 13:29:41 INFO BlockManagerInfo: Added rdd_53_5 in memory on 192.168.20.190:50718 (size: 936.0 B, free: 290.1 MB) 14/08/19 13:29:41 ERROR TaskSetManager: Task 21.0:1 failed 1 times; aborting job 14/08/19 13:29:41 INFO BlockManagerMaster: Updated info of block rdd_53_5 14/08/19 13:29:41 ERROR Executor: Exception in task ID 733 java.lang.ClassCastException: scala.collection.immutable.Map$Map3 cannot be cast to scala.collection.mutable.Map at $line142.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:44) at $line142.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:44) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:389) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:388) at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1014) at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:847) at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:847) at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1083) at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1083) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:111) at org.apache.spark.scheduler.Task.run(Task.scala:51) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:183) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) 14/08/19 13:29:41 INFO TaskSchedulerImpl: Cancelling stage 21 14/08/19 13:29:41 INFO BlockManager: Found block broadcast_5 locally 14/08/19 13:29:41 INFO BlockManagerInfo: Added rdd_53_0 in memory on 192.168.20.190:50718 (size: 936.0 B, free: 290.1 MB) 14/08/19 13:29:41 INFO BlockManagerMaster: Updated info of block rdd_53_0 14/08/19 13:29:41 INFO TaskSchedulerImpl: Stage 21 was cancelled 14/08/19 13:29:41 ERROR Executor: Exception in task ID 728 java.lang.ClassCastException: scala.collection.immutable.Map$Map3 cannot be cast to scala.collection.mutable.Map at $line142.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:44) at $line142.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:44) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:389) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:388) at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1014) at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:847) at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:847) at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1083) at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1083) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:111) at org.apache.spark.scheduler.Task.run(Task.scala:51) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:183) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) 14/08/19 13:29:41 INFO DAGScheduler: Failed to run count at <console>:53 14/08/19 13:29:41 INFO TaskSetManager: Loss was due to java.lang.ClassCastException: scala.collection.immutable.Map$Map3 cannot be cast to scala.collection.mutable.Map [duplicate 1] 14/08/19 13:29:41 INFO CacheManager: Partition rdd_53_8 not found, computing it 14/08/19 13:29:41 INFO HadoopRDD: Input split: file:/Users/ramdurga/data/S3_data/alpha-nms/08-18-2014/Prepare/124/Prepare_1408323522264.json:0+168 14/08/19 13:29:41 INFO TaskSetManager: Loss was due to java.lang.ClassCastException: scala.collection.immutable.Map$Map3 cannot be cast to scala.collection.mutable.Map [duplicate 2] 14/08/19 13:29:41 INFO TaskSetManager: Loss was due to java.lang.ClassCastException: scala.collection.immutable.Map$Map3 cannot be cast to scala.collection.mutable.Map [duplicate 3] 14/08/19 13:29:41 INFO TaskSetManager: Loss was due to java.lang.ClassCastException: scala.collection.immutable.Map$Map3 cannot be cast to scala.collection.mutable.Map [duplicate 4] 14/08/19 13:29:41 INFO Executor: Executor is trying to kill task 736 14/08/19 13:29:41 INFO Executor: Executor killed task 736 14/08/19 13:29:41 INFO TaskSetManager: Loss was due to java.lang.ClassCastException: scala.collection.immutable.Map$Map3 cannot be cast to scala.collection.mutable.Map [duplicate 5] 14/08/19 13:29:41 INFO TaskSetManager: Loss was due to java.lang.ClassCastException: scala.collection.immutable.Map$Map3 cannot be cast to scala.collection.mutable.Map [duplicate 6] 14/08/19 13:29:41 WARN TaskSetManager: Task 736 was killed. 14/08/19 13:29:41 INFO TaskSchedulerImpl: Removed TaskSet 21.0, whose tasks have all completed, from pool 14/08/19 13:29:41 INFO TaskSetManager: Loss was due to java.lang.ClassCastException: scala.collection.immutable.Map$Map3 cannot be cast to scala.collection.mutable.Map [duplicate 7] 14/08/19 13:29:41 INFO TaskSchedulerImpl: Removed TaskSet 21.0, whose tasks have all completed, from pool org.apache.spark.SparkException: Job aborted due to stage failure: Task 21.0:1 failed 1 times, most recent failure: Exception failure in TID 729 on host localhost: java.lang.ClassCastException: scala.collection.immutable.Map$Map3 cannot be cast to scala.collection.mutable.Map $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:44) $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:44) scala.collection.Iterator$$anon$11.next(Iterator.scala:328) scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:389) scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:388) org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1014) org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:847) org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:847) org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1083) org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1083) org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:111) org.apache.spark.scheduler.Task.run(Task.scala:51) org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:183) java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) java.lang.Thread.run(Thread.java:745) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1044) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1028) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1026) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1026) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:634) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:634) at scala.Option.foreach(Option.scala:236) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:634) at org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1229) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498) at akka.actor.ActorCell.invoke(ActorCell.scala:456) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237) at akka.dispatch.Mailbox.run(Mailbox.scala:219) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) -- View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/Spark-exception-while-reading-different-inputs-tp12483.html Sent from the Apache Spark User List mailing list archive at Nabble.com. --------------------------------------------------------------------- To unsubscribe, e-mail: user-unsubscr...@spark.apache.org For additional commands, e-mail: user-h...@spark.apache.org