[ https://issues.apache.org/jira/browse/SPARK-13230?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Alin Treznai updated SPARK-13230: --------------------------------- Description: Using HashMap.merged with Spark fails with NullPointerException. {noformat} import org.apache.spark.{SparkConf, SparkContext} import scala.collection.immutable.HashMap object MergeTest { def mergeFn:(HashMap[String, Long], HashMap[String, Long]) => HashMap[String, Long] = { case (m1, m2) => m1.merged(m2){ case (x,y) => (x._1, x._2 + y._2) } } def empty = HashMap.empty[String,Long] def main(args: Array[String]) = { val input = Seq(HashMap("A" -> 1L), HashMap("A" -> 2L, "B" -> 3L),HashMap("A" -> 2L, "C" -> 4L)) val conf = new SparkConf().setAppName("MergeTest").setMaster("local[*]") val sc = new SparkContext(conf) val result = sc.parallelize(input).reduce(mergeFn) println(s"Result=$result") sc.stop() } } {noformat} Error message: org.apache.spark.SparkDriverExecutionException: Execution error at org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:1169) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1637) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1832) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1952) at org.apache.spark.rdd.RDD$$anonfun$reduce$1.apply(RDD.scala:1025) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111) at org.apache.spark.rdd.RDD.withScope(RDD.scala:316) at org.apache.spark.rdd.RDD.reduce(RDD.scala:1007) at MergeTest$.main(MergeTest.scala:21) at MergeTest.main(MergeTest.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) Caused by: java.lang.NullPointerException at MergeTest$$anonfun$mergeFn$1$$anonfun$apply$1.apply(MergeTest.scala:12) at MergeTest$$anonfun$mergeFn$1$$anonfun$apply$1.apply(MergeTest.scala:12) at scala.collection.immutable.HashMap$$anon$2.apply(HashMap.scala:148) at scala.collection.immutable.HashMap$HashMap1.updated0(HashMap.scala:200) at scala.collection.immutable.HashMap$HashTrieMap.updated0(HashMap.scala:322) at scala.collection.immutable.HashMap$HashTrieMap.merge0(HashMap.scala:463) at scala.collection.immutable.HashMap.merged(HashMap.scala:117) at MergeTest$$anonfun$mergeFn$1.apply(MergeTest.scala:12) at MergeTest$$anonfun$mergeFn$1.apply(MergeTest.scala:11) at org.apache.spark.rdd.RDD$$anonfun$reduce$1$$anonfun$15.apply(RDD.scala:1020) at org.apache.spark.rdd.RDD$$anonfun$reduce$1$$anonfun$15.apply(RDD.scala:1017) at org.apache.spark.scheduler.JobWaiter.taskSucceeded(JobWaiter.scala:56) at org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:1165) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1637) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) was: Using HashMap.merged with Spark fails with NullPointerException. import org.apache.spark.{SparkConf, SparkContext} import scala.collection.immutable.HashMap object MergeTest { def mergeFn:(HashMap[String, Long], HashMap[String, Long]) => HashMap[String, Long] = {\\ case (m1, m2) => m1.merged(m2){ case (x,y) => (x._1, x._2 + y._2) }\\ }\\ def empty = HashMap.empty[String,Long] def main(args: Array[String]) = { val input = Seq(HashMap("A" -> 1L), HashMap("A" -> 2L, "B" -> 3L),HashMap("A" -> 2L, "C" -> 4L)) val conf = new SparkConf().setAppName("MergeTest").setMaster("local[*]") val sc = new SparkContext(conf) val result = sc.parallelize(input).reduce(mergeFn) println(s"Result=$result") sc.stop() } } Error message: org.apache.spark.SparkDriverExecutionException: Execution error at org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:1169) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1637) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1832) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1952) at org.apache.spark.rdd.RDD$$anonfun$reduce$1.apply(RDD.scala:1025) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111) at org.apache.spark.rdd.RDD.withScope(RDD.scala:316) at org.apache.spark.rdd.RDD.reduce(RDD.scala:1007) at MergeTest$.main(MergeTest.scala:21) at MergeTest.main(MergeTest.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) Caused by: java.lang.NullPointerException at MergeTest$$anonfun$mergeFn$1$$anonfun$apply$1.apply(MergeTest.scala:12) at MergeTest$$anonfun$mergeFn$1$$anonfun$apply$1.apply(MergeTest.scala:12) at scala.collection.immutable.HashMap$$anon$2.apply(HashMap.scala:148) at scala.collection.immutable.HashMap$HashMap1.updated0(HashMap.scala:200) at scala.collection.immutable.HashMap$HashTrieMap.updated0(HashMap.scala:322) at scala.collection.immutable.HashMap$HashTrieMap.merge0(HashMap.scala:463) at scala.collection.immutable.HashMap.merged(HashMap.scala:117) at MergeTest$$anonfun$mergeFn$1.apply(MergeTest.scala:12) at MergeTest$$anonfun$mergeFn$1.apply(MergeTest.scala:11) at org.apache.spark.rdd.RDD$$anonfun$reduce$1$$anonfun$15.apply(RDD.scala:1020) at org.apache.spark.rdd.RDD$$anonfun$reduce$1$$anonfun$15.apply(RDD.scala:1017) at org.apache.spark.scheduler.JobWaiter.taskSucceeded(JobWaiter.scala:56) at org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:1165) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1637) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) > HashMap.merged not working properly with Spark > ---------------------------------------------- > > Key: SPARK-13230 > URL: https://issues.apache.org/jira/browse/SPARK-13230 > Project: Spark > Issue Type: Bug > Components: Spark Core > Affects Versions: 1.6.0 > Environment: Ubuntu 14.04.3, Scala 2.11.7, Spark 1.6.0 > Reporter: Alin Treznai > > Using HashMap.merged with Spark fails with NullPointerException. > {noformat} > import org.apache.spark.{SparkConf, SparkContext} > import scala.collection.immutable.HashMap > object MergeTest { > def mergeFn:(HashMap[String, Long], HashMap[String, Long]) => > HashMap[String, Long] = { > case (m1, m2) => m1.merged(m2){ case (x,y) => (x._1, x._2 + y._2) } > } > def empty = HashMap.empty[String,Long] > def main(args: Array[String]) = { > val input = Seq(HashMap("A" -> 1L), HashMap("A" -> 2L, "B" -> > 3L),HashMap("A" -> 2L, "C" -> 4L)) > val conf = new SparkConf().setAppName("MergeTest").setMaster("local[*]") > val sc = new SparkContext(conf) > val result = sc.parallelize(input).reduce(mergeFn) > println(s"Result=$result") > sc.stop() > } > } > {noformat} > Error message: > org.apache.spark.SparkDriverExecutionException: Execution error > at > org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:1169) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1637) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588) > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) > at > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1832) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1952) > at org.apache.spark.rdd.RDD$$anonfun$reduce$1.apply(RDD.scala:1025) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:316) > at org.apache.spark.rdd.RDD.reduce(RDD.scala:1007) > at MergeTest$.main(MergeTest.scala:21) > at MergeTest.main(MergeTest.scala) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > Caused by: java.lang.NullPointerException > at > MergeTest$$anonfun$mergeFn$1$$anonfun$apply$1.apply(MergeTest.scala:12) > at > MergeTest$$anonfun$mergeFn$1$$anonfun$apply$1.apply(MergeTest.scala:12) > at scala.collection.immutable.HashMap$$anon$2.apply(HashMap.scala:148) > at > scala.collection.immutable.HashMap$HashMap1.updated0(HashMap.scala:200) > at > scala.collection.immutable.HashMap$HashTrieMap.updated0(HashMap.scala:322) > at > scala.collection.immutable.HashMap$HashTrieMap.merge0(HashMap.scala:463) > at scala.collection.immutable.HashMap.merged(HashMap.scala:117) > at MergeTest$$anonfun$mergeFn$1.apply(MergeTest.scala:12) > at MergeTest$$anonfun$mergeFn$1.apply(MergeTest.scala:11) > at > org.apache.spark.rdd.RDD$$anonfun$reduce$1$$anonfun$15.apply(RDD.scala:1020) > at > org.apache.spark.rdd.RDD$$anonfun$reduce$1$$anonfun$15.apply(RDD.scala:1017) > at > org.apache.spark.scheduler.JobWaiter.taskSucceeded(JobWaiter.scala:56) > at > org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:1165) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1637) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588) > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org