[jira] [Commented] (SPARK-30229) java.lang.NullPointerException at org.apache.spark.SparkContext.getPreferredLocs(SparkContext.scala:1783)
[ https://issues.apache.org/jira/browse/SPARK-30229?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17021704#comment-17021704 ] Hyukjin Kwon commented on SPARK-30229: -- [~Ankitraj] were you able to reproduce? [~SeaAndHill] can you show exact code and steps to reproduce this? Ideally should be copy-and-paste-able. > java.lang.NullPointerException at > org.apache.spark.SparkContext.getPreferredLocs(SparkContext.scala:1783) > - > > Key: SPARK-30229 > URL: https://issues.apache.org/jira/browse/SPARK-30229 > Project: Spark > Issue Type: Bug > Components: Spark Core >Affects Versions: 2.2.1 >Reporter: SeaAndHill >Priority: Major > > 2019-12-12 11:52:00 INFO JobScheduler:54 - Added jobs for time 157612272 > ms > 2019-12-12 11:52:00 INFO JobScheduler:54 - Starting job streaming job > 157612272 ms.0 from job set of time 157612272 ms > 2019-12-12 11:52:00 INFO CarbonSparkSqlParser:54 - Parsing command: > event_detail_temp > 2019-12-12 11:52:00 INFO CarbonLateDecodeRule:95 - skip CarbonOptimizer > 2019-12-12 11:52:00 INFO CarbonLateDecodeRule:72 - Skip CarbonOptimizer > 2019-12-12 11:52:00 INFO CarbonLateDecodeRule:95 - skip CarbonOptimizer > 2019-12-12 11:52:00 INFO CarbonLateDecodeRule:72 - Skip CarbonOptimizer > 2019-12-12 11:52:00 INFO JobScheduler:54 - Finished job streaming job > 157612272 ms.0 from job set of time 157612272 ms > 2019-12-12 11:52:00 ERROR JobScheduler:91 - Error running job streaming job > 157612272 ms.0 > java.lang.NullPointerException > at org.apache.spark.SparkContext.getPreferredLocs(SparkContext.scala:1783) > at > org.apache.spark.rdd.DefaultPartitionCoalescer.currPrefLocs(CoalescedRDD.scala:178) > at > org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations$$anonfun$getAllPrefLocs$2.apply(CoalescedRDD.scala:196) > at > org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations$$anonfun$getAllPrefLocs$2.apply(CoalescedRDD.scala:195) > at > scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) > at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186) > at > org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations.getAllPrefLocs(CoalescedRDD.scala:195) > at > org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations.(CoalescedRDD.scala:188) > at > org.apache.spark.rdd.DefaultPartitionCoalescer.coalesce(CoalescedRDD.scala:391) > at org.apache.spark.rdd.CoalescedRDD.getPartitions(CoalescedRDD.scala:91) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:2094) > at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:936) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.rdd.RDDOperati
[jira] [Commented] (SPARK-30229) java.lang.NullPointerException at org.apache.spark.SparkContext.getPreferredLocs(SparkContext.scala:1783)
[ https://issues.apache.org/jira/browse/SPARK-30229?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16999194#comment-16999194 ] Ankit Raj Boudh commented on SPARK-30229: - ok, i wii try same operation. > java.lang.NullPointerException at > org.apache.spark.SparkContext.getPreferredLocs(SparkContext.scala:1783) > - > > Key: SPARK-30229 > URL: https://issues.apache.org/jira/browse/SPARK-30229 > Project: Spark > Issue Type: Bug > Components: Spark Core >Affects Versions: 2.2.1 >Reporter: SeaAndHill >Priority: Major > > 2019-12-12 11:52:00 INFO JobScheduler:54 - Added jobs for time 157612272 > ms > 2019-12-12 11:52:00 INFO JobScheduler:54 - Starting job streaming job > 157612272 ms.0 from job set of time 157612272 ms > 2019-12-12 11:52:00 INFO CarbonSparkSqlParser:54 - Parsing command: > event_detail_temp > 2019-12-12 11:52:00 INFO CarbonLateDecodeRule:95 - skip CarbonOptimizer > 2019-12-12 11:52:00 INFO CarbonLateDecodeRule:72 - Skip CarbonOptimizer > 2019-12-12 11:52:00 INFO CarbonLateDecodeRule:95 - skip CarbonOptimizer > 2019-12-12 11:52:00 INFO CarbonLateDecodeRule:72 - Skip CarbonOptimizer > 2019-12-12 11:52:00 INFO JobScheduler:54 - Finished job streaming job > 157612272 ms.0 from job set of time 157612272 ms > 2019-12-12 11:52:00 ERROR JobScheduler:91 - Error running job streaming job > 157612272 ms.0 > java.lang.NullPointerException > at org.apache.spark.SparkContext.getPreferredLocs(SparkContext.scala:1783) > at > org.apache.spark.rdd.DefaultPartitionCoalescer.currPrefLocs(CoalescedRDD.scala:178) > at > org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations$$anonfun$getAllPrefLocs$2.apply(CoalescedRDD.scala:196) > at > org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations$$anonfun$getAllPrefLocs$2.apply(CoalescedRDD.scala:195) > at > scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) > at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186) > at > org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations.getAllPrefLocs(CoalescedRDD.scala:195) > at > org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations.(CoalescedRDD.scala:188) > at > org.apache.spark.rdd.DefaultPartitionCoalescer.coalesce(CoalescedRDD.scala:391) > at org.apache.spark.rdd.CoalescedRDD.getPartitions(CoalescedRDD.scala:91) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:2094) > at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:936) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:362) > at or
[jira] [Commented] (SPARK-30229) java.lang.NullPointerException at org.apache.spark.SparkContext.getPreferredLocs(SparkContext.scala:1783)
[ https://issues.apache.org/jira/browse/SPARK-30229?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16998958#comment-16998958 ] SeaAndHill commented on SPARK-30229: i am using spark streaming to cusum json data from kafka ,and call the collectAsList method , and the exception occur, not always ,but sometimes or ofen happen , > java.lang.NullPointerException at > org.apache.spark.SparkContext.getPreferredLocs(SparkContext.scala:1783) > - > > Key: SPARK-30229 > URL: https://issues.apache.org/jira/browse/SPARK-30229 > Project: Spark > Issue Type: Bug > Components: Spark Core >Affects Versions: 2.2.1 >Reporter: SeaAndHill >Priority: Major > > 2019-12-12 11:52:00 INFO JobScheduler:54 - Added jobs for time 157612272 > ms > 2019-12-12 11:52:00 INFO JobScheduler:54 - Starting job streaming job > 157612272 ms.0 from job set of time 157612272 ms > 2019-12-12 11:52:00 INFO CarbonSparkSqlParser:54 - Parsing command: > event_detail_temp > 2019-12-12 11:52:00 INFO CarbonLateDecodeRule:95 - skip CarbonOptimizer > 2019-12-12 11:52:00 INFO CarbonLateDecodeRule:72 - Skip CarbonOptimizer > 2019-12-12 11:52:00 INFO CarbonLateDecodeRule:95 - skip CarbonOptimizer > 2019-12-12 11:52:00 INFO CarbonLateDecodeRule:72 - Skip CarbonOptimizer > 2019-12-12 11:52:00 INFO JobScheduler:54 - Finished job streaming job > 157612272 ms.0 from job set of time 157612272 ms > 2019-12-12 11:52:00 ERROR JobScheduler:91 - Error running job streaming job > 157612272 ms.0 > java.lang.NullPointerException > at org.apache.spark.SparkContext.getPreferredLocs(SparkContext.scala:1783) > at > org.apache.spark.rdd.DefaultPartitionCoalescer.currPrefLocs(CoalescedRDD.scala:178) > at > org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations$$anonfun$getAllPrefLocs$2.apply(CoalescedRDD.scala:196) > at > org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations$$anonfun$getAllPrefLocs$2.apply(CoalescedRDD.scala:195) > at > scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) > at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186) > at > org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations.getAllPrefLocs(CoalescedRDD.scala:195) > at > org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations.(CoalescedRDD.scala:188) > at > org.apache.spark.rdd.DefaultPartitionCoalescer.coalesce(CoalescedRDD.scala:391) > at org.apache.spark.rdd.CoalescedRDD.getPartitions(CoalescedRDD.scala:91) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:2094) > at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:936) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.rd
[jira] [Commented] (SPARK-30229) java.lang.NullPointerException at org.apache.spark.SparkContext.getPreferredLocs(SparkContext.scala:1783)
[ https://issues.apache.org/jira/browse/SPARK-30229?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16998247#comment-16998247 ] Ankit Raj Boudh commented on SPARK-30229: - please provide me what operation you have done so that i will reproduce this issue. > java.lang.NullPointerException at > org.apache.spark.SparkContext.getPreferredLocs(SparkContext.scala:1783) > - > > Key: SPARK-30229 > URL: https://issues.apache.org/jira/browse/SPARK-30229 > Project: Spark > Issue Type: Bug > Components: Spark Core >Affects Versions: 2.2.1 >Reporter: SeaAndHill >Priority: Major > > 2019-12-12 11:52:00 INFO JobScheduler:54 - Added jobs for time 157612272 > ms > 2019-12-12 11:52:00 INFO JobScheduler:54 - Starting job streaming job > 157612272 ms.0 from job set of time 157612272 ms > 2019-12-12 11:52:00 INFO CarbonSparkSqlParser:54 - Parsing command: > event_detail_temp > 2019-12-12 11:52:00 INFO CarbonLateDecodeRule:95 - skip CarbonOptimizer > 2019-12-12 11:52:00 INFO CarbonLateDecodeRule:72 - Skip CarbonOptimizer > 2019-12-12 11:52:00 INFO CarbonLateDecodeRule:95 - skip CarbonOptimizer > 2019-12-12 11:52:00 INFO CarbonLateDecodeRule:72 - Skip CarbonOptimizer > 2019-12-12 11:52:00 INFO JobScheduler:54 - Finished job streaming job > 157612272 ms.0 from job set of time 157612272 ms > 2019-12-12 11:52:00 ERROR JobScheduler:91 - Error running job streaming job > 157612272 ms.0 > java.lang.NullPointerException > at org.apache.spark.SparkContext.getPreferredLocs(SparkContext.scala:1783) > at > org.apache.spark.rdd.DefaultPartitionCoalescer.currPrefLocs(CoalescedRDD.scala:178) > at > org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations$$anonfun$getAllPrefLocs$2.apply(CoalescedRDD.scala:196) > at > org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations$$anonfun$getAllPrefLocs$2.apply(CoalescedRDD.scala:195) > at > scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) > at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186) > at > org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations.getAllPrefLocs(CoalescedRDD.scala:195) > at > org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations.(CoalescedRDD.scala:188) > at > org.apache.spark.rdd.DefaultPartitionCoalescer.coalesce(CoalescedRDD.scala:391) > at org.apache.spark.rdd.CoalescedRDD.getPartitions(CoalescedRDD.scala:91) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:2094) > at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:936) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) > at org
[jira] [Commented] (SPARK-30229) java.lang.NullPointerException at org.apache.spark.SparkContext.getPreferredLocs(SparkContext.scala:1783)
[ https://issues.apache.org/jira/browse/SPARK-30229?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16998245#comment-16998245 ] Ankit Raj Boudh commented on SPARK-30229: - no , today i will analyse this > java.lang.NullPointerException at > org.apache.spark.SparkContext.getPreferredLocs(SparkContext.scala:1783) > - > > Key: SPARK-30229 > URL: https://issues.apache.org/jira/browse/SPARK-30229 > Project: Spark > Issue Type: Bug > Components: Spark Core >Affects Versions: 2.2.1 >Reporter: SeaAndHill >Priority: Major > > 2019-12-12 11:52:00 INFO JobScheduler:54 - Added jobs for time 157612272 > ms > 2019-12-12 11:52:00 INFO JobScheduler:54 - Starting job streaming job > 157612272 ms.0 from job set of time 157612272 ms > 2019-12-12 11:52:00 INFO CarbonSparkSqlParser:54 - Parsing command: > event_detail_temp > 2019-12-12 11:52:00 INFO CarbonLateDecodeRule:95 - skip CarbonOptimizer > 2019-12-12 11:52:00 INFO CarbonLateDecodeRule:72 - Skip CarbonOptimizer > 2019-12-12 11:52:00 INFO CarbonLateDecodeRule:95 - skip CarbonOptimizer > 2019-12-12 11:52:00 INFO CarbonLateDecodeRule:72 - Skip CarbonOptimizer > 2019-12-12 11:52:00 INFO JobScheduler:54 - Finished job streaming job > 157612272 ms.0 from job set of time 157612272 ms > 2019-12-12 11:52:00 ERROR JobScheduler:91 - Error running job streaming job > 157612272 ms.0 > java.lang.NullPointerException > at org.apache.spark.SparkContext.getPreferredLocs(SparkContext.scala:1783) > at > org.apache.spark.rdd.DefaultPartitionCoalescer.currPrefLocs(CoalescedRDD.scala:178) > at > org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations$$anonfun$getAllPrefLocs$2.apply(CoalescedRDD.scala:196) > at > org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations$$anonfun$getAllPrefLocs$2.apply(CoalescedRDD.scala:195) > at > scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) > at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186) > at > org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations.getAllPrefLocs(CoalescedRDD.scala:195) > at > org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations.(CoalescedRDD.scala:188) > at > org.apache.spark.rdd.DefaultPartitionCoalescer.coalesce(CoalescedRDD.scala:391) > at org.apache.spark.rdd.CoalescedRDD.getPartitions(CoalescedRDD.scala:91) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:2094) > at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:936) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:362) > at o
[jira] [Commented] (SPARK-30229) java.lang.NullPointerException at org.apache.spark.SparkContext.getPreferredLocs(SparkContext.scala:1783)
[ https://issues.apache.org/jira/browse/SPARK-30229?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16996907#comment-16996907 ] SeaAndHill commented on SPARK-30229: do you know the cause and the work around to avoid this exception > java.lang.NullPointerException at > org.apache.spark.SparkContext.getPreferredLocs(SparkContext.scala:1783) > - > > Key: SPARK-30229 > URL: https://issues.apache.org/jira/browse/SPARK-30229 > Project: Spark > Issue Type: Bug > Components: Spark Core >Affects Versions: 2.2.1 >Reporter: SeaAndHill >Priority: Major > > 2019-12-12 11:52:00 INFO JobScheduler:54 - Added jobs for time 157612272 > ms > 2019-12-12 11:52:00 INFO JobScheduler:54 - Starting job streaming job > 157612272 ms.0 from job set of time 157612272 ms > 2019-12-12 11:52:00 INFO CarbonSparkSqlParser:54 - Parsing command: > event_detail_temp > 2019-12-12 11:52:00 INFO CarbonLateDecodeRule:95 - skip CarbonOptimizer > 2019-12-12 11:52:00 INFO CarbonLateDecodeRule:72 - Skip CarbonOptimizer > 2019-12-12 11:52:00 INFO CarbonLateDecodeRule:95 - skip CarbonOptimizer > 2019-12-12 11:52:00 INFO CarbonLateDecodeRule:72 - Skip CarbonOptimizer > 2019-12-12 11:52:00 INFO JobScheduler:54 - Finished job streaming job > 157612272 ms.0 from job set of time 157612272 ms > 2019-12-12 11:52:00 ERROR JobScheduler:91 - Error running job streaming job > 157612272 ms.0 > java.lang.NullPointerException > at org.apache.spark.SparkContext.getPreferredLocs(SparkContext.scala:1783) > at > org.apache.spark.rdd.DefaultPartitionCoalescer.currPrefLocs(CoalescedRDD.scala:178) > at > org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations$$anonfun$getAllPrefLocs$2.apply(CoalescedRDD.scala:196) > at > org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations$$anonfun$getAllPrefLocs$2.apply(CoalescedRDD.scala:195) > at > scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) > at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186) > at > org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations.getAllPrefLocs(CoalescedRDD.scala:195) > at > org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations.(CoalescedRDD.scala:188) > at > org.apache.spark.rdd.DefaultPartitionCoalescer.coalesce(CoalescedRDD.scala:391) > at org.apache.spark.rdd.CoalescedRDD.getPartitions(CoalescedRDD.scala:91) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:2094) > at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:936) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) > at org.apache.spark.rdd.RDD.withSco
[jira] [Commented] (SPARK-30229) java.lang.NullPointerException at org.apache.spark.SparkContext.getPreferredLocs(SparkContext.scala:1783)
[ https://issues.apache.org/jira/browse/SPARK-30229?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16996834#comment-16996834 ] Ankit Raj Boudh commented on SPARK-30229: - i will raise PR for this issue > java.lang.NullPointerException at > org.apache.spark.SparkContext.getPreferredLocs(SparkContext.scala:1783) > - > > Key: SPARK-30229 > URL: https://issues.apache.org/jira/browse/SPARK-30229 > Project: Spark > Issue Type: Bug > Components: Spark Core >Affects Versions: 2.2.1 >Reporter: SeaAndHill >Priority: Major > > 2019-12-12 11:52:00 INFO JobScheduler:54 - Added jobs for time 157612272 > ms > 2019-12-12 11:52:00 INFO JobScheduler:54 - Starting job streaming job > 157612272 ms.0 from job set of time 157612272 ms > 2019-12-12 11:52:00 INFO CarbonSparkSqlParser:54 - Parsing command: > event_detail_temp > 2019-12-12 11:52:00 INFO CarbonLateDecodeRule:95 - skip CarbonOptimizer > 2019-12-12 11:52:00 INFO CarbonLateDecodeRule:72 - Skip CarbonOptimizer > 2019-12-12 11:52:00 INFO CarbonLateDecodeRule:95 - skip CarbonOptimizer > 2019-12-12 11:52:00 INFO CarbonLateDecodeRule:72 - Skip CarbonOptimizer > 2019-12-12 11:52:00 INFO JobScheduler:54 - Finished job streaming job > 157612272 ms.0 from job set of time 157612272 ms > 2019-12-12 11:52:00 ERROR JobScheduler:91 - Error running job streaming job > 157612272 ms.0 > java.lang.NullPointerException > at org.apache.spark.SparkContext.getPreferredLocs(SparkContext.scala:1783) > at > org.apache.spark.rdd.DefaultPartitionCoalescer.currPrefLocs(CoalescedRDD.scala:178) > at > org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations$$anonfun$getAllPrefLocs$2.apply(CoalescedRDD.scala:196) > at > org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations$$anonfun$getAllPrefLocs$2.apply(CoalescedRDD.scala:195) > at > scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) > at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186) > at > org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations.getAllPrefLocs(CoalescedRDD.scala:195) > at > org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations.(CoalescedRDD.scala:188) > at > org.apache.spark.rdd.DefaultPartitionCoalescer.coalesce(CoalescedRDD.scala:391) > at org.apache.spark.rdd.CoalescedRDD.getPartitions(CoalescedRDD.scala:91) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:2094) > at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:936) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:362) > at o