[ https://issues.apache.org/jira/browse/SPARK-25028?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Izek Greenfield updated SPARK-25028: ------------------------------------ Description: on line 143: val partitionColumnValues = partitionColumns.indices.map(r.get(_).toString) If the value is NULL the code will fail with NPE sample: val df = List((1, null , "first"), (2, null , "second")).toDF("index", "name", "value").withColumn("name", $"name".cast("string")) df.write.partitionBy("name").saveAsTable("df13") spark.sql("ANALYZE TABLE df13 PARTITION (name) COMPUTE STATISTICS") output: 2018-08-08 09:25:43 WARN BaseSessionStateBuilder$$anon$1:66 - Max iterations (2) reached for batch Resolution java.lang.NullPointerException at org.apache.spark.sql.execution.command.AnalyzePartitionCommand$$anonfun$calculateRowCountsPerPartition$1$$anonfun$8.apply(AnalyzePartitionCommand.scala:143) at org.apache.spark.sql.execution.command.AnalyzePartitionCommand$$anonfun$calculateRowCountsPerPartition$1$$anonfun$8.apply(AnalyzePartitionCommand.scala:143) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) at scala.collection.immutable.Range.foreach(Range.scala:160) at scala.collection.TraversableLike$class.map(TraversableLike.scala:234) at scala.collection.AbstractTraversable.map(Traversable.scala:104) at org.apache.spark.sql.execution.command.AnalyzePartitionCommand$$anonfun$calculateRowCountsPerPartition$1.apply(AnalyzePartitionCommand.scala:143) at org.apache.spark.sql.execution.command.AnalyzePartitionCommand$$anonfun$calculateRowCountsPerPartition$1.apply(AnalyzePartitionCommand.scala:142) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186) at scala.collection.TraversableLike$class.map(TraversableLike.scala:234) at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:186) at org.apache.spark.sql.execution.command.AnalyzePartitionCommand.calculateRowCountsPerPartition(AnalyzePartitionCommand.scala:142) at org.apache.spark.sql.execution.command.AnalyzePartitionCommand.run(AnalyzePartitionCommand.scala:104) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68) at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79) at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:190) at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:190) at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3254) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:77) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3253) at org.apache.spark.sql.Dataset.<init>(Dataset.scala:190) at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:75) at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:641) ... 49 elided was: on line 143: val partitionColumnValues = partitionColumns.indices.map(r.get(_).toString) if the value is NULL the code will fail with NPE > AnalyzePartitionCommand failed with NPE if value is null > -------------------------------------------------------- > > Key: SPARK-25028 > URL: https://issues.apache.org/jira/browse/SPARK-25028 > Project: Spark > Issue Type: Bug > Components: Spark Core > Affects Versions: 2.4.0 > Reporter: Izek Greenfield > Priority: Major > > on line 143: val partitionColumnValues = > partitionColumns.indices.map(r.get(_).toString) > If the value is NULL the code will fail with NPE > sample: > val df = List((1, null , "first"), (2, null , "second")).toDF("index", > "name", "value").withColumn("name", $"name".cast("string")) > df.write.partitionBy("name").saveAsTable("df13") > spark.sql("ANALYZE TABLE df13 PARTITION (name) COMPUTE STATISTICS") > output: > 2018-08-08 09:25:43 WARN BaseSessionStateBuilder$$anon$1:66 - Max iterations > (2) reached for batch Resolution > java.lang.NullPointerException > at > org.apache.spark.sql.execution.command.AnalyzePartitionCommand$$anonfun$calculateRowCountsPerPartition$1$$anonfun$8.apply(AnalyzePartitionCommand.scala:143) > at > org.apache.spark.sql.execution.command.AnalyzePartitionCommand$$anonfun$calculateRowCountsPerPartition$1$$anonfun$8.apply(AnalyzePartitionCommand.scala:143) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at scala.collection.immutable.Range.foreach(Range.scala:160) > at scala.collection.TraversableLike$class.map(TraversableLike.scala:234) > at scala.collection.AbstractTraversable.map(Traversable.scala:104) > at > org.apache.spark.sql.execution.command.AnalyzePartitionCommand$$anonfun$calculateRowCountsPerPartition$1.apply(AnalyzePartitionCommand.scala:143) > at > org.apache.spark.sql.execution.command.AnalyzePartitionCommand$$anonfun$calculateRowCountsPerPartition$1.apply(AnalyzePartitionCommand.scala:142) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at > scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) > at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186) > at scala.collection.TraversableLike$class.map(TraversableLike.scala:234) > at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:186) > at > org.apache.spark.sql.execution.command.AnalyzePartitionCommand.calculateRowCountsPerPartition(AnalyzePartitionCommand.scala:142) > at > org.apache.spark.sql.execution.command.AnalyzePartitionCommand.run(AnalyzePartitionCommand.scala:104) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79) > at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:190) > at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:190) > at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3254) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:77) > at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3253) > at org.apache.spark.sql.Dataset.<init>(Dataset.scala:190) > at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:75) > at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:641) > ... 49 elided -- This message was sent by Atlassian JIRA (v7.6.3#76005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org