Barry Becker created SPARK-19317: ------------------------------------ Summary: UnsupportedOperationException: empty.reduceLeft in LinearSeqOptimized Key: SPARK-19317 URL: https://issues.apache.org/jira/browse/SPARK-19317 Project: Spark Issue Type: Bug Components: Spark Core Affects Versions: 2.1.0 Reporter: Barry Becker
I wish I had more of a simple reproducible case to give, but I got the below exception while selecting null values in one of the columns of a dataframe. My client code that failed was df.filter(filterExp).count() where the filter expression was something like someColumn.isNull. There were 412 nulls out of 716,000 total rows for the column being filtered. Its odd because I have a different, smaller dataset where I did the same thing on a column with 100 nulls out of 800 and did not get the error. The exception seems to indicate that spark is trying to do reduceLeft on an empy list. {code} java.lang.UnsupportedOperationException: empty.reduceLeftscala.collection.LinearSeqOptimized$class.reduceLeft(LinearSeqOptimized.scala:137) scala.collection.immutable.List.reduceLeft(List.scala:84) scala.collection.TraversableOnce$class.reduce(TraversableOnce.scala:208) scala.collection.AbstractTraversable.reduce(Traversable.scala:104) org.apache.spark.sql.execution.columnar.InMemoryTableScanExec$$anonfun$1.applyOrElse(InMemoryTableScanExec.scala:90) org.apache.spark.sql.execution.columnar.InMemoryTableScanExec$$anonfun$1.applyOrElse(InMemoryTableScanExec.scala:54) scala.runtime.AbstractPartialFunction.apply(AbstractPartialFunction.scala:36) org.apache.spark.sql.execution.columnar.InMemoryTableScanExec$$anonfun$1.applyOrElse(InMemoryTableScanExec.scala:61) org.apache.spark.sql.execution.columnar.InMemoryTableScanExec$$anonfun$1.applyOrElse(InMemoryTableScanExec.scala:54) scala.PartialFunction$Lifted.apply(PartialFunction.scala:223) scala.PartialFunction$Lifted.apply(PartialFunction.scala:219) org.apache.spark.sql.execution.columnar.InMemoryTableScanExec$$anonfun$2.apply(InMemoryTableScanExec.scala:95) org.apache.spark.sql.execution.columnar.InMemoryTableScanExec$$anonfun$2.apply(InMemoryTableScanExec.scala:94) scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241) scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241) scala.collection.immutable.List.foreach(List.scala:381) scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241) scala.collection.immutable.List.flatMap(List.scala:344) org.apache.spark.sql.execution.columnar.InMemoryTableScanExec.(InMemoryTableScanExec.scala:94) org.apache.spark.sql.execution.SparkStrategies$InMemoryScans$$anonfun$6.apply(SparkStrategies.scala:306) org.apache.spark.sql.execution.SparkStrategies$InMemoryScans$$anonfun$6.apply(SparkStrategies.scala:306) org.apache.spark.sql.execution.SparkPlanner.pruneFilterProject(SparkPlanner.scala:96) org.apache.spark.sql.execution.SparkStrategies$InMemoryScans$.apply(SparkStrategies.scala:302) org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:62) org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:62) scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434) scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440) scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439) org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:92) org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:77) org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:74) scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157) scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157) scala.collection.Iterator$class.foreach(Iterator.scala:893) scala.collection.AbstractIterator.foreach(Iterator.scala:1336) scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157) scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336) org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:74) org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:66) scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434) scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440) org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:92) org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:77) org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:74) scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157) scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157) scala.collection.Iterator$class.foreach(Iterator.scala:893) scala.collection.AbstractIterator.foreach(Iterator.scala:1336) scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157) scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336) org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:74) org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:66) scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434) scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440) org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:92) org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:79) org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:75) org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:84) org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:84) org.apache.spark.sql.Dataset.withCallback(Dataset.scala:2774) org.apache.spark.sql.Dataset.count(Dataset.scala:2404) mypackage.Selection(Selection.scala:34) {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org