[ https://issues.apache.org/jira/browse/SPARK-23274?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Xiao Li updated SPARK-23274: ---------------------------- Priority: Blocker (was: Major) > ReplaceExceptWithFilter fails on dataframes filtered on same column > ------------------------------------------------------------------- > > Key: SPARK-23274 > URL: https://issues.apache.org/jira/browse/SPARK-23274 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 2.3.0 > Reporter: Onur Satici > Priority: Blocker > > Currently affects: > {code:java} > $ git tag --contains 01f6ba0e7a > v2.3.0-rc1 > v2.3.0-rc2 > {code} > Steps to reproduce: > {code:java} > $ cat test.csv > a,b > 1,2 > 1,3 > 2,2 > 2,4 > {code} > {code:java} > val df = spark.read.format("csv").option("header", "true").load("test.csv") > val df1 = df.filter($"a" === 1) > val df2 = df.filter($"a" === 2) > df1.select("b").except(df2.select("b")).show > {code} > results in: > {code:java} > java.util.NoSuchElementException: key not found: a > at scala.collection.MapLike$class.default(MapLike.scala:228) > at scala.collection.AbstractMap.default(Map.scala:59) > at scala.collection.MapLike$class.apply(MapLike.scala:141) > at scala.collection.AbstractMap.apply(Map.scala:59) > at > org.apache.spark.sql.catalyst.optimizer.ReplaceExceptWithFilter$$anonfun$org$apache$spark$sql$catalyst$optimizer$ReplaceExceptWithFilter$$transformCondition$1.applyOrElse(ReplaceExceptWithFilter.scala:60) > at > org.apache.spark.sql.catalyst.optimizer.ReplaceExceptWithFilter$$anonfun$org$apache$spark$sql$catalyst$optimizer$ReplaceExceptWithFilter$$transformCondition$1.applyOrElse(ReplaceExceptWithFilter.scala:60) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$2.apply(TreeNode.scala:267) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$2.apply(TreeNode.scala:267) > at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:266) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:272) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:272) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:306) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:304) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:272) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:272) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:272) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:306) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:304) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:272) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:256) > at > org.apache.spark.sql.catalyst.optimizer.ReplaceExceptWithFilter$.org$apache$spark$sql$catalyst$optimizer$ReplaceExceptWithFilter$$transformCondition(ReplaceExceptWithFilter.scala:60) > at > org.apache.spark.sql.catalyst.optimizer.ReplaceExceptWithFilter$$anonfun$apply$1.applyOrElse(ReplaceExceptWithFilter.scala:50) > at > org.apache.spark.sql.catalyst.optimizer.ReplaceExceptWithFilter$$anonfun$apply$1.applyOrElse(ReplaceExceptWithFilter.scala:48) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$2.apply(TreeNode.scala:267) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$2.apply(TreeNode.scala:267) > at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:266) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:272) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:272) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:306) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:304) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:272) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:272) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:272) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:306) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:304) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:272) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:256) > at > org.apache.spark.sql.catalyst.optimizer.ReplaceExceptWithFilter$.apply(ReplaceExceptWithFilter.scala:48) > at > org.apache.spark.sql.catalyst.optimizer.ReplaceExceptWithFilter$.apply(ReplaceExceptWithFilter.scala:41) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:92) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:89) > at > scala.collection.IndexedSeqOptimized$class.foldl(IndexedSeqOptimized.scala:57) > at > scala.collection.IndexedSeqOptimized$class.foldLeft(IndexedSeqOptimized.scala:66) > at scala.collection.mutable.WrappedArray.foldLeft(WrappedArray.scala:35) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:89) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:81) > at scala.collection.immutable.List.foreach(List.scala:381) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:81) > at > org.apache.spark.sql.execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:79) > at > org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:79) > at > org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:85) > at > org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:81) > at > org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:90) > at > org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:90) > at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3141) > at org.apache.spark.sql.Dataset.head(Dataset.scala:2426) > at org.apache.spark.sql.Dataset.take(Dataset.scala:2640) > at org.apache.spark.sql.Dataset.showString(Dataset.scala:237) > at org.apache.spark.sql.Dataset.show(Dataset.scala:668) > at org.apache.spark.sql.Dataset.show(Dataset.scala:627) > at org.apache.spark.sql.Dataset.show(Dataset.scala:636) > ... 49 elided > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org