[ https://issues.apache.org/jira/browse/SPARK-21966?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16160877#comment-16160877 ]
Feng Zhu commented on SPARK-21966: ---------------------------------- The rule ResolveMissingReference does not plan to support binary nodes. This rule could be very complex for a complete support. > ResolveMissingReference rule should not ignore the Union operator > ----------------------------------------------------------------- > > Key: SPARK-21966 > URL: https://issues.apache.org/jira/browse/SPARK-21966 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 2.1.0, 2.1.1, 2.2.0 > Reporter: Feng Zhu > > The below example will fail. > {code:java} > val df1 = spark.createDataFrame(Seq((1, 1), (2, 1), (2, 2))).toDF("a", "b") > val df2 = spark.createDataFrame(Seq((1, 1), (1, 2), (2, 3))).toDF("a", "b") > val df3 = df1.cube("a").sum("b") > val df4 = df2.cube("a").sum("b") > val df5 = df3.union(df4).filter("grouping_id()=0").show() > {code} > It will thow an Exception: > {code:java} > Exception in thread "main" org.apache.spark.sql.AnalysisException: cannot > resolve '`spark_grouping_id`' given input columns: [a, sum(b)];; > 'Filter ('spark_grouping_id > 0) > +- Union > :- Aggregate [a#17, spark_grouping_id#15], [a#17, sum(cast(b#6 as bigint)) > AS sum(b)#14L] > : +- Expand [List(a#5, b#6, a#16, 0), List(a#5, b#6, null, 1)], [a#5, > b#6, a#17, spark_grouping_id#15] > : +- Project [a#5, b#6, a#5 AS a#16] > : +- Project [_1#0 AS a#5, _2#1 AS b#6] > : +- LocalRelation [_1#0, _2#1] > +- Aggregate [a#30, spark_grouping_id#28], [a#30, sum(cast(b#6 as bigint)) > AS sum(b)#27L] > +- Expand [List(a#5, b#6, a#29, 0), List(a#5, b#6, null, 1)], [a#5, > b#6, a#30, spark_grouping_id#28] > +- Project [a#5, b#6, a#5 AS a#29] > +- Project [_1#0 AS a#5, _2#1 AS b#6] > +- LocalRelation [_1#0, _2#1] > at > org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$2.applyOrElse(CheckAnalysis.scala:77) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$2.applyOrElse(CheckAnalysis.scala:74) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:310) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:310) > at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:309) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:307) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:307) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:331) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:188) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformChildren(TreeNode.scala:329) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:307) > at > org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpressionUp$1(QueryPlan.scala:282) > at > org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$2(QueryPlan.scala:292) > at > org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$7.apply(QueryPlan.scala:301) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:188) > at > org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpressionsUp(QueryPlan.scala:301) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:74) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:67) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:128) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:67) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:57) > at > org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:48) > at > org.apache.spark.sql.execution.QueryExecution.withCachedData$lzycompute(QueryExecution.scala:72) > at > org.apache.spark.sql.execution.QueryExecution.withCachedData(QueryExecution.scala:71) > at > org.apache.spark.sql.execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:77) > at > org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:77) > at > org.apache.spark.sql.execution.QueryExecution.<init>(QueryExecution.scala:79) > at > org.apache.spark.sql.internal.SessionState.executePlan(SessionState.scala:169) > at org.apache.spark.sql.Dataset.<init>(Dataset.scala:167) > at org.apache.spark.sql.Dataset$.apply(Dataset.scala:58) > at org.apache.spark.sql.Dataset.withTypedPlan(Dataset.scala:2827) > at org.apache.spark.sql.Dataset.filter(Dataset.scala:1272) > at org.apache.spark.sql.Dataset.filter(Dataset.scala:1286) > at SparkSQLExample$.main(SparkSQLExample.scala:57) > {code} -- This message was sent by Atlassian JIRA (v6.4.14#64029) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org