[ https://issues.apache.org/jira/browse/SPARK-31620?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17101862#comment-17101862 ]
angerszhu commented on SPARK-31620: ----------------------------------- It is because in PlanSubqueries, we have scalar-subquery in Expression. then in transformAllExpression method, since expression changed, in {color:#FF0000}QueryPlan.mapExperssion(){color} method we will make a copy of the tree. For aggregate expression, when makeCopy, it will got a new DeclarativeAggregate expression with lazy val {color:#0747a6}inputAggBufferAttributes{color} will be null. When we reuse this value in *HashAggregateExec.doConsumeWithoutKeys,* {color:#0747a6}inputAggBufferAttributes{color} will be re-initial with a new exprId, then error happended. In right SparkPlan, inputAggBufferAttributes is same as child's output. > TreeNodeException: Binding attribute, tree: sum#19L > --------------------------------------------------- > > Key: SPARK-31620 > URL: https://issues.apache.org/jira/browse/SPARK-31620 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 2.3.4, 2.4.5, 3.0.0 > Reporter: Yuming Wang > Priority: Major > > {noformat} > scala> spark.sql("create temporary view t1 as select * from values (1, 2) as > t1(a, b)") > res0: org.apache.spark.sql.DataFrame = [] > scala> spark.sql("create temporary view t2 as select * from values (3, 4) as > t2(c, d)") > res1: org.apache.spark.sql.DataFrame = [] > scala> spark.sql("select sum(if(c > (select a from t1), d, 0)) as csum from > t2").show > org.apache.spark.sql.catalyst.errors.package$TreeNodeException: Binding > attribute, tree: sum#19L > at > org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) > at > org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1.applyOrElse(BoundAttribute.scala:75) > at > org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1.applyOrElse(BoundAttribute.scala:74) > at > org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDown$1(TreeNode.scala:309) > at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:72) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:309) > at > org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDown$3(TreeNode.scala:314) > at > org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$mapChildren$1(TreeNode.scala:399) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:237) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:397) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:350) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:314) > at > org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDown$3(TreeNode.scala:314) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapChild$2(TreeNode.scala:368) > at > org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$mapChildren$4(TreeNode.scala:427) > at > scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238) > at > scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36) > at > scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33) > at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:38) > at scala.collection.TraversableLike.map(TraversableLike.scala:238) > at scala.collection.TraversableLike.map$(TraversableLike.scala:231) > at scala.collection.AbstractTraversable.map(Traversable.scala:108) > at > org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$mapChildren$1(TreeNode.scala:427) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:237) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:397) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:350) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:314) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:298) > at > org.apache.spark.sql.catalyst.expressions.BindReferences$.bindReference(BoundAttribute.scala:74) > at > org.apache.spark.sql.catalyst.expressions.BindReferences$.$anonfun$bindReferences$1(BoundAttribute.scala:96) > at > scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238) > at scala.collection.immutable.List.foreach(List.scala:392) > at scala.collection.TraversableLike.map(TraversableLike.scala:238) > at scala.collection.TraversableLike.map$(TraversableLike.scala:231) > at scala.collection.immutable.List.map(List.scala:298) > at > org.apache.spark.sql.catalyst.expressions.BindReferences$.bindReferences(BoundAttribute.scala:96) > at > org.apache.spark.sql.execution.aggregate.HashAggregateExec.$anonfun$doConsumeWithoutKeys$4(HashAggregateExec.scala:348) > at > scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238) > at scala.collection.immutable.List.foreach(List.scala:392) > at scala.collection.TraversableLike.map(TraversableLike.scala:238) > at scala.collection.TraversableLike.map$(TraversableLike.scala:231) > at scala.collection.immutable.List.map(List.scala:298) > at > org.apache.spark.sql.execution.aggregate.HashAggregateExec.doConsumeWithoutKeys(HashAggregateExec.scala:347) > at > org.apache.spark.sql.execution.aggregate.HashAggregateExec.doConsume(HashAggregateExec.scala:175) > at > org.apache.spark.sql.execution.CodegenSupport.constructDoConsumeFunction(WholeStageCodegenExec.scala:221) > at > org.apache.spark.sql.execution.CodegenSupport.consume(WholeStageCodegenExec.scala:192) > at > org.apache.spark.sql.execution.CodegenSupport.consume$(WholeStageCodegenExec.scala:149) > at > org.apache.spark.sql.execution.InputAdapter.consume(WholeStageCodegenExec.scala:496) > at > org.apache.spark.sql.execution.InputRDDCodegen.doProduce(WholeStageCodegenExec.scala:483) > at > org.apache.spark.sql.execution.InputRDDCodegen.doProduce$(WholeStageCodegenExec.scala:456) > at > org.apache.spark.sql.execution.InputAdapter.doProduce(WholeStageCodegenExec.scala:496) > at > org.apache.spark.sql.execution.CodegenSupport.$anonfun$produce$1(WholeStageCodegenExec.scala:95) > at > org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210) > at > org.apache.spark.sql.execution.CodegenSupport.produce(WholeStageCodegenExec.scala:90) > at > org.apache.spark.sql.execution.CodegenSupport.produce$(WholeStageCodegenExec.scala:90) > at > org.apache.spark.sql.execution.InputAdapter.produce(WholeStageCodegenExec.scala:496) > at > org.apache.spark.sql.execution.aggregate.HashAggregateExec.doProduceWithoutKeys(HashAggregateExec.scala:243) > at > org.apache.spark.sql.execution.aggregate.HashAggregateExec.doProduce(HashAggregateExec.scala:167) > at > org.apache.spark.sql.execution.CodegenSupport.$anonfun$produce$1(WholeStageCodegenExec.scala:95) > at > org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210) > at > org.apache.spark.sql.execution.CodegenSupport.produce(WholeStageCodegenExec.scala:90) > at > org.apache.spark.sql.execution.CodegenSupport.produce$(WholeStageCodegenExec.scala:90) > at > org.apache.spark.sql.execution.aggregate.HashAggregateExec.produce(HashAggregateExec.scala:48) > at > org.apache.spark.sql.execution.WholeStageCodegenExec.doCodeGen(WholeStageCodegenExec.scala:632) > at > org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:692) > at > org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175) > at > org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171) > at > org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:316) > at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:434) > at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:420) > at > org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:47) > at org.apache.spark.sql.Dataset.collectFromPlan(Dataset.scala:3626) > at org.apache.spark.sql.Dataset.$anonfun$head$1(Dataset.scala:2695) > at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3616) > at > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:100) > at > org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160) > at > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87) > at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64) > at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3614) > at org.apache.spark.sql.Dataset.head(Dataset.scala:2695) > at org.apache.spark.sql.Dataset.take(Dataset.scala:2902) > at org.apache.spark.sql.Dataset.getRows(Dataset.scala:300) > at org.apache.spark.sql.Dataset.showString(Dataset.scala:337) > at org.apache.spark.sql.Dataset.show(Dataset.scala:824) > at org.apache.spark.sql.Dataset.show(Dataset.scala:783) > at org.apache.spark.sql.Dataset.show(Dataset.scala:792) > ... 47 elided > Caused by: java.lang.RuntimeException: Couldn't find sum#19L in > [sum#13L,sum#12L] > at scala.sys.package$.error(package.scala:30) > at > org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1.$anonfun$applyOrElse$1(BoundAttribute.scala:81) > at > org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52) > ... 140 more > {noformat} -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org