[ https://issues.apache.org/jira/browse/SPARK-39426?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17564735#comment-17564735 ]
Pablo Langa Blanco commented on SPARK-39426: -------------------------------------------- I tested it on master and 3.3.0 and it seems to be fixed. > Subquery star select creates broken plan in case of self join > ------------------------------------------------------------- > > Key: SPARK-39426 > URL: https://issues.apache.org/jira/browse/SPARK-39426 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 3.2.1 > Reporter: Denis > Priority: Major > > Subquery star select creates broken plan in case of self join > How to reproduce: > {code:java} > import spark.implicits._ > spark.sparkContext.setCheckpointDir(Files.createTempDirectory("some-prefix").toFile.toString) > val frame = Seq(1).toDF("id").checkpoint() > val joined = frame > .join(frame, Seq("id"), "left") > .select("id") > joined > .join(joined, Seq("id"), "left") > .as("a") > .select("a.*"){code} > This query throws exception: > {code:java} > Exception in thread "main" org.apache.spark.sql.AnalysisException: Resolved > attribute(s) id#7 missing from id#10,id#11 in operator !Project [id#7, > id#10]. Attribute(s) with the same name appear in the operation: id. Please > check if the right attribute(s) are used.; > Project [id#10, id#4] > +- SubqueryAlias a > +- Project [id#10, id#4] > +- Join LeftOuter, (id#4 = id#10) > :- Project [id#4] > : +- Project [id#7, id#4] > : +- Join LeftOuter, (id#4 = id#7) > : :- LogicalRDD [id#4], false > : +- LogicalRDD [id#7], false > +- Project [id#10] > +- !Project [id#7, id#10] > +- Join LeftOuter, (id#10 = id#11) > :- LogicalRDD [id#10], false > +- LogicalRDD [id#11], false at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis.failAnalysis(CheckAnalysis.scala:51) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis.failAnalysis$(CheckAnalysis.scala:50) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:182) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis$1(CheckAnalysis.scala:471) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis$1$adapted(CheckAnalysis.scala:94) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:263) > at > org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1(TreeNode.scala:262) > at > org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1$adapted(TreeNode.scala:262) > at scala.collection.Iterator.foreach(Iterator.scala:943) > at scala.collection.Iterator.foreach$(Iterator.scala:943) > at scala.collection.AbstractIterator.foreach(Iterator.scala:1431) > at scala.collection.IterableLike.foreach(IterableLike.scala:74) > at scala.collection.IterableLike.foreach$(IterableLike.scala:73) > at scala.collection.AbstractIterable.foreach(Iterable.scala:56) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:262) > at > org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1(TreeNode.scala:262) > at > org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1$adapted(TreeNode.scala:262) > at scala.collection.Iterator.foreach(Iterator.scala:943) > at scala.collection.Iterator.foreach$(Iterator.scala:943) > at scala.collection.AbstractIterator.foreach(Iterator.scala:1431) > at scala.collection.IterableLike.foreach(IterableLike.scala:74) > at scala.collection.IterableLike.foreach$(IterableLike.scala:73) > at scala.collection.AbstractIterable.foreach(Iterable.scala:56) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:262) > at > org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1(TreeNode.scala:262) > at > org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1$adapted(TreeNode.scala:262) > at scala.collection.Iterator.foreach(Iterator.scala:943) > at scala.collection.Iterator.foreach$(Iterator.scala:943) > at scala.collection.AbstractIterator.foreach(Iterator.scala:1431) > at scala.collection.IterableLike.foreach(IterableLike.scala:74) > at scala.collection.IterableLike.foreach$(IterableLike.scala:73) > at scala.collection.AbstractIterable.foreach(Iterable.scala:56) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:262) > at > org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1(TreeNode.scala:262) > at > org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1$adapted(TreeNode.scala:262) > at scala.collection.Iterator.foreach(Iterator.scala:943) > at scala.collection.Iterator.foreach$(Iterator.scala:943) > at scala.collection.AbstractIterator.foreach(Iterator.scala:1431) > at scala.collection.IterableLike.foreach(IterableLike.scala:74) > at scala.collection.IterableLike.foreach$(IterableLike.scala:73) > at scala.collection.AbstractIterable.foreach(Iterable.scala:56) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:262) > at > org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1(TreeNode.scala:262) > at > org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1$adapted(TreeNode.scala:262) > at scala.collection.Iterator.foreach(Iterator.scala:943) > at scala.collection.Iterator.foreach$(Iterator.scala:943) > at scala.collection.AbstractIterator.foreach(Iterator.scala:1431) > at scala.collection.IterableLike.foreach(IterableLike.scala:74) > at scala.collection.IterableLike.foreach$(IterableLike.scala:73) > at scala.collection.AbstractIterable.foreach(Iterable.scala:56) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:262) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis(CheckAnalysis.scala:94) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis$(CheckAnalysis.scala:91) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:182) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:205) > at > org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:330) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:202) > at > org.apache.spark.sql.execution.QueryExecution.$anonfun$analyzed$1(QueryExecution.scala:88) > at > org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111) > at > org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:196) > at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775) > at > org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:196) > at > org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:88) > at > org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:86) > at > org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:78) > at org.apache.spark.sql.Dataset$.$anonfun$ofRows$1(Dataset.scala:90) > at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775) > at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:88) > at org.apache.spark.sql.Dataset.withPlan(Dataset.scala:3734) > at org.apache.spark.sql.Dataset.select(Dataset.scala:1454) > at org.apache.spark.sql.Dataset.select(Dataset.scala:1471) > at com.joom.gburg_local.GburgTestLocal$.main(GburgTestLocal.scala:61) > at com.joom.gburg_local.GburgTestLocal.main(GburgTestLocal.scala) > {code} -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org