[ https://issues.apache.org/jira/browse/SPARK-20093?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15946501#comment-15946501 ]
Takeshi Yamamuro commented on SPARK-20093: ------------------------------------------ It seems this issue is the same with SPARK-10925. > Exception when Joining dataframe with another dataframe generated by applying > groupBy transformation on original one > -------------------------------------------------------------------------------------------------------------------- > > Key: SPARK-20093 > URL: https://issues.apache.org/jira/browse/SPARK-20093 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 2.0.0, 2.0.1, 2.0.2, 2.1.0, 2.2.0 > Reporter: Hosur Narahari > > When we generate a dataframe by doing grouping, and perform join on original > dataframe with aggregate column, we get AnalysisException. Below I've > attached a piece of code and resulting exception to reproduce. > Code: > import org.apache.spark.sql.SparkSession > object App { > lazy val spark = > SparkSession.builder.appName("Test").master("local").getOrCreate > def main(args: Array[String]): Unit = { > test1 > } > private def test1 { > import org.apache.spark.sql.functions._ > val df = spark.createDataFrame(Seq(("M",172,60), ("M", 170, 60), ("F", > 155, 56), ("M", 160, 55), ("F", 150, 53))).toDF("gender", "height", "weight") > val groupDF = df.groupBy("gender").agg(min("height").as("height")) > groupDF.show() > val out = groupDF.join(df, groupDF("height") <=> > df("height")).select(df("gender"), df("height"), df("weight")) > out.show > } > } > When I ran above code, I got below exception: > Exception in thread "main" org.apache.spark.sql.AnalysisException: resolved > attribute(s) height#8 missing from > height#19,height#30,gender#29,weight#31,gender#7 in operator !Join Inner, > (height#19 <=> height#8);; > !Join Inner, (height#19 <=> height#8) > :- Aggregate [gender#7], [gender#7, min(height#8) AS height#19] > : +- Project [_1#0 AS gender#7, _2#1 AS height#8, _3#2 AS weight#9] > : +- LocalRelation [_1#0, _2#1, _3#2] > +- Project [_1#0 AS gender#29, _2#1 AS height#30, _3#2 AS weight#31] > +- LocalRelation [_1#0, _2#1, _3#2] > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.failAnalysis(CheckAnalysis.scala:39) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:90) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:342) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:78) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:127) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:78) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:90) > at > org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:53) > at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:67) > at > org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$withPlan(Dataset.scala:2831) > at org.apache.spark.sql.Dataset.join(Dataset.scala:843) > at org.apache.spark.sql.Dataset.join(Dataset.scala:807) > at App$.test1(App.scala:17) > at App$.main(App.scala:9) > at App.main(App.scala) > Please someone look into it. -- This message was sent by Atlassian JIRA (v6.3.15#6346) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org