This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-3.2 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.2 by this push: new 33e4ce5 [SPARK-36339][SQL] References to grouping that not part of aggregation should be replaced 33e4ce5 is described below commit 33e4ce562a59d191b06d477ecc6d9230e43b96b8 Author: gaoyajun02 <gaoyaju...@gmail.com> AuthorDate: Fri Aug 6 16:34:37 2021 +0800 [SPARK-36339][SQL] References to grouping that not part of aggregation should be replaced ### What changes were proposed in this pull request? Currently, references to grouping sets are reported as errors after aggregated expressions, e.g. ``` SELECT count(name) c, name FROM VALUES ('Alice'), ('Bob') people(name) GROUP BY name GROUPING SETS(name); ``` Error in query: expression 'people.`name`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.;; ### Why are the changes needed? Fix the map anonymous function in the constructAggregateExprs function does not use underscores to avoid ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit tests. Closes #33574 from gaoyajun02/SPARK-36339. Lead-authored-by: gaoyajun02 <gaoyaju...@gmail.com> Co-authored-by: gaoyajun02 <gaoyaju...@meituan.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> (cherry picked from commit 888f8f03c89ea7ee8997171eadf64c87e17c4efe) Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 4 ++-- .../src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 13 +++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 75fad11a..963b42b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -580,7 +580,7 @@ class Analyzer(override val catalogManager: CatalogManager) aggregations: Seq[NamedExpression], groupByAliases: Seq[Alias], groupingAttrs: Seq[Expression], - gid: Attribute): Seq[NamedExpression] = aggregations.map { + gid: Attribute): Seq[NamedExpression] = aggregations.map { agg => // collect all the found AggregateExpression, so we can check an expression is part of // any AggregateExpression or not. val aggsBuffer = ArrayBuffer[Expression]() @@ -588,7 +588,7 @@ class Analyzer(override val catalogManager: CatalogManager) def isPartOfAggregation(e: Expression): Boolean = { aggsBuffer.exists(a => a.find(_ eq e).isDefined) } - replaceGroupingFunc(_, groupByExprs, gid).transformDown { + replaceGroupingFunc(agg, groupByExprs, gid).transformDown { // AggregateExpression should be computed on the unmodified value of its argument // expressions, so we should not replace any references to grouping expression // inside it. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index ed3b479..032ddbb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -3405,6 +3405,19 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark } } + test("SPARK-36339: References to grouping attributes should be replaced") { + withTempView("t") { + Seq("a", "a", "b").toDF("x").createOrReplaceTempView("t") + checkAnswer( + sql( + """ + |select count(x) c, x from t + |group by x grouping sets(x) + """.stripMargin), + Seq(Row(2, "a"), Row(1, "b"))) + } + } + test("SPARK-31166: UNION map<null, null> and other maps should not fail") { checkAnswer( sql("(SELECT map()) UNION ALL (SELECT map(1, 2))"), --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org