This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new fab10f0 [SPARK-33131][SQL][3.0] Fix grouping sets with having clause can not resolve qualified col name fab10f0 is described below commit fab10f0dd37ecc8f77d5d8115118ec0a74b59da0 Author: ulysses <youxi...@weidian.com> AuthorDate: Fri Oct 16 22:11:13 2020 -0700 [SPARK-33131][SQL][3.0] Fix grouping sets with having clause can not resolve qualified col name This is [#30029](https://github.com/apache/spark/pull/30029) backport for branch-3.0. ### What changes were proposed in this pull request? Correct the resolution of having clause. ### Why are the changes needed? Grouping sets construct new aggregate lost the qualified name of grouping expression. Here is a example: ``` -- Works resolved by `ResolveReferences` select c1 from values (1) as t1(c1) group by grouping sets(t1.c1) having c1 = 1 -- Works because of the extra expression c1 select c1 as c2 from values (1) as t1(c1) group by grouping sets(t1.c1) having t1.c1 = 1 -- Failed select c1 from values (1) as t1(c1) group by grouping sets(t1.c1) having t1.c1 = 1 ``` It wroks with `Aggregate` without grouping sets through `ResolveReferences`, but Grouping sets not works since the exprId has been changed. ### Does this PR introduce _any_ user-facing change? Yes, bug fix. ### How was this patch tested? add test. Closes #30077 from ulysses-you/SPARK-33131-branch-3.0. Authored-by: ulysses <youxi...@weidian.com> Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- .../spark/sql/catalyst/analysis/Analyzer.scala | 2 +- .../src/test/resources/sql-tests/inputs/having.sql | 6 ++++ .../resources/sql-tests/results/having.sql.out | 32 ++++++++++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 729d316..bf3d4f0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -457,7 +457,7 @@ class Analyzer( */ private def constructGroupByAlias(groupByExprs: Seq[Expression]): Seq[Alias] = { groupByExprs.map { - case e: NamedExpression => Alias(e, e.name)() + case e: NamedExpression => Alias(e, e.name)(qualifier = e.qualifier) case other => Alias(other, other.toString)() } } diff --git a/sql/core/src/test/resources/sql-tests/inputs/having.sql b/sql/core/src/test/resources/sql-tests/inputs/having.sql index 3b75be1..2799b1a 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/having.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/having.sql @@ -24,3 +24,9 @@ SELECT SUM(a) AS b, CAST('2020-01-01' AS DATE) AS fake FROM VALUES (1, 10), (2, SELECT SUM(a) AS b FROM VALUES (1, 10), (2, 20) AS T(a, b) GROUP BY GROUPING SETS ((b), (a, b)) HAVING b > 10; SELECT SUM(a) AS b FROM VALUES (1, 10), (2, 20) AS T(a, b) GROUP BY CUBE(a, b) HAVING b > 10; SELECT SUM(a) AS b FROM VALUES (1, 10), (2, 20) AS T(a, b) GROUP BY ROLLUP(a, b) HAVING b > 10; + +-- SPARK-33131: Grouping sets with having clause can not resolve qualified col name. +SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY GROUPING SETS(t.c1) HAVING t.c1 = 1; +SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY CUBE(t.c1) HAVING t.c1 = 1; +SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY ROLLUP(t.c1) HAVING t.c1 = 1; +SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY t.c1 HAVING t.c1 = 1; diff --git a/sql/core/src/test/resources/sql-tests/results/having.sql.out b/sql/core/src/test/resources/sql-tests/results/having.sql.out index 1b3ac78..6508143 100644 --- a/sql/core/src/test/resources/sql-tests/results/having.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/having.sql.out @@ -81,3 +81,35 @@ SELECT SUM(a) AS b FROM VALUES (1, 10), (2, 20) AS T(a, b) GROUP BY ROLLUP(a, b) struct<b:bigint> -- !query output 2 + + +-- !query +SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY GROUPING SETS(t.c1) HAVING t.c1 = 1 +-- !query schema +struct<c1:int> +-- !query output +1 + + +-- !query +SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY CUBE(t.c1) HAVING t.c1 = 1 +-- !query schema +struct<c1:int> +-- !query output +1 + + +-- !query +SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY ROLLUP(t.c1) HAVING t.c1 = 1 +-- !query schema +struct<c1:int> +-- !query output +1 + + +-- !query +SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY t.c1 HAVING t.c1 = 1 +-- !query schema +struct<c1:int> +-- !query output +1 --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org