This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch branch-2.4 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-2.4 by this push: new d442146 [SPARK-34012][SQL][2.4] Keep behavior consistent when conf `spark.sqllegacy.parser.havingWithoutGroupByAsWhere` is true with migration guide d442146 is described below commit d442146964a981dd7f074c4954f7fed2752124e8 Author: angerszhu <angers....@gmail.com> AuthorDate: Wed Jan 6 20:54:47 2021 +0900 [SPARK-34012][SQL][2.4] Keep behavior consistent when conf `spark.sqllegacy.parser.havingWithoutGroupByAsWhere` is true with migration guide ### What changes were proposed in this pull request? In https://github.com/apache/spark/pull/22696 we support HAVING without GROUP BY means global aggregate But since we treat having as Filter before, in this way will cause a lot of analyze error, after https://github.com/apache/spark/pull/28294 we use `UnresolvedHaving` to instead `Filter` to solve such problem, but break origin logical about treat `SELECT 1 FROM range(10) HAVING true` as `SELECT 1 FROM range(10) WHERE true` . This PR fix this issue and add UT. NOTE: This backport comes from #31039 ### Why are the changes needed? Keep consistent behavior of migration guide. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? added UT Closes #31050 from AngersZhuuuu/SPARK-34012-2.4. Authored-by: angerszhu <angers....@gmail.com> Signed-off-by: Takeshi Yamamuro <yamam...@apache.org> --- .../spark/sql/catalyst/parser/AstBuilder.scala | 6 ++- .../test/resources/sql-tests/inputs/group-by.sql | 10 ++++ .../resources/sql-tests/results/group-by.sql.out | 60 +++++++++++++++++++++- 3 files changed, 74 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 90e7d1c..4c4e4f1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -467,7 +467,11 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging val withProject = if (aggregation == null && having != null) { if (conf.getConf(SQLConf.LEGACY_HAVING_WITHOUT_GROUP_BY_AS_WHERE)) { // If the legacy conf is set, treat HAVING without GROUP BY as WHERE. - withHaving(having, createProject()) + val predicate = expression(having) match { + case p: Predicate => p + case e => Cast(e, BooleanType) + } + Filter(predicate, createProject()) } else { // According to SQL standard, HAVING without GROUP BY means global aggregate. withHaving(having, Aggregate(Nil, namedExpressions, withFilter)) diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql index 433db71..0c40a8c 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql @@ -80,3 +80,13 @@ SELECT 1 FROM range(10) HAVING true; SELECT 1 FROM range(10) HAVING MAX(id) > 0; SELECT id FROM range(10) HAVING id > 0; + +SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=true; + +SELECT 1 FROM range(10) HAVING true; + +SELECT 1 FROM range(10) HAVING MAX(id) > 0; + +SELECT id FROM range(10) HAVING id > 0; + +SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=false; diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out index f9d1ee8..d23a58a 100644 --- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 30 +-- Number of queries: 35 -- !query 0 @@ -275,3 +275,61 @@ struct<> -- !query 29 output org.apache.spark.sql.AnalysisException grouping expressions sequence is empty, and '`id`' is not an aggregate function. Wrap '()' in windowing function(s) or wrap '`id`' in first() (or first_value) if you don't care which value you get.; + + +-- !query 30 +SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=true +-- !query 30 schema +struct<key:string,value:string> +-- !query 30 output +spark.sql.legacy.parser.havingWithoutGroupByAsWhere true + + +-- !query 31 +SELECT 1 FROM range(10) HAVING true +-- !query 31 schema +struct<1:int> +-- !query 31 output +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 + + +-- !query 32 +SELECT 1 FROM range(10) HAVING MAX(id) > 0 +-- !query 32 schema +struct<> +-- !query 32 output +java.lang.UnsupportedOperationException +Cannot evaluate expression: max(input[0, bigint, false]) + + +-- !query 33 +SELECT id FROM range(10) HAVING id > 0 +-- !query 33 schema +struct<id:bigint> +-- !query 33 output +1 +2 +3 +4 +5 +6 +7 +8 +9 + + +-- !query 34 +SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=false +-- !query 34 schema +struct<key:string,value:string> +-- !query 34 output +spark.sql.legacy.parser.havingWithoutGroupByAsWhere false --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org