This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.4 by this push: new 5e9ad2e71f9 [SPARK-42259][SQL] ResolveGroupingAnalytics should take care of Python UDAF 5e9ad2e71f9 is described below commit 5e9ad2e71f9a22d2f1b2d47b2e6bee323b354eb7 Author: Wenchen Fan <wenc...@databricks.com> AuthorDate: Wed Feb 1 17:36:14 2023 +0800 [SPARK-42259][SQL] ResolveGroupingAnalytics should take care of Python UDAF ### What changes were proposed in this pull request? This is a long-standing correctness issue with Python UDAF and grouping analytics. The rule `ResolveGroupingAnalytics` should take care of Python UDAF when matching aggregate expressions. ### Why are the changes needed? bug fix ### Does this PR introduce _any_ user-facing change? Yes, the query result was wrong before ### How was this patch tested? existing tests Closes #39824 from cloud-fan/python. Authored-by: Wenchen Fan <wenc...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> (cherry picked from commit 1219c8492376e038894111cd5d922229260482e7) Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../spark/sql/catalyst/analysis/Analyzer.scala | 2 +- .../results/udaf/udaf-group-analytics.sql.out | 58 +++++++++++----------- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 48ea0460725..7a92c46577d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -622,7 +622,7 @@ class Analyzer(override val catalogManager: CatalogManager) // AggregateExpression should be computed on the unmodified value of its argument // expressions, so we should not replace any references to grouping expression // inside it. - case e: AggregateExpression => + case e if AggregateExpression.isAggregate(e) => aggsBuffer += e e case e if isPartOfAggregation(e) => e diff --git a/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-analytics.sql.out b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-analytics.sql.out index b8c94b19d81..f0be6f43642 100644 --- a/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-analytics.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-analytics.sql.out @@ -15,18 +15,18 @@ SELECT a + b, b, udaf(a - b) FROM testData GROUP BY a + b, b WITH CUBE struct<(a + b):int,b:int,udaf((a - b)):int> -- !query output 2 1 1 -2 NULL 0 +2 NULL 1 3 1 1 3 2 1 -3 NULL 0 +3 NULL 2 4 1 1 4 2 1 -4 NULL 0 +4 NULL 2 5 2 1 -5 NULL 0 +5 NULL 1 NULL 1 3 NULL 2 3 -NULL NULL 0 +NULL NULL 6 -- !query @@ -36,16 +36,16 @@ struct<a:int,b:int,udaf(b):int> -- !query output 1 1 1 1 2 1 -1 NULL 0 +1 NULL 2 2 1 1 2 2 1 -2 NULL 0 +2 NULL 2 3 1 1 3 2 1 -3 NULL 0 +3 NULL 2 NULL 1 3 NULL 2 3 -NULL NULL 0 +NULL NULL 6 -- !query @@ -54,16 +54,16 @@ SELECT a + b, b, udaf(a - b) FROM testData GROUP BY a + b, b WITH ROLLUP struct<(a + b):int,b:int,udaf((a - b)):int> -- !query output 2 1 1 -2 NULL 0 +2 NULL 1 3 1 1 3 2 1 -3 NULL 0 +3 NULL 2 4 1 1 4 2 1 -4 NULL 0 +4 NULL 2 5 2 1 -5 NULL 0 -NULL NULL 0 +5 NULL 1 +NULL NULL 6 -- !query @@ -73,14 +73,14 @@ struct<a:int,b:int,udaf(b):int> -- !query output 1 1 1 1 2 1 -1 NULL 0 +1 NULL 2 2 1 1 2 2 1 -2 NULL 0 +2 NULL 2 3 1 1 3 2 1 -3 NULL 0 -NULL NULL 0 +3 NULL 2 +NULL NULL 6 -- !query @@ -416,14 +416,14 @@ GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER -- !query schema struct<course:string,sum:int> -- !query output -NULL 0 -Java 0 +NULL 5 Java 1 Java 1 -dotNET 0 +Java 2 dotNET 1 dotNET 1 dotNET 1 +dotNET 3 -- !query @@ -432,14 +432,14 @@ GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER -- !query schema struct<course:string,sum:int,grouping_id(course, earnings):bigint> -- !query output -NULL 0 3 -Java 0 1 +NULL 5 3 Java 1 0 Java 1 0 -dotNET 0 1 +Java 2 1 dotNET 1 0 dotNET 1 0 dotNET 1 0 +dotNET 3 1 -- !query @@ -468,16 +468,16 @@ SELECT a + b AS k, b, udaf(a - b) FROM testData GROUP BY ROLLUP(k, b) struct<k:int,b:int,udaf((a - b)):int> -- !query output 2 1 1 -2 NULL 0 +2 NULL 1 3 1 1 3 2 1 -3 NULL 0 +3 NULL 2 4 1 1 4 2 1 -4 NULL 0 +4 NULL 2 5 2 1 -5 NULL 0 -NULL NULL 0 +5 NULL 1 +NULL NULL 6 -- !query --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org