This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.4 by this push: new e140bf719e3 [SPARK-43956][SQL][3.4] Fix the bug doesn't display column's sql for Percentile[Cont|Disc] e140bf719e3 is described below commit e140bf719e3e8d7347f5d00b2ebaf77d6a5b2210 Author: Jiaan Geng <belie...@163.com> AuthorDate: Sat Jun 3 22:15:15 2023 +0300 [SPARK-43956][SQL][3.4] Fix the bug doesn't display column's sql for Percentile[Cont|Disc] ### What changes were proposed in this pull request? This PR used to backport https://github.com/apache/spark/pull/41436 to 3.4 ### Why are the changes needed? Fix the bug doesn't display column's sql for Percentile[Cont|Disc]. ### Does this PR introduce _any_ user-facing change? 'Yes'. Users could see the correct sql information. ### How was this patch tested? Test cases updated. Closes #41445 from beliefer/SPARK-43956_followup. Authored-by: Jiaan Geng <belie...@163.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../expressions/aggregate/percentiles.scala | 4 ++-- .../sql-tests/results/percentiles.sql.out | 24 +++++++++++----------- .../results/postgreSQL/aggregates_part4.sql.out | 8 ++++---- .../udf/postgreSQL/udf-aggregates_part4.sql.out | 8 ++++---- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala index 81bc7e51499..8447a5f9b51 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala @@ -368,7 +368,7 @@ case class PercentileCont(left: Expression, right: Expression, reverse: Boolean override def sql(isDistinct: Boolean): String = { val distinct = if (isDistinct) "DISTINCT " else "" val direction = if (reverse) " DESC" else "" - s"$prettyName($distinct${right.sql}) WITHIN GROUP (ORDER BY v$direction)" + s"$prettyName($distinct${right.sql}) WITHIN GROUP (ORDER BY ${left.sql}$direction)" } override protected def withNewChildrenInternal( newLeft: Expression, newRight: Expression): PercentileCont = @@ -408,7 +408,7 @@ case class PercentileDisc( override def sql(isDistinct: Boolean): String = { val distinct = if (isDistinct) "DISTINCT " else "" val direction = if (reverse) " DESC" else "" - s"$prettyName($distinct${right.sql}) WITHIN GROUP (ORDER BY v$direction)" + s"$prettyName($distinct${right.sql}) WITHIN GROUP (ORDER BY ${left.sql}$direction)" } override protected def withNewChildrenInternal( diff --git a/sql/core/src/test/resources/sql-tests/results/percentiles.sql.out b/sql/core/src/test/resources/sql-tests/results/percentiles.sql.out index 38319875c71..cd99ded56bf 100644 --- a/sql/core/src/test/resources/sql-tests/results/percentiles.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/percentiles.sql.out @@ -144,7 +144,7 @@ SELECT FROM basic_pays ORDER BY salary -- !query schema -struct<employee_name:string,department:string,salary:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,p [...] +struct<employee_name:string,department:string,salary:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLL [...] -- !query output Leslie Thompson IT 5186 5917.75 5186.0 7381.25 8113.0 Anthony Bow Accounting 6627 8543.75 8435.0 9746.5 9998.0 @@ -344,7 +344,7 @@ FROM basic_pays WINDOW w AS (PARTITION BY department) ORDER BY salary -- !query schema -struct<employee_name:string,department:string,salary:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,p [...] +struct<employee_name:string,department:string,salary:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLL [...] -- !query output Leslie Thompson IT 5186 5917.75 5186.0 7381.25 8113.0 Anthony Bow Accounting 6627 8543.75 8435.0 9746.5 9998.0 @@ -380,7 +380,7 @@ WHERE salary > 8900 WINDOW w AS (PARTITION BY department) ORDER BY salary -- !query schema -struct<employee_name:string,department:string,salary:int,median(salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.5) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.5) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.5) WITHIN GROUP (ORDER [...] +struct<employee_name:string,department:string,salary:int,median(salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.5) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.5) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.5) WITHIN GRO [...] -- !query output Jeff Firrelli Accounting 8992 9998.0 9998.0 9998.0 9998.0 9998.0 Julie Firrelli Sales 9181 9441.0 9441.0 9441.0 9441.0 9441.0 @@ -594,7 +594,7 @@ SELECT percentile_cont(0.25) WITHIN GROUP (ORDER BY dt DESC) FROM intervals -- !query schema -struct<percentile_cont(0.25) WITHIN GROUP (ORDER BY v):interval year to month,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC):interval year to month> +struct<percentile_cont(0.25) WITHIN GROUP (ORDER BY dt):interval year to month,percentile_cont(0.25) WITHIN GROUP (ORDER BY dt DESC):interval year to month> -- !query output 0-10 2-6 @@ -608,7 +608,7 @@ FROM intervals GROUP BY k ORDER BY k -- !query schema -struct<k:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY v):interval day to second,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC):interval day to second> +struct<k:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY ym):interval day to second,percentile_cont(0.25) WITHIN GROUP (ORDER BY ym DESC):interval day to second> -- !query output 0 0 00:00:10.000000000 0 00:00:30.000000000 1 0 00:00:12.500000000 0 00:00:17.500000000 @@ -626,7 +626,7 @@ FROM intervals GROUP BY k ORDER BY k -- !query schema -struct<k:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY v):interval day to second,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC):interval day to second> +struct<k:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY dt2):interval day to second,percentile_cont(0.25) WITHIN GROUP (ORDER BY dt2 DESC):interval day to second> -- !query output 0 0 00:10:00.000000000 0 00:30:00.000000000 1 0 00:12:30.000000000 0 00:17:30.000000000 @@ -641,7 +641,7 @@ SELECT percentile_disc(0.25) WITHIN GROUP (ORDER BY dt DESC) FROM intervals -- !query schema -struct<percentile_disc(0.25) WITHIN GROUP (ORDER BY v):interval year to month,percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC):interval year to month> +struct<percentile_disc(0.25) WITHIN GROUP (ORDER BY dt):interval year to month,percentile_disc(0.25) WITHIN GROUP (ORDER BY dt DESC):interval year to month> -- !query output 0-10 2-6 @@ -655,7 +655,7 @@ FROM intervals GROUP BY k ORDER BY k -- !query schema -struct<k:int,percentile_disc(0.25) WITHIN GROUP (ORDER BY v):interval day to second,percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC):interval day to second> +struct<k:int,percentile_disc(0.25) WITHIN GROUP (ORDER BY ym):interval day to second,percentile_disc(0.25) WITHIN GROUP (ORDER BY ym DESC):interval day to second> -- !query output 0 0 00:00:10.000000000 0 00:00:30.000000000 1 0 00:00:10.000000000 0 00:00:20.000000000 @@ -673,7 +673,7 @@ FROM intervals GROUP BY k ORDER BY k -- !query schema -struct<k:int,percentile_disc(0.25) WITHIN GROUP (ORDER BY v):interval day to second,percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC):interval day to second> +struct<k:int,percentile_disc(0.25) WITHIN GROUP (ORDER BY dt2):interval day to second,percentile_disc(0.25) WITHIN GROUP (ORDER BY dt2 DESC):interval day to second> -- !query output 0 0 00:10:00.000000000 0 00:30:00.000000000 1 0 00:10:00.000000000 0 00:20:00.000000000 @@ -689,7 +689,7 @@ SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY dt) FROM intervals -- !query schema -struct<median(dt):interval year to month,percentile(dt, 0.5, 1):interval year to month,percentile_cont(0.5) WITHIN GROUP (ORDER BY v):interval year to month> +struct<median(dt):interval year to month,percentile(dt, 0.5, 1):interval year to month,percentile_cont(0.5) WITHIN GROUP (ORDER BY dt):interval year to month> -- !query output 1-8 1-8 1-8 @@ -704,7 +704,7 @@ FROM intervals GROUP BY k ORDER BY k -- !query schema -struct<k:int,median(ym):interval day to second,percentile(ym, 0.5, 1):interval day to second,percentile_cont(0.5) WITHIN GROUP (ORDER BY v):interval day to second> +struct<k:int,median(ym):interval day to second,percentile(ym, 0.5, 1):interval day to second,percentile_cont(0.5) WITHIN GROUP (ORDER BY ym):interval day to second> -- !query output 0 0 00:00:20.000000000 0 00:00:20.000000000 0 00:00:20.000000000 1 0 00:00:15.000000000 0 00:00:15.000000000 0 00:00:15.000000000 @@ -723,7 +723,7 @@ FROM intervals GROUP BY k ORDER BY k -- !query schema -struct<k:int,median(dt2):interval day to second,percentile(dt2, 0.5, 1):interval day to second,percentile_cont(0.5) WITHIN GROUP (ORDER BY v):interval day to second> +struct<k:int,median(dt2):interval day to second,percentile(dt2, 0.5, 1):interval day to second,percentile_cont(0.5) WITHIN GROUP (ORDER BY dt2):interval day to second> -- !query output 0 0 00:20:00.000000000 0 00:20:00.000000000 0 00:20:00.000000000 1 0 00:15:00.000000000 0 00:15:00.000000000 0 00:15:00.000000000 diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part4.sql.out index 45f00b36f16..1aaa514eb13 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part4.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part4.sql.out @@ -2,7 +2,7 @@ -- !query select percentile_cont(0.5) within group (order by b) from aggtest -- !query schema -struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double> +struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY b):double> -- !query output 53.44850015640259 @@ -10,7 +10,7 @@ struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double> -- !query select percentile_cont(0.5) within group (order by b), sum(b) from aggtest -- !query schema -struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double,sum(b):double> +struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY b):double,sum(b):double> -- !query output 53.44850015640259 431.77260909229517 @@ -18,7 +18,7 @@ struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double,sum(b):double> -- !query select percentile_cont(0.5) within group (order by thousand) from tenk1 -- !query schema -struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double> +struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY thousand):double> -- !query output 499.5 @@ -26,6 +26,6 @@ struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double> -- !query select percentile_disc(0.5) within group (order by thousand) from tenk1 -- !query schema -struct<percentile_disc(0.5) WITHIN GROUP (ORDER BY v):double> +struct<percentile_disc(0.5) WITHIN GROUP (ORDER BY thousand):double> -- !query output 499.0 diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part4.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part4.sql.out index 45f00b36f16..1aaa514eb13 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part4.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part4.sql.out @@ -2,7 +2,7 @@ -- !query select percentile_cont(0.5) within group (order by b) from aggtest -- !query schema -struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double> +struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY b):double> -- !query output 53.44850015640259 @@ -10,7 +10,7 @@ struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double> -- !query select percentile_cont(0.5) within group (order by b), sum(b) from aggtest -- !query schema -struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double,sum(b):double> +struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY b):double,sum(b):double> -- !query output 53.44850015640259 431.77260909229517 @@ -18,7 +18,7 @@ struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double,sum(b):double> -- !query select percentile_cont(0.5) within group (order by thousand) from tenk1 -- !query schema -struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double> +struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY thousand):double> -- !query output 499.5 @@ -26,6 +26,6 @@ struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double> -- !query select percentile_disc(0.5) within group (order by thousand) from tenk1 -- !query schema -struct<percentile_disc(0.5) WITHIN GROUP (ORDER BY v):double> +struct<percentile_disc(0.5) WITHIN GROUP (ORDER BY thousand):double> -- !query output 499.0 --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org