This is an automated email from the ASF dual-hosted git repository. rui pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push: new ae41b54dc [VL] Restore the test cases for corr in group-by.sql and udf-group-by.sql (#5175) ae41b54dc is described below commit ae41b54dcaacb7fde8e7bac7689ff7797ad9fd06 Author: Joey <joey....@alibaba-inc.com> AuthorDate: Fri Mar 29 14:35:04 2024 +0800 [VL] Restore the test cases for corr in group-by.sql and udf-group-by.sql (#5175) --- .../test/resources/sql-tests/inputs/group-by.sql | 4 ++++ .../sql-tests/inputs/udf/udf-group-by.sql | 4 ++++ .../resources/sql-tests/results/group-by.sql.out | 9 ++++++++ .../sql-tests/results/udf/udf-group-by.sql.out | 9 ++++++++ .../utils/velox/VeloxSQLQueryTestSettings.scala | 4 ++-- .../test/resources/sql-tests/inputs/group-by.sql | 6 +++++ .../sql-tests/inputs/udf/udf-group-by.sql | 4 ++++ .../resources/sql-tests/results/group-by.sql.out | 27 ++++++++++++++++++++++ .../sql-tests/results/udf/udf-group-by.sql.out | 9 ++++++++ .../utils/velox/VeloxSQLQueryTestSettings.scala | 5 ++-- .../test/resources/sql-tests/inputs/group-by.sql | 4 ++++ .../sql-tests/inputs/udf/udf-group-by.sql | 4 ++++ .../resources/sql-tests/results/group-by.sql.out | 10 ++++++++ .../sql-tests/results/udf/udf-group-by.sql.out | 10 ++++++++ .../utils/velox/VeloxSQLQueryTestSettings.scala | 3 ++- 15 files changed, 106 insertions(+), 6 deletions(-) diff --git a/gluten-ut/spark32/src/test/resources/sql-tests/inputs/group-by.sql b/gluten-ut/spark32/src/test/resources/sql-tests/inputs/group-by.sql index 4b2e12975..e2c3672a2 100644 --- a/gluten-ut/spark32/src/test/resources/sql-tests/inputs/group-by.sql +++ b/gluten-ut/spark32/src/test/resources/sql-tests/inputs/group-by.sql @@ -75,6 +75,10 @@ SELECT 1 from ( ) b where b.z != b.z; +-- SPARK-24369 multiple distinct aggregations having the same argument set +SELECT corr(DISTINCT x, y), corr(DISTINCT y, x), count(*) + FROM (VALUES (1, 1), (2, 2), (2, 2)) t(x, y); + -- SPARK-25708 HAVING without GROUP BY means global aggregate SELECT 1 FROM range(10) HAVING true; diff --git a/gluten-ut/spark32/src/test/resources/sql-tests/inputs/udf/udf-group-by.sql b/gluten-ut/spark32/src/test/resources/sql-tests/inputs/udf/udf-group-by.sql index a4df72f44..0cc57c97b 100644 --- a/gluten-ut/spark32/src/test/resources/sql-tests/inputs/udf/udf-group-by.sql +++ b/gluten-ut/spark32/src/test/resources/sql-tests/inputs/udf/udf-group-by.sql @@ -71,6 +71,10 @@ SELECT 1 from ( ) b where b.z != b.z; +-- SPARK-24369 multiple distinct aggregations having the same argument set +SELECT corr(DISTINCT x, y), udf(corr(DISTINCT y, x)), count(*) + FROM (VALUES (1, 1), (2, 2), (2, 2)) t(x, y); + -- SPARK-25708 HAVING without GROUP BY means global aggregate SELECT udf(1) FROM range(10) HAVING true; diff --git a/gluten-ut/spark32/src/test/resources/sql-tests/results/group-by.sql.out b/gluten-ut/spark32/src/test/resources/sql-tests/results/group-by.sql.out index 8986ca9b0..79e6f72df 100644 --- a/gluten-ut/spark32/src/test/resources/sql-tests/results/group-by.sql.out +++ b/gluten-ut/spark32/src/test/resources/sql-tests/results/group-by.sql.out @@ -243,6 +243,15 @@ struct<1:int> +-- !query +SELECT corr(DISTINCT x, y), corr(DISTINCT y, x), count(*) + FROM (VALUES (1, 1), (2, 2), (2, 2)) t(x, y) +-- !query schema +struct<corr(DISTINCT x, y):double,corr(DISTINCT y, x):double,count(1):bigint> +-- !query output +0.9999999999999999 0.9999999999999999 3 + + -- !query SELECT 1 FROM range(10) HAVING true -- !query schema diff --git a/gluten-ut/spark32/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out b/gluten-ut/spark32/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out index 26d55d341..986815c97 100644 --- a/gluten-ut/spark32/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out +++ b/gluten-ut/spark32/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out @@ -243,6 +243,15 @@ struct<1:int> +-- !query +SELECT corr(DISTINCT x, y), udf(corr(DISTINCT y, x)), count(*) + FROM (VALUES (1, 1), (2, 2), (2, 2)) t(x, y) +-- !query schema +struct<corr(DISTINCT x, y):double,udf(corr(DISTINCT y, x)):double,count(1):bigint> +-- !query output +0.9999999999999999 0.9999999999999999 3 + + -- !query SELECT udf(1) FROM range(10) HAVING true -- !query schema diff --git a/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/velox/VeloxSQLQueryTestSettings.scala b/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/velox/VeloxSQLQueryTestSettings.scala index 4464dbefd..9ec55f015 100644 --- a/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/velox/VeloxSQLQueryTestSettings.scala +++ b/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/velox/VeloxSQLQueryTestSettings.scala @@ -230,9 +230,9 @@ object VeloxSQLQueryTestSettings extends SQLQueryTestSettings { val OVERWRITE_SQL_QUERY_LIST: Set[String] = Set( // Velox corr has better computation logic but it fails Spark's precision check. - // Remove -- SPARK-24369 multiple distinct aggregations having the same argument set + // Overwrite below test cases. + // -- SPARK-24369 multiple distinct aggregations having the same argument set "group-by.sql", - // Remove -- SPARK-24369 multiple distinct aggregations having the same argument set "udf/udf-group-by.sql" ) } diff --git a/gluten-ut/spark33/src/test/resources/sql-tests/inputs/group-by.sql b/gluten-ut/spark33/src/test/resources/sql-tests/inputs/group-by.sql index 331cd9440..291a8478c 100644 --- a/gluten-ut/spark33/src/test/resources/sql-tests/inputs/group-by.sql +++ b/gluten-ut/spark33/src/test/resources/sql-tests/inputs/group-by.sql @@ -84,6 +84,10 @@ SELECT 1 from ( ) b where b.z != b.z; +-- SPARK-24369 multiple distinct aggregations having the same argument set +SELECT corr(DISTINCT x, y), corr(DISTINCT y, x), count(*) + FROM (VALUES (1, 1), (2, 2), (2, 2)) t(x, y); + -- SPARK-25708 HAVING without GROUP BY means global aggregate SELECT 1 FROM range(10) HAVING true; @@ -243,6 +247,8 @@ SELECT k, count(*) FILTER (WHERE x IS NOT NULL), regr_count(y, x) FROM testRegre -- SPARK-37613: Support ANSI Aggregate Function: regr_r2 SELECT regr_r2(y, x) FROM testRegression; SELECT regr_r2(y, x) FROM testRegression WHERE x IS NOT NULL; +SELECT k, corr(y, x), regr_r2(y, x) FROM testRegression GROUP BY k; +SELECT k, corr(y, x) FILTER (WHERE x IS NOT NULL), regr_r2(y, x) FROM testRegression GROUP BY k; -- SPARK-27974: Support ANSI Aggregate Function: array_agg SELECT diff --git a/gluten-ut/spark33/src/test/resources/sql-tests/inputs/udf/udf-group-by.sql b/gluten-ut/spark33/src/test/resources/sql-tests/inputs/udf/udf-group-by.sql index a4df72f44..0cc57c97b 100644 --- a/gluten-ut/spark33/src/test/resources/sql-tests/inputs/udf/udf-group-by.sql +++ b/gluten-ut/spark33/src/test/resources/sql-tests/inputs/udf/udf-group-by.sql @@ -71,6 +71,10 @@ SELECT 1 from ( ) b where b.z != b.z; +-- SPARK-24369 multiple distinct aggregations having the same argument set +SELECT corr(DISTINCT x, y), udf(corr(DISTINCT y, x)), count(*) + FROM (VALUES (1, 1), (2, 2), (2, 2)) t(x, y); + -- SPARK-25708 HAVING without GROUP BY means global aggregate SELECT udf(1) FROM range(10) HAVING true; diff --git a/gluten-ut/spark33/src/test/resources/sql-tests/results/group-by.sql.out b/gluten-ut/spark33/src/test/resources/sql-tests/results/group-by.sql.out index 2fda449a9..ffaa2d511 100644 --- a/gluten-ut/spark33/src/test/resources/sql-tests/results/group-by.sql.out +++ b/gluten-ut/spark33/src/test/resources/sql-tests/results/group-by.sql.out @@ -272,6 +272,15 @@ struct<1:int> +-- !query +SELECT corr(DISTINCT x, y), corr(DISTINCT y, x), count(*) + FROM (VALUES (1, 1), (2, 2), (2, 2)) t(x, y) +-- !query schema +struct<corr(DISTINCT x, y):double,corr(DISTINCT y, x):double,count(1):bigint> +-- !query output +0.9999999999999999 0.9999999999999999 3 + + -- !query SELECT 1 FROM range(10) HAVING true -- !query schema @@ -884,6 +893,24 @@ struct<regr_r2(y, x):double> 0.997690531177829 +-- !query +SELECT k, corr(y, x), regr_r2(y, x) FROM testRegression GROUP BY k +-- !query schema +struct<k:int,corr(y, x):double,regr_r2(y, x):double> +-- !query output +1 NULL NULL +2 0.9988445981121533 0.997690531177829 + + +-- !query +SELECT k, corr(y, x) FILTER (WHERE x IS NOT NULL), regr_r2(y, x) FROM testRegression GROUP BY k +-- !query schema +struct<k:int,corr(y, x) FILTER (WHERE (x IS NOT NULL)):double,regr_r2(y, x):double> +-- !query output +1 NULL NULL +2 0.9988445981121533 0.997690531177829 + + -- !query SELECT collect_list(col), diff --git a/gluten-ut/spark33/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out b/gluten-ut/spark33/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out index 14b278021..90272001f 100644 --- a/gluten-ut/spark33/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out +++ b/gluten-ut/spark33/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out @@ -243,6 +243,15 @@ struct<1:int> +-- !query +SELECT corr(DISTINCT x, y), udf(corr(DISTINCT y, x)), count(*) + FROM (VALUES (1, 1), (2, 2), (2, 2)) t(x, y) +-- !query schema +struct<corr(DISTINCT x, y):double,udf(corr(DISTINCT y, x)):double,count(1):bigint> +-- !query output +0.9999999999999999 0.9999999999999999 3 + + -- !query SELECT udf(1) FROM range(10) HAVING true -- !query schema diff --git a/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/velox/VeloxSQLQueryTestSettings.scala b/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/velox/VeloxSQLQueryTestSettings.scala index 26428b3a4..f773e78e8 100644 --- a/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/velox/VeloxSQLQueryTestSettings.scala +++ b/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/velox/VeloxSQLQueryTestSettings.scala @@ -233,10 +233,9 @@ object VeloxSQLQueryTestSettings extends SQLQueryTestSettings { private val OVERWRITE_SQL_QUERY_LIST: Set[String] = Set( // Velox corr has better computation logic but it fails Spark's precision check. - // Remove -- SPARK-24369 multiple distinct aggregations having the same argument set, - // -- SPARK-37613: Support ANSI Aggregate Function: regr_r2 + // Overwrite below test cases. + // -- SPARK-24369 multiple distinct aggregations having the same argument set "group-by.sql", - // Remove -- SPARK-24369 multiple distinct aggregations having the same argument set "udf/udf-group-by.sql" ) } diff --git a/gluten-ut/spark34/src/test/resources/sql-tests/inputs/group-by.sql b/gluten-ut/spark34/src/test/resources/sql-tests/inputs/group-by.sql index b618ad1d5..c35cdb0de 100644 --- a/gluten-ut/spark34/src/test/resources/sql-tests/inputs/group-by.sql +++ b/gluten-ut/spark34/src/test/resources/sql-tests/inputs/group-by.sql @@ -81,6 +81,10 @@ SELECT 1 from ( ) b where b.z != b.z; +-- SPARK-24369 multiple distinct aggregations having the same argument set +SELECT corr(DISTINCT x, y), corr(DISTINCT y, x), count(*) + FROM (VALUES (1, 1), (2, 2), (2, 2)) t(x, y); + -- SPARK-25708 HAVING without GROUP BY means global aggregate SELECT 1 FROM range(10) HAVING true; diff --git a/gluten-ut/spark34/src/test/resources/sql-tests/inputs/udf/udf-group-by.sql b/gluten-ut/spark34/src/test/resources/sql-tests/inputs/udf/udf-group-by.sql index a4df72f44..0cc57c97b 100644 --- a/gluten-ut/spark34/src/test/resources/sql-tests/inputs/udf/udf-group-by.sql +++ b/gluten-ut/spark34/src/test/resources/sql-tests/inputs/udf/udf-group-by.sql @@ -71,6 +71,10 @@ SELECT 1 from ( ) b where b.z != b.z; +-- SPARK-24369 multiple distinct aggregations having the same argument set +SELECT corr(DISTINCT x, y), udf(corr(DISTINCT y, x)), count(*) + FROM (VALUES (1, 1), (2, 2), (2, 2)) t(x, y); + -- SPARK-25708 HAVING without GROUP BY means global aggregate SELECT udf(1) FROM range(10) HAVING true; diff --git a/gluten-ut/spark34/src/test/resources/sql-tests/results/group-by.sql.out b/gluten-ut/spark34/src/test/resources/sql-tests/results/group-by.sql.out index 581446305..a92a58efb 100644 --- a/gluten-ut/spark34/src/test/resources/sql-tests/results/group-by.sql.out +++ b/gluten-ut/spark34/src/test/resources/sql-tests/results/group-by.sql.out @@ -331,6 +331,16 @@ struct<1:int> -- !query output + +-- !query +SELECT corr(DISTINCT x, y), corr(DISTINCT y, x), count(*) + FROM (VALUES (1, 1), (2, 2), (2, 2)) t(x, y) +-- !query schema +struct<corr(DISTINCT x, y):double,corr(DISTINCT y, x):double,count(1):bigint> +-- !query output +0.9999999999999999 0.9999999999999999 3 + + -- !query SELECT 1 FROM range(10) HAVING true -- !query schema diff --git a/gluten-ut/spark34/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out b/gluten-ut/spark34/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out index c18e062d7..35f91a7c4 100644 --- a/gluten-ut/spark34/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out +++ b/gluten-ut/spark34/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out @@ -297,6 +297,16 @@ struct<1:int> -- !query output + +-- !query +SELECT corr(DISTINCT x, y), udf(corr(DISTINCT y, x)), count(*) + FROM (VALUES (1, 1), (2, 2), (2, 2)) t(x, y) +-- !query schema +struct<corr(DISTINCT x, y):double,udf(corr(DISTINCT y, x)):double,count(1):bigint> +-- !query output +0.9999999999999999 0.9999999999999999 3 + + -- !query SELECT udf(1) FROM range(10) HAVING true -- !query schema diff --git a/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/velox/VeloxSQLQueryTestSettings.scala b/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/velox/VeloxSQLQueryTestSettings.scala index 8f22a2d0c..a7f190c0d 100644 --- a/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/velox/VeloxSQLQueryTestSettings.scala +++ b/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/velox/VeloxSQLQueryTestSettings.scala @@ -234,7 +234,8 @@ object VeloxSQLQueryTestSettings extends SQLQueryTestSettings { val OVERWRITE_SQL_QUERY_LIST: Set[String] = Set( // Velox corr has better computation logic but it fails Spark's precision check. - // Remove -- SPARK-24369 multiple distinct aggregations having the same argument set + // Overwrite below test cases. + // -- SPARK-24369 multiple distinct aggregations having the same argument set "group-by.sql", "udf/udf-group-by.sql", // Exception string doesn't match for --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org