Repository: spark Updated Branches: refs/heads/master 5c7f6b663 -> e028fd3ae
[SPARK-25736][SQL][TEST] add tests to verify the behavior of multi-column count ## What changes were proposed in this pull request? AFAIK multi-column count is not widely supported by the mainstream databases(postgres doesn't support), and the SQL standard doesn't define it clearly, as near as I can tell. Since Spark supports it, we should clearly document the current behavior and add tests to verify it. ## How was this patch tested? N/A Closes #22728 from cloud-fan/doc. Authored-by: Wenchen Fan <wenc...@databricks.com> Signed-off-by: hyukjinkwon <gurwls...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e028fd3a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e028fd3a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e028fd3a Branch: refs/heads/master Commit: e028fd3aed9e5e4c478f307f0a467b54b73ff0d5 Parents: 5c7f6b6 Author: Wenchen Fan <wenc...@databricks.com> Authored: Tue Oct 16 15:13:01 2018 +0800 Committer: hyukjinkwon <gurwls...@apache.org> Committed: Tue Oct 16 15:13:01 2018 +0800 ---------------------------------------------------------------------- .../catalyst/expressions/aggregate/Count.scala | 2 +- .../test/resources/sql-tests/inputs/count.sql | 27 ++++++++++ .../resources/sql-tests/results/count.sql.out | 55 ++++++++++++++++++++ 3 files changed, 83 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/e028fd3a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala index 40582d0..8cab8e4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala @@ -52,7 +52,7 @@ abstract class CountLike extends DeclarativeAggregate { usage = """ _FUNC_(*) - Returns the total number of retrieved rows, including rows containing null. - _FUNC_(expr) - Returns the number of rows for which the supplied expression is non-null. + _FUNC_(expr[, expr...]) - Returns the number of rows for which the supplied expression(s) are all non-null. _FUNC_(DISTINCT expr[, expr...]) - Returns the number of rows for which the supplied expression(s) are unique and non-null. """) http://git-wip-us.apache.org/repos/asf/spark/blob/e028fd3a/sql/core/src/test/resources/sql-tests/inputs/count.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/sql-tests/inputs/count.sql b/sql/core/src/test/resources/sql-tests/inputs/count.sql new file mode 100644 index 0000000..9f9ee4a --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/count.sql @@ -0,0 +1,27 @@ +-- Test data. +CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES +(1, 1), (1, 2), (2, 1), (1, 1), (null, 2), (1, null), (null, null) +AS testData(a, b); + +-- count with single expression +SELECT + count(*), count(1), count(null), count(a), count(b), count(a + b), count((a, b)) +FROM testData; + +-- distinct count with single expression +SELECT + count(DISTINCT 1), + count(DISTINCT null), + count(DISTINCT a), + count(DISTINCT b), + count(DISTINCT (a + b)), + count(DISTINCT (a, b)) +FROM testData; + +-- count with multiple expressions +SELECT count(a, b), count(b, a), count(testData.*) FROM testData; + +-- distinct count with multiple expressions +SELECT + count(DISTINCT a, b), count(DISTINCT b, a), count(DISTINCT *), count(DISTINCT testData.*) +FROM testData; http://git-wip-us.apache.org/repos/asf/spark/blob/e028fd3a/sql/core/src/test/resources/sql-tests/results/count.sql.out ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/sql-tests/results/count.sql.out b/sql/core/src/test/resources/sql-tests/results/count.sql.out new file mode 100644 index 0000000..b8a86d4 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/count.sql.out @@ -0,0 +1,55 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 5 + + +-- !query 0 +CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES +(1, 1), (1, 2), (2, 1), (1, 1), (null, 2), (1, null), (null, null) +AS testData(a, b) +-- !query 0 schema +struct<> +-- !query 0 output + + + +-- !query 1 +SELECT + count(*), count(1), count(null), count(a), count(b), count(a + b), count((a, b)) +FROM testData +-- !query 1 schema +struct<count(1):bigint,count(1):bigint,count(NULL):bigint,count(a):bigint,count(b):bigint,count((a + b)):bigint,count(named_struct(a, a, b, b)):bigint> +-- !query 1 output +7 7 0 5 5 4 7 + + +-- !query 2 +SELECT + count(DISTINCT 1), + count(DISTINCT null), + count(DISTINCT a), + count(DISTINCT b), + count(DISTINCT (a + b)), + count(DISTINCT (a, b)) +FROM testData +-- !query 2 schema +struct<count(DISTINCT 1):bigint,count(DISTINCT NULL):bigint,count(DISTINCT a):bigint,count(DISTINCT b):bigint,count(DISTINCT (a + b)):bigint,count(DISTINCT named_struct(a, a, b, b)):bigint> +-- !query 2 output +1 0 2 2 2 6 + + +-- !query 3 +SELECT count(a, b), count(b, a), count(testData.*) FROM testData +-- !query 3 schema +struct<count(a, b):bigint,count(b, a):bigint,count(a, b):bigint> +-- !query 3 output +4 4 4 + + +-- !query 4 +SELECT + count(DISTINCT a, b), count(DISTINCT b, a), count(DISTINCT *), count(DISTINCT testData.*) +FROM testData +-- !query 4 schema +struct<count(DISTINCT a, b):bigint,count(DISTINCT b, a):bigint,count(DISTINCT a, b):bigint,count(DISTINCT a, b):bigint> +-- !query 4 output +3 3 3 3 --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org