shivusondur commented on a change in pull request #25161: [SPARK-28390][SQL][PYTHON][TESTS] Convert and port 'pgSQL/select_having.sql' into UDF test base URL: https://github.com/apache/spark/pull/25161#discussion_r305217718
########## File path: sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-select_having.sql ########## @@ -0,0 +1,56 @@ +-- +-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group +-- +-- +-- SELECT_HAVING +-- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/select_having.sql +-- +-- This test file was converted from inputs/pgSQL/select_having.sql + +-- load test data +CREATE TABLE test_having (a int, b int, c string, d string) USING parquet; +INSERT INTO test_having VALUES (0, 1, 'XXXX', 'A'); +INSERT INTO test_having VALUES (1, 2, 'AAAA', 'b'); +INSERT INTO test_having VALUES (2, 2, 'AAAA', 'c'); +INSERT INTO test_having VALUES (3, 3, 'BBBB', 'D'); +INSERT INTO test_having VALUES (4, 3, 'BBBB', 'e'); +INSERT INTO test_having VALUES (5, 3, 'bbbb', 'F'); +INSERT INTO test_having VALUES (6, 4, 'cccc', 'g'); +INSERT INTO test_having VALUES (7, 4, 'cccc', 'h'); +INSERT INTO test_having VALUES (8, 4, 'CCCC', 'I'); +INSERT INTO test_having VALUES (9, 4, 'CCCC', 'j'); + +SELECT udf(b), udf(c) FROM test_having + GROUP BY b, c HAVING udf(count(*)) = 1 ORDER BY b, c; + +-- HAVING is effectively equivalent to WHERE in this case +SELECT udf(b), udf(c) FROM test_having + GROUP BY b, c HAVING udf(b) = 3 ORDER BY b, c; + +-- [SPARK-28386] Cannot resolve ORDER BY columns with GROUP BY and HAVING +-- SELECT lower(c), count(c) FROM test_having +-- GROUP BY lower(c) HAVING count(*) > 2 OR min(a) = max(a) +-- ORDER BY lower(c); + +SELECT udf(c), max(udf(a)) FROM test_having + GROUP BY c HAVING udf(count(*)) > 2 OR udf(min(a)) = udf(max(a)) + ORDER BY c; + +-- test degenerate cases involving HAVING without GROUP BY +-- Per SQL spec, these should generate 0 or 1 row, even without aggregates + +SELECT udf(min(a)), udf(max(a)) FROM test_having HAVING udf(min(a)) = udf(max(a)); +SELECT udf(min(a)), udf(max(a)) FROM test_having HAVING udf(min(a)) < udf(max(a)); + +-- errors: ungrouped column references +SELECT udf(a) FROM test_having HAVING udf(min(a)) < udf(max(a)); +SELECT 1 AS one FROM test_having HAVING udf(a) > 1; + +-- the really degenerate case: need not scan table at all +SELECT 1 AS one FROM test_having HAVING udf(1 > 2); +SELECT 1 AS one FROM test_having HAVING udf(1 < 2); Review comment: Handled ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org