[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24881: [SPARK-23160][SQL][TEST] Port window.sql

GitBox Tue, 30 Jul 2019 08:39:43 -0700

dongjoon-hyun commented on a change in pull request #24881: 
[SPARK-23160][SQL][TEST] Port window.sql
URL: https://github.com/apache/spark/pull/24881#discussion_r308795761


 ##########
 File path: sql/core/src/test/resources/sql-tests/inputs/pgSQL/window.sql
 ##########
 @@ -0,0 +1,1169 @@
+-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+--
+-- Window Functions Testing
+-- 
https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/window.sql
+
+CREATE TEMPORARY VIEW tenk2 AS SELECT * FROM tenk1;
+
+CREATE TABLE empsalary (
+    depname string,
+    empno integer,
+    salary int,
+    enroll_date date
+) USING parquet;
+
+INSERT INTO empsalary VALUES
+('develop', 10, 5200, '2007-08-01'),
+('sales', 1, 5000, '2006-10-01'),
+('personnel', 5, 3500, '2007-12-10'),
+('sales', 4, 4800, '2007-08-08'),
+('personnel', 2, 3900, '2006-12-23'),
+('develop', 7, 4200, '2008-01-01'),
+('develop', 9, 4500, '2008-01-01'),
+('sales', 3, 4800, '2007-08-01'),
+('develop', 8, 6000, '2006-10-01'),
+('develop', 11, 5200, '2007-08-15');
+
+SELECT depname, empno, salary, sum(salary) OVER (PARTITION BY depname) FROM 
empsalary ORDER BY depname, salary;
+
+SELECT depname, empno, salary, rank() OVER (PARTITION BY depname ORDER BY 
salary) FROM empsalary;
+
+-- with GROUP BY
+SELECT four, ten, SUM(SUM(four)) OVER (PARTITION BY four), AVG(ten) FROM tenk1
+GROUP BY four, ten ORDER BY four, ten;
+
+SELECT depname, empno, salary, sum(salary) OVER w FROM empsalary WINDOW w AS 
(PARTITION BY depname);
+
+-- [SPARK-28064] Order by does not accept a call to rank()
+-- SELECT depname, empno, salary, rank() OVER w FROM empsalary WINDOW w AS 
(PARTITION BY depname ORDER BY salary) ORDER BY rank() OVER w;
+
+-- empty window specification
+SELECT COUNT(*) OVER () FROM tenk1 WHERE unique2 < 10;
+
+SELECT COUNT(*) OVER w FROM tenk1 WHERE unique2 < 10 WINDOW w AS ();
+
+-- no window operation
+SELECT four FROM tenk1 WHERE FALSE WINDOW w AS (PARTITION BY ten);
+
+-- cumulative aggregate
+SELECT sum(four) OVER (PARTITION BY ten ORDER BY unique2) AS sum_1, ten, four 
FROM tenk1 WHERE unique2 < 10;
+
+SELECT row_number() OVER (ORDER BY unique2) FROM tenk1 WHERE unique2 < 10;
+
+SELECT rank() OVER (PARTITION BY four ORDER BY ten) AS rank_1, ten, four FROM 
tenk1 WHERE unique2 < 10;
+
+SELECT dense_rank() OVER (PARTITION BY four ORDER BY ten), ten, four FROM 
tenk1 WHERE unique2 < 10;
+
+SELECT percent_rank() OVER (PARTITION BY four ORDER BY ten), ten, four FROM 
tenk1 WHERE unique2 < 10;
+
+SELECT cume_dist() OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 
WHERE unique2 < 10;
+
+SELECT ntile(3) OVER (ORDER BY ten, four), ten, four FROM tenk1 WHERE unique2 
< 10;
+
+-- [SPARK-28065] ntile does not accept NULL as input
+-- SELECT ntile(NULL) OVER (ORDER BY ten, four), ten, four FROM tenk1 LIMIT 2;
+
+SELECT lag(ten) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 
WHERE unique2 < 10;
+
+-- [SPARK-28068] `lag` second argument must be a literal in Spark
+-- SELECT lag(ten, four) OVER (PARTITION BY four ORDER BY ten), ten, four FROM 
tenk1 WHERE unique2 < 10;
+-- SELECT lag(ten, four, 0) OVER (PARTITION BY four ORDER BY ten), ten, four 
FROM tenk1 WHERE unique2 < 10;
+
+SELECT lead(ten) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 
WHERE unique2 < 10;
+
+SELECT lead(ten * 2, 1) OVER (PARTITION BY four ORDER BY ten), ten, four FROM 
tenk1 WHERE unique2 < 10;
+
+SELECT lead(ten * 2, 1, -1) OVER (PARTITION BY four ORDER BY ten), ten, four 
FROM tenk1 WHERE unique2 < 10;
+
+SELECT first(ten) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 
WHERE unique2 < 10;
+
+-- last returns the last row of the frame, which is CURRENT ROW in ORDER BY 
window.
+SELECT last(four) OVER (ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
+
+SELECT last(ten) OVER (PARTITION BY four), ten, four FROM
+(SELECT * FROM tenk1 WHERE unique2 < 10 ORDER BY four, ten)s
+ORDER BY four, ten;
+
+-- [SPARK-27951] ANSI SQL: NTH_VALUE function
+-- SELECT nth_value(ten, four + 1) OVER (PARTITION BY four), ten, four
+-- FROM (SELECT * FROM tenk1 WHERE unique2 < 10 ORDER BY four, ten)s;
+
+SELECT ten, two, sum(hundred) AS gsum, sum(sum(hundred)) OVER (PARTITION BY 
two ORDER BY ten) AS wsum
+FROM tenk1 GROUP BY ten, two;
+
+SELECT count(*) OVER (PARTITION BY four), four FROM (SELECT * FROM tenk1 WHERE 
two = 1)s WHERE unique2 < 10;
+
+SELECT (count(*) OVER (PARTITION BY four ORDER BY ten) +
+  sum(hundred) OVER (PARTITION BY four ORDER BY ten)) AS cntsum
+  FROM tenk1 WHERE unique2 < 10;
+
+-- opexpr with different windows evaluation.
+SELECT * FROM(
+  SELECT count(*) OVER (PARTITION BY four ORDER BY ten) +
+    sum(hundred) OVER (PARTITION BY two ORDER BY ten) AS total,
+    count(*) OVER (PARTITION BY four ORDER BY ten) AS fourcount,
+    sum(hundred) OVER (PARTITION BY two ORDER BY ten) AS twosum
+    FROM tenk1
+)sub WHERE total <> fourcount + twosum;
+
+SELECT avg(four) OVER (PARTITION BY four ORDER BY thousand / 100) FROM tenk1 
WHERE unique2 < 10;
+
+SELECT ten, two, sum(hundred) AS gsum, sum(sum(hundred)) OVER win AS wsum
+FROM tenk1 GROUP BY ten, two WINDOW win AS (PARTITION BY two ORDER BY ten);
+
+-- more than one window with GROUP BY
+SELECT sum(salary),
+  row_number() OVER (ORDER BY depname),
+  sum(sum(salary)) OVER (ORDER BY depname DESC)
+FROM empsalary GROUP BY depname;
+
+-- identical windows with different names
+SELECT sum(salary) OVER w1, count(*) OVER w2
+FROM empsalary WINDOW w1 AS (ORDER BY salary), w2 AS (ORDER BY salary);
+
+-- subplan
+-- [SPARK-28379] Correlated scalar subqueries must be aggregated
+-- SELECT lead(ten, (SELECT two FROM tenk1 WHERE s.unique2 = unique2)) OVER 
(PARTITION BY four ORDER BY ten)
+-- FROM tenk1 s WHERE unique2 < 10;
+
+-- empty table
+SELECT count(*) OVER (PARTITION BY four) FROM (SELECT * FROM tenk1 WHERE 
FALSE)s;
+
+-- mixture of agg/wfunc in the same window
+SELECT sum(salary) OVER w, rank() OVER w FROM empsalary WINDOW w AS (PARTITION 
BY depname ORDER BY salary DESC);
+
+-- strict aggs
+-- Temporarily turns off the ANSI mode because of compatibility issues between 
keywords
+SET spark.sql.parser.ansi.enabled=false;
+SELECT empno, depname, salary, bonus, depadj, MIN(bonus) OVER (ORDER BY 
empno), MAX(depadj) OVER () FROM(
+SELECT *,
+  CASE WHEN enroll_date < '2008-01-01' THEN 2008 - extract(year FROM 
enroll_date) END * 500 AS bonus,
+  CASE WHEN
+    AVG(salary) OVER (PARTITION BY depname) < salary
+    THEN 200 END AS depadj FROM empsalary
+  )s;
+SET spark.sql.parser.ansi.enabled=true;
+
+create temporary view int4_tbl as select * from values
+  (0),
+  (123456),
+  (-123456),
+  (2147483647),
+  (-2147483647)
+  as int4_tbl(f1);
+
+-- window function over ungrouped agg over empty row set (bug before 9.1)
+SELECT SUM(COUNT(f1)) OVER () FROM int4_tbl WHERE f1=42;
+
+-- window function with ORDER BY an expression involving aggregates (9.1 bug)
+select ten,
+  sum(unique1) + sum(unique2) as res,
+  rank() over (order by sum(unique1) + sum(unique2)) as rank
+from tenk1
+group by ten order by ten;
+
+-- window and aggregate with GROUP BY expression (9.2 bug)
+-- explain
+-- select first(max(x)) over (), y
+--   from (select unique1 as x, ten+four as y from tenk1) ss
+--   group by y;
+
+-- test non-default frame specifications
+SELECT four, ten,
+sum(ten) over (partition by four order by ten),
+last(ten) over (partition by four order by ten)
+FROM (select distinct ten, four from tenk1) ss;
+
+SELECT four, ten,
+sum(ten) over (partition by four order by ten range between unbounded 
preceding and current row),
+last(ten) over (partition by four order by ten range between unbounded 
preceding and current row)
+FROM (select distinct ten, four from tenk1) ss;
+
+SELECT four, ten,
+sum(ten) over (partition by four order by ten range between unbounded 
preceding and unbounded following),
+last(ten) over (partition by four order by ten range between unbounded 
preceding and unbounded following)
+FROM (select distinct ten, four from tenk1) ss;
+
+SELECT four, ten/4 as two,
+sum(ten/4) over (partition by four order by ten/4 range between unbounded 
preceding and current row),
+last(ten/4) over (partition by four order by ten/4 range between unbounded 
preceding and current row)
+FROM (select distinct ten, four from tenk1) ss;
+
+SELECT four, ten/4 as two,
+sum(ten/4) over (partition by four order by ten/4 rows between unbounded 
preceding and current row),
+last(ten/4) over (partition by four order by ten/4 rows between unbounded 
preceding and current row)
+FROM (select distinct ten, four from tenk1) ss;
+
+SELECT sum(unique1) over (order by four range between current row and 
unbounded following),
+unique1, four
+FROM tenk1 WHERE unique1 < 10;
+
+SELECT sum(unique1) over (rows between current row and unbounded following),
+unique1, four
+FROM tenk1 WHERE unique1 < 10;
+
+SELECT sum(unique1) over (rows between 2 preceding and 2 following),
+unique1, four
+FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (rows between 2 preceding and 2 following exclude 
no others),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (rows between 2 preceding and 2 following exclude 
current row),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (rows between 2 preceding and 2 following exclude 
group),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (rows between 2 preceding and 2 following exclude 
ties),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT first(unique1) over (ORDER BY four rows between current row and 2 
following exclude current row),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT first(unique1) over (ORDER BY four rows between current row and 2 
following exclude group),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT first(unique1) over (ORDER BY four rows between current row and 2 
following exclude ties),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT last(unique1) over (ORDER BY four rows between current row and 2 
following exclude current row),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT last(unique1) over (ORDER BY four rows between current row and 2 
following exclude group),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT last(unique1) over (ORDER BY four rows between current row and 2 
following exclude ties),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+SELECT sum(unique1) over (rows between 2 preceding and 1 preceding),
+unique1, four
+FROM tenk1 WHERE unique1 < 10;
+
+SELECT sum(unique1) over (rows between 1 following and 3 following),
+unique1, four
+FROM tenk1 WHERE unique1 < 10;
+
+SELECT sum(unique1) over (rows between unbounded preceding and 1 following),
+unique1, four
+FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (w range between current row and unbounded 
following),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10 WINDOW w AS (order by four);
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (w range between unbounded preceding and current 
row exclude current row),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10 WINDOW w AS (order by four);
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (w range between unbounded preceding and current 
row exclude group),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10 WINDOW w AS (order by four);
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (w range between unbounded preceding and current 
row exclude ties),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10 WINDOW w AS (order by four);
+
+-- [SPARK-27951] ANSI SQL: NTH_VALUE function
+-- SELECT first_value(unique1) over w,
+-- nth_value(unique1, 2) over w AS nth_2,
+-- last_value(unique1) over w, unique1, four
+-- FROM tenk1 WHERE unique1 < 10
+-- WINDOW w AS (order by four range between current row and unbounded 
following);
+
+-- [SPARK-28501] Frame bound value must be a literal.
+-- SELECT sum(unique1) over
+-- (order by unique1
+--   rows (SELECT unique1 FROM tenk1 ORDER BY unique1 LIMIT 1) + 1 PRECEDING),
+-- unique1
+-- FROM tenk1 WHERE unique1 < 10;
+
+CREATE TEMP VIEW v_window AS
+SELECT i.id, sum(i.id) over (order by i.id rows between 1 preceding and 1 
following) as sum_rows
+FROM range(1, 10) i;
+
+SELECT * FROM v_window;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- CREATE OR REPLACE TEMP VIEW v_window AS
+-- SELECT i, sum(i) over (order by i rows between 1 preceding and 1 following
+--   exclude current row) as sum_rows FROM range(1, 10) i;
+
+SELECT * FROM v_window;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- CREATE OR REPLACE TEMP VIEW v_window AS
+-- SELECT i, sum(i) over (order by i rows between 1 preceding and 1 following
+--   exclude group) as sum_rows FROM range(1, 10) i;
+
+SELECT * FROM v_window;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- CREATE OR REPLACE TEMP VIEW v_window AS
+-- SELECT i, sum(i) over (order by i rows between 1 preceding and 1 following
+--   exclude ties) as sum_rows FROM generate_series(1, 10) i;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- CREATE OR REPLACE TEMP VIEW v_window AS
+-- SELECT i, sum(i) over (order by i rows between 1 preceding and 1 following
+--   exclude no others) as sum_rows FROM generate_series(1, 10) i;
+
+SELECT * FROM v_window;
+
+CREATE OR REPLACE TEMP VIEW v_window AS
+SELECT i.id, sum(i.id) over (order by i.id range between 1 preceding and 1 
following) as sum_rows FROM range(1, 10) i;
+
+SELECT * FROM v_window;
+
+DROP VIEW v_window;
+
+-- [SPARK-28429] SQL Datetime util function being casted to double instead of 
timestamp
+-- CREATE TEMP VIEW v_window AS
+-- SELECT i, min(i) over (order by i range between '1 day' preceding and '10 
days' following) as min_i
+--   FROM range(now(), now()+'100 days', '1 hour') i;
+
+-- RANGE offset PRECEDING/FOLLOWING tests
+
+SELECT sum(unique1) over (order by four range between 2 preceding and 1 
preceding),
+unique1, four
+FROM tenk1 WHERE unique1 < 10;
+
+SELECT sum(unique1) over (order by four desc range between 2 preceding and 1 
preceding),
+unique1, four
+FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (order by four range between 2 preceding and 1 
preceding exclude no others),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (order by four range between 2 preceding and 1 
preceding exclude current row),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (order by four range between 2 preceding and 1 
preceding exclude group),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (order by four range between 2 preceding and 1 
preceding exclude ties),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (order by four range between 2 preceding and 6 
following exclude ties),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (order by four range between 2 preceding and 6 
following exclude group),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+SELECT sum(unique1) over (partition by four order by unique1 range between 5 
preceding and 6 following),
+unique1, four
+FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (partition by four order by unique1 range between 
5 preceding and 6 following
+--   exclude current row),unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28429] SQL Datetime util function being casted to double instead of 
timestamp
+-- select sum(salary) over (order by enroll_date range between '1 year' 
preceding and '1 year' following),
+-- salary, enroll_date from empsalary;
+
+-- [SPARK-28429] SQL Datetime util function being casted to double instead of 
timestamp
+-- select sum(salary) over (order by enroll_date desc range between '1 year' 
preceding and '1 year' following),
+-- salary, enroll_date from empsalary;
+
+-- [SPARK-28429] SQL Datetime util function being casted to double instead of 
timestamp
+-- select sum(salary) over (order by enroll_date desc range between '1 year' 
following and '1 year' following),
+-- salary, enroll_date from empsalary;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- select sum(salary) over (order by enroll_date range between '1 year' 
preceding and '1 year' following
+--   exclude current row), salary, enroll_date from empsalary;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- select sum(salary) over (order by enroll_date range between '1 year' 
preceding and '1 year' following
+--   exclude group), salary, enroll_date from empsalary;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- select sum(salary) over (order by enroll_date range between '1 year' 
preceding and '1 year' following
+--   exclude ties), salary, enroll_date from empsalary;
+
+-- [SPARK-27951] ANSI SQL: NTH_VALUE function
+-- select first_value(salary) over(order by salary range between 1000 
preceding and 1000 following),
+-- lead(salary) over(order by salary range between 1000 preceding and 1000 
following),
+-- nth_value(salary, 1) over(order by salary range between 1000 preceding and 
1000 following),
+-- salary from empsalary;
+
+-- [SPARK-27951] ANSI SQL: NTH_VALUE function
+-- select last_value(salary) over(order by salary range between 1000 preceding 
and 1000 following),
+-- lag(salary) over(order by salary range between 1000 preceding and 1000 
following),
+-- salary from empsalary;
+
+-- [SPARK-27951] ANSI SQL: NTH_VALUE function
+-- select first_value(salary) over(order by salary range between 1000 
following and 3000 following
+--   exclude current row),
+-- lead(salary) over(order by salary range between 1000 following and 3000 
following exclude ties),
+-- nth_value(salary, 1) over(order by salary range between 1000 following and 
3000 following
+--   exclude ties),
+-- salary from empsalary;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- select last(salary) over(order by salary range between 1000 following and 
3000 following
+--   exclude group),
+-- lag(salary) over(order by salary range between 1000 following and 3000 
following exclude group),
+-- salary from empsalary;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- select first(salary) over(order by enroll_date range between unbounded 
preceding and '1 year' following
+--   exclude ties),
+-- last(salary) over(order by enroll_date range between unbounded preceding 
and '1 year' following),
+-- salary, enroll_date from empsalary;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- select first(salary) over(order by enroll_date range between unbounded 
preceding and '1 year' following
+--   exclude ties),
+-- last(salary) over(order by enroll_date range between unbounded preceding 
and '1 year' following
+--   exclude ties),
+-- salary, enroll_date from empsalary;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- select first(salary) over(order by enroll_date range between unbounded 
preceding and '1 year' following
+--   exclude group),
+-- last(salary) over(order by enroll_date range between unbounded preceding 
and '1 year' following
+--   exclude group),
+-- salary, enroll_date from empsalary;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- select first(salary) over(order by enroll_date range between unbounded 
preceding and '1 year' following
+--   exclude current row),
+-- last(salary) over(order by enroll_date range between unbounded preceding 
and '1 year' following
+--   exclude current row),
+-- salary, enroll_date from empsalary;
+
+-- RANGE offset PRECEDING/FOLLOWING with null values
+select ss.id, ss.y,
+       first(ss.y) over w,
+       last(ss.y) over w
+from
+  (select x.id, x.id as y from range(1,5) as x
+   union all select null, 42
+   union all select null, 43) ss
+window w as
+  (order by ss.id asc nulls first range between 2 preceding and 2 following);
+
+select ss.id, ss.y,
+       first(ss.y) over w,
+       last(ss.y) over w
+from
+  (select x.id, x.id as y from range(1,5) as x
+   union all select null, 42
+   union all select null, 43) ss
+window w as
+  (order by ss.id asc nulls last range between 2 preceding and 2 following);
+
+select ss.id, ss.y,
+       first(ss.y) over w,
+       last(ss.y) over w
+from
+  (select x.id, x.id as y from range(1,5) as x
+   union all select null, 42
+   union all select null, 43) ss
+window w as
+  (order by ss.id desc nulls first range between 2 preceding and 2 following);
+
+select ss.id, ss.y,
+       first(ss.y) over w,
+       last(ss.y) over w
+from
+  (select x.id, x.id as y from range(1,5) as x
+   union all select null, 42
+   union all select null, 43) ss
+window w as
+  (order by ss.id desc nulls last range between 2 preceding and 2 following);
+
+-- Check overflow behavior for various integer sizes
+
+select x.id, last(x.id) over (order by x.id range between current row and 
2147450884 following)
+from range(32764, 32766) x;
+
+select x.id, last(x.id) over (order by x.id desc range between current row and 
2147450885 following)
+from range(-32766, -32764) x;
+
+select x.id, last(x.id) over (order by x.id range between current row and 4 
following)
+from range(2147483644, 2147483646) x;
+
+select x.id, last(x.id) over (order by x.id desc range between current row and 
5 following)
+from range(-2147483646, -2147483644) x;
+
+select x.id, last(x.id) over (order by x.id range between current row and 4 
following)
+from range(9223372036854775804, 9223372036854775806) x;
+
+select x.id, last(x.id) over (order by x.id desc range between current row and 
5 following)
+from range(-9223372036854775806, -9223372036854775804) x;
+
+-- Test in_range for other numeric datatypes
+
+create table numerics (
+    id int,
+    f_float4 float,
+    f_float8 float,
+    f_numeric int
+) using parquet;
+
+-- [SPARK-27768] Infinity, -Infinity, NaN should be recognized in a case 
insensitive manner
+insert into numerics values
+(1, -3, -3, -3),
+(2, -1, -1, -1),
+(3, 0, 0, 0),
+(4, 1.1, 1.1, 1.1),
+(5, 1.12, 1.12, 1.12),
+(6, 2, 2, 2),
+(7, 100, 100, 100);
+--(8, 'infinity', 'infinity', '1000'),
+--(9, 'NaN', 'NaN', 'NaN');
+--(0, '-infinity', '-infinity', '-1000'),  -- numeric type lacks infinities
+
+
+select id, f_float4, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_float4 range between
+             1 preceding and 1 following);
+
+select id, f_float4, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_float4 range between
+             1 preceding and 1.1 following);
+
+select id, f_float4, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_float4 range between
+             'inf' preceding and 'inf' following);
+
+select id, f_float4, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_float4 range between
+             1.1 preceding and 'NaN' following);  -- error, NaN disallowed
+
+select id, f_float8, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_float8 range between
+             1 preceding and 1 following);
+
+select id, f_float8, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_float8 range between
+             1 preceding and 1.1 following);
+
+select id, f_float8, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_float8 range between
+             'inf' preceding and 'inf' following);
+
+select id, f_float8, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_float8 range between
+             1.1 preceding and 'NaN' following);  -- error, NaN disallowed
+
+select id, f_numeric, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_numeric range between
+             1 preceding and 1 following);
+
+select id, f_numeric, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_numeric range between
+             1 preceding and 1.1 following);
+
+select id, f_numeric, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_numeric range between
+             1 preceding and 1.1 following);  -- currently unsupported
+
+select id, f_numeric, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_numeric range between
+             1.1 preceding and 'NaN' following);  -- error, NaN disallowed
+
+-- Test in_range for other datetime datatypes
+
+-- Spark only supports timestamp
+create table datetimes (
+    id int,
+    f_time timestamp,
+    f_timetz timestamp,
+    f_interval timestamp,
+    f_timestamptz timestamp,
+    f_timestamp timestamp
+) using parquet;
+
+insert into datetimes values
+(1, '11:00', '11:00 BST', '1 year', '2000-10-19 10:23:54+01', '2000-10-19 
10:23:54'),
+(2, '12:00', '12:00 BST', '2 years', '2001-10-19 10:23:54+01', '2001-10-19 
10:23:54'),
+(3, '13:00', '13:00 BST', '3 years', '2001-10-19 10:23:54+01', '2001-10-19 
10:23:54'),
+(4, '14:00', '14:00 BST', '4 years', '2002-10-19 10:23:54+01', '2002-10-19 
10:23:54'),
+(5, '15:00', '15:00 BST', '5 years', '2003-10-19 10:23:54+01', '2003-10-19 
10:23:54'),
+(6, '15:00', '15:00 BST', '5 years', '2004-10-19 10:23:54+01', '2004-10-19 
10:23:54'),
+(7, '17:00', '17:00 BST', '7 years', '2005-10-19 10:23:54+01', '2005-10-19 
10:23:54'),
+(8, '18:00', '18:00 BST', '8 years', '2006-10-19 10:23:54+01', '2006-10-19 
10:23:54'),
+(9, '19:00', '19:00 BST', '9 years', '2007-10-19 10:23:54+01', '2007-10-19 
10:23:54'),
+(10, '20:00', '20:00 BST', '10 years', '2008-10-19 10:23:54+01', '2008-10-19 
10:23:54');
+
+-- [SPARK-28429] SQL Datetime util function being casted to double instead of 
timestamp
+-- select id, f_time, first(id) over w, last(id) over w
+-- from datetimes
+-- window w as (order by f_time range between
+--              '70 min' preceding and '2 hours' following);
+
+-- [SPARK-28429] SQL Datetime util function being casted to double instead of 
timestamp
+-- select id, f_time, first(id) over w, last(id) over w
+-- from datetimes
+-- window w as (order by f_time desc range between
+--              '70 min' preceding and '2 hours' following);
+
+-- [SPARK-28429] SQL Datetime util function being casted to double instead of 
timestamp
+-- select id, f_timetz, first(id) over w, last(id) over w
+-- from datetimes
+-- window w as (order by f_timetz range between
+--              '70 min' preceding and '2 hours' following);
+
+-- [SPARK-28429] SQL Datetime util function being casted to double instead of 
timestamp
+-- select id, f_timetz, first(id) over w, last(id) over w
+-- from datetimes
+-- window w as (order by f_timetz desc range between
+--              '70 min' preceding and '2 hours' following);
+
+-- [SPARK-28429] SQL Datetime util function being casted to double instead of 
timestamp
+-- select id, f_interval, first(id) over w, last(id) over w
+-- from datetimes
+-- window w as (order by f_interval range between
+--              '1 year' preceding and '1 year' following);
+
+-- [SPARK-28429] SQL Datetime util function being casted to double instead of 
timestamp
+-- select id, f_interval, first(id) over w, last(id) over w
+-- from datetimes
+-- window w as (order by f_interval desc range between
+--              '1 year' preceding and '1 year' following);
+
+-- [SPARK-28429] SQL Datetime util function being casted to double instead of 
timestamp
+-- select id, f_timestamptz, first(id) over w, last(id) over w
+-- from datetimes
+-- window w as (order by f_timestamptz range between
+--              '1 year' preceding and '1 year' following);
+
+-- [SPARK-28429] SQL Datetime util function being casted to double instead of 
timestamp
+-- select id, f_timestamptz, first(id) over w, last(id) over w
+-- from datetimes
+-- window w as (order by f_timestamptz desc range between
+--              '1 year' preceding and '1 year' following);
+
+-- [SPARK-28429] SQL Datetime util function being casted to double instead of 
timestamp
+-- select id, f_timestamp, first(id) over w, last(id) over w
+-- from datetimes
+-- window w as (order by f_timestamp range between
+--              '1 year' preceding and '1 year' following);
+
+-- [SPARK-28429] SQL Datetime util function being casted to double instead of 
timestamp
+-- select id, f_timestamp, first(id) over w, last(id) over w
+-- from datetimes
+-- window w as (order by f_timestamp desc range between
+--              '1 year' preceding and '1 year' following);
+
+-- RANGE offset PRECEDING/FOLLOWING error cases
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- select sum(salary) over (order by enroll_date, salary range between '1 
year' preceding and '2 years' following
+--     exclude ties), salary, enroll_date from empsalary;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- select sum(salary) over (range between '1 year' preceding and '2 years' 
following
+--     exclude ties), salary, enroll_date from empsalary;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- select sum(salary) over (order by depname range between '1 year' preceding 
and '2 years' following
+--     exclude ties), salary, enroll_date from empsalary;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- select max(enroll_date) over (order by enroll_date range between 1 
preceding and 2 following
+--     exclude ties), salary, enroll_date from empsalary;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- select max(enroll_date) over (order by salary range between -1 preceding 
and 2 following
+--     exclude ties), salary, enroll_date from empsalary;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- select max(enroll_date) over (order by salary range between 1 preceding and 
-2 following
+--     exclude ties), salary, enroll_date from empsalary;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- select max(enroll_date) over (order by salary range between '1 year' 
preceding and '2 years' following
+--     exclude ties), salary, enroll_date from empsalary;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- select max(enroll_date) over (order by enroll_date range between '1 year' 
preceding and '-2 years' following
+--     exclude ties), salary, enroll_date from empsalary;
+
+-- GROUPS tests
+
+SELECT sum(unique1) over (order by four range between unbounded preceding and 
current row),
+unique1, four
+FROM tenk1 WHERE unique1 < 10;
+
+SELECT sum(unique1) over (order by four range between unbounded preceding and 
unbounded following),
+unique1, four
+FROM tenk1 WHERE unique1 < 10;
+
+SELECT sum(unique1) over (order by four range between current row and 
unbounded following),
+unique1, four
+FROM tenk1 WHERE unique1 < 10;
+
+SELECT sum(unique1) over (order by four range between 1 preceding and 
unbounded following),
+unique1, four
+FROM tenk1 WHERE unique1 < 10;
+
+SELECT sum(unique1) over (order by four range between 1 following and 
unbounded following),
+unique1, four
+FROM tenk1 WHERE unique1 < 10;
+
+SELECT sum(unique1) over (order by four range between unbounded preceding and 
2 following),
+unique1, four
+FROM tenk1 WHERE unique1 < 10;
+
+SELECT sum(unique1) over (order by four range between 2 preceding and 1 
preceding),
+unique1, four
+FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (order by four range between 2 preceding and 1 
following),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+SELECT sum(unique1) over (order by four range between 0 preceding and 0 
following),
+unique1, four
+FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (order by four range between 2 preceding and 1 
following
+--   exclude current row), unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (order by four range between 2 preceding and 1 
following
+--   exclude group), unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (order by four range between 2 preceding and 1 
following
+--   exclude ties), unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+SELECT sum(unique1) over (partition by ten
+  order by four range between 0 preceding and 0 following),unique1, four, ten
+FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (partition by ten
+--   order by four range between 0 preceding and 0 following exclude current 
row), unique1, four, ten
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (partition by ten
+--   order by four range between 0 preceding and 0 following exclude group), 
unique1, four, ten
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (partition by ten
+--   order by four range between 0 preceding and 0 following exclude ties), 
unique1, four, ten
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-27951] ANSI SQL: NTH_VALUE function
+-- select first_value(salary) over(order by enroll_date groups between 1 
preceding and 1 following),
+-- lead(salary) over(order by enroll_date groups between 1 preceding and 1 
following),
+-- nth_value(salary, 1) over(order by enroll_date groups between 1 preceding 
and 1 following),
+-- salary, enroll_date from empsalary;
+
+-- [SPARK-28508] Support for range frame+row frame in the same query
+-- select last(salary) over(order by enroll_date range between 1 preceding and 
1 following),
+-- lag(salary)         over(order by enroll_date range between 1 preceding and 
1 following),
+-- salary, enroll_date from empsalary;
+
+-- [SPARK-27951] ANSI SQL: NTH_VALUE function
+-- select first_value(salary) over(order by enroll_date groups between 1 
following and 3 following
+--   exclude current row),
+-- lead(salary) over(order by enroll_date groups between 1 following and 3 
following exclude ties),
+-- nth_value(salary, 1) over(order by enroll_date groups between 1 following 
and 3 following
+--   exclude ties),
+-- salary, enroll_date from empsalary;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- select last(salary) over(order by enroll_date range between 1 following and 
3 following
+--   exclude group),
+-- lag(salary) over(order by enroll_date range between 1 following and 3 
following exclude group),
+-- salary, enroll_date from empsalary;
+
+-- Show differences in offset interpretation between ROWS, RANGE, and GROUPS
+
+WITH cte (x) AS (
+        SELECT * FROM range(1, 35, 2)
+)
+SELECT x, (sum(x) over w)
+FROM cte
+WINDOW w AS (ORDER BY x rows between 1 preceding and 1 following);
+
+WITH cte (x) AS (
+        SELECT * FROM range(1, 35, 2)
+)
+SELECT x, (sum(x) over w)
+FROM cte
+WINDOW w AS (ORDER BY x range between 1 preceding and 1 following);
+
+WITH cte (x) AS (
+        SELECT * FROM range(1, 35, 2)
+)
+SELECT x, (sum(x) over w)
+FROM cte
+WINDOW w AS (ORDER BY x range between 1 preceding and 1 following);
+
+WITH cte (x) AS (
+        select 1 union all select 1 union all select 1 union all
+        SELECT * FROM range(5, 49, 2)
+)
+SELECT x, (sum(x) over w)
+FROM cte
+WINDOW w AS (ORDER BY x rows between 1 preceding and 1 following);
+
+WITH cte (x) AS (
+        select 1 union all select 1 union all select 1 union all
+        SELECT * FROM range(5, 49, 2)
+)
+SELECT x, (sum(x) over w)
+FROM cte
+WINDOW w AS (ORDER BY x range between 1 preceding and 1 following);
+
+WITH cte (x) AS (
+        select 1 union all select 1 union all select 1 union all
+        SELECT * FROM range(5, 49, 2)
+)
+SELECT x, (sum(x) over w)
+FROM cte
+WINDOW w AS (ORDER BY x range between 1 preceding and 1 following);
+
+-- with UNION
+
+SELECT count(*) OVER (PARTITION BY four) FROM (SELECT * FROM tenk1 UNION ALL 
SELECT * FROM tenk2)s LIMIT 0;
+
+-- check some degenerate cases
+
+create table t1 (f1 int, f2 int) using parquet;
+insert into t1 values (1,1),(1,2),(2,2);
+
+select f1, sum(f1) over (partition by f1 order by f1
+                         range between 1 preceding and 1 following)
+from t1 where f1 = f2; -- error, must have order by
+
+-- Since EXPLAIN clause rely on host physical location, it is commented out
+-- explain
+-- select f1, sum(f1) over (partition by f1 order by f2
+-- range between 1 preceding and 1 following)
+-- from t1 where f1 = f2;
+-- select f1, sum(f1) over (partition by f1 order by f2
+--                          range between 1 preceding and 1 following)
+-- from t1 where f1 = f2;
+
+select f1, sum(f1) over (partition by f1, f1 order by f2
+range between 2 preceding and 1 preceding)
+from t1 where f1 = f2;
+
+select f1, sum(f1) over (partition by f1, f2 order by f2
+range between 1 following and 2 following)
+from t1 where f1 = f2;
+
+select f1, sum(f1) over (partition by f1,
+f1 order by f2 range between 1 preceding and 1 following)
+from t1 where f1 = f2;
+
+-- Since EXPLAIN clause rely on host physical location, it is commented out
+-- explain
+-- select f1, sum(f1) over (partition by f1 order by f2
+-- range between 1 preceding and 1 following)
+-- from t1 where f1 = f2;
+
+select f1, sum(f1) over (partition by f1 order by f2
+range between 1 preceding and 1 following)
+from t1 where f1 = f2;
+
+select f1, sum(f1) over (partition by f1, f1 order by f2
+range between 2 preceding and 1 preceding)
+from t1 where f1 = f2;
+ 
+select f1, sum(f1) over (partition by f1, f2 order by f2
+range between 1 following and 2 following)
+from t1 where f1 = f2;
+
+-- ordering by a non-integer constant is allowed
+
+SELECT rank() OVER (ORDER BY length('abc'));
+
+-- can't order by another window function
+SELECT rank() OVER (ORDER BY rank() OVER (ORDER BY random()));
+
+-- some other errors
+select * from
+(select row_number() over (order by salary) rn from empsalary) ss
+where rn < 10;
+
+-- [SPARK-28506] not handling usage of group function and window function at 
some conditions
+-- SELECT * FROM empsalary INNER JOIN tenk1 ON row_number() OVER (ORDER BY 
salary) < 10;
+
+-- [SPARK-28506] not handling usage of group function and window function at 
some conditions
+-- SELECT rank() OVER (ORDER BY 1), count(*) FROM empsalary GROUP BY 1;
+
+-- [SPARK-28086] Adds `random()` to Spark
+-- SELECT * FROM rank() OVER (ORDER BY random());
 
 Review comment:
   Remove line 950 and uncomment 951 since SPARK-28086 is merged.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24881: [SPARK-23160][SQL][TEST] Port window.sql

Reply via email to