This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 97f3239 update window function sql tests (#1059)
97f3239 is described below
commit 97f3239810255e968e62d6fe59598d71d798e487
Author: Jiayu Liu <[email protected]>
AuthorDate: Tue Sep 28 05:24:31 2021 +0800
update window function sql tests (#1059)
---
datafusion/tests/sql.rs | 81 +++++++++++++++++++++++++++++++++----------------
1 file changed, 55 insertions(+), 26 deletions(-)
diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs
index ab4462a..e394a66 100644
--- a/datafusion/tests/sql.rs
+++ b/datafusion/tests/sql.rs
@@ -981,6 +981,7 @@ async fn csv_query_count() -> Result<()> {
Ok(())
}
+/// for window functions without order by the first, last, and nth function
call does not make sense
#[tokio::test]
async fn csv_query_window_with_empty_over() -> Result<()> {
let mut ctx = ExecutionContext::new();
@@ -989,29 +990,27 @@ async fn csv_query_window_with_empty_over() -> Result<()>
{
c9, \
count(c5) over (), \
max(c5) over (), \
- min(c5) over (), \
- first_value(c5) over (), \
- last_value(c5) over (), \
- nth_value(c5, 2) over () \
+ min(c5) over () \
from aggregate_test_100 \
order by c9 \
limit 5";
let actual = execute_to_batches(&mut ctx, sql).await;
let expected = vec![
-
"+-----------+------------------------------+----------------------------+----------------------------+------------------------------------+-----------------------------------+-------------------------------------------+",
- "| c9 | COUNT(aggregate_test_100.c5) |
MAX(aggregate_test_100.c5) | MIN(aggregate_test_100.c5) |
FIRST_VALUE(aggregate_test_100.c5) | LAST_VALUE(aggregate_test_100.c5) |
NTH_VALUE(aggregate_test_100.c5,Int64(2)) |",
-
"+-----------+------------------------------+----------------------------+----------------------------+------------------------------------+-----------------------------------+-------------------------------------------+",
- "| 28774375 | 100 | 2143473091
| -2141999138 | 2033001162 | 61035129
| 706441268 |",
- "| 63044568 | 100 | 2143473091
| -2141999138 | 2033001162 | 61035129
| 706441268 |",
- "| 141047417 | 100 | 2143473091
| -2141999138 | 2033001162 | 61035129
| 706441268 |",
- "| 141680161 | 100 | 2143473091
| -2141999138 | 2033001162 | 61035129
| 706441268 |",
- "| 145294611 | 100 | 2143473091
| -2141999138 | 2033001162 | 61035129
| 706441268 |",
-
"+-----------+------------------------------+----------------------------+----------------------------+------------------------------------+-----------------------------------+-------------------------------------------+",
+
"+-----------+------------------------------+----------------------------+----------------------------+",
+ "| c9 | COUNT(aggregate_test_100.c5) |
MAX(aggregate_test_100.c5) | MIN(aggregate_test_100.c5) |",
+
"+-----------+------------------------------+----------------------------+----------------------------+",
+ "| 28774375 | 100 | 2143473091
| -2141999138 |",
+ "| 63044568 | 100 | 2143473091
| -2141999138 |",
+ "| 141047417 | 100 | 2143473091
| -2141999138 |",
+ "| 141680161 | 100 | 2143473091
| -2141999138 |",
+ "| 145294611 | 100 | 2143473091
| -2141999138 |",
+
"+-----------+------------------------------+----------------------------+----------------------------+",
];
assert_batches_eq!(expected, &actual);
Ok(())
}
+/// for window functions without order by the first, last, and nth function
call does not make sense
#[tokio::test]
async fn csv_query_window_with_partition_by() -> Result<()> {
let mut ctx = ExecutionContext::new();
@@ -1022,24 +1021,21 @@ async fn csv_query_window_with_partition_by() ->
Result<()> {
avg(cast(c4 as Int)) over (partition by c3), \
count(cast(c4 as Int)) over (partition by c3), \
max(cast(c4 as Int)) over (partition by c3), \
- min(cast(c4 as Int)) over (partition by c3), \
- first_value(cast(c4 as Int)) over (partition by c3), \
- last_value(cast(c4 as Int)) over (partition by c3), \
- nth_value(cast(c4 as Int), 2) over (partition by c3) \
+ min(cast(c4 as Int)) over (partition by c3) \
from aggregate_test_100 \
order by c9 \
limit 5";
let actual = execute_to_batches(&mut ctx, sql).await;
let expected = vec![
-
"+-----------+-------------------------------------------+-------------------------------------------+---------------------------------------------+-------------------------------------------+-------------------------------------------+---------------------------------------------------+--------------------------------------------------+----------------------------------------------------------+",
- "| c9 | SUM(CAST(aggregate_test_100.c4 AS Int32)) |
AVG(CAST(aggregate_test_100.c4 AS Int32)) | COUNT(CAST(aggregate_test_100.c4 AS
Int32)) | MAX(CAST(aggregate_test_100.c4 AS Int32)) |
MIN(CAST(aggregate_test_100.c4 AS Int32)) |
FIRST_VALUE(CAST(aggregate_test_100.c4 AS Int32)) |
LAST_VALUE(CAST(aggregate_test_100.c4 AS Int32)) |
NTH_VALUE(CAST(aggregate_test_100.c4 AS Int32),Int64(2)) |",
-
"+-----------+-------------------------------------------+-------------------------------------------+---------------------------------------------+-------------------------------------------+-------------------------------------------+---------------------------------------------------+--------------------------------------------------+----------------------------------------------------------+",
- "| 28774375 | -16110 | -16110
| 1 |
-16110 | -16110
| -16110 | -16110
|
|",
- "| 63044568 | 3917 | 3917
| 1 |
3917 | 3917
| 3917 | 3917
|
|",
- "| 141047417 | -38455 | -19227.5
| 2 |
-16974 | -21481
| -16974 | -21481
|
|",
- "| 141680161 | -1114 | -1114
| 1 |
-1114 | -1114
| -1114 | -1114
|
|",
- "| 145294611 | 15673 | 15673
| 1 |
15673 | 15673
| 15673 | 15673
|
|",
-
"+-----------+-------------------------------------------+-------------------------------------------+---------------------------------------------+-------------------------------------------+-------------------------------------------+---------------------------------------------------+--------------------------------------------------+----------------------------------------------------------+",
+
"+-----------+-------------------------------------------+-------------------------------------------+---------------------------------------------+-------------------------------------------+-------------------------------------------+",
+ "| c9 | SUM(CAST(aggregate_test_100.c4 AS Int32)) |
AVG(CAST(aggregate_test_100.c4 AS Int32)) | COUNT(CAST(aggregate_test_100.c4 AS
Int32)) | MAX(CAST(aggregate_test_100.c4 AS Int32)) |
MIN(CAST(aggregate_test_100.c4 AS Int32)) |",
+
"+-----------+-------------------------------------------+-------------------------------------------+---------------------------------------------+-------------------------------------------+-------------------------------------------+",
+ "| 28774375 | -16110 | -16110
| 1 |
-16110 | -16110
|",
+ "| 63044568 | 3917 | 3917
| 1 |
3917 | 3917
|",
+ "| 141047417 | -38455 | -19227.5
| 2 |
-16974 | -21481
|",
+ "| 141680161 | -1114 | -1114
| 1 |
-1114 | -1114
|",
+ "| 145294611 | 15673 | 15673
| 1 |
15673 | 15673
|",
+
"+-----------+-------------------------------------------+-------------------------------------------+---------------------------------------------+-------------------------------------------+-------------------------------------------+",
];
assert_batches_eq!(expected, &actual);
Ok(())
@@ -1079,6 +1075,39 @@ async fn csv_query_window_with_order_by() -> Result<()> {
}
#[tokio::test]
+async fn csv_query_window_with_partition_by_order_by() -> Result<()> {
+ let mut ctx = ExecutionContext::new();
+ register_aggregate_csv(&mut ctx)?;
+ let sql = "select \
+ c9, \
+ sum(c5) over (partition by c4 order by c9), \
+ avg(c5) over (partition by c4 order by c9), \
+ count(c5) over (partition by c4 order by c9), \
+ max(c5) over (partition by c4 order by c9), \
+ min(c5) over (partition by c4 order by c9), \
+ first_value(c5) over (partition by c4 order by c9), \
+ last_value(c5) over (partition by c4 order by c9), \
+ nth_value(c5, 2) over (partition by c4 order by c9) \
+ from aggregate_test_100 \
+ order by c9 \
+ limit 5";
+ let actual = execute_to_batches(&mut ctx, sql).await;
+ let expected = vec![
+
"+-----------+----------------------------+----------------------------+------------------------------+----------------------------+----------------------------+------------------------------------+-----------------------------------+-------------------------------------------+",
+ "| c9 | SUM(aggregate_test_100.c5) | AVG(aggregate_test_100.c5)
| COUNT(aggregate_test_100.c5) | MAX(aggregate_test_100.c5) |
MIN(aggregate_test_100.c5) | FIRST_VALUE(aggregate_test_100.c5) |
LAST_VALUE(aggregate_test_100.c5) | NTH_VALUE(aggregate_test_100.c5,Int64(2))
|",
+
"+-----------+----------------------------+----------------------------+------------------------------+----------------------------+----------------------------+------------------------------------+-----------------------------------+-------------------------------------------+",
+ "| 28774375 | 61035129 | 61035129
| 1 | 61035129 | 61035129
| 61035129 | 61035129
| |",
+ "| 63044568 | -108973366 | -108973366
| 1 | -108973366 | -108973366
| -108973366 | -108973366
| |",
+ "| 141047417 | 623103518 | 623103518
| 1 | 623103518 | 623103518
| 623103518 | 623103518
| |",
+ "| 141680161 | -1927628110 | -1927628110
| 1 | -1927628110 | -1927628110
| -1927628110 | -1927628110
| |",
+ "| 145294611 | -1899175111 | -1899175111
| 1 | -1899175111 | -1899175111
| -1899175111 | -1899175111
| |",
+
"+-----------+----------------------------+----------------------------+------------------------------+----------------------------+----------------------------+------------------------------------+-----------------------------------+-------------------------------------------+"
+ ];
+ assert_batches_eq!(expected, &actual);
+ Ok(())
+}
+
+#[tokio::test]
async fn csv_query_group_by_int_count() -> Result<()> {
let mut ctx = ExecutionContext::new();
register_aggregate_csv(&mut ctx)?;