This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 85bf2afdbc Port some tests in joins.rs to sqllogictest (#6306)
85bf2afdbc is described below
commit 85bf2afdbcddc3de537598f3bf7a2fc34b55c497
Author: parkma99 <[email protected]>
AuthorDate: Fri May 12 11:28:44 2023 +0800
Port some tests in joins.rs to sqllogictest (#6306)
* Port some tests in joins.rs to sqllogictest
* Update datafusion/core/tests/sqllogictests/test_files/joins.slt
Co-authored-by: Andrew Lamb <[email protected]>
* Update datafusion/core/tests/sqllogictests/test_files/joins.slt
Co-authored-by: Andrew Lamb <[email protected]>
* Update datafusion/core/tests/sqllogictests/test_files/joins.slt
Co-authored-by: Andrew Lamb <[email protected]>
* Update datafusion/core/tests/sqllogictests/test_files/joins.slt
Co-authored-by: Andrew Lamb <[email protected]>
---------
Co-authored-by: Andrew Lamb <[email protected]>
---
datafusion/core/tests/sql/joins.rs | 415 --------------
datafusion/core/tests/sql/mod.rs | 94 ----
.../core/tests/sqllogictests/test_files/joins.slt | 624 +++++++++++++++++++++
3 files changed, 624 insertions(+), 509 deletions(-)
diff --git a/datafusion/core/tests/sql/joins.rs
b/datafusion/core/tests/sql/joins.rs
index 3213ae3e05..54ecfe4951 100644
--- a/datafusion/core/tests/sql/joins.rs
+++ b/datafusion/core/tests/sql/joins.rs
@@ -18,392 +18,6 @@
use super::*;
use datafusion::from_slice::FromSlice;
-#[tokio::test]
-async fn equijoin() -> Result<()> {
- let ctx = create_join_context_qualified("t1", "t2")?;
- let equivalent_sql = [
- "SELECT t1.a, t2.b FROM t1 INNER JOIN t2 ON t1.a = t2.a ORDER BY t1.a",
- "SELECT t1.a, t2.b FROM t1 INNER JOIN t2 ON t2.a = t1.a ORDER BY t1.a",
- ];
- let expected = vec![
- "+---+-----+",
- "| a | b |",
- "+---+-----+",
- "| 1 | 100 |",
- "| 2 | 200 |",
- "| 4 | 400 |",
- "+---+-----+",
- ];
- for sql in equivalent_sql.iter() {
- let actual = execute_to_batches(&ctx, sql).await;
- assert_batches_eq!(expected, &actual);
- }
-
- Ok(())
-}
-
-#[tokio::test]
-async fn left_join_unbalanced() -> Result<()> {
- // the t1_id is larger than t2_id so the join_selection optimizer should
kick in
- let ctx = create_join_context_unbalanced("t1_id", "t2_id")?;
- let equivalent_sql = [
- "SELECT t1_id, t1_name, t2_name FROM t1 LEFT JOIN t2 ON t1_id = t2_id
ORDER BY t1_id",
- "SELECT t1_id, t1_name, t2_name FROM t1 LEFT JOIN t2 ON t2_id = t1_id
ORDER BY t1_id",
- ];
- let expected = vec![
- "+-------+---------+---------+",
- "| t1_id | t1_name | t2_name |",
- "+-------+---------+---------+",
- "| 11 | a | z |",
- "| 22 | b | y |",
- "| 33 | c | |",
- "| 44 | d | x |",
- "| 77 | e | |",
- "+-------+---------+---------+",
- ];
- for sql in equivalent_sql.iter() {
- let actual = execute_to_batches(&ctx, sql).await;
- assert_batches_eq!(expected, &actual);
- }
- Ok(())
-}
-
-#[tokio::test]
-async fn left_join_null_filter() -> Result<()> {
- // Since t2 is the non-preserved side of the join, we cannot push down a
NULL filter.
- // Note that this is only true because IS NULL does not remove nulls. For
filters that
- // remove nulls, we can rewrite the join as an inner join and then push
down the filter.
- let ctx = create_join_context_with_nulls()?;
- let sql = "SELECT t1_id, t2_id, t2_name FROM t1 LEFT JOIN t2 ON t1_id =
t2_id WHERE t2_name IS NULL ORDER BY t1_id";
- let expected = vec![
- "+-------+-------+---------+",
- "| t1_id | t2_id | t2_name |",
- "+-------+-------+---------+",
- "| 22 | 22 | |",
- "| 33 | | |",
- "| 77 | | |",
- "| 88 | | |",
- "+-------+-------+---------+",
- ];
-
- let actual = execute_to_batches(&ctx, sql).await;
- assert_batches_eq!(expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn left_join_null_filter_on_join_column() -> Result<()> {
- // Again, since t2 is the non-preserved side of the join, we cannot push
down a NULL filter.
- let ctx = create_join_context_with_nulls()?;
- let sql = "SELECT t1_id, t2_id, t2_name FROM t1 LEFT JOIN t2 ON t1_id =
t2_id WHERE t2_id IS NULL ORDER BY t1_id";
- let expected = vec![
- "+-------+-------+---------+",
- "| t1_id | t2_id | t2_name |",
- "+-------+-------+---------+",
- "| 33 | | |",
- "| 77 | | |",
- "| 88 | | |",
- "+-------+-------+---------+",
- ];
-
- let actual = execute_to_batches(&ctx, sql).await;
- assert_batches_eq!(expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn left_join_not_null_filter() -> Result<()> {
- let ctx = create_join_context_with_nulls()?;
- let sql = "SELECT t1_id, t2_id, t2_name FROM t1 LEFT JOIN t2 ON t1_id =
t2_id WHERE t2_name IS NOT NULL ORDER BY t1_id";
- let expected = vec![
- "+-------+-------+---------+",
- "| t1_id | t2_id | t2_name |",
- "+-------+-------+---------+",
- "| 11 | 11 | z |",
- "| 44 | 44 | x |",
- "| 99 | 99 | u |",
- "+-------+-------+---------+",
- ];
-
- let actual = execute_to_batches(&ctx, sql).await;
- assert_batches_eq!(expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn left_join_not_null_filter_on_join_column() -> Result<()> {
- let ctx = create_join_context_with_nulls()?;
- let sql = "SELECT t1_id, t2_id, t2_name FROM t1 LEFT JOIN t2 ON t1_id =
t2_id WHERE t2_id IS NOT NULL ORDER BY t1_id";
- let expected = vec![
- "+-------+-------+---------+",
- "| t1_id | t2_id | t2_name |",
- "+-------+-------+---------+",
- "| 11 | 11 | z |",
- "| 22 | 22 | |",
- "| 44 | 44 | x |",
- "| 99 | 99 | u |",
- "+-------+-------+---------+",
- ];
-
- let actual = execute_to_batches(&ctx, sql).await;
- assert_batches_eq!(expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn self_join_non_equijoin() -> Result<()> {
- let ctx = create_join_context_with_nulls()?;
- let sql =
- "SELECT x.t1_id, y.t1_id FROM t1 x JOIN t1 y ON x.t1_id = 11 AND
y.t1_id = 44";
- let expected = vec![
- "+-------+-------+",
- "| t1_id | t1_id |",
- "+-------+-------+",
- "| 11 | 44 |",
- "+-------+-------+",
- ];
-
- let actual = execute_to_batches(&ctx, sql).await;
- assert_batches_eq!(expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn right_join_null_filter() -> Result<()> {
- let ctx = create_join_context_with_nulls()?;
- let sql = "SELECT t1_id, t1_name, t2_id FROM t1 RIGHT JOIN t2 ON t1_id =
t2_id WHERE t1_name IS NULL ORDER BY t2_id";
- let expected = vec![
- "+-------+---------+-------+",
- "| t1_id | t1_name | t2_id |",
- "+-------+---------+-------+",
- "| | | 55 |",
- "| 99 | | 99 |",
- "+-------+---------+-------+",
- ];
-
- let actual = execute_to_batches(&ctx, sql).await;
- assert_batches_eq!(expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn right_join_null_filter_on_join_column() -> Result<()> {
- let ctx = create_join_context_with_nulls()?;
- let sql = "SELECT t1_id, t1_name, t2_id FROM t1 RIGHT JOIN t2 ON t1_id =
t2_id WHERE t1_id IS NULL ORDER BY t2_id";
- let expected = vec![
- "+-------+---------+-------+",
- "| t1_id | t1_name | t2_id |",
- "+-------+---------+-------+",
- "| | | 55 |",
- "+-------+---------+-------+",
- ];
-
- let actual = execute_to_batches(&ctx, sql).await;
- assert_batches_eq!(expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn right_join_not_null_filter() -> Result<()> {
- let ctx = create_join_context_with_nulls()?;
- let sql = "SELECT t1_id, t1_name, t2_id FROM t1 RIGHT JOIN t2 ON t1_id =
t2_id WHERE t1_name IS NOT NULL ORDER BY t2_id";
- let expected = vec![
- "+-------+---------+-------+",
- "| t1_id | t1_name | t2_id |",
- "+-------+---------+-------+",
- "| 11 | a | 11 |",
- "| 22 | b | 22 |",
- "| 44 | d | 44 |",
- "+-------+---------+-------+",
- ];
-
- let actual = execute_to_batches(&ctx, sql).await;
- assert_batches_eq!(expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn right_join_not_null_filter_on_join_column() -> Result<()> {
- let ctx = create_join_context_with_nulls()?;
- let sql = "SELECT t1_id, t1_name, t2_id FROM t1 RIGHT JOIN t2 ON t1_id =
t2_id WHERE t1_id IS NOT NULL ORDER BY t2_id";
- let expected = vec![
- "+-------+---------+-------+",
- "| t1_id | t1_name | t2_id |",
- "+-------+---------+-------+",
- "| 11 | a | 11 |",
- "| 22 | b | 22 |",
- "| 44 | d | 44 |",
- "| 99 | | 99 |",
- "+-------+---------+-------+",
- ];
-
- let actual = execute_to_batches(&ctx, sql).await;
- assert_batches_eq!(expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn full_join_null_filter() -> Result<()> {
- let ctx = create_join_context_with_nulls()?;
- let sql = "SELECT t1_id, t1_name, t2_id FROM t1 FULL OUTER JOIN t2 ON
t1_id = t2_id WHERE t1_name IS NULL ORDER BY t1_id";
- let expected = vec![
- "+-------+---------+-------+",
- "| t1_id | t1_name | t2_id |",
- "+-------+---------+-------+",
- "| 88 | | |",
- "| 99 | | 99 |",
- "| | | 55 |",
- "+-------+---------+-------+",
- ];
-
- let actual = execute_to_batches(&ctx, sql).await;
- assert_batches_eq!(expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn full_join_not_null_filter() -> Result<()> {
- let ctx = create_join_context_with_nulls()?;
- let sql = "SELECT t1_id, t1_name, t2_id FROM t1 FULL OUTER JOIN t2 ON
t1_id = t2_id WHERE t1_name IS NOT NULL ORDER BY t1_id";
- let expected = vec![
- "+-------+---------+-------+",
- "| t1_id | t1_name | t2_id |",
- "+-------+---------+-------+",
- "| 11 | a | 11 |",
- "| 22 | b | 22 |",
- "| 33 | c | |",
- "| 44 | d | 44 |",
- "| 77 | e | |",
- "+-------+---------+-------+",
- ];
-
- let actual = execute_to_batches(&ctx, sql).await;
- assert_batches_eq!(expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn left_join_using() -> Result<()> {
- let test_repartition_joins = vec![true, false];
- for repartition_joins in test_repartition_joins {
- let ctx = create_join_context("id", "id", repartition_joins)?;
- let sql =
- "SELECT id, t1_name, t2_name FROM t1 LEFT JOIN t2 USING (id) ORDER
BY id";
- let actual = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+----+---------+---------+",
- "| id | t1_name | t2_name |",
- "+----+---------+---------+",
- "| 11 | a | z |",
- "| 22 | b | y |",
- "| 33 | c | |",
- "| 44 | d | x |",
- "+----+---------+---------+",
- ];
- assert_batches_eq!(expected, &actual);
- }
- Ok(())
-}
-
-#[tokio::test]
-async fn cross_join() {
- let test_repartition_joins = vec![true, false];
- for repartition_joins in test_repartition_joins {
- let ctx = create_join_context("t1_id", "t2_id",
repartition_joins).unwrap();
-
- let sql = "SELECT t1_id, t1_name, t2_name FROM t1, t2 ORDER BY t1_id";
- let actual = execute(&ctx, sql).await;
-
- assert_eq!(4 * 4, actual.len());
-
- let sql = "SELECT t1_id, t1_name, t2_name FROM t1, t2 WHERE 1=1 ORDER
BY t1_id";
- let actual = execute(&ctx, sql).await;
-
- assert_eq!(4 * 4, actual.len());
-
- let sql = "SELECT t1_id, t1_name, t2_name FROM t1 CROSS JOIN t2 ORDER
BY t1_id";
-
- let actual = execute(&ctx, sql).await;
- assert_eq!(4 * 4, actual.len());
-
- let actual = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+-------+---------+---------+",
- "| t1_id | t1_name | t2_name |",
- "+-------+---------+---------+",
- "| 11 | a | z |",
- "| 11 | a | y |",
- "| 11 | a | x |",
- "| 11 | a | w |",
- "| 22 | b | z |",
- "| 22 | b | y |",
- "| 22 | b | x |",
- "| 22 | b | w |",
- "| 33 | c | z |",
- "| 33 | c | y |",
- "| 33 | c | x |",
- "| 33 | c | w |",
- "| 44 | d | z |",
- "| 44 | d | y |",
- "| 44 | d | x |",
- "| 44 | d | w |",
- "+-------+---------+---------+",
- ];
-
- assert_batches_eq!(expected, &actual);
-
- // Two partitions (from UNION) on the left
- let sql = "SELECT * FROM (SELECT t1_id, t1_name FROM t1 UNION ALL
SELECT t1_id, t1_name FROM t1) AS t1 CROSS JOIN t2";
- let actual = execute(&ctx, sql).await;
-
- assert_eq!(4 * 4 * 2, actual.len());
-
- // Two partitions (from UNION) on the right
- let sql = "SELECT t1_id, t1_name, t2_name FROM t1 CROSS JOIN (SELECT
t2_name FROM t2 UNION ALL SELECT t2_name FROM t2) AS t2";
- let actual = execute(&ctx, sql).await;
-
- assert_eq!(4 * 4 * 2, actual.len());
- }
-}
-
-#[tokio::test]
-async fn cross_join_unbalanced() {
- // the t1_id is larger than t2_id so the join_selection optimizer should
kick in
- let ctx = create_join_context_unbalanced("t1_id", "t2_id").unwrap();
-
- // the order of the values is not determinisitic, so we need to sort to
check the values
- let sql =
- "SELECT t1_id, t1_name, t2_name FROM t1 CROSS JOIN t2 ORDER BY t1_id,
t1_name, t2_name";
- let actual = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+-------+---------+---------+",
- "| t1_id | t1_name | t2_name |",
- "+-------+---------+---------+",
- "| 11 | a | w |",
- "| 11 | a | x |",
- "| 11 | a | y |",
- "| 11 | a | z |",
- "| 22 | b | w |",
- "| 22 | b | x |",
- "| 22 | b | y |",
- "| 22 | b | z |",
- "| 33 | c | w |",
- "| 33 | c | x |",
- "| 33 | c | y |",
- "| 33 | c | z |",
- "| 44 | d | w |",
- "| 44 | d | x |",
- "| 44 | d | y |",
- "| 44 | d | z |",
- "| 77 | e | w |",
- "| 77 | e | x |",
- "| 77 | e | y |",
- "| 77 | e | z |",
- "+-------+---------+---------+",
- ];
- assert_batches_eq!(expected, &actual);
-}
-
#[tokio::test]
async fn test_join_timestamp() -> Result<()> {
let ctx = SessionContext::new();
@@ -595,35 +209,6 @@ async fn nestedjoin_without_alias() -> Result<()> {
Ok(())
}
-#[tokio::test]
-async fn issue_3002() -> Result<()> {
- // repro case for https://github.com/apache/arrow-datafusion/issues/3002
- let sql = "select a.a, b.b from a join b on a.a = b.b";
- let expected = vec!["++", "++"];
- let ctx = create_join_context_qualified("a", "b")?;
- let actual = execute_to_batches(&ctx, sql).await;
- assert_batches_eq!(expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn inner_join_nulls() {
- let sql = "SELECT * FROM (SELECT null AS id1) t1
- INNER JOIN (SELECT null AS id2) t2 ON id1 = id2";
-
- #[rustfmt::skip]
- let expected = vec![
- "++",
- "++",
- ];
-
- let ctx = create_join_context_qualified("t1", "t2").unwrap();
- let actual = execute_to_batches(&ctx, sql).await;
-
- // left and right shouldn't match anything
- assert_batches_eq!(expected, &actual);
-}
-
#[tokio::test]
async fn join_tables_with_duplicated_column_name_not_in_on_constraint() ->
Result<()> {
let ctx = SessionContext::new();
diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs
index bd9e213dfe..7041cf7a1f 100644
--- a/datafusion/core/tests/sql/mod.rs
+++ b/datafusion/core/tests/sql/mod.rs
@@ -520,100 +520,6 @@ fn create_hashjoin_datatype_context() ->
Result<SessionContext> {
Ok(ctx)
}
-/// the table column_left has more rows than the table column_right
-fn create_join_context_unbalanced(
- column_left: &str,
- column_right: &str,
-) -> Result<SessionContext> {
- let ctx = SessionContext::new();
-
- let t1_schema = Arc::new(Schema::new(vec![
- Field::new(column_left, DataType::UInt32, true),
- Field::new("t1_name", DataType::Utf8, true),
- ]));
- let t1_data = RecordBatch::try_new(
- t1_schema,
- vec![
- Arc::new(UInt32Array::from_slice([11, 22, 33, 44, 77])),
- Arc::new(StringArray::from(vec![
- Some("a"),
- Some("b"),
- Some("c"),
- Some("d"),
- Some("e"),
- ])),
- ],
- )?;
- ctx.register_batch("t1", t1_data)?;
-
- let t2_schema = Arc::new(Schema::new(vec![
- Field::new(column_right, DataType::UInt32, true),
- Field::new("t2_name", DataType::Utf8, true),
- ]));
- let t2_data = RecordBatch::try_new(
- t2_schema,
- vec![
- Arc::new(UInt32Array::from_slice([11, 22, 44, 55])),
- Arc::new(StringArray::from(vec![
- Some("z"),
- Some("y"),
- Some("x"),
- Some("w"),
- ])),
- ],
- )?;
- ctx.register_batch("t2", t2_data)?;
-
- Ok(ctx)
-}
-
-// Create memory tables with nulls
-fn create_join_context_with_nulls() -> Result<SessionContext> {
- let ctx = SessionContext::new();
-
- let t1_schema = Arc::new(Schema::new(vec![
- Field::new("t1_id", DataType::UInt32, true),
- Field::new("t1_name", DataType::Utf8, true),
- ]));
- let t1_data = RecordBatch::try_new(
- t1_schema,
- vec![
- Arc::new(UInt32Array::from(vec![11, 22, 33, 44, 77, 88, 99])),
- Arc::new(StringArray::from(vec![
- Some("a"),
- Some("b"),
- Some("c"),
- Some("d"),
- Some("e"),
- None,
- None,
- ])),
- ],
- )?;
- ctx.register_batch("t1", t1_data)?;
-
- let t2_schema = Arc::new(Schema::new(vec![
- Field::new("t2_id", DataType::UInt32, true),
- Field::new("t2_name", DataType::Utf8, true),
- ]));
- let t2_data = RecordBatch::try_new(
- t2_schema,
- vec![
- Arc::new(UInt32Array::from(vec![11, 22, 44, 55, 99])),
- Arc::new(StringArray::from(vec![
- Some("z"),
- None,
- Some("x"),
- Some("w"),
- Some("u"),
- ])),
- ],
- )?;
- ctx.register_batch("t2", t2_data)?;
-
- Ok(ctx)
-}
-
fn create_sort_merge_join_context(
column_left: &str,
column_right: &str,
diff --git a/datafusion/core/tests/sqllogictests/test_files/joins.slt
b/datafusion/core/tests/sqllogictests/test_files/joins.slt
new file mode 100644
index 0000000000..eb8f72470c
--- /dev/null
+++ b/datafusion/core/tests/sqllogictests/test_files/joins.slt
@@ -0,0 +1,624 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+##########
+## Joins Tests
+##########
+
+# create table t1
+statement ok
+CREATE TABLE t1(a INT, b INT, c INT) AS VALUES
+(1, 10, 50),
+(2, 20, 60),
+(3, 30, 70),
+(4, 40, 80)
+
+# create table t2
+statement ok
+CREATE TABLE t2(a INT, b INT, c INT) AS VALUES
+(1, 100, 500),
+(2, 200, 600),
+(9, 300, 700),
+(4, 400, 800)
+
+# equijoin
+query II nosort
+SELECT t1.a, t2.b FROM t1 INNER JOIN t2 ON t1.a = t2.a ORDER BY t1.a
+----
+1 100
+2 200
+4 400
+
+query II nosort
+SELECT t1.a, t2.b FROM t1 INNER JOIN t2 ON t2.a = t1.a ORDER BY t1.a
+----
+1 100
+2 200
+4 400
+
+# inner_join_nulls
+query ??
+SELECT * FROM (SELECT null AS id1) t1
+INNER JOIN (SELECT null AS id2) t2 ON id1 = id2
+----
+
+statement ok
+DROP TABLE t1
+
+statement ok
+DROP TABLE t2
+
+
+# create table a
+statement ok
+CREATE TABLE a(a INT, b INT, c INT) AS VALUES
+(1, 10, 50),
+(2, 20, 60),
+(3, 30, 70),
+(4, 40, 80)
+
+# create table b
+statement ok
+CREATE TABLE b(a INT, b INT, c INT) AS VALUES
+(1, 100, 500),
+(2, 200, 600),
+(9, 300, 700),
+(4, 400, 800)
+
+# issue_3002
+# // repro case for https://github.com/apache/arrow-datafusion/issues/3002
+
+query II
+select a.a, b.b from a join b on a.a = b.b
+----
+
+statement ok
+DROP TABLE a
+
+statement ok
+DROP TABLE b
+
+# create table t1
+statement ok
+CREATE TABLE t1(t1_id INT, t1_name VARCHAR) AS VALUES
+(11, 'a'),
+(22, 'b'),
+(33, 'c'),
+(44, 'd'),
+(77, 'e')
+
+# create table t2
+statement ok
+CREATE TABLE t2(t2_id INT, t2_name VARCHAR) AS VALUES
+(11, 'z'),
+(22, 'y'),
+(44, 'x'),
+(55, 'w')
+
+# left_join_unbalanced
+# // the t1_id is larger than t2_id so the join_selection optimizer should
kick in
+query ITT nosort
+SELECT t1_id, t1_name, t2_name FROM t1 LEFT JOIN t2 ON t1_id = t2_id ORDER BY
t1_id
+----
+11 a z
+22 b y
+33 c NULL
+44 d x
+77 e NULL
+
+query ITT nosort
+SELECT t1_id, t1_name, t2_name FROM t1 LEFT JOIN t2 ON t2_id = t1_id ORDER BY
t1_id
+----
+11 a z
+22 b y
+33 c NULL
+44 d x
+77 e NULL
+
+
+# cross_join_unbalanced
+query ITT nosort
+SELECT t1_id, t1_name, t2_name FROM t1 CROSS JOIN t2 ORDER BY t1_id, t1_name,
t2_name
+----
+11 a w
+11 a x
+11 a y
+11 a z
+22 b w
+22 b x
+22 b y
+22 b z
+33 c w
+33 c x
+33 c y
+33 c z
+44 d w
+44 d x
+44 d y
+44 d z
+77 e w
+77 e x
+77 e y
+77 e z
+
+statement ok
+DROP TABLE t1
+
+statement ok
+DROP TABLE t2
+
+# create table t1
+statement ok
+CREATE TABLE t1(t1_id INT, t1_name VARCHAR) AS VALUES
+(11, 'a'),
+(22, 'b'),
+(33, 'c'),
+(44, 'd'),
+(77, 'e'),
+(88, NULL),
+(99, NULL)
+
+# create table t2
+statement ok
+CREATE TABLE t2(t2_id INT, t2_name VARCHAR) AS VALUES
+(11, 'z'),
+(22, NULL),
+(44, 'x'),
+(55, 'w'),
+(99, 'u')
+
+# left_join_null_filter
+# // Since t2 is the non-preserved side of the join, we cannot push down a
NULL filter.
+# // Note that this is only true because IS NULL does not remove nulls. For
filters that
+# // remove nulls, we can rewrite the join as an inner join and then push
down the filter.
+query IIT nosort
+SELECT t1_id, t2_id, t2_name FROM t1 LEFT JOIN t2 ON t1_id = t2_id WHERE
t2_name IS NULL ORDER BY t1_id
+----
+22 22 NULL
+33 NULL NULL
+77 NULL NULL
+88 NULL NULL
+
+# left_join_null_filter_on_join_column
+# // Again, since t2 is the non-preserved side of the join, we cannot push
down a NULL filter.
+query IIT nosort
+SELECT t1_id, t2_id, t2_name FROM t1 LEFT JOIN t2 ON t1_id = t2_id WHERE t2_id
IS NULL ORDER BY t1_id
+----
+33 NULL NULL
+77 NULL NULL
+88 NULL NULL
+
+# left_join_not_null_filter
+query IIT nosort
+SELECT t1_id, t2_id, t2_name FROM t1 LEFT JOIN t2 ON t1_id = t2_id WHERE
t2_name IS NOT NULL ORDER BY t1_id
+----
+11 11 z
+44 44 x
+99 99 u
+
+# left_join_not_null_filter_on_join_column
+query IIT nosort
+SELECT t1_id, t2_id, t2_name FROM t1 LEFT JOIN t2 ON t1_id = t2_id WHERE t2_id
IS NOT NULL ORDER BY t1_id
+----
+11 11 z
+22 22 NULL
+44 44 x
+99 99 u
+
+# self_join_non_equijoin
+query II nosort
+SELECT x.t1_id, y.t1_id FROM t1 x JOIN t1 y ON x.t1_id = 11 AND y.t1_id = 44
+----
+11 44
+
+# right_join_null_filter
+query ITI nosort
+SELECT t1_id, t1_name, t2_id FROM t1 RIGHT JOIN t2 ON t1_id = t2_id WHERE
t1_name IS NULL ORDER BY t2_id
+----
+NULL NULL 55
+99 NULL 99
+
+# right_join_null_filter_on_join_column
+query ITI nosort
+SELECT t1_id, t1_name, t2_id FROM t1 RIGHT JOIN t2 ON t1_id = t2_id WHERE
t1_id IS NULL ORDER BY t2_id
+----
+NULL NULL 55
+
+# right_join_not_null_filter
+query ITI nosort
+SELECT t1_id, t1_name, t2_id FROM t1 RIGHT JOIN t2 ON t1_id = t2_id WHERE
t1_name IS NOT NULL ORDER BY t2_id
+----
+11 a 11
+22 b 22
+44 d 44
+
+# right_join_not_null_filter_on_join_column
+query ITI nosort
+SELECT t1_id, t1_name, t2_id FROM t1 RIGHT JOIN t2 ON t1_id = t2_id WHERE
t1_id IS NOT NULL ORDER BY t2_id
+----
+11 a 11
+22 b 22
+44 d 44
+99 NULL 99
+
+# full_join_null_filter
+query ITI nosort
+SELECT t1_id, t1_name, t2_id FROM t1 FULL OUTER JOIN t2 ON t1_id = t2_id WHERE
t1_name IS NULL ORDER BY t1_id
+----
+88 NULL NULL
+99 NULL 99
+NULL NULL 55
+
+# full_join_not_null_filter
+query ITI nosort
+SELECT t1_id, t1_name, t2_id FROM t1 FULL OUTER JOIN t2 ON t1_id = t2_id WHERE
t1_name IS NOT NULL ORDER BY t1_id
+----
+11 a 11
+22 b 22
+33 c NULL
+44 d 44
+77 e NULL
+
+statement ok
+DROP TABLE t1
+
+statement ok
+DROP TABLE t2
+
+# create table t1
+statement ok
+CREATE TABLE t1(id INT, t1_name VARCHAR, t1_int INT) AS VALUES
+(11, 'a', 1),
+(22, 'b', 2),
+(33, 'c', 3),
+(44, 'd', 4)
+
+# create table t2
+statement ok
+CREATE TABLE t2(id INT, t2_name VARCHAR, t2_int INT) AS VALUES
+(11, 'z', 3),
+(22, 'y', 1),
+(44, 'x', 3),
+(55, 'w', 3)
+
+# left_join_using
+
+# set repartition_joins to true
+statement ok
+set datafusion.optimizer.repartition_joins = true
+
+query ITT nosort
+SELECT id, t1_name, t2_name FROM t1 LEFT JOIN t2 USING (id) ORDER BY id
+----
+11 a z
+22 b y
+33 c NULL
+44 d x
+
+# set repartition_joins to false
+statement ok
+set datafusion.optimizer.repartition_joins = false
+
+query ITT nosort
+SELECT id, t1_name, t2_name FROM t1 LEFT JOIN t2 USING (id) ORDER BY id
+----
+11 a z
+22 b y
+33 c NULL
+44 d x
+
+statement ok
+DROP TABLE t1
+
+statement ok
+DROP TABLE t2
+
+# create table t1
+statement ok
+CREATE TABLE t1(t1_id INT, t1_name VARCHAR, t1_int INT) AS VALUES
+(11, 'a', 1),
+(22, 'b', 2),
+(33, 'c', 3),
+(44, 'd', 4)
+
+# create table t2
+statement ok
+CREATE TABLE t2(t2_id INT, t2_name VARCHAR, t2_int INT) AS VALUES
+(11, 'z', 3),
+(22, 'y', 1),
+(44, 'x', 3),
+(55, 'w', 3)
+
+# cross_join
+
+# set repartition_joins to true
+statement ok
+set datafusion.optimizer.repartition_joins = true
+
+query ITT nosort
+SELECT t1_id, t1_name, t2_name FROM t1, t2 ORDER BY t1_id
+----
+11 a z
+11 a y
+11 a x
+11 a w
+22 b z
+22 b y
+22 b x
+22 b w
+33 c z
+33 c y
+33 c x
+33 c w
+44 d z
+44 d y
+44 d x
+44 d w
+
+query ITT nosort
+SELECT t1_id, t1_name, t2_name FROM t1, t2 WHERE 1=1 ORDER BY t1_id
+----
+11 a z
+11 a y
+11 a x
+11 a w
+22 b z
+22 b y
+22 b x
+22 b w
+33 c z
+33 c y
+33 c x
+33 c w
+44 d z
+44 d y
+44 d x
+44 d w
+
+query ITT nosort
+SELECT t1_id, t1_name, t2_name FROM t1 CROSS JOIN t2 ORDER BY t1_id
+----
+11 a z
+11 a y
+11 a x
+11 a w
+22 b z
+22 b y
+22 b x
+22 b w
+33 c z
+33 c y
+33 c x
+33 c w
+44 d z
+44 d y
+44 d x
+44 d w
+
+query ITITI rowsort
+SELECT * FROM (SELECT t1_id, t1_name FROM t1 UNION ALL SELECT t1_id, t1_name
FROM t1) AS t1 CROSS JOIN t2
+----
+11 a 11 z 3
+11 a 11 z 3
+11 a 22 y 1
+11 a 22 y 1
+11 a 44 x 3
+11 a 44 x 3
+11 a 55 w 3
+11 a 55 w 3
+22 b 11 z 3
+22 b 11 z 3
+22 b 22 y 1
+22 b 22 y 1
+22 b 44 x 3
+22 b 44 x 3
+22 b 55 w 3
+22 b 55 w 3
+33 c 11 z 3
+33 c 11 z 3
+33 c 22 y 1
+33 c 22 y 1
+33 c 44 x 3
+33 c 44 x 3
+33 c 55 w 3
+33 c 55 w 3
+44 d 11 z 3
+44 d 11 z 3
+44 d 22 y 1
+44 d 22 y 1
+44 d 44 x 3
+44 d 44 x 3
+44 d 55 w 3
+44 d 55 w 3
+
+query ITT rowsort
+SELECT t1_id, t1_name, t2_name FROM t1 CROSS JOIN (SELECT t2_name FROM t2
UNION ALL SELECT t2_name FROM t2) AS t2_data
+----
+11 a w
+11 a w
+11 a x
+11 a x
+11 a y
+11 a y
+11 a z
+11 a z
+22 b w
+22 b w
+22 b x
+22 b x
+22 b y
+22 b y
+22 b z
+22 b z
+33 c w
+33 c w
+33 c x
+33 c x
+33 c y
+33 c y
+33 c z
+33 c z
+44 d w
+44 d w
+44 d x
+44 d x
+44 d y
+44 d y
+44 d z
+44 d z
+
+# set repartition_joins to true
+statement ok
+set datafusion.optimizer.repartition_joins = false
+
+query ITT nosort
+SELECT t1_id, t1_name, t2_name FROM t1, t2 ORDER BY t1_id
+----
+11 a z
+11 a y
+11 a x
+11 a w
+22 b z
+22 b y
+22 b x
+22 b w
+33 c z
+33 c y
+33 c x
+33 c w
+44 d z
+44 d y
+44 d x
+44 d w
+
+query ITT nosort
+SELECT t1_id, t1_name, t2_name FROM t1, t2 WHERE 1=1 ORDER BY t1_id
+----
+11 a z
+11 a y
+11 a x
+11 a w
+22 b z
+22 b y
+22 b x
+22 b w
+33 c z
+33 c y
+33 c x
+33 c w
+44 d z
+44 d y
+44 d x
+44 d w
+
+query ITT nosort
+SELECT t1_id, t1_name, t2_name FROM t1 CROSS JOIN t2 ORDER BY t1_id
+----
+11 a z
+11 a y
+11 a x
+11 a w
+22 b z
+22 b y
+22 b x
+22 b w
+33 c z
+33 c y
+33 c x
+33 c w
+44 d z
+44 d y
+44 d x
+44 d w
+
+query ITITI rowsort
+SELECT * FROM (SELECT t1_id, t1_name FROM t1 UNION ALL SELECT t1_id, t1_name
FROM t1) AS t1 CROSS JOIN t2
+----
+11 a 11 z 3
+11 a 11 z 3
+11 a 22 y 1
+11 a 22 y 1
+11 a 44 x 3
+11 a 44 x 3
+11 a 55 w 3
+11 a 55 w 3
+22 b 11 z 3
+22 b 11 z 3
+22 b 22 y 1
+22 b 22 y 1
+22 b 44 x 3
+22 b 44 x 3
+22 b 55 w 3
+22 b 55 w 3
+33 c 11 z 3
+33 c 11 z 3
+33 c 22 y 1
+33 c 22 y 1
+33 c 44 x 3
+33 c 44 x 3
+33 c 55 w 3
+33 c 55 w 3
+44 d 11 z 3
+44 d 11 z 3
+44 d 22 y 1
+44 d 22 y 1
+44 d 44 x 3
+44 d 44 x 3
+44 d 55 w 3
+44 d 55 w 3
+
+query ITT rowsort
+SELECT t1_id, t1_name, t2_name FROM t1 CROSS JOIN (SELECT t2_name FROM t2
UNION ALL SELECT t2_name FROM t2) AS t2_data
+----
+11 a w
+11 a w
+11 a x
+11 a x
+11 a y
+11 a y
+11 a z
+11 a z
+22 b w
+22 b w
+22 b x
+22 b x
+22 b y
+22 b y
+22 b z
+22 b z
+33 c w
+33 c w
+33 c x
+33 c x
+33 c y
+33 c y
+33 c z
+33 c z
+44 d w
+44 d w
+44 d x
+44 d x
+44 d y
+44 d y
+44 d z
+44 d z
+
+statement ok
+DROP TABLE t1
+
+statement ok
+DROP TABLE t2