This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 85bf2afdbc Port some tests in joins.rs to sqllogictest (#6306)
85bf2afdbc is described below

commit 85bf2afdbcddc3de537598f3bf7a2fc34b55c497
Author: parkma99 <[email protected]>
AuthorDate: Fri May 12 11:28:44 2023 +0800

    Port some tests in joins.rs to sqllogictest (#6306)
    
    * Port some tests in joins.rs to sqllogictest
    
    * Update datafusion/core/tests/sqllogictests/test_files/joins.slt
    
    Co-authored-by: Andrew Lamb <[email protected]>
    
    * Update datafusion/core/tests/sqllogictests/test_files/joins.slt
    
    Co-authored-by: Andrew Lamb <[email protected]>
    
    * Update datafusion/core/tests/sqllogictests/test_files/joins.slt
    
    Co-authored-by: Andrew Lamb <[email protected]>
    
    * Update datafusion/core/tests/sqllogictests/test_files/joins.slt
    
    Co-authored-by: Andrew Lamb <[email protected]>
    
    ---------
    
    Co-authored-by: Andrew Lamb <[email protected]>
---
 datafusion/core/tests/sql/joins.rs                 | 415 --------------
 datafusion/core/tests/sql/mod.rs                   |  94 ----
 .../core/tests/sqllogictests/test_files/joins.slt  | 624 +++++++++++++++++++++
 3 files changed, 624 insertions(+), 509 deletions(-)

diff --git a/datafusion/core/tests/sql/joins.rs 
b/datafusion/core/tests/sql/joins.rs
index 3213ae3e05..54ecfe4951 100644
--- a/datafusion/core/tests/sql/joins.rs
+++ b/datafusion/core/tests/sql/joins.rs
@@ -18,392 +18,6 @@
 use super::*;
 use datafusion::from_slice::FromSlice;
 
-#[tokio::test]
-async fn equijoin() -> Result<()> {
-    let ctx = create_join_context_qualified("t1", "t2")?;
-    let equivalent_sql = [
-        "SELECT t1.a, t2.b FROM t1 INNER JOIN t2 ON t1.a = t2.a ORDER BY t1.a",
-        "SELECT t1.a, t2.b FROM t1 INNER JOIN t2 ON t2.a = t1.a ORDER BY t1.a",
-    ];
-    let expected = vec![
-        "+---+-----+",
-        "| a | b   |",
-        "+---+-----+",
-        "| 1 | 100 |",
-        "| 2 | 200 |",
-        "| 4 | 400 |",
-        "+---+-----+",
-    ];
-    for sql in equivalent_sql.iter() {
-        let actual = execute_to_batches(&ctx, sql).await;
-        assert_batches_eq!(expected, &actual);
-    }
-
-    Ok(())
-}
-
-#[tokio::test]
-async fn left_join_unbalanced() -> Result<()> {
-    // the t1_id is larger than t2_id so the join_selection optimizer should 
kick in
-    let ctx = create_join_context_unbalanced("t1_id", "t2_id")?;
-    let equivalent_sql = [
-        "SELECT t1_id, t1_name, t2_name FROM t1 LEFT JOIN t2 ON t1_id = t2_id 
ORDER BY t1_id",
-        "SELECT t1_id, t1_name, t2_name FROM t1 LEFT JOIN t2 ON t2_id = t1_id 
ORDER BY t1_id",
-    ];
-    let expected = vec![
-        "+-------+---------+---------+",
-        "| t1_id | t1_name | t2_name |",
-        "+-------+---------+---------+",
-        "| 11    | a       | z       |",
-        "| 22    | b       | y       |",
-        "| 33    | c       |         |",
-        "| 44    | d       | x       |",
-        "| 77    | e       |         |",
-        "+-------+---------+---------+",
-    ];
-    for sql in equivalent_sql.iter() {
-        let actual = execute_to_batches(&ctx, sql).await;
-        assert_batches_eq!(expected, &actual);
-    }
-    Ok(())
-}
-
-#[tokio::test]
-async fn left_join_null_filter() -> Result<()> {
-    // Since t2 is the non-preserved side of the join, we cannot push down a 
NULL filter.
-    // Note that this is only true because IS NULL does not remove nulls. For 
filters that
-    // remove nulls, we can rewrite the join as an inner join and then push 
down the filter.
-    let ctx = create_join_context_with_nulls()?;
-    let sql = "SELECT t1_id, t2_id, t2_name FROM t1 LEFT JOIN t2 ON t1_id = 
t2_id WHERE t2_name IS NULL ORDER BY t1_id";
-    let expected = vec![
-        "+-------+-------+---------+",
-        "| t1_id | t2_id | t2_name |",
-        "+-------+-------+---------+",
-        "| 22    | 22    |         |",
-        "| 33    |       |         |",
-        "| 77    |       |         |",
-        "| 88    |       |         |",
-        "+-------+-------+---------+",
-    ];
-
-    let actual = execute_to_batches(&ctx, sql).await;
-    assert_batches_eq!(expected, &actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn left_join_null_filter_on_join_column() -> Result<()> {
-    // Again, since t2 is the non-preserved side of the join, we cannot push 
down a NULL filter.
-    let ctx = create_join_context_with_nulls()?;
-    let sql = "SELECT t1_id, t2_id, t2_name FROM t1 LEFT JOIN t2 ON t1_id = 
t2_id WHERE t2_id IS NULL ORDER BY t1_id";
-    let expected = vec![
-        "+-------+-------+---------+",
-        "| t1_id | t2_id | t2_name |",
-        "+-------+-------+---------+",
-        "| 33    |       |         |",
-        "| 77    |       |         |",
-        "| 88    |       |         |",
-        "+-------+-------+---------+",
-    ];
-
-    let actual = execute_to_batches(&ctx, sql).await;
-    assert_batches_eq!(expected, &actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn left_join_not_null_filter() -> Result<()> {
-    let ctx = create_join_context_with_nulls()?;
-    let sql = "SELECT t1_id, t2_id, t2_name FROM t1 LEFT JOIN t2 ON t1_id = 
t2_id WHERE t2_name IS NOT NULL ORDER BY t1_id";
-    let expected = vec![
-        "+-------+-------+---------+",
-        "| t1_id | t2_id | t2_name |",
-        "+-------+-------+---------+",
-        "| 11    | 11    | z       |",
-        "| 44    | 44    | x       |",
-        "| 99    | 99    | u       |",
-        "+-------+-------+---------+",
-    ];
-
-    let actual = execute_to_batches(&ctx, sql).await;
-    assert_batches_eq!(expected, &actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn left_join_not_null_filter_on_join_column() -> Result<()> {
-    let ctx = create_join_context_with_nulls()?;
-    let sql = "SELECT t1_id, t2_id, t2_name FROM t1 LEFT JOIN t2 ON t1_id = 
t2_id WHERE t2_id IS NOT NULL ORDER BY t1_id";
-    let expected = vec![
-        "+-------+-------+---------+",
-        "| t1_id | t2_id | t2_name |",
-        "+-------+-------+---------+",
-        "| 11    | 11    | z       |",
-        "| 22    | 22    |         |",
-        "| 44    | 44    | x       |",
-        "| 99    | 99    | u       |",
-        "+-------+-------+---------+",
-    ];
-
-    let actual = execute_to_batches(&ctx, sql).await;
-    assert_batches_eq!(expected, &actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn self_join_non_equijoin() -> Result<()> {
-    let ctx = create_join_context_with_nulls()?;
-    let sql =
-        "SELECT x.t1_id, y.t1_id FROM t1 x JOIN t1 y ON x.t1_id = 11 AND 
y.t1_id = 44";
-    let expected = vec![
-        "+-------+-------+",
-        "| t1_id | t1_id |",
-        "+-------+-------+",
-        "| 11    | 44    |",
-        "+-------+-------+",
-    ];
-
-    let actual = execute_to_batches(&ctx, sql).await;
-    assert_batches_eq!(expected, &actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn right_join_null_filter() -> Result<()> {
-    let ctx = create_join_context_with_nulls()?;
-    let sql = "SELECT t1_id, t1_name, t2_id FROM t1 RIGHT JOIN t2 ON t1_id = 
t2_id WHERE t1_name IS NULL ORDER BY t2_id";
-    let expected = vec![
-        "+-------+---------+-------+",
-        "| t1_id | t1_name | t2_id |",
-        "+-------+---------+-------+",
-        "|       |         | 55    |",
-        "| 99    |         | 99    |",
-        "+-------+---------+-------+",
-    ];
-
-    let actual = execute_to_batches(&ctx, sql).await;
-    assert_batches_eq!(expected, &actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn right_join_null_filter_on_join_column() -> Result<()> {
-    let ctx = create_join_context_with_nulls()?;
-    let sql = "SELECT t1_id, t1_name, t2_id FROM t1 RIGHT JOIN t2 ON t1_id = 
t2_id WHERE t1_id IS NULL ORDER BY t2_id";
-    let expected = vec![
-        "+-------+---------+-------+",
-        "| t1_id | t1_name | t2_id |",
-        "+-------+---------+-------+",
-        "|       |         | 55    |",
-        "+-------+---------+-------+",
-    ];
-
-    let actual = execute_to_batches(&ctx, sql).await;
-    assert_batches_eq!(expected, &actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn right_join_not_null_filter() -> Result<()> {
-    let ctx = create_join_context_with_nulls()?;
-    let sql = "SELECT t1_id, t1_name, t2_id FROM t1 RIGHT JOIN t2 ON t1_id = 
t2_id WHERE t1_name IS NOT NULL ORDER BY t2_id";
-    let expected = vec![
-        "+-------+---------+-------+",
-        "| t1_id | t1_name | t2_id |",
-        "+-------+---------+-------+",
-        "| 11    | a       | 11    |",
-        "| 22    | b       | 22    |",
-        "| 44    | d       | 44    |",
-        "+-------+---------+-------+",
-    ];
-
-    let actual = execute_to_batches(&ctx, sql).await;
-    assert_batches_eq!(expected, &actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn right_join_not_null_filter_on_join_column() -> Result<()> {
-    let ctx = create_join_context_with_nulls()?;
-    let sql = "SELECT t1_id, t1_name, t2_id FROM t1 RIGHT JOIN t2 ON t1_id = 
t2_id WHERE t1_id IS NOT NULL ORDER BY t2_id";
-    let expected = vec![
-        "+-------+---------+-------+",
-        "| t1_id | t1_name | t2_id |",
-        "+-------+---------+-------+",
-        "| 11    | a       | 11    |",
-        "| 22    | b       | 22    |",
-        "| 44    | d       | 44    |",
-        "| 99    |         | 99    |",
-        "+-------+---------+-------+",
-    ];
-
-    let actual = execute_to_batches(&ctx, sql).await;
-    assert_batches_eq!(expected, &actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn full_join_null_filter() -> Result<()> {
-    let ctx = create_join_context_with_nulls()?;
-    let sql = "SELECT t1_id, t1_name, t2_id FROM t1 FULL OUTER JOIN t2 ON 
t1_id = t2_id WHERE t1_name IS NULL ORDER BY t1_id";
-    let expected = vec![
-        "+-------+---------+-------+",
-        "| t1_id | t1_name | t2_id |",
-        "+-------+---------+-------+",
-        "| 88    |         |       |",
-        "| 99    |         | 99    |",
-        "|       |         | 55    |",
-        "+-------+---------+-------+",
-    ];
-
-    let actual = execute_to_batches(&ctx, sql).await;
-    assert_batches_eq!(expected, &actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn full_join_not_null_filter() -> Result<()> {
-    let ctx = create_join_context_with_nulls()?;
-    let sql = "SELECT t1_id, t1_name, t2_id FROM t1 FULL OUTER JOIN t2 ON 
t1_id = t2_id WHERE t1_name IS NOT NULL ORDER BY t1_id";
-    let expected = vec![
-        "+-------+---------+-------+",
-        "| t1_id | t1_name | t2_id |",
-        "+-------+---------+-------+",
-        "| 11    | a       | 11    |",
-        "| 22    | b       | 22    |",
-        "| 33    | c       |       |",
-        "| 44    | d       | 44    |",
-        "| 77    | e       |       |",
-        "+-------+---------+-------+",
-    ];
-
-    let actual = execute_to_batches(&ctx, sql).await;
-    assert_batches_eq!(expected, &actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn left_join_using() -> Result<()> {
-    let test_repartition_joins = vec![true, false];
-    for repartition_joins in test_repartition_joins {
-        let ctx = create_join_context("id", "id", repartition_joins)?;
-        let sql =
-            "SELECT id, t1_name, t2_name FROM t1 LEFT JOIN t2 USING (id) ORDER 
BY id";
-        let actual = execute_to_batches(&ctx, sql).await;
-        let expected = vec![
-            "+----+---------+---------+",
-            "| id | t1_name | t2_name |",
-            "+----+---------+---------+",
-            "| 11 | a       | z       |",
-            "| 22 | b       | y       |",
-            "| 33 | c       |         |",
-            "| 44 | d       | x       |",
-            "+----+---------+---------+",
-        ];
-        assert_batches_eq!(expected, &actual);
-    }
-    Ok(())
-}
-
-#[tokio::test]
-async fn cross_join() {
-    let test_repartition_joins = vec![true, false];
-    for repartition_joins in test_repartition_joins {
-        let ctx = create_join_context("t1_id", "t2_id", 
repartition_joins).unwrap();
-
-        let sql = "SELECT t1_id, t1_name, t2_name FROM t1, t2 ORDER BY t1_id";
-        let actual = execute(&ctx, sql).await;
-
-        assert_eq!(4 * 4, actual.len());
-
-        let sql = "SELECT t1_id, t1_name, t2_name FROM t1, t2 WHERE 1=1 ORDER 
BY t1_id";
-        let actual = execute(&ctx, sql).await;
-
-        assert_eq!(4 * 4, actual.len());
-
-        let sql = "SELECT t1_id, t1_name, t2_name FROM t1 CROSS JOIN t2 ORDER 
BY t1_id";
-
-        let actual = execute(&ctx, sql).await;
-        assert_eq!(4 * 4, actual.len());
-
-        let actual = execute_to_batches(&ctx, sql).await;
-        let expected = vec![
-            "+-------+---------+---------+",
-            "| t1_id | t1_name | t2_name |",
-            "+-------+---------+---------+",
-            "| 11    | a       | z       |",
-            "| 11    | a       | y       |",
-            "| 11    | a       | x       |",
-            "| 11    | a       | w       |",
-            "| 22    | b       | z       |",
-            "| 22    | b       | y       |",
-            "| 22    | b       | x       |",
-            "| 22    | b       | w       |",
-            "| 33    | c       | z       |",
-            "| 33    | c       | y       |",
-            "| 33    | c       | x       |",
-            "| 33    | c       | w       |",
-            "| 44    | d       | z       |",
-            "| 44    | d       | y       |",
-            "| 44    | d       | x       |",
-            "| 44    | d       | w       |",
-            "+-------+---------+---------+",
-        ];
-
-        assert_batches_eq!(expected, &actual);
-
-        // Two partitions (from UNION) on the left
-        let sql = "SELECT * FROM (SELECT t1_id, t1_name FROM t1 UNION ALL 
SELECT t1_id, t1_name FROM t1) AS t1 CROSS JOIN t2";
-        let actual = execute(&ctx, sql).await;
-
-        assert_eq!(4 * 4 * 2, actual.len());
-
-        // Two partitions (from UNION) on the right
-        let sql = "SELECT t1_id, t1_name, t2_name FROM t1 CROSS JOIN (SELECT 
t2_name FROM t2 UNION ALL SELECT t2_name FROM t2) AS t2";
-        let actual = execute(&ctx, sql).await;
-
-        assert_eq!(4 * 4 * 2, actual.len());
-    }
-}
-
-#[tokio::test]
-async fn cross_join_unbalanced() {
-    // the t1_id is larger than t2_id so the join_selection optimizer should 
kick in
-    let ctx = create_join_context_unbalanced("t1_id", "t2_id").unwrap();
-
-    // the order of the values is not determinisitic, so we need to sort to 
check the values
-    let sql =
-        "SELECT t1_id, t1_name, t2_name FROM t1 CROSS JOIN t2 ORDER BY t1_id, 
t1_name, t2_name";
-    let actual = execute_to_batches(&ctx, sql).await;
-    let expected = vec![
-        "+-------+---------+---------+",
-        "| t1_id | t1_name | t2_name |",
-        "+-------+---------+---------+",
-        "| 11    | a       | w       |",
-        "| 11    | a       | x       |",
-        "| 11    | a       | y       |",
-        "| 11    | a       | z       |",
-        "| 22    | b       | w       |",
-        "| 22    | b       | x       |",
-        "| 22    | b       | y       |",
-        "| 22    | b       | z       |",
-        "| 33    | c       | w       |",
-        "| 33    | c       | x       |",
-        "| 33    | c       | y       |",
-        "| 33    | c       | z       |",
-        "| 44    | d       | w       |",
-        "| 44    | d       | x       |",
-        "| 44    | d       | y       |",
-        "| 44    | d       | z       |",
-        "| 77    | e       | w       |",
-        "| 77    | e       | x       |",
-        "| 77    | e       | y       |",
-        "| 77    | e       | z       |",
-        "+-------+---------+---------+",
-    ];
-    assert_batches_eq!(expected, &actual);
-}
-
 #[tokio::test]
 async fn test_join_timestamp() -> Result<()> {
     let ctx = SessionContext::new();
@@ -595,35 +209,6 @@ async fn nestedjoin_without_alias() -> Result<()> {
     Ok(())
 }
 
-#[tokio::test]
-async fn issue_3002() -> Result<()> {
-    // repro case for https://github.com/apache/arrow-datafusion/issues/3002
-    let sql = "select a.a, b.b from a join b on a.a = b.b";
-    let expected = vec!["++", "++"];
-    let ctx = create_join_context_qualified("a", "b")?;
-    let actual = execute_to_batches(&ctx, sql).await;
-    assert_batches_eq!(expected, &actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn inner_join_nulls() {
-    let sql = "SELECT * FROM (SELECT null AS id1) t1
-            INNER JOIN (SELECT null AS id2) t2 ON id1 = id2";
-
-    #[rustfmt::skip]
-    let expected = vec![
-        "++",
-        "++",
-    ];
-
-    let ctx = create_join_context_qualified("t1", "t2").unwrap();
-    let actual = execute_to_batches(&ctx, sql).await;
-
-    // left and right shouldn't match anything
-    assert_batches_eq!(expected, &actual);
-}
-
 #[tokio::test]
 async fn join_tables_with_duplicated_column_name_not_in_on_constraint() -> 
Result<()> {
     let ctx = SessionContext::new();
diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs
index bd9e213dfe..7041cf7a1f 100644
--- a/datafusion/core/tests/sql/mod.rs
+++ b/datafusion/core/tests/sql/mod.rs
@@ -520,100 +520,6 @@ fn create_hashjoin_datatype_context() -> 
Result<SessionContext> {
     Ok(ctx)
 }
 
-/// the table column_left has more rows than the table column_right
-fn create_join_context_unbalanced(
-    column_left: &str,
-    column_right: &str,
-) -> Result<SessionContext> {
-    let ctx = SessionContext::new();
-
-    let t1_schema = Arc::new(Schema::new(vec![
-        Field::new(column_left, DataType::UInt32, true),
-        Field::new("t1_name", DataType::Utf8, true),
-    ]));
-    let t1_data = RecordBatch::try_new(
-        t1_schema,
-        vec![
-            Arc::new(UInt32Array::from_slice([11, 22, 33, 44, 77])),
-            Arc::new(StringArray::from(vec![
-                Some("a"),
-                Some("b"),
-                Some("c"),
-                Some("d"),
-                Some("e"),
-            ])),
-        ],
-    )?;
-    ctx.register_batch("t1", t1_data)?;
-
-    let t2_schema = Arc::new(Schema::new(vec![
-        Field::new(column_right, DataType::UInt32, true),
-        Field::new("t2_name", DataType::Utf8, true),
-    ]));
-    let t2_data = RecordBatch::try_new(
-        t2_schema,
-        vec![
-            Arc::new(UInt32Array::from_slice([11, 22, 44, 55])),
-            Arc::new(StringArray::from(vec![
-                Some("z"),
-                Some("y"),
-                Some("x"),
-                Some("w"),
-            ])),
-        ],
-    )?;
-    ctx.register_batch("t2", t2_data)?;
-
-    Ok(ctx)
-}
-
-// Create memory tables with nulls
-fn create_join_context_with_nulls() -> Result<SessionContext> {
-    let ctx = SessionContext::new();
-
-    let t1_schema = Arc::new(Schema::new(vec![
-        Field::new("t1_id", DataType::UInt32, true),
-        Field::new("t1_name", DataType::Utf8, true),
-    ]));
-    let t1_data = RecordBatch::try_new(
-        t1_schema,
-        vec![
-            Arc::new(UInt32Array::from(vec![11, 22, 33, 44, 77, 88, 99])),
-            Arc::new(StringArray::from(vec![
-                Some("a"),
-                Some("b"),
-                Some("c"),
-                Some("d"),
-                Some("e"),
-                None,
-                None,
-            ])),
-        ],
-    )?;
-    ctx.register_batch("t1", t1_data)?;
-
-    let t2_schema = Arc::new(Schema::new(vec![
-        Field::new("t2_id", DataType::UInt32, true),
-        Field::new("t2_name", DataType::Utf8, true),
-    ]));
-    let t2_data = RecordBatch::try_new(
-        t2_schema,
-        vec![
-            Arc::new(UInt32Array::from(vec![11, 22, 44, 55, 99])),
-            Arc::new(StringArray::from(vec![
-                Some("z"),
-                None,
-                Some("x"),
-                Some("w"),
-                Some("u"),
-            ])),
-        ],
-    )?;
-    ctx.register_batch("t2", t2_data)?;
-
-    Ok(ctx)
-}
-
 fn create_sort_merge_join_context(
     column_left: &str,
     column_right: &str,
diff --git a/datafusion/core/tests/sqllogictests/test_files/joins.slt 
b/datafusion/core/tests/sqllogictests/test_files/joins.slt
new file mode 100644
index 0000000000..eb8f72470c
--- /dev/null
+++ b/datafusion/core/tests/sqllogictests/test_files/joins.slt
@@ -0,0 +1,624 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+##########
+## Joins Tests
+##########
+
+# create table t1
+statement ok
+CREATE TABLE t1(a INT, b INT, c INT) AS VALUES
+(1, 10, 50),
+(2, 20, 60),
+(3, 30, 70),
+(4, 40, 80)
+
+# create table t2
+statement ok
+CREATE TABLE t2(a INT, b INT, c INT) AS VALUES
+(1, 100, 500),
+(2, 200, 600),
+(9, 300, 700),
+(4, 400, 800)
+
+# equijoin
+query II nosort
+SELECT t1.a, t2.b FROM t1 INNER JOIN t2 ON t1.a = t2.a ORDER BY t1.a
+----
+1 100
+2 200
+4 400
+
+query II nosort
+SELECT t1.a, t2.b FROM t1 INNER JOIN t2 ON t2.a = t1.a ORDER BY t1.a
+----
+1 100
+2 200
+4 400
+
+# inner_join_nulls
+query ??
+SELECT * FROM (SELECT null AS id1) t1
+INNER JOIN (SELECT null AS id2) t2 ON id1 = id2
+----
+
+statement ok
+DROP TABLE t1
+
+statement ok
+DROP TABLE t2
+
+
+# create table a
+statement ok
+CREATE TABLE a(a INT, b INT, c INT) AS VALUES
+(1, 10, 50),
+(2, 20, 60),
+(3, 30, 70),
+(4, 40, 80)
+
+# create table b
+statement ok
+CREATE TABLE b(a INT, b INT, c INT) AS VALUES
+(1, 100, 500),
+(2, 200, 600),
+(9, 300, 700),
+(4, 400, 800)
+
+# issue_3002
+#    // repro case for https://github.com/apache/arrow-datafusion/issues/3002
+
+query II
+select a.a, b.b from a join b on a.a = b.b
+----
+
+statement ok
+DROP TABLE a
+
+statement ok
+DROP TABLE b
+
+# create table t1
+statement ok
+CREATE TABLE t1(t1_id INT, t1_name VARCHAR) AS VALUES
+(11, 'a'),
+(22, 'b'),
+(33, 'c'),
+(44, 'd'),
+(77, 'e')
+
+# create table t2
+statement ok
+CREATE TABLE t2(t2_id INT, t2_name VARCHAR) AS VALUES
+(11, 'z'),
+(22, 'y'),
+(44, 'x'),
+(55, 'w')
+
+# left_join_unbalanced
+#     // the t1_id is larger than t2_id so the join_selection optimizer should 
kick in
+query ITT nosort
+SELECT t1_id, t1_name, t2_name FROM t1 LEFT JOIN t2 ON t1_id = t2_id ORDER BY 
t1_id
+----
+11 a z
+22 b y
+33 c NULL
+44 d x
+77 e NULL
+
+query ITT nosort
+SELECT t1_id, t1_name, t2_name FROM t1 LEFT JOIN t2 ON t2_id = t1_id ORDER BY 
t1_id
+----
+11 a z
+22 b y
+33 c NULL
+44 d x
+77 e NULL
+
+
+# cross_join_unbalanced
+query ITT nosort
+SELECT t1_id, t1_name, t2_name FROM t1 CROSS JOIN t2 ORDER BY t1_id, t1_name, 
t2_name
+----
+11 a w
+11 a x
+11 a y
+11 a z
+22 b w
+22 b x
+22 b y
+22 b z
+33 c w
+33 c x
+33 c y
+33 c z
+44 d w
+44 d x
+44 d y
+44 d z
+77 e w
+77 e x
+77 e y
+77 e z
+
+statement ok
+DROP TABLE t1
+
+statement ok
+DROP TABLE t2
+
+# create table t1
+statement ok
+CREATE TABLE t1(t1_id INT, t1_name VARCHAR) AS VALUES
+(11, 'a'),
+(22, 'b'),
+(33, 'c'),
+(44, 'd'),
+(77, 'e'),
+(88, NULL),
+(99, NULL)
+
+# create table t2
+statement ok
+CREATE TABLE t2(t2_id INT, t2_name VARCHAR) AS VALUES
+(11, 'z'),
+(22, NULL),
+(44, 'x'),
+(55, 'w'),
+(99, 'u')
+
+# left_join_null_filter
+#    // Since t2 is the non-preserved side of the join, we cannot push down a 
NULL filter.
+#   // Note that this is only true because IS NULL does not remove nulls. For 
filters that
+#   // remove nulls, we can rewrite the join as an inner join and then push 
down the filter.
+query IIT nosort
+SELECT t1_id, t2_id, t2_name FROM t1 LEFT JOIN t2 ON t1_id = t2_id WHERE 
t2_name IS NULL ORDER BY t1_id
+----
+22 22 NULL
+33 NULL NULL
+77 NULL NULL
+88 NULL NULL
+
+# left_join_null_filter_on_join_column
+#    // Again, since t2 is the non-preserved side of the join, we cannot push 
down a NULL filter.
+query IIT nosort
+SELECT t1_id, t2_id, t2_name FROM t1 LEFT JOIN t2 ON t1_id = t2_id WHERE t2_id 
IS NULL ORDER BY t1_id
+----
+33 NULL NULL
+77 NULL NULL
+88 NULL NULL
+
+# left_join_not_null_filter
+query IIT nosort
+SELECT t1_id, t2_id, t2_name FROM t1 LEFT JOIN t2 ON t1_id = t2_id WHERE 
t2_name IS NOT NULL ORDER BY t1_id
+----
+11 11 z
+44 44 x
+99 99 u
+
+# left_join_not_null_filter_on_join_column
+query IIT nosort
+SELECT t1_id, t2_id, t2_name FROM t1 LEFT JOIN t2 ON t1_id = t2_id WHERE t2_id 
IS NOT NULL ORDER BY t1_id
+----
+11 11 z
+22 22 NULL
+44 44 x
+99 99 u
+
+# self_join_non_equijoin
+query II nosort
+SELECT x.t1_id, y.t1_id FROM t1 x JOIN t1 y ON x.t1_id = 11 AND y.t1_id = 44
+----
+11 44
+
+# right_join_null_filter
+query ITI nosort
+SELECT t1_id, t1_name, t2_id FROM t1 RIGHT JOIN t2 ON t1_id = t2_id WHERE 
t1_name IS NULL ORDER BY t2_id
+----
+NULL NULL 55
+99 NULL 99
+
+# right_join_null_filter_on_join_column
+query ITI nosort
+SELECT t1_id, t1_name, t2_id FROM t1 RIGHT JOIN t2 ON t1_id = t2_id WHERE 
t1_id IS NULL ORDER BY t2_id
+----
+NULL NULL 55
+
+# right_join_not_null_filter
+query ITI nosort
+SELECT t1_id, t1_name, t2_id FROM t1 RIGHT JOIN t2 ON t1_id = t2_id WHERE 
t1_name IS NOT NULL ORDER BY t2_id
+----
+11 a 11
+22 b 22
+44 d 44
+
+# right_join_not_null_filter_on_join_column
+query ITI nosort
+SELECT t1_id, t1_name, t2_id FROM t1 RIGHT JOIN t2 ON t1_id = t2_id WHERE 
t1_id IS NOT NULL ORDER BY t2_id
+----
+11 a 11
+22 b 22
+44 d 44
+99 NULL 99
+
+# full_join_null_filter
+query ITI nosort
+SELECT t1_id, t1_name, t2_id FROM t1 FULL OUTER JOIN t2 ON t1_id = t2_id WHERE 
t1_name IS NULL ORDER BY t1_id
+----
+88 NULL NULL
+99 NULL 99
+NULL NULL 55
+
+# full_join_not_null_filter
+query ITI nosort
+SELECT t1_id, t1_name, t2_id FROM t1 FULL OUTER JOIN t2 ON t1_id = t2_id WHERE 
t1_name IS NOT NULL ORDER BY t1_id
+----
+11 a 11
+22 b 22
+33 c NULL
+44 d 44
+77 e NULL
+
+statement ok
+DROP TABLE t1
+
+statement ok
+DROP TABLE t2
+
+# create table t1
+statement ok
+CREATE TABLE t1(id INT, t1_name VARCHAR, t1_int INT) AS VALUES
+(11, 'a', 1),
+(22, 'b', 2),
+(33, 'c', 3),
+(44, 'd', 4)
+
+# create table t2
+statement ok
+CREATE TABLE t2(id INT, t2_name VARCHAR, t2_int INT) AS VALUES
+(11, 'z', 3),
+(22, 'y', 1),
+(44, 'x', 3),
+(55, 'w', 3)
+
+# left_join_using
+
+# set repartition_joins to true
+statement ok
+set datafusion.optimizer.repartition_joins = true
+
+query ITT nosort
+SELECT id, t1_name, t2_name FROM t1 LEFT JOIN t2 USING (id) ORDER BY id
+----
+11 a z
+22 b y
+33 c NULL
+44 d x
+
+# set repartition_joins to false
+statement ok
+set datafusion.optimizer.repartition_joins = false
+
+query ITT nosort
+SELECT id, t1_name, t2_name FROM t1 LEFT JOIN t2 USING (id) ORDER BY id
+----
+11 a z
+22 b y
+33 c NULL
+44 d x
+
+statement ok
+DROP TABLE t1
+
+statement ok
+DROP TABLE t2
+
+# create table t1
+statement ok
+CREATE TABLE t1(t1_id INT, t1_name VARCHAR, t1_int INT) AS VALUES
+(11, 'a', 1),
+(22, 'b', 2),
+(33, 'c', 3),
+(44, 'd', 4)
+
+# create table t2
+statement ok
+CREATE TABLE t2(t2_id INT, t2_name VARCHAR, t2_int INT) AS VALUES
+(11, 'z', 3),
+(22, 'y', 1),
+(44, 'x', 3),
+(55, 'w', 3)
+
+# cross_join
+
+# set repartition_joins to true
+statement ok
+set datafusion.optimizer.repartition_joins = true
+
+query ITT nosort
+SELECT t1_id, t1_name, t2_name FROM t1, t2 ORDER BY t1_id
+----
+11 a z
+11 a y
+11 a x
+11 a w
+22 b z
+22 b y
+22 b x
+22 b w
+33 c z
+33 c y
+33 c x
+33 c w
+44 d z
+44 d y
+44 d x
+44 d w
+
+query ITT nosort
+SELECT t1_id, t1_name, t2_name FROM t1, t2 WHERE 1=1 ORDER BY t1_id
+----
+11 a z
+11 a y
+11 a x
+11 a w
+22 b z
+22 b y
+22 b x
+22 b w
+33 c z
+33 c y
+33 c x
+33 c w
+44 d z
+44 d y
+44 d x
+44 d w
+
+query ITT nosort
+SELECT t1_id, t1_name, t2_name FROM t1 CROSS JOIN t2 ORDER BY t1_id
+----
+11 a z
+11 a y
+11 a x
+11 a w
+22 b z
+22 b y
+22 b x
+22 b w
+33 c z
+33 c y
+33 c x
+33 c w
+44 d z
+44 d y
+44 d x
+44 d w
+
+query ITITI rowsort
+SELECT * FROM (SELECT t1_id, t1_name FROM t1 UNION ALL SELECT t1_id, t1_name 
FROM t1) AS t1 CROSS JOIN t2
+----
+11 a 11 z 3
+11 a 11 z 3
+11 a 22 y 1
+11 a 22 y 1
+11 a 44 x 3
+11 a 44 x 3
+11 a 55 w 3
+11 a 55 w 3
+22 b 11 z 3
+22 b 11 z 3
+22 b 22 y 1
+22 b 22 y 1
+22 b 44 x 3
+22 b 44 x 3
+22 b 55 w 3
+22 b 55 w 3
+33 c 11 z 3
+33 c 11 z 3
+33 c 22 y 1
+33 c 22 y 1
+33 c 44 x 3
+33 c 44 x 3
+33 c 55 w 3
+33 c 55 w 3
+44 d 11 z 3
+44 d 11 z 3
+44 d 22 y 1
+44 d 22 y 1
+44 d 44 x 3
+44 d 44 x 3
+44 d 55 w 3
+44 d 55 w 3
+
+query ITT rowsort
+SELECT t1_id, t1_name, t2_name FROM t1 CROSS JOIN (SELECT t2_name FROM t2 
UNION ALL SELECT t2_name FROM t2) AS t2_data
+----
+11 a w
+11 a w
+11 a x
+11 a x
+11 a y
+11 a y
+11 a z
+11 a z
+22 b w
+22 b w
+22 b x
+22 b x
+22 b y
+22 b y
+22 b z
+22 b z
+33 c w
+33 c w
+33 c x
+33 c x
+33 c y
+33 c y
+33 c z
+33 c z
+44 d w
+44 d w
+44 d x
+44 d x
+44 d y
+44 d y
+44 d z
+44 d z
+
+# set repartition_joins to true
+statement ok
+set datafusion.optimizer.repartition_joins = false
+
+query ITT nosort
+SELECT t1_id, t1_name, t2_name FROM t1, t2 ORDER BY t1_id
+----
+11 a z
+11 a y
+11 a x
+11 a w
+22 b z
+22 b y
+22 b x
+22 b w
+33 c z
+33 c y
+33 c x
+33 c w
+44 d z
+44 d y
+44 d x
+44 d w
+
+query ITT nosort
+SELECT t1_id, t1_name, t2_name FROM t1, t2 WHERE 1=1 ORDER BY t1_id
+----
+11 a z
+11 a y
+11 a x
+11 a w
+22 b z
+22 b y
+22 b x
+22 b w
+33 c z
+33 c y
+33 c x
+33 c w
+44 d z
+44 d y
+44 d x
+44 d w
+
+query ITT nosort
+SELECT t1_id, t1_name, t2_name FROM t1 CROSS JOIN t2 ORDER BY t1_id
+----
+11 a z
+11 a y
+11 a x
+11 a w
+22 b z
+22 b y
+22 b x
+22 b w
+33 c z
+33 c y
+33 c x
+33 c w
+44 d z
+44 d y
+44 d x
+44 d w
+
+query ITITI rowsort
+SELECT * FROM (SELECT t1_id, t1_name FROM t1 UNION ALL SELECT t1_id, t1_name 
FROM t1) AS t1 CROSS JOIN t2
+----
+11 a 11 z 3
+11 a 11 z 3
+11 a 22 y 1
+11 a 22 y 1
+11 a 44 x 3
+11 a 44 x 3
+11 a 55 w 3
+11 a 55 w 3
+22 b 11 z 3
+22 b 11 z 3
+22 b 22 y 1
+22 b 22 y 1
+22 b 44 x 3
+22 b 44 x 3
+22 b 55 w 3
+22 b 55 w 3
+33 c 11 z 3
+33 c 11 z 3
+33 c 22 y 1
+33 c 22 y 1
+33 c 44 x 3
+33 c 44 x 3
+33 c 55 w 3
+33 c 55 w 3
+44 d 11 z 3
+44 d 11 z 3
+44 d 22 y 1
+44 d 22 y 1
+44 d 44 x 3
+44 d 44 x 3
+44 d 55 w 3
+44 d 55 w 3
+
+query ITT rowsort
+SELECT t1_id, t1_name, t2_name FROM t1 CROSS JOIN (SELECT t2_name FROM t2 
UNION ALL SELECT t2_name FROM t2) AS t2_data
+----
+11 a w
+11 a w
+11 a x
+11 a x
+11 a y
+11 a y
+11 a z
+11 a z
+22 b w
+22 b w
+22 b x
+22 b x
+22 b y
+22 b y
+22 b z
+22 b z
+33 c w
+33 c w
+33 c x
+33 c x
+33 c y
+33 c y
+33 c z
+33 c z
+44 d w
+44 d w
+44 d x
+44 d x
+44 d y
+44 d y
+44 d z
+44 d z
+
+statement ok
+DROP TABLE t1
+
+statement ok
+DROP TABLE t2

Reply via email to