This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 03e39da62e fix: Implicitly plan `UNNEST` as lateral (#13695)
03e39da62e is described below
commit 03e39da62e403e064d21b57e9d6c200464c03749
Author: Rohan Krishnaswamy <[email protected]>
AuthorDate: Fri Dec 13 07:22:24 2024 -0800
fix: Implicitly plan `UNNEST` as lateral (#13695)
* plan implicit lateral if table factor is UNNEST
* check for outer references in `create_relation_subquery`
* add sqllogictest
* fix lateral constant test to not expect a subquery node
* replace sqllogictest in favor of logical plan test
* update lateral join sqllogictests
* add sqllogictests
* fix logical plan test
---
datafusion/sql/src/relation/join.rs | 1 +
datafusion/sql/src/relation/mod.rs | 6 ++++
datafusion/sql/tests/sql_integration.rs | 20 +++++++++++--
datafusion/sqllogictest/test_files/joins.slt | 20 +++++++------
datafusion/sqllogictest/test_files/unnest.slt | 41 +++++++++++++++++++++++++++
5 files changed, 77 insertions(+), 11 deletions(-)
diff --git a/datafusion/sql/src/relation/join.rs
b/datafusion/sql/src/relation/join.rs
index 2ed1197e8f..75f39792bc 100644
--- a/datafusion/sql/src/relation/join.rs
+++ b/datafusion/sql/src/relation/join.rs
@@ -163,6 +163,7 @@ pub(crate) fn is_lateral(factor: &TableFactor) -> bool {
match factor {
TableFactor::Derived { lateral, .. } => *lateral,
TableFactor::Function { lateral, .. } => *lateral,
+ TableFactor::UNNEST { .. } => true,
_ => false,
}
}
diff --git a/datafusion/sql/src/relation/mod.rs
b/datafusion/sql/src/relation/mod.rs
index 45a617daae..8915b00692 100644
--- a/datafusion/sql/src/relation/mod.rs
+++ b/datafusion/sql/src/relation/mod.rs
@@ -188,6 +188,12 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
planner_context.set_outer_query_schema(old_query_schema);
planner_context.set_outer_from_schema(Some(old_from_schema));
+ // We can omit the subquery wrapper if there are no columns
+ // referencing the outer scope.
+ if outer_ref_columns.is_empty() {
+ return Ok(plan);
+ }
+
match plan {
LogicalPlan::SubqueryAlias(SubqueryAlias { input, alias, .. }) => {
subquery_alias(
diff --git a/datafusion/sql/tests/sql_integration.rs
b/datafusion/sql/tests/sql_integration.rs
index 8c2d8ebad4..9363d16c9f 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -3129,9 +3129,8 @@ fn lateral_constant() {
\n Cross Join: \
\n TableScan: j1\
\n SubqueryAlias: j2\
- \n Subquery:\
- \n Projection: Int64(1)\
- \n EmptyRelation";
+ \n Projection: Int64(1)\
+ \n EmptyRelation";
quick_test(sql, expected);
}
@@ -3230,6 +3229,21 @@ fn lateral_nested_left_join() {
quick_test(sql, expected);
}
+#[test]
+fn lateral_unnest() {
+ let sql = "SELECT * from unnest_table u, unnest(u.array_col)";
+ let expected = "Projection: *\
+ \n Cross Join: \
+ \n SubqueryAlias: u\
+ \n TableScan: unnest_table\
+ \n Subquery:\
+ \n Projection:
__unnest_placeholder(outer_ref(u.array_col),depth=1) AS
UNNEST(outer_ref(u.array_col))\
+ \n Unnest:
lists[__unnest_placeholder(outer_ref(u.array_col))|depth=1] structs[]\
+ \n Projection: outer_ref(u.array_col) AS
__unnest_placeholder(outer_ref(u.array_col))\
+ \n EmptyRelation";
+ quick_test(sql, expected);
+}
+
#[test]
fn hive_aggregate_with_filter() -> Result<()> {
let dialect = &HiveDialect {};
diff --git a/datafusion/sqllogictest/test_files/joins.slt
b/datafusion/sqllogictest/test_files/joins.slt
index 49aaa877ca..68426f180d 100644
--- a/datafusion/sqllogictest/test_files/joins.slt
+++ b/datafusion/sqllogictest/test_files/joins.slt
@@ -4058,10 +4058,12 @@ logical_plan
03)----TableScan: join_t1 projection=[t1_id, t1_name]
04)--SubqueryAlias: series
05)----Subquery:
-06)------Projection:
__unnest_placeholder(generate_series(Int64(1),outer_ref(t1.t1_int)),depth=1) AS
i
-07)--------Unnest:
lists[__unnest_placeholder(generate_series(Int64(1),outer_ref(t1.t1_int)))|depth=1]
structs[]
-08)----------Projection: generate_series(Int64(1), CAST(outer_ref(t1.t1_int)
AS Int64)) AS
__unnest_placeholder(generate_series(Int64(1),outer_ref(t1.t1_int)))
-09)------------EmptyRelation
+06)------Projection: UNNEST(generate_series(Int64(1),outer_ref(t1.t1_int))) AS
i
+07)--------Subquery:
+08)----------Projection:
__unnest_placeholder(generate_series(Int64(1),outer_ref(t1.t1_int)),depth=1) AS
UNNEST(generate_series(Int64(1),outer_ref(t1.t1_int)))
+09)------------Unnest:
lists[__unnest_placeholder(generate_series(Int64(1),outer_ref(t1.t1_int)))|depth=1]
structs[]
+10)--------------Projection: generate_series(Int64(1),
CAST(outer_ref(t1.t1_int) AS Int64)) AS
__unnest_placeholder(generate_series(Int64(1),outer_ref(t1.t1_int)))
+11)----------------EmptyRelation
physical_plan_error This feature is not implemented: Physical plan does not
support logical expression OuterReferenceColumn(UInt32, Column { relation:
Some(Bare { table: "t1" }), name: "t1_int" })
@@ -4081,10 +4083,12 @@ logical_plan
03)----TableScan: join_t1 projection=[t1_id, t1_name]
04)--SubqueryAlias: series
05)----Subquery:
-06)------Projection:
__unnest_placeholder(generate_series(Int64(1),outer_ref(t2.t1_int)),depth=1) AS
i
-07)--------Unnest:
lists[__unnest_placeholder(generate_series(Int64(1),outer_ref(t2.t1_int)))|depth=1]
structs[]
-08)----------Projection: generate_series(Int64(1), CAST(outer_ref(t2.t1_int)
AS Int64)) AS
__unnest_placeholder(generate_series(Int64(1),outer_ref(t2.t1_int)))
-09)------------EmptyRelation
+06)------Projection: UNNEST(generate_series(Int64(1),outer_ref(t2.t1_int))) AS
i
+07)--------Subquery:
+08)----------Projection:
__unnest_placeholder(generate_series(Int64(1),outer_ref(t2.t1_int)),depth=1) AS
UNNEST(generate_series(Int64(1),outer_ref(t2.t1_int)))
+09)------------Unnest:
lists[__unnest_placeholder(generate_series(Int64(1),outer_ref(t2.t1_int)))|depth=1]
structs[]
+10)--------------Projection: generate_series(Int64(1),
CAST(outer_ref(t2.t1_int) AS Int64)) AS
__unnest_placeholder(generate_series(Int64(1),outer_ref(t2.t1_int)))
+11)----------------EmptyRelation
physical_plan_error This feature is not implemented: Physical plan does not
support logical expression OuterReferenceColumn(UInt32, Column { relation:
Some(Bare { table: "t2" }), name: "t1_int" })
diff --git a/datafusion/sqllogictest/test_files/unnest.slt
b/datafusion/sqllogictest/test_files/unnest.slt
index 1c54006bd2..2685e18427 100644
--- a/datafusion/sqllogictest/test_files/unnest.slt
+++ b/datafusion/sqllogictest/test_files/unnest.slt
@@ -860,6 +860,47 @@ select count(*) from (select unnest(range(0, 100000)) id)
t inner join (select u
----
100000
+# Test implicit LATERAL support for UNNEST
+# Issue: https://github.com/apache/datafusion/issues/13659
+# TODO: https://github.com/apache/datafusion/issues/10048
+query error DataFusion error: This feature is not implemented: Physical plan
does not support logical expression OuterReferenceColumn\(List\(Field \{ name:
"item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false,
metadata: \{\} \}\), Column \{ relation: Some\(Bare \{ table: "u" \}\), name:
"column1" \}\)
+select * from unnest_table u, unnest(u.column1);
+
+# Test implicit LATERAL support for UNNEST (INNER JOIN)
+query error DataFusion error: This feature is not implemented: Physical plan
does not support logical expression OuterReferenceColumn\(List\(Field \{ name:
"item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false,
metadata: \{\} \}\), Column \{ relation: Some\(Bare \{ table: "u" \}\), name:
"column1" \}\)
+select * from unnest_table u INNER JOIN unnest(u.column1) AS t(column1) ON
u.column3 = t.column1;
+
+# Test implicit LATERAL planning for UNNEST
+query TT
+explain select * from unnest_table u, unnest(u.column1);
+----
+logical_plan
+01)Cross Join:
+02)--SubqueryAlias: u
+03)----TableScan: unnest_table projection=[column1, column2, column3, column4,
column5]
+04)--Subquery:
+05)----Projection: __unnest_placeholder(outer_ref(u.column1),depth=1) AS
UNNEST(outer_ref(u.column1))
+06)------Unnest: lists[__unnest_placeholder(outer_ref(u.column1))|depth=1]
structs[]
+07)--------Projection: outer_ref(u.column1) AS
__unnest_placeholder(outer_ref(u.column1))
+08)----------EmptyRelation
+physical_plan_error This feature is not implemented: Physical plan does not
support logical expression OuterReferenceColumn(List(Field { name: "item",
data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata:
{} }), Column { relation: Some(Bare { table: "u" }), name: "column1" })
+
+# Test implicit LATERAL planning for UNNEST (INNER JOIN)
+query TT
+explain select * from unnest_table u INNER JOIN unnest(u.column1) AS
t(column1) ON u.column3 = t.column1;
+----
+logical_plan
+01)Inner Join: u.column3 = t.column1
+02)--SubqueryAlias: u
+03)----TableScan: unnest_table projection=[column1, column2, column3, column4,
column5]
+04)--SubqueryAlias: t
+05)----Subquery:
+06)------Projection: __unnest_placeholder(outer_ref(u.column1),depth=1) AS
column1
+07)--------Unnest: lists[__unnest_placeholder(outer_ref(u.column1))|depth=1]
structs[]
+08)----------Projection: outer_ref(u.column1) AS
__unnest_placeholder(outer_ref(u.column1))
+09)------------EmptyRelation
+physical_plan_error This feature is not implemented: Physical plan does not
support logical expression OuterReferenceColumn(List(Field { name: "item",
data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata:
{} }), Column { relation: Some(Bare { table: "u" }), name: "column1" })
+
## Unnest in subquery
query IIII
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]