This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 6d3521738b Push scalar functions into cross join (#11528)
6d3521738b is described below
commit 6d3521738b5b3cb6f02ed3c9266431b714647330
Author: 张林伟 <[email protected]>
AuthorDate: Wed Jul 24 01:57:02 2024 +0800
Push scalar functions into cross join (#11528)
* UDF can be join condition
* Fix test
---
datafusion/optimizer/src/push_down_filter.rs | 6 +++---
datafusion/sqllogictest/test_files/joins.slt | 23 ++++++++++-------------
2 files changed, 13 insertions(+), 16 deletions(-)
diff --git a/datafusion/optimizer/src/push_down_filter.rs
b/datafusion/optimizer/src/push_down_filter.rs
index a22f2e83e2..ad9be449d9 100644
--- a/datafusion/optimizer/src/push_down_filter.rs
+++ b/datafusion/optimizer/src/push_down_filter.rs
@@ -261,8 +261,7 @@ fn can_evaluate_as_join_condition(predicate: &Expr) ->
Result<bool> {
| Expr::InSubquery(_)
| Expr::ScalarSubquery(_)
| Expr::OuterReferenceColumn(_, _)
- | Expr::Unnest(_)
- | Expr::ScalarFunction(_) => {
+ | Expr::Unnest(_) => {
is_evaluate = false;
Ok(TreeNodeRecursion::Stop)
}
@@ -284,7 +283,8 @@ fn can_evaluate_as_join_condition(predicate: &Expr) ->
Result<bool> {
| Expr::Case(_)
| Expr::Cast(_)
| Expr::TryCast(_)
- | Expr::InList { .. } => Ok(TreeNodeRecursion::Continue),
+ | Expr::InList { .. }
+ | Expr::ScalarFunction(_) => Ok(TreeNodeRecursion::Continue),
Expr::Sort(_)
| Expr::AggregateFunction(_)
| Expr::WindowFunction(_)
diff --git a/datafusion/sqllogictest/test_files/joins.slt
b/datafusion/sqllogictest/test_files/joins.slt
index b9897f81a1..441ccb7d99 100644
--- a/datafusion/sqllogictest/test_files/joins.slt
+++ b/datafusion/sqllogictest/test_files/joins.slt
@@ -3532,7 +3532,7 @@ physical_plan
03)--RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
04)----CsvExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a,
b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST],
has_header=true
-# Currently datafusion cannot pushdown filter conditions with scalar UDF into
+# Currently datafusion can pushdown filter conditions with scalar UDF into
# cross join.
query TT
EXPLAIN SELECT *
@@ -3540,19 +3540,16 @@ FROM annotated_data as t1, annotated_data as t2
WHERE EXAMPLE(t1.a, t2.a) > 3
----
logical_plan
-01)Filter: example(CAST(t1.a AS Float64), CAST(t2.a AS Float64)) > Float64(3)
-02)--CrossJoin:
-03)----SubqueryAlias: t1
-04)------TableScan: annotated_data projection=[a0, a, b, c, d]
-05)----SubqueryAlias: t2
-06)------TableScan: annotated_data projection=[a0, a, b, c, d]
+01)Inner Join: Filter: example(CAST(t1.a AS Float64), CAST(t2.a AS Float64))
> Float64(3)
+02)--SubqueryAlias: t1
+03)----TableScan: annotated_data projection=[a0, a, b, c, d]
+04)--SubqueryAlias: t2
+05)----TableScan: annotated_data projection=[a0, a, b, c, d]
physical_plan
-01)CoalesceBatchesExec: target_batch_size=2
-02)--FilterExec: example(CAST(a@1 AS Float64), CAST(a@6 AS Float64)) > 3
-03)----CrossJoinExec
-04)------CsvExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a,
b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST],
has_header=true
-05)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-06)--------CsvExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a,
b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST],
has_header=true
+01)NestedLoopJoinExec: join_type=Inner, filter=example(CAST(a@0 AS Float64),
CAST(a@1 AS Float64)) > 3
+02)--CsvExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a,
b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST],
has_header=true
+03)--RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+04)----CsvExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a,
b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST],
has_header=true
####
# Config teardown
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]