This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 71dbdf6f19 fix: skip predicates on struct unnest in PushDownFilter
(#16790)
71dbdf6f19 is described below
commit 71dbdf6f19a6b802d8261f62817c43e1a9a4cc3d
Author: Andrey Koshchiy <[email protected]>
AuthorDate: Sun Jul 27 13:34:19 2025 +0300
fix: skip predicates on struct unnest in PushDownFilter (#16790)
* fix: skip predicates on struct unnest in FilterPushdown
* doc comments
* fix
---
datafusion/optimizer/src/push_down_filter.rs | 30 +++++++++++++++++++---
.../sqllogictest/test_files/push_down_filter.slt | 21 ++++++++++++++-
docs/source/user-guide/sql/special_functions.md | 13 +++++-----
3 files changed, 54 insertions(+), 10 deletions(-)
diff --git a/datafusion/optimizer/src/push_down_filter.rs
b/datafusion/optimizer/src/push_down_filter.rs
index bcb867f6e7..35ec7d074d 100644
--- a/datafusion/optimizer/src/push_down_filter.rs
+++ b/datafusion/optimizer/src/push_down_filter.rs
@@ -20,6 +20,7 @@
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
+use arrow::datatypes::DataType;
use indexmap::IndexSet;
use itertools::Itertools;
@@ -875,14 +876,37 @@ impl OptimizerRule for PushDownFilter {
let predicates =
split_conjunction_owned(filter.predicate.clone());
let mut non_unnest_predicates = vec![];
let mut unnest_predicates = vec![];
+ let mut unnest_struct_columns = vec![];
+
+ for idx in &unnest.struct_type_columns {
+ let (sub_qualifier, field) =
+ unnest.input.schema().qualified_field(*idx);
+ let field_name = field.name().clone();
+
+ if let DataType::Struct(children) = field.data_type() {
+ for child in children {
+ let child_name = child.name().clone();
+ unnest_struct_columns.push(Column::new(
+ sub_qualifier.cloned(),
+ format!("{field_name}.{child_name}"),
+ ));
+ }
+ }
+ }
+
for predicate in predicates {
// collect all the Expr::Column in predicate recursively
let mut accum: HashSet<Column> = HashSet::new();
expr_to_columns(&predicate, &mut accum)?;
- if unnest.list_type_columns.iter().any(|(_, unnest_list)| {
- accum.contains(&unnest_list.output_column)
- }) {
+ let contains_list_columns =
+ unnest.list_type_columns.iter().any(|(_, unnest_list)|
{
+ accum.contains(&unnest_list.output_column)
+ });
+ let contains_struct_columns =
+ unnest_struct_columns.iter().any(|c|
accum.contains(c));
+
+ if contains_list_columns || contains_struct_columns {
unnest_predicates.push(predicate);
} else {
non_unnest_predicates.push(predicate);
diff --git a/datafusion/sqllogictest/test_files/push_down_filter.slt
b/datafusion/sqllogictest/test_files/push_down_filter.slt
index f6d71cad60..6e2972d96a 100644
--- a/datafusion/sqllogictest/test_files/push_down_filter.slt
+++ b/datafusion/sqllogictest/test_files/push_down_filter.slt
@@ -128,12 +128,31 @@ physical_plan
06)----------ProjectionExec: expr=[column1@0 as column1, column2@1 as
__unnest_placeholder(d.column2)]
07)------------DataSourceExec: partitions=1, partition_sizes=[1]
+statement ok
+drop table d;
+statement ok
+CREATE TABLE d AS VALUES (named_struct('a', 1, 'b', 2)), (named_struct('a', 3,
'b', 4)), (named_struct('a', 5, 'b', 6));
+
+query II
+select * from (select unnest(column1) from d) where
"__unnest_placeholder(d.column1).b" > 5;
+----
+5 6
+
+query TT
+explain select * from (select unnest(column1) from d) where
"__unnest_placeholder(d.column1).b" > 5;
+----
+physical_plan
+01)CoalesceBatchesExec: target_batch_size=8192
+02)--FilterExec: __unnest_placeholder(d.column1).b@1 > 5
+03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+04)------UnnestExec
+05)--------ProjectionExec: expr=[column1@0 as __unnest_placeholder(d.column1)]
+06)----------DataSourceExec: partitions=1, partition_sizes=[1]
statement ok
drop table d;
-
# Test push down filter with limit for parquet
statement ok
set datafusion.execution.parquet.pushdown_filters = true;
diff --git a/docs/source/user-guide/sql/special_functions.md
b/docs/source/user-guide/sql/special_functions.md
index 7c9efbb662..4f2a39f642 100644
--- a/docs/source/user-guide/sql/special_functions.md
+++ b/docs/source/user-guide/sql/special_functions.md
@@ -69,6 +69,7 @@ Expands an array or map into rows.
### `unnest (struct)`
Expand a struct fields into individual columns.
+Each field of the struct will be prefixed with `__unnest_placeholder` and
could be accessed via `"__unnest_placeholder(<struct>).<field>"`.
#### Arguments
@@ -91,10 +92,10 @@ Expand a struct fields into individual columns.
+---------------------------+
> select unnest(struct_column) from foov;
-+------------------------------------------+------------------------------------------+
-| unnest_placeholder(foov.struct_column).a |
unnest_placeholder(foov.struct_column).b |
-+------------------------------------------+------------------------------------------+
-| 5 | a string
|
-| 6 | another string
|
-+------------------------------------------+------------------------------------------+
++--------------------------------------------+--------------------------------------------+
+| __unnest_placeholder(foov.struct_column).a |
__unnest_placeholder(foov.struct_column).b |
++--------------------------------------------+--------------------------------------------+
+| 5 | a string
|
+| 6 | another string
|
++--------------------------------------------+--------------------------------------------+
```
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]