This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 71dbdf6f19 fix: skip predicates on struct unnest in PushDownFilter 
(#16790)
71dbdf6f19 is described below

commit 71dbdf6f19a6b802d8261f62817c43e1a9a4cc3d
Author: Andrey Koshchiy <[email protected]>
AuthorDate: Sun Jul 27 13:34:19 2025 +0300

    fix: skip predicates on struct unnest in PushDownFilter (#16790)
    
    * fix: skip predicates on struct unnest in FilterPushdown
    
    * doc comments
    
    * fix
---
 datafusion/optimizer/src/push_down_filter.rs       | 30 +++++++++++++++++++---
 .../sqllogictest/test_files/push_down_filter.slt   | 21 ++++++++++++++-
 docs/source/user-guide/sql/special_functions.md    | 13 +++++-----
 3 files changed, 54 insertions(+), 10 deletions(-)

diff --git a/datafusion/optimizer/src/push_down_filter.rs 
b/datafusion/optimizer/src/push_down_filter.rs
index bcb867f6e7..35ec7d074d 100644
--- a/datafusion/optimizer/src/push_down_filter.rs
+++ b/datafusion/optimizer/src/push_down_filter.rs
@@ -20,6 +20,7 @@
 use std::collections::{HashMap, HashSet};
 use std::sync::Arc;
 
+use arrow::datatypes::DataType;
 use indexmap::IndexSet;
 use itertools::Itertools;
 
@@ -875,14 +876,37 @@ impl OptimizerRule for PushDownFilter {
                 let predicates = 
split_conjunction_owned(filter.predicate.clone());
                 let mut non_unnest_predicates = vec![];
                 let mut unnest_predicates = vec![];
+                let mut unnest_struct_columns = vec![];
+
+                for idx in &unnest.struct_type_columns {
+                    let (sub_qualifier, field) =
+                        unnest.input.schema().qualified_field(*idx);
+                    let field_name = field.name().clone();
+
+                    if let DataType::Struct(children) = field.data_type() {
+                        for child in children {
+                            let child_name = child.name().clone();
+                            unnest_struct_columns.push(Column::new(
+                                sub_qualifier.cloned(),
+                                format!("{field_name}.{child_name}"),
+                            ));
+                        }
+                    }
+                }
+
                 for predicate in predicates {
                     // collect all the Expr::Column in predicate recursively
                     let mut accum: HashSet<Column> = HashSet::new();
                     expr_to_columns(&predicate, &mut accum)?;
 
-                    if unnest.list_type_columns.iter().any(|(_, unnest_list)| {
-                        accum.contains(&unnest_list.output_column)
-                    }) {
+                    let contains_list_columns =
+                        unnest.list_type_columns.iter().any(|(_, unnest_list)| 
{
+                            accum.contains(&unnest_list.output_column)
+                        });
+                    let contains_struct_columns =
+                        unnest_struct_columns.iter().any(|c| 
accum.contains(c));
+
+                    if contains_list_columns || contains_struct_columns {
                         unnest_predicates.push(predicate);
                     } else {
                         non_unnest_predicates.push(predicate);
diff --git a/datafusion/sqllogictest/test_files/push_down_filter.slt 
b/datafusion/sqllogictest/test_files/push_down_filter.slt
index f6d71cad60..6e2972d96a 100644
--- a/datafusion/sqllogictest/test_files/push_down_filter.slt
+++ b/datafusion/sqllogictest/test_files/push_down_filter.slt
@@ -128,12 +128,31 @@ physical_plan
 06)----------ProjectionExec: expr=[column1@0 as column1, column2@1 as 
__unnest_placeholder(d.column2)]
 07)------------DataSourceExec: partitions=1, partition_sizes=[1]
 
+statement ok
+drop table d;
 
+statement ok
+CREATE TABLE d AS VALUES (named_struct('a', 1, 'b', 2)), (named_struct('a', 3, 
'b', 4)), (named_struct('a', 5, 'b', 6));
+
+query II
+select * from (select unnest(column1) from d) where 
"__unnest_placeholder(d.column1).b" > 5;
+----
+5 6
+
+query TT
+explain select * from (select unnest(column1) from d) where 
"__unnest_placeholder(d.column1).b" > 5;
+----
+physical_plan
+01)CoalesceBatchesExec: target_batch_size=8192
+02)--FilterExec: __unnest_placeholder(d.column1).b@1 > 5
+03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+04)------UnnestExec
+05)--------ProjectionExec: expr=[column1@0 as __unnest_placeholder(d.column1)]
+06)----------DataSourceExec: partitions=1, partition_sizes=[1]
 
 statement ok
 drop table d;
 
-
 # Test push down filter with limit for parquet
 statement ok
 set datafusion.execution.parquet.pushdown_filters = true;
diff --git a/docs/source/user-guide/sql/special_functions.md 
b/docs/source/user-guide/sql/special_functions.md
index 7c9efbb662..4f2a39f642 100644
--- a/docs/source/user-guide/sql/special_functions.md
+++ b/docs/source/user-guide/sql/special_functions.md
@@ -69,6 +69,7 @@ Expands an array or map into rows.
 ### `unnest (struct)`
 
 Expand a struct fields into individual columns.
+Each field of the struct will be prefixed with `__unnest_placeholder` and 
could be accessed via `"__unnest_placeholder(<struct>).<field>"`.
 
 #### Arguments
 
@@ -91,10 +92,10 @@ Expand a struct fields into individual columns.
 +---------------------------+
 
 > select unnest(struct_column) from foov;
-+------------------------------------------+------------------------------------------+
-| unnest_placeholder(foov.struct_column).a | 
unnest_placeholder(foov.struct_column).b |
-+------------------------------------------+------------------------------------------+
-| 5                                        | a string                          
       |
-| 6                                        | another string                    
       |
-+------------------------------------------+------------------------------------------+
++--------------------------------------------+--------------------------------------------+
+| __unnest_placeholder(foov.struct_column).a | 
__unnest_placeholder(foov.struct_column).b |
++--------------------------------------------+--------------------------------------------+
+| 5                                          | a string                        
           |
+| 6                                          | another string                  
           |
++--------------------------------------------+--------------------------------------------+
 ```


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to