kosiew commented on code in PR #16589:
URL: https://github.com/apache/datafusion/pull/16589#discussion_r2177077207


##########
datafusion/physical-expr-adapter/README.md:
##########
@@ -0,0 +1,14 @@
+# DataFusion Physical Expression Adapter
+
+This crate provides physical expression schema adaptation utilities for 
DataFusion that allow adapting a `PhysicalExpr` to different schema types.
+This handles cases such as `lit(SclarValue::Int32(123)) = int64_column` by 
rewriting it to `lit(SclarValue::Int32(123)) = cast(int64_column, 'Int32')`

Review Comment:
   ```suggestion
   This handles cases such as `lit(ScalarValue::Int32(123)) = int64_column` by 
rewriting it to `lit(ScalarValue::Int32(123)) = cast(int64_column, 'Int32')`
   ```



##########
datafusion/physical-expr-adapter/Cargo.toml:
##########
@@ -0,0 +1,30 @@
+[package]
+name = "datafusion-physical-expr-adapter"
+description = "Physical expression schema adaptation utilities for DataFusion"
+keywords = ["datafusion", "query", "sql"]
+readme = "README.md"
+version = { workspace = true }
+edition = { workspace = true }
+homepage = { workspace = true }
+repository = { workspace = true }
+license = { workspace = true }
+authors = { workspace = true }
+rust-version = { workspace = true }
+
+[lib]
+name = "datafusion_physical_expr_adapter"
+path = "src/lib.rs"
+
+[dependencies]
+arrow = { workspace = true }
+datafusion-common = { workspace = true }
+datafusion-expr = { workspace = true }
+datafusion-functions = { workspace = true }
+datafusion-physical-expr = { workspace = true }
+datafusion-physical-expr-common = { workspace = true }
+itertools = { workspace = true }
+
+[dev-dependencies]
+datafusion-expr = { workspace = true }

Review Comment:
   Not required as it is already included in dependencies



##########
datafusion/physical-expr-adapter/src/schema_rewriter.rs:
##########
@@ -97,13 +101,111 @@ impl<'a> PhysicalExprSchemaRewriter<'a> {
         &self,
         expr: Arc<dyn PhysicalExpr>,
     ) -> Result<Transformed<Arc<dyn PhysicalExpr>>> {
+        if let Some(transformed) = 
self.try_rewrite_struct_field_access(&expr)? {
+            return Ok(Transformed::yes(transformed));
+        }
+
         if let Some(column) = expr.as_any().downcast_ref::<Column>() {
             return self.rewrite_column(Arc::clone(&expr), column);
         }
 
         Ok(Transformed::no(expr))
     }
 
+    fn try_rewrite_struct_field_access(

Review Comment:
   The new try_rewrite_struct_field_access handles missing struct fields by 
returning Null. I tried adding a test case for nested structs (e.g., a.b.c) to 
ensure recursive behavior but it failed.
   
   ```rust
   #[test]
       fn test_rewrite_nested_struct_missing_field() {
           let physical_schema = Schema::new(vec![Field::new(
               "nested",
               DataType::Struct(
                   vec![Field::new(
                       "a",
                       DataType::Struct(vec![Field::new("b", DataType::Utf8, 
true)].into()),
                       true,
                   )]
                   .into(),
               ),
               true,
           )]);
   
           let logical_schema = Schema::new(vec![Field::new(
               "nested",
               DataType::Struct(
                   vec![Field::new(
                       "a",
                       DataType::Struct(
                           vec![
                               Field::new("b", DataType::Utf8, true),
                               Field::new("c", DataType::Int32, true),
                           ]
                           .into(),
                       ),
                       true,
                   )]
                   .into(),
               ),
               true,
           )]);
   
           let rewriter = PhysicalExprSchemaRewriter::new(&physical_schema, 
&logical_schema);
   
           let column_expr = Arc::new(Column::new("nested", 0));
   
           let result = rewriter.rewrite(column_expr).unwrap();
   
           let expected = Arc::new(CastExpr::new(
               Arc::new(Column::new("nested", 0)),
               DataType::Struct(
                   vec![Field::new(
                       "a",
                       DataType::Struct(
                           vec![
                               Field::new("b", DataType::Utf8, true),
                               Field::new("c", DataType::Int32, true),
                           ]
                           .into(),
                       ),
                       true,
                   )]
                   .into(),
               ),
               None,
           )) as Arc<dyn PhysicalExpr>;
   
           assert_eq!(result.to_string(), expected.to_string());
       }
   ```
   
   ```text
   called `Result::unwrap()` on an `Err` value: Execution("Cannot cast column 
'nested' from 'Struct(a Struct(b Utf8))' (physical data type) to 'Struct(a 
Struct(b Utf8, c Int32))' (logical data type)")
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to