kosiew commented on code in PR #20814:
URL: https://github.com/apache/datafusion/pull/20814#discussion_r2903601416


##########
datafusion/physical-expr-adapter/src/schema_rewriter.rs:
##########
@@ -404,71 +404,69 @@ impl DefaultPhysicalExprAdapterRewriter {
             }
         };
 
-        // Check if the column exists in the physical schema
-        let physical_column_index = match self
-            .physical_file_schema
-            .index_of(column.name())
-        {
-            Ok(index) => index,
-            Err(_) => {
-                if !logical_field.is_nullable() {
-                    return exec_err!(
-                        "Non-nullable column '{}' is missing from the physical 
schema",
-                        column.name()
-                    );
-                }
-                // If the column is missing from the physical schema fill it 
in with nulls.
-                // For a different behavior, provide a custom 
`PhysicalExprAdapter` implementation.
-                let null_value = 
ScalarValue::Null.cast_to(logical_field.data_type())?;
-                return Ok(Transformed::yes(Arc::new(
-                    expressions::Literal::new_with_metadata(
-                        null_value,
-                        Some(FieldMetadata::from(logical_field)),
-                    ),
-                )));
+        let Some((resolved_column, physical_field)) =
+            self.resolve_physical_column(column)?
+        else {
+            if !logical_field.is_nullable() {
+                return exec_err!(
+                    "Non-nullable column '{}' is missing from the physical 
schema",
+                    column.name()
+                );
             }
+            // If the column is missing from the physical schema fill it in 
with nulls.
+            // For a different behavior, provide a custom 
`PhysicalExprAdapter` implementation.
+            let null_value = 
ScalarValue::Null.cast_to(logical_field.data_type())?;
+            return Ok(Transformed::yes(Arc::new(
+                expressions::Literal::new_with_metadata(
+                    null_value,
+                    Some(FieldMetadata::from(logical_field)),
+                ),
+            )));
         };
-        let physical_field = 
self.physical_file_schema.field(physical_column_index);
 
-        if column.index() == physical_column_index && logical_field == 
physical_field {
+        if resolved_column.index() == column.index()
+            && logical_field == physical_field.as_ref()
+        {
             return Ok(Transformed::no(expr));
         }
 
-        let column = self.resolve_column(column, physical_column_index)?;
-
-        if logical_field == physical_field {
+        if logical_field == physical_field.as_ref() {
             // If the fields match (including metadata/nullability), we can 
use the column as is
-            return Ok(Transformed::yes(Arc::new(column)));
+            return Ok(Transformed::yes(Arc::new(resolved_column)));
         }
 
-        if logical_field.data_type() == physical_field.data_type() {
-            // The data type matches, but the field metadata / nullability 
differs.
-            // Emit a CastColumnExpr so downstream schema construction uses 
the logical field.
-            return self.create_cast_column_expr(column, logical_field);
-        }
-
-        // We need to cast the column to the logical data type
+        // We need a cast expression whenever the logical and physical fields 
differ,
+        // whether that difference is only metadata/nullability or also data 
type.
         // TODO: add optimization to move the cast from the column to literal 
expressions in the case of `col = 123`
         // since that's much cheaper to evalaute.
         // See 
https://github.com/apache/datafusion/issues/15780#issuecomment-2824716928
-        self.create_cast_column_expr(column, logical_field)
+        self.create_cast_column_expr(resolved_column, physical_field, 
logical_field)
     }
 
-    /// Resolves a column expression, handling index and type mismatches.
-    ///
-    /// Returns the appropriate Column expression when the column's index or 
data type
-    /// don't match the physical schema. Assumes that the early-exit case 
(both index
-    /// and type match) has already been checked by the caller.
-    fn resolve_column(
+    /// Resolves a logical column to the corresponding physical column and 
field.
+    fn resolve_physical_column(
         &self,
         column: &Column,
-        physical_column_index: usize,
-    ) -> Result<Column> {
-        if column.index() == physical_column_index {
-            Ok(column.clone())
+    ) -> Result<Option<(Column, FieldRef)>> {
+        let Ok(physical_column_index) = 
self.physical_file_schema.index_of(column.name())

Review Comment:
   Good catch. 
   
   The refactor moved the name-based physical-schema lookup into 
`resolve_physical_column()`, and that dropped the one place where we explained 
why we intentionally resolve by name first instead of trusting the incoming 
index. 
   
   I’ll add that explanation back on `resolve_physical_column()` so readers 
still have the context in the place where the lookup now happens.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to