alamb commented on code in PR #9007:
URL: https://github.com/apache/arrow-rs/pull/9007#discussion_r2624692184


##########
arrow-cast/src/cast/mod.rs:
##########
@@ -1255,7 +1263,25 @@ pub fn cast_with_options(
                         let column = array.column(from_field_idx);
                         cast_with_options(column, to_field.data_type(), 
cast_options)
                     })
-                    .collect::<Result<Vec<ArrayRef>, ArrowError>>()?
+                    .collect::<Result<Vec<ArrayRef>, ArrowError>>()
+                {
+                    Ok(casted_fields) => casted_fields,
+                    Err(e) => {
+                        // If it's Field not found, we cast field by field
+                        if !e.to_string().starts_with("Field '")

Review Comment:
   I came up with this. But it requires an additional cast 🤔 
   
   This type of thing is perhaps why @tustvold suggested initially keeping the 
struct casting in arrow-rs simple and making something more complicated in 
datafusion (that can handle this, and more schema evolution things in 
DataFusion 🤔 )
   
   ```rust
   fn cast_struct(
       array: &StructArray,
       from_fields: &Fields,
       to_fields: &Fields,
       cast_options: &CastOptions,
   ) -> Result<ArrayRef, ArrowError> {
       let fields = cast_struct_inner(array.columns(), from_fields, to_fields, 
cast_options)?;
       let array = StructArray::try_new(to_fields.clone(), fields, 
array.nulls().cloned())?;
       Ok(Arc::new(array) as ArrayRef)
   }
   
   fn cast_struct_inner(
           columns: &[ArrayRef],
           from_fields: &Fields,
           to_fields: &Fields,
           cast_options: &CastOptions,
       ) -> Result<Vec<ArrayRef>, ArrowError> {
   
           // Fast path: if field names are in the same order, we can just zip 
and cast
       let fields_match_order = from_fields.len() == to_fields.len()
           && from_fields
           .iter()
           .zip(to_fields.iter())
           .all(|(f1, f2)| f1.name() == f2.name());
   
       if fields_match_order {
           // Fast path: cast columns in order
           return columns
               .iter()
               .zip(to_fields.iter())
               .map(|(column, field)| {
                   cast_with_options(column, field.data_type(), cast_options)
               })
               .collect();
       }
   
       // Slow path 1: match fields by name and reorder
       let mut result = Vec::with_capacity(to_fields.len());
       let mut first_missing_field = None;
       for to_field in to_fields {
           let Some(from_field_idx) = from_fields
               .iter()
               .position(|from_field| from_field.name() == to_field.name()) 
else {
               first_missing_field = Some(to_field);
               break;
           };
           result.push(cast_with_options(&columns[from_field_idx], 
to_field.data_type(), cast_options)?);
       }
   
       let Some(first_missing_field) = first_missing_field else {
           return Ok(result);
       };
   
       // slow path 2: field names don't match, try to cast field by field
       // TODO avoid recasting fields that were already casted
       result.clear();
       for (l, field) in columns.iter().zip(to_fields) {
           let cast_field = cast_with_options(l, field.data_type(), 
cast_options)
               .map_err(|e| ArrowError::CastError(
                   format!("Field '{}' not found in source struct, and failed 
position casting of '{}': {}",
                       first_missing_field.name(),
                       field.name(),
                           e)))?;
           result.push(cast_field)
       }
       Ok(result)
   }
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to