alamb commented on code in PR #9007:
URL: https://github.com/apache/arrow-rs/pull/9007#discussion_r2624692184
##########
arrow-cast/src/cast/mod.rs:
##########
@@ -1255,7 +1263,25 @@ pub fn cast_with_options(
let column = array.column(from_field_idx);
cast_with_options(column, to_field.data_type(),
cast_options)
})
- .collect::<Result<Vec<ArrayRef>, ArrowError>>()?
+ .collect::<Result<Vec<ArrayRef>, ArrowError>>()
+ {
+ Ok(casted_fields) => casted_fields,
+ Err(e) => {
+ // If it's Field not found, we cast field by field
+ if !e.to_string().starts_with("Field '")
Review Comment:
I came up with this. But it requires an additional cast 🤔
This type of thing is perhaps why @tustvold suggested initially keeping the
struct casting in arrow-rs simple and making something more complicated in
datafusion (that can handle this, and more schema evolution things in
DataFusion 🤔 )
```rust
fn cast_struct(
array: &StructArray,
from_fields: &Fields,
to_fields: &Fields,
cast_options: &CastOptions,
) -> Result<ArrayRef, ArrowError> {
let fields = cast_struct_inner(array.columns(), from_fields, to_fields,
cast_options)?;
let array = StructArray::try_new(to_fields.clone(), fields,
array.nulls().cloned())?;
Ok(Arc::new(array) as ArrayRef)
}
fn cast_struct_inner(
columns: &[ArrayRef],
from_fields: &Fields,
to_fields: &Fields,
cast_options: &CastOptions,
) -> Result<Vec<ArrayRef>, ArrowError> {
// Fast path: if field names are in the same order, we can just zip
and cast
let fields_match_order = from_fields.len() == to_fields.len()
&& from_fields
.iter()
.zip(to_fields.iter())
.all(|(f1, f2)| f1.name() == f2.name());
if fields_match_order {
// Fast path: cast columns in order
return columns
.iter()
.zip(to_fields.iter())
.map(|(column, field)| {
cast_with_options(column, field.data_type(), cast_options)
})
.collect();
}
// Slow path 1: match fields by name and reorder
let mut result = Vec::with_capacity(to_fields.len());
let mut first_missing_field = None;
for to_field in to_fields {
let Some(from_field_idx) = from_fields
.iter()
.position(|from_field| from_field.name() == to_field.name())
else {
first_missing_field = Some(to_field);
break;
};
result.push(cast_with_options(&columns[from_field_idx],
to_field.data_type(), cast_options)?);
}
let Some(first_missing_field) = first_missing_field else {
return Ok(result);
};
// slow path 2: field names don't match, try to cast field by field
// TODO avoid recasting fields that were already casted
result.clear();
for (l, field) in columns.iter().zip(to_fields) {
let cast_field = cast_with_options(l, field.data_type(),
cast_options)
.map_err(|e| ArrowError::CastError(
format!("Field '{}' not found in source struct, and failed
position casting of '{}': {}",
first_missing_field.name(),
field.name(),
e)))?;
result.push(cast_field)
}
Ok(result)
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]