etseidl commented on code in PR #7402:
URL: https://github.com/apache/arrow-rs/pull/7402#discussion_r2037847313
##########
arrow-array/src/record_batch.rs:
##########
@@ -1637,4 +1653,57 @@ mod tests {
"bar"
);
}
+
+ #[test]
+ fn test_batch_with_force_schema() {
Review Comment:
Could you please also add a check where the forced schema succeeds?
##########
arrow-array/src/record_batch.rs:
##########
@@ -359,6 +359,20 @@ impl RecordBatch {
})
}
+ /// Forcibly overrides the schema of this [`RecordBatch`]
+ /// without additional schema checks however bringing all the schema
compatibility responsibilities
+ /// to the caller site.
+ ///
+ /// If provided schema is not compatible with this [`RecordBatch`] columns
the runtime behavior
+ /// is undefined
+ pub fn with_schema_force(self, schema: SchemaRef) -> Result<Self,
ArrowError> {
Review Comment:
Maybe `with_schema_unchecked` would be better?
##########
arrow-array/src/record_batch.rs:
##########
@@ -359,6 +359,20 @@ impl RecordBatch {
})
}
+ /// Forcibly overrides the schema of this [`RecordBatch`]
+ /// without additional schema checks however bringing all the schema
compatibility responsibilities
+ /// to the caller site.
Review Comment:
```suggestion
/// Overrides the schema of this [`RecordBatch`]
/// without additional schema checks. Note, however, that this pushes
all the schema compatibility responsibilities
/// to the caller site. In particular, the caller guarantees that
`schema` is a superset
/// of the current schema as determined by [`Schema::contains`].
```
##########
arrow-array/src/record_batch.rs:
##########
@@ -1637,4 +1653,57 @@ mod tests {
"bar"
);
}
+
+ #[test]
+ fn test_batch_with_force_schema() {
+ fn force_schema_and_get_err_from_batch(
+ record_batch: &RecordBatch,
+ schema_ref: SchemaRef,
+ idx: usize,
+ ) -> Option<ArrowError> {
+ record_batch
+ .clone()
+ .with_schema_force(schema_ref)
+ .unwrap()
+ .project(&[idx])
+ .err()
+ }
+
+ let c: ArrayRef = Arc::new(StringArray::from(vec!["d", "e", "f"]));
+
+ let record_batch =
+ RecordBatch::try_from_iter(vec![("c", c.clone())]).expect("valid
conversion");
+
+ // Test empty schema for non-empty schema batch
+ let invalid_schema_empty = Schema::empty();
+ assert_eq!(
+ force_schema_and_get_err_from_batch(&record_batch,
invalid_schema_empty.into(), 0)
+ .unwrap()
+ .to_string(),
+ "Schema error: project index 0 out of bounds, max field 0"
+ );
+
+ // Wrong number of columns
+ let invalid_schema_more_cols = Schema::new(vec![
+ Field::new("a", DataType::Utf8, false),
+ Field::new("a", DataType::Int32, false),
Review Comment:
```suggestion
Field::new("b", DataType::Int32, false),
```
?. This triggers my OCD 😅
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]