tustvold commented on code in PR #4959:
URL: https://github.com/apache/arrow-rs/pull/4959#discussion_r1366866144
##########
arrow-array/src/record_batch.rs:
##########
@@ -334,6 +335,46 @@ impl RecordBatch {
&self.columns[..]
}
+ /// Remove column by index and return it.
+ ///
+ /// Return `Some(ArrayRef)` if the column is removed, otherwise return
`None.
+ /// - Return `None` if the `index` is out of bounds
+ /// - Return `None` if the `index` is in bounds but the schema is shared
(i.e. ref count > 1)
+ ///
+ /// ```
+ /// use std::sync::Arc;
+ /// use arrow_array::{BooleanArray, Int32Array, RecordBatch};
+ /// use arrow_schema::{DataType, Field, Schema};
+ /// let id_array = Int32Array::from(vec![1, 2, 3, 4, 5]);
+ /// let bool_array = BooleanArray::from(vec![true, false, false, true,
true]);
+ /// let schema = Schema::new(vec![
+ /// Field::new("id", DataType::Int32, false),
+ /// Field::new("bool", DataType::Boolean, false),
+ /// ]);
+ ///
+ /// let mut batch = RecordBatch::try_new(Arc::new(schema),
vec![Arc::new(id_array), Arc::new(bool_array)]).unwrap();
+ ///
+ /// let removed_column = batch.remove_column(0).unwrap();
+ ///
assert_eq!(removed_column.as_any().downcast_ref::<Int32Array>().unwrap(),
&Int32Array::from(vec![1, 2, 3, 4, 5]));
+ /// assert_eq!(batch.num_columns(), 1);
+ /// ```
+ pub fn remove_column(&mut self, index: usize) -> Option<ArrayRef> {
+ if index < self.num_columns() {
+ let new_schema = mem::replace(&mut self.schema,
Arc::new(Schema::empty()));
Review Comment:
This approach isn't panic safe
Given that the `FieldRef` are themselves reference counted, what do you
think of just creating a new `Fields`? We are going to have to allocate
regardless
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]