jorgecarleitao commented on a change in pull request #9235: URL: https://github.com/apache/arrow/pull/9235#discussion_r559729105
########## File path: rust/arrow/src/buffer.rs ########## @@ -758,39 +793,54 @@ impl MutableBuffer { } } - /// Ensures that this buffer has at least `capacity` slots in this buffer. This will - /// also ensure the new capacity will be a multiple of 64 bytes. - /// - /// Returns the new capacity for this buffer. - pub fn reserve(&mut self, capacity: usize) -> usize { - if capacity > self.capacity { - let new_capacity = bit_util::round_upto_multiple_of_64(capacity); - let new_capacity = cmp::max(new_capacity, self.capacity * 2); - self.data = - unsafe { memory::reallocate(self.data, self.capacity, new_capacity) }; - self.capacity = new_capacity; + fn reserve_at_least(&mut self, capacity: usize) { + let new_capacity = bit_util::round_upto_multiple_of_64(capacity); + let new_capacity = std::cmp::max(new_capacity, self.capacity * 2); + self.data = unsafe { memory::reallocate(self.data, self.capacity, new_capacity) }; + self.capacity = new_capacity; + } + + /// Ensures that this buffer has at least `self.len + additional` bytes. This re-allocates iff + /// `self.len + additional > capacity`. + /// # Example + /// ``` + /// # use arrow::buffer::{Buffer, MutableBuffer}; + /// let mut buffer = MutableBuffer::new(0); + /// buffer.reserve(253); // allocates for the first time + /// (0..253u8).for_each(|i| buffer.push(i)); // no reallocation + /// let buffer: Buffer = buffer.into(); + /// assert_eq!(buffer.len(), 253); + /// ``` + // For performance reasons, this must be inlined so that the `if` is executed inside the caller, and not as an extra call that just + // exits. + #[inline(always)] + pub fn reserve(&mut self, additional: usize) { + let required_cap = self.len + additional; + if required_cap > self.capacity { + self.reserve_at_least(required_cap) } - self.capacity } - /// Resizes the buffer so that the `len` will equal to the `new_len`. - /// - /// If `new_len` is greater than `len`, the buffer's length is simply adjusted to be - /// the former, optionally extending the capacity. The data between `len` and - /// `new_len` will be zeroed out. - /// - /// If `new_len` is less than `len`, the buffer will be truncated. - pub fn resize(&mut self, new_len: usize) { + /// Resizes the buffer, either truncating its contents (with no change in capacity), or Review comment: I opted for not doing so for the following reasons: * `Vec::resize` has no such argument * `Vec::resize` does not reallocate itself when `new_len < len` I.e. my general goal is to have `MutableBuffer`'s API as close as possible to `Vec` so that it can serve as a drop-in replacement that people can easily relate to. What could make sense would be to offer a `shrink_to_fit` method (which typically triggers a reallocation). I haven't seen a use case where `shrink_to_fit` was needed on a `MutableBuffer`, though. My understanding is that because Arrow buffers are immutable, people only use `MutableBuffer` to build new arrays, and, when building arrays, there is typically no use-case to shrink the buffer (i.e. in 100% of the cases in the crates so far we just grow them as needed). My understanding is that such memory pressure typically emerges in long-living mutable containers. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org