This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 663a6374b4 Improve documentation for MutableArrayData (#6272)
663a6374b4 is described below

commit 663a6374b4fec72aa458ad05f2e76bde7c2e3cec
Author: Andrew Lamb <[email protected]>
AuthorDate: Tue Aug 20 06:41:55 2024 -0400

    Improve documentation for MutableArrayData (#6272)
---
 arrow-data/src/transform/mod.rs | 128 ++++++++++++++++++++++++++++------------
 1 file changed, 91 insertions(+), 37 deletions(-)

diff --git a/arrow-data/src/transform/mod.rs b/arrow-data/src/transform/mod.rs
index 1e43bf488c..c6d834aea0 100644
--- a/arrow-data/src/transform/mod.rs
+++ b/arrow-data/src/transform/mod.rs
@@ -97,40 +97,78 @@ fn build_extend_null_bits(array: &ArrayData, use_nulls: 
bool) -> ExtendNullBits
     }
 }
 
-/// Struct to efficiently and interactively create an [ArrayData] from an 
existing [ArrayData] by
+/// Efficiently create an [ArrayData] from one or more existing [ArrayData]s by
 /// copying chunks.
 ///
-/// The main use case of this struct is to perform unary operations to arrays 
of arbitrary types,
-/// such as `filter` and `take`.
+/// The main use case of this struct is to perform unary operations to arrays 
of
+/// arbitrary types, such as `filter` and `take`.
+///
+/// # Example
+/// ```
+/// use arrow_buffer::Buffer;
+/// use arrow_data::ArrayData;
+/// use arrow_data::transform::MutableArrayData;
+/// use arrow_schema::DataType;
+/// fn i32_array(values: &[i32]) -> ArrayData {
+///   ArrayData::try_new(DataType::Int32, 5, None, 0, 
vec![Buffer::from_slice_ref(values)], vec![]).unwrap()
+/// }
+/// let arr1  = i32_array(&[1, 2, 3, 4, 5]);
+/// let arr2  = i32_array(&[6, 7, 8, 9, 10]);
+/// // Create a mutable array for copying values from arr1 and arr2, with a 
capacity for 6 elements
+/// let capacity = 3 * size_of::<i32>();
+/// let mut mutable = MutableArrayData::new(vec![&arr1, &arr2], false, 10);
+/// // Copy the first 3 elements from arr1
+/// mutable.extend(0, 0, 3);
+/// // Copy the last 3 elements from arr2
+/// mutable.extend(1, 2, 4);
+/// // Complete the MutableArrayData into a new ArrayData
+/// let frozen = mutable.freeze();
+/// assert_eq!(frozen, i32_array(&[1, 2, 3, 8, 9, 10]));
+/// ```
 pub struct MutableArrayData<'a> {
+    /// Input arrays: the data being read FROM.
+    ///
+    /// Note this is "dead code" because all actual references to the arrays 
are
+    /// stored in closures for extending values and nulls.
     #[allow(dead_code)]
     arrays: Vec<&'a ArrayData>,
-    /// The attributes in [_MutableArrayData] cannot be in [MutableArrayData] 
due to
-    /// mutability invariants (interior mutability):
-    /// [MutableArrayData] contains a function that can only mutate 
[_MutableArrayData], not
-    /// [MutableArrayData] itself
+
+    /// In progress output array: The data being written TO
+    ///
+    /// Note these fields are in a separate struct, [_MutableArrayData], as 
they
+    /// cannot be in [MutableArrayData] itself due to mutability invariants 
(interior
+    /// mutability): [MutableArrayData] contains a function that can only 
mutate
+    /// [_MutableArrayData], not [MutableArrayData] itself
     data: _MutableArrayData<'a>,
 
-    /// the child data of the `Array` in Dictionary arrays.
-    /// This is not stored in `MutableArrayData` because these values constant 
and only needed
-    /// at the end, when freezing [_MutableArrayData].
+    /// The child data of the `Array` in Dictionary arrays.
+    ///
+    /// This is not stored in `_MutableArrayData` because these values are
+    /// constant and only needed at the end, when freezing [_MutableArrayData].
     dictionary: Option<ArrayData>,
 
-    /// Variadic data buffers referenced by views
-    /// This is not stored in `MutableArrayData` because these values constant 
 and only needed
-    /// at the end, when freezing [_MutableArrayData]
+    /// Variadic data buffers referenced by views.
+    ///
+    /// Note this this is not stored in `_MutableArrayData` because these 
values
+    /// are constant and only needed at the end, when freezing
+    /// [_MutableArrayData]
     variadic_data_buffers: Vec<Buffer>,
 
-    /// function used to extend values from arrays. This function's lifetime 
is bound to the array
-    /// because it reads values from it.
+    /// function used to extend output array with values from input arrays.
+    ///
+    /// This function's lifetime is bound to the input arrays because it reads
+    /// values from them.
     extend_values: Vec<Extend<'a>>,
 
-    /// function used to extend nulls from arrays. This function's lifetime is 
bound to the array
-    /// because it reads nulls from it.
+    /// function used to extend the output array with nulls from input arrays.
+    ///
+    /// This function's lifetime is bound to the input arrays because it reads
+    /// nulls from it.
     extend_null_bits: Vec<ExtendNullBits<'a>>,
 
-    /// function used to extend nulls.
-    /// this is independent of the arrays and therefore has no lifetime.
+    /// function used to extend the output array with null elements.
+    ///
+    /// This function is independent of the arrays and therefore has no 
lifetime.
     extend_nulls: ExtendNulls,
 }
 
@@ -307,47 +345,63 @@ fn preallocate_offset_and_binary_buffer<Offset: 
ArrowNativeType + Integer>(
     ]
 }
 
-/// Define capacities of child data or data buffers.
+/// Define capacities to pre-allocate for child data or data buffers.
 #[derive(Debug, Clone)]
 pub enum Capacities {
     /// Binary, Utf8 and LargeUtf8 data types
-    /// Define
+    ///
+    /// Defines
     /// * the capacity of the array offsets
     /// * the capacity of the binary/ str buffer
     Binary(usize, Option<usize>),
     /// List and LargeList data types
-    /// Define
+    ///
+    /// Defines
     /// * the capacity of the array offsets
     /// * the capacity of the child data
     List(usize, Option<Box<Capacities>>),
     /// Struct type
+    ///
+    /// Defines
     /// * the capacity of the array
     /// * the capacities of the fields
     Struct(usize, Option<Vec<Capacities>>),
     /// Dictionary type
+    ///
+    /// Defines
     /// * the capacity of the array/keys
     /// * the capacity of the values
     Dictionary(usize, Option<Box<Capacities>>),
     /// Don't preallocate inner buffers and rely on array growth strategy
     Array(usize),
 }
+
 impl<'a> MutableArrayData<'a> {
-    /// returns a new [MutableArrayData] with capacity to `capacity` slots and 
specialized to create an
-    /// [ArrayData] from multiple `arrays`.
+    /// Returns a new [MutableArrayData] with capacity to `capacity` slots and
+    /// specialized to create an [ArrayData] from multiple `arrays`.
     ///
-    /// `use_nulls` is a flag used to optimize insertions. It should be 
`false` if the only source of nulls
-    /// are the arrays themselves and `true` if the user plans to call 
[MutableArrayData::extend_nulls].
-    /// In other words, if `use_nulls` is `false`, calling 
[MutableArrayData::extend_nulls] should not be used.
+    /// # Arguments
+    /// * `arrays` - the source arrays to copy from
+    /// * `use_nulls` - a flag used to optimize insertions
+    ///   - `false` if the only source of nulls are the arrays themselves
+    ///   - `true` if the user plans to call [MutableArrayData::extend_nulls].
+    /// * capacity - the preallocated capacity of the output array, in bytes
+    ///
+    /// Thus, if `use_nulls` is `false`, calling
+    /// [MutableArrayData::extend_nulls] should not be used.
     pub fn new(arrays: Vec<&'a ArrayData>, use_nulls: bool, capacity: usize) 
-> Self {
         Self::with_capacities(arrays, use_nulls, Capacities::Array(capacity))
     }
 
-    /// Similar to [MutableArrayData::new], but lets users define the 
preallocated capacities of the array.
-    /// See also [MutableArrayData::new] for more information on the arguments.
+    /// Similar to [MutableArrayData::new], but lets users define the
+    /// preallocated capacities of the array with more granularity.
     ///
-    /// # Panic
-    /// This function panics if the given `capacities` don't match the data 
type of `arrays`. Or when
-    /// a [Capacities] variant is not yet supported.
+    /// See [MutableArrayData::new] for more information on the arguments.
+    ///
+    /// # Panics
+    ///
+    /// This function panics if the given `capacities` don't match the data 
type
+    /// of `arrays`. Or when a [Capacities] variant is not yet supported.
     pub fn with_capacities(
         arrays: Vec<&'a ArrayData>,
         use_nulls: bool,
@@ -646,7 +700,7 @@ impl<'a> MutableArrayData<'a> {
         }
     }
 
-    /// Extends this array with a chunk of its source arrays
+    /// Extends the in progress array with a region of the input arrays
     ///
     /// # Arguments
     /// * `index` - the index of array that you what to copy values from
@@ -664,12 +718,11 @@ impl<'a> MutableArrayData<'a> {
         self.data.len += len;
     }
 
-    /// Extends this [MutableArrayData] with null elements, disregarding the 
bound arrays
+    /// Extends the in progress array with null elements, ignoring the input 
arrays.
     ///
     /// # Panics
     ///
     /// Panics if [`MutableArrayData`] not created with `use_nulls` or 
nullable source arrays
-    ///
     pub fn extend_nulls(&mut self, len: usize) {
         self.data.len += len;
         let bit_len = bit_util::ceil(self.data.len, 8);
@@ -697,12 +750,13 @@ impl<'a> MutableArrayData<'a> {
         self.data.null_count
     }
 
-    /// Creates a [ArrayData] from the pushed regions up to this point, 
consuming `self`.
+    /// Creates a [ArrayData] from the in progress array, consuming `self`.
     pub fn freeze(self) -> ArrayData {
         unsafe { self.into_builder().build_unchecked() }
     }
 
-    /// Creates a [ArrayDataBuilder] from the pushed regions up to this point, 
consuming `self`.
+    /// Consume self and returns the in progress array as [`ArrayDataBuilder`].
+    ///
     /// This is useful for extending the default behavior of MutableArrayData.
     pub fn into_builder(self) -> ArrayDataBuilder {
         let data = self.data;

Reply via email to