This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 56e8208612 Improve documentation on `ArrayData::offset` (#7385)
56e8208612 is described below

commit 56e820861286dff422c90644d69fd48adc83ff76
Author: Andrew Lamb <[email protected]>
AuthorDate: Sun Apr 6 07:38:41 2025 -0400

    Improve documentation on `ArrayData::offset` (#7385)
    
    * Improve documentation on ArrayData::offsets
    
    * Apply suggestions from code review
    
    Co-authored-by: Weston Pace <[email protected]>
    
    ---------
    
    Co-authored-by: Weston Pace <[email protected]>
---
 arrow-data/src/data.rs | 45 +++++++++++++++++++++++++++++++++++----------
 1 file changed, 35 insertions(+), 10 deletions(-)

diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs
index 10b954777d..4c117184de 100644
--- a/arrow-data/src/data.rs
+++ b/arrow-data/src/data.rs
@@ -201,26 +201,50 @@ pub(crate) fn new_buffers(data_type: &DataType, capacity: 
usize) -> [MutableBuff
 
 #[derive(Debug, Clone)]
 pub struct ArrayData {
-    /// The data type for this array data
+    /// The data type
     data_type: DataType,
 
-    /// The number of elements in this array data
+    /// The number of elements
     len: usize,
 
-    /// The offset into this array data, in number of items
+    /// The offset in number of items (not bytes).
+    ///
+    /// The offset applies to [`Self::child_data`] and [`Self::buffers`]. It
+    /// does NOT apply to [`Self::nulls`].
     offset: usize,
 
-    /// The buffers for this array data. Note that depending on the array 
types, this
-    /// could hold different kinds of buffers (e.g., value buffer, value 
offset buffer)
-    /// at different positions.
+    /// The buffers that store the actual data for this array, as defined
+    /// in the [Arrow Spec].
+    ///
+    /// Depending on the array types, [`Self::buffers`] can hold different
+    /// kinds of buffers (e.g., value buffer, value offset buffer) at different
+    /// positions.
+    ///
+    /// The buffer may be larger than needed.  Some items at the beginning may 
be skipped if
+    /// there is an `offset`.  Some items at the end may be skipped if the 
buffer is longer than
+    /// we need to satisfy `len`.
+    ///
+    /// [Arrow 
Spec](https://arrow.apache.org/docs/format/Columnar.html#physical-memory-layout)
     buffers: Vec<Buffer>,
 
-    /// The child(ren) of this array. Only non-empty for nested types, 
currently
-    /// `ListArray` and `StructArray`.
+    /// The child(ren) of this array.
+    ///
+    /// Only non-empty for nested types, such as `ListArray` and
+    /// `StructArray`.
+    ///
+    /// The first logical element in each child element begins at `offset`.
+    ///
+    /// If the child element also has an offset then these offsets are
+    /// cumulative.
     child_data: Vec<ArrayData>,
 
-    /// The null bitmap. A `None` value for this indicates all values are 
non-null in
-    /// this array.
+    /// The null bitmap.
+    ///
+    /// `None` indicates all values are non-null in this array.
+    ///
+    /// [`Self::offset]` does not apply to the null bitmap. While the
+    /// BooleanBuffer may be sliced (have its own offset) internally, this
+    /// `NullBuffer` always represents exactly `len` elements.
     nulls: Option<NullBuffer>,
 }
 
@@ -555,6 +579,7 @@ impl ArrayData {
     }
 
     /// Returns the `buffer` as a slice of type `T` starting at self.offset
+    ///
     /// # Panics
     /// This function panics if:
     /// * the buffer is not byte-aligned with type T, or

Reply via email to