This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 56e8208612 Improve documentation on `ArrayData::offset` (#7385)
56e8208612 is described below
commit 56e820861286dff422c90644d69fd48adc83ff76
Author: Andrew Lamb <[email protected]>
AuthorDate: Sun Apr 6 07:38:41 2025 -0400
Improve documentation on `ArrayData::offset` (#7385)
* Improve documentation on ArrayData::offsets
* Apply suggestions from code review
Co-authored-by: Weston Pace <[email protected]>
---------
Co-authored-by: Weston Pace <[email protected]>
---
arrow-data/src/data.rs | 45 +++++++++++++++++++++++++++++++++++----------
1 file changed, 35 insertions(+), 10 deletions(-)
diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs
index 10b954777d..4c117184de 100644
--- a/arrow-data/src/data.rs
+++ b/arrow-data/src/data.rs
@@ -201,26 +201,50 @@ pub(crate) fn new_buffers(data_type: &DataType, capacity:
usize) -> [MutableBuff
#[derive(Debug, Clone)]
pub struct ArrayData {
- /// The data type for this array data
+ /// The data type
data_type: DataType,
- /// The number of elements in this array data
+ /// The number of elements
len: usize,
- /// The offset into this array data, in number of items
+ /// The offset in number of items (not bytes).
+ ///
+ /// The offset applies to [`Self::child_data`] and [`Self::buffers`]. It
+ /// does NOT apply to [`Self::nulls`].
offset: usize,
- /// The buffers for this array data. Note that depending on the array
types, this
- /// could hold different kinds of buffers (e.g., value buffer, value
offset buffer)
- /// at different positions.
+ /// The buffers that store the actual data for this array, as defined
+ /// in the [Arrow Spec].
+ ///
+ /// Depending on the array types, [`Self::buffers`] can hold different
+ /// kinds of buffers (e.g., value buffer, value offset buffer) at different
+ /// positions.
+ ///
+ /// The buffer may be larger than needed. Some items at the beginning may
be skipped if
+ /// there is an `offset`. Some items at the end may be skipped if the
buffer is longer than
+ /// we need to satisfy `len`.
+ ///
+ /// [Arrow
Spec](https://arrow.apache.org/docs/format/Columnar.html#physical-memory-layout)
buffers: Vec<Buffer>,
- /// The child(ren) of this array. Only non-empty for nested types,
currently
- /// `ListArray` and `StructArray`.
+ /// The child(ren) of this array.
+ ///
+ /// Only non-empty for nested types, such as `ListArray` and
+ /// `StructArray`.
+ ///
+ /// The first logical element in each child element begins at `offset`.
+ ///
+ /// If the child element also has an offset then these offsets are
+ /// cumulative.
child_data: Vec<ArrayData>,
- /// The null bitmap. A `None` value for this indicates all values are
non-null in
- /// this array.
+ /// The null bitmap.
+ ///
+ /// `None` indicates all values are non-null in this array.
+ ///
+ /// [`Self::offset]` does not apply to the null bitmap. While the
+ /// BooleanBuffer may be sliced (have its own offset) internally, this
+ /// `NullBuffer` always represents exactly `len` elements.
nulls: Option<NullBuffer>,
}
@@ -555,6 +579,7 @@ impl ArrayData {
}
/// Returns the `buffer` as a slice of type `T` starting at self.offset
+ ///
/// # Panics
/// This function panics if:
/// * the buffer is not byte-aligned with type T, or