This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 231ae9b31 Return Buffers from ArrayData::buffers instead of slice
(#1799) (#3783)
231ae9b31 is described below
commit 231ae9b31769b62da368b9f1eb355a840540cb06
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Thu Mar 2 16:07:26 2023 +0000
Return Buffers from ArrayData::buffers instead of slice (#1799) (#3783)
* Return Buffers from ArrayData::buffers instead of slice (#1799)
* Clippy
---
arrow-arith/src/boolean.rs | 2 +-
arrow-array/src/array/primitive_array.rs | 4 +-
arrow-array/src/array/union_array.rs | 30 +++++-----
arrow-csv/src/reader/mod.rs | 2 +-
arrow-data/src/data/buffers.rs | 97 ++++++++++++++++++++++++++++++++
arrow-data/src/data/mod.rs | 10 +++-
arrow-json/src/reader.rs | 6 +-
arrow-select/src/filter.rs | 6 +-
arrow-select/src/nullif.rs | 2 +-
arrow/src/ffi.rs | 4 +-
10 files changed, 132 insertions(+), 31 deletions(-)
diff --git a/arrow-arith/src/boolean.rs b/arrow-arith/src/boolean.rs
index 5bd39a673..61942dc90 100644
--- a/arrow-arith/src/boolean.rs
+++ b/arrow-arith/src/boolean.rs
@@ -353,7 +353,7 @@ pub fn not(left: &BooleanArray) -> Result<BooleanArray,
ArrowError> {
let data = left.data_ref();
let null_bit_buffer = data.nulls().map(|b| b.inner().sliced());
- let values = buffer_unary_not(&data.buffers()[0], left_offset, len);
+ let values = buffer_unary_not(data.buffers()[0], left_offset, len);
let data = unsafe {
ArrayData::new_unchecked(
diff --git a/arrow-array/src/array/primitive_array.rs
b/arrow-array/src/array/primitive_array.rs
index 0e28060b2..408f0c4ae 100644
--- a/arrow-array/src/array/primitive_array.rs
+++ b/arrow-array/src/array/primitive_array.rs
@@ -1234,7 +1234,7 @@ mod tests {
fn test_primitive_array_from_vec() {
let buf = Buffer::from_slice_ref([0, 1, 2, 3, 4]);
let arr = Int32Array::from(vec![0, 1, 2, 3, 4]);
- assert_eq!(buf, arr.data.buffers()[0]);
+ assert_eq!(buf, *arr.data.buffers()[0]);
assert_eq!(5, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(0, arr.null_count());
@@ -1740,7 +1740,7 @@ mod tests {
.build()
.unwrap();
let arr = Int32Array::from(data);
- assert_eq!(buf2, arr.data.buffers()[0]);
+ assert_eq!(buf2, *arr.data.buffers()[0]);
assert_eq!(5, arr.len());
assert_eq!(0, arr.null_count());
for i in 0..3 {
diff --git a/arrow-array/src/array/union_array.rs
b/arrow-array/src/array/union_array.rs
index f215fb0de..867eb8d59 100644
--- a/arrow-array/src/array/union_array.rs
+++ b/arrow-array/src/array/union_array.rs
@@ -409,7 +409,7 @@ mod tests {
// Check type ids
assert_eq!(
- union.data().buffers()[0],
+ *union.data().buffers()[0],
Buffer::from_slice_ref(&expected_type_ids)
);
for (i, id) in expected_type_ids.iter().enumerate() {
@@ -418,7 +418,7 @@ mod tests {
// Check offsets
assert_eq!(
- union.data().buffers()[1],
+ *union.data().buffers()[1],
Buffer::from_slice_ref(&expected_value_offsets)
);
for (i, id) in expected_value_offsets.iter().enumerate() {
@@ -427,15 +427,15 @@ mod tests {
// Check data
assert_eq!(
- union.data().child_data()[0].buffers()[0],
+ *union.data().child_data()[0].buffers()[0],
Buffer::from_slice_ref([1_i32, 4, 6])
);
assert_eq!(
- union.data().child_data()[1].buffers()[0],
+ *union.data().child_data()[1].buffers()[0],
Buffer::from_slice_ref([2_i32, 7])
);
assert_eq!(
- union.data().child_data()[2].buffers()[0],
+ *union.data().child_data()[2].buffers()[0],
Buffer::from_slice_ref([3_i32, 5]),
);
@@ -467,7 +467,7 @@ mod tests {
// Check type ids
assert_eq!(
- union.data().buffers()[0],
+ *union.data().buffers()[0],
Buffer::from_slice_ref(&expected_type_ids)
);
for (i, id) in expected_type_ids.iter().enumerate() {
@@ -476,7 +476,7 @@ mod tests {
// Check offsets
assert_eq!(
- union.data().buffers()[1],
+ *union.data().buffers()[1],
Buffer::from_slice_ref(&expected_value_offsets)
);
for (i, id) in expected_value_offsets.iter().enumerate() {
@@ -660,7 +660,7 @@ mod tests {
.unwrap();
// Check type ids
- assert_eq!(Buffer::from_slice_ref(type_ids),
array.data().buffers()[0]);
+ assert_eq!(Buffer::from_slice_ref(type_ids),
*array.data().buffers()[0]);
for (i, id) in type_ids.iter().enumerate() {
assert_eq!(id, &array.type_id(i));
}
@@ -668,7 +668,7 @@ mod tests {
// Check offsets
assert_eq!(
Buffer::from_slice_ref(value_offsets),
- array.data().buffers()[1]
+ *array.data().buffers()[1]
);
for (i, id) in value_offsets.iter().enumerate() {
assert_eq!(id, &array.value_offset(i));
@@ -736,7 +736,7 @@ mod tests {
// Check type ids
assert_eq!(
Buffer::from_slice_ref(&expected_type_ids),
- union.data().buffers()[0]
+ *union.data().buffers()[0]
);
for (i, id) in expected_type_ids.iter().enumerate() {
assert_eq!(id, &union.type_id(i));
@@ -747,16 +747,16 @@ mod tests {
// Check data
assert_eq!(
- union.data().child_data()[0].buffers()[0],
+ *union.data().child_data()[0].buffers()[0],
Buffer::from_slice_ref([1_i32, 0, 0, 4, 0, 6, 0]),
);
assert_eq!(
Buffer::from_slice_ref([0_i32, 2_i32, 0, 0, 0, 0, 7]),
- union.data().child_data()[1].buffers()[0]
+ *union.data().child_data()[1].buffers()[0]
);
assert_eq!(
Buffer::from_slice_ref([0_i32, 0, 3_i32, 0, 5, 0, 0]),
- union.data().child_data()[2].buffers()[0]
+ *union.data().child_data()[2].buffers()[0]
);
assert_eq!(expected_array_values.len(), union.len());
@@ -785,7 +785,7 @@ mod tests {
// Check type ids
assert_eq!(
Buffer::from_slice_ref(&expected_type_ids),
- union.data().buffers()[0]
+ *union.data().buffers()[0]
);
for (i, id) in expected_type_ids.iter().enumerate() {
assert_eq!(id, &union.type_id(i));
@@ -847,7 +847,7 @@ mod tests {
// Check type ids
assert_eq!(
Buffer::from_slice_ref(&expected_type_ids),
- union.data().buffers()[0]
+ *union.data().buffers()[0]
);
for (i, id) in expected_type_ids.iter().enumerate() {
assert_eq!(id, &union.type_id(i));
diff --git a/arrow-csv/src/reader/mod.rs b/arrow-csv/src/reader/mod.rs
index e78f2d0ba..84d55c4ae 100644
--- a/arrow-csv/src/reader/mod.rs
+++ b/arrow-csv/src/reader/mod.rs
@@ -2482,7 +2482,7 @@ mod tests {
for v in *values {
t.update(v, None)
}
- assert_eq!(&t.get(), expected, "{:?}", values)
+ assert_eq!(&t.get(), expected, "{values:?}")
}
}
}
diff --git a/arrow-data/src/data/buffers.rs b/arrow-data/src/data/buffers.rs
new file mode 100644
index 000000000..3b57bfe0e
--- /dev/null
+++ b/arrow-data/src/data/buffers.rs
@@ -0,0 +1,97 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_buffer::Buffer;
+use std::iter::Chain;
+use std::ops::Index;
+
+/// A collection of [`Buffer`]
+#[derive(Debug, Default, Copy, Clone, Eq, PartialEq)]
+pub struct Buffers<'a>([Option<&'a Buffer>; 2]);
+
+impl<'a> Buffers<'a> {
+ /// Temporary will be removed once ArrayData does not store `Vec<Buffer>`
directly (#3769)
+ #[inline]
+ pub(crate) fn from_slice(a: &'a [Buffer]) -> Self {
+ match a.len() {
+ 0 => Self([None, None]),
+ 1 => Self([Some(&a[0]), None]),
+ _ => Self([Some(&a[0]), Some(&a[1])]),
+ }
+ }
+
+ /// Returns the number of [`Buffer`] in this collection
+ #[inline]
+ pub fn len(&self) -> usize {
+ self.0[0].is_some() as usize + self.0[1].is_some() as usize
+ }
+
+ /// Returns `true` if this collection is empty
+ #[inline]
+ pub fn is_empty(&self) -> bool {
+ self.0[0].is_none() && self.0[1].is_none()
+ }
+
+ #[inline]
+ pub fn iter(&self) -> IntoIter<'a> {
+ self.into_iter()
+ }
+
+ /// Converts this [`Buffers`] to a `Vec<Buffer>`
+ #[inline]
+ pub fn to_vec(&self) -> Vec<Buffer> {
+ self.iter().cloned().collect()
+ }
+}
+
+impl<'a> Index<usize> for Buffers<'a> {
+ type Output = &'a Buffer;
+
+ #[inline]
+ fn index(&self, index: usize) -> &Self::Output {
+ self.0[index].as_ref().unwrap()
+ }
+}
+
+impl<'a> IntoIterator for Buffers<'a> {
+ type Item = &'a Buffer;
+ type IntoIter = IntoIter<'a>;
+
+ #[inline]
+ fn into_iter(self) -> Self::IntoIter {
+ IntoIter(self.0[0].into_iter().chain(self.0[1].into_iter()))
+ }
+}
+
+type OptionIter<'a> = std::option::IntoIter<&'a Buffer>;
+
+/// [`Iterator`] for [`Buffers`]
+pub struct IntoIter<'a>(Chain<OptionIter<'a>, OptionIter<'a>>);
+
+impl<'a> Iterator for IntoIter<'a> {
+ type Item = &'a Buffer;
+
+ #[inline]
+ fn next(&mut self) -> Option<Self::Item> {
+ self.0.next()
+ }
+
+ #[inline]
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ self.0.size_hint()
+ }
+}
diff --git a/arrow-data/src/data/mod.rs b/arrow-data/src/data/mod.rs
index d76cb9eb1..051deef07 100644
--- a/arrow-data/src/data/mod.rs
+++ b/arrow-data/src/data/mod.rs
@@ -30,6 +30,9 @@ use std::sync::Arc;
use crate::equal;
+mod buffers;
+pub use buffers::*;
+
#[allow(unused)] // Private until ready (#1176)
mod bytes;
#[allow(unused)] // Private until ready (#1176)
@@ -371,9 +374,10 @@ impl ArrayData {
&self.data_type
}
- /// Returns a slice of the [`Buffer`]s that hold the data.
- pub fn buffers(&self) -> &[Buffer] {
- &self.buffers[..]
+ /// Returns the [`Buffers`] storing data for this [`ArrayData`]
+ pub fn buffers(&self) -> Buffers<'_> {
+ // In future ArrayData won't store data contiguously as `Vec<Buffer>`
(#1799)
+ Buffers::from_slice(&self.buffers)
}
/// Returns a slice of children [`ArrayData`]. This will be non
diff --git a/arrow-json/src/reader.rs b/arrow-json/src/reader.rs
index 0ef438e36..3ac39c110 100644
--- a/arrow-json/src/reader.rs
+++ b/arrow-json/src/reader.rs
@@ -2210,7 +2210,7 @@ mod tests {
.unwrap();
// test that the list offsets are correct
assert_eq!(
- cc.data().buffers()[0],
+ *cc.data().buffers()[0],
Buffer::from_slice_ref([0i32, 2, 2, 4, 5])
);
let cc = as_boolean_array(cc.values());
@@ -2230,7 +2230,7 @@ mod tests {
.unwrap();
// test that the list offsets are correct
assert_eq!(
- dd.data().buffers()[0],
+ *dd.data().buffers()[0],
Buffer::from_slice_ref([0i32, 1, 1, 2, 6])
);
@@ -2374,7 +2374,7 @@ mod tests {
let read: &ListArray =
read.as_any().downcast_ref::<ListArray>().unwrap();
let expected = expected.as_any().downcast_ref::<ListArray>().unwrap();
assert_eq!(
- read.data().buffers()[0],
+ *read.data().buffers()[0],
Buffer::from_slice_ref([0i32, 2, 3, 6, 6, 6, 7])
);
// compare list null buffers
diff --git a/arrow-select/src/filter.rs b/arrow-select/src/filter.rs
index d8ea9fceb..6ba08746d 100644
--- a/arrow-select/src/filter.rs
+++ b/arrow-select/src/filter.rs
@@ -82,7 +82,7 @@ impl<'a> IndexIterator<'a> {
fn new(filter: &'a BooleanArray, remaining: usize) -> Self {
assert_eq!(filter.null_count(), 0);
let data = filter.data();
- let iter = BitIndexIterator::new(&data.buffers()[0], data.offset(),
data.len());
+ let iter = BitIndexIterator::new(data.buffers()[0], data.offset(),
data.len());
Self { remaining, iter }
}
}
@@ -470,7 +470,7 @@ fn filter_boolean(values: &BooleanArray, predicate:
&FilterPredicate) -> Boolean
assert_eq!(data.buffers().len(), 1);
assert_eq!(data.child_data().len(), 0);
- let values = filter_bits(&data.buffers()[0], data.offset(), predicate);
+ let values = filter_bits(data.buffers()[0], data.offset(), predicate);
let mut builder = ArrayDataBuilder::new(DataType::Boolean)
.len(predicate.count)
@@ -572,7 +572,7 @@ where
Self {
src_offsets: array.value_offsets(),
- src_values: &array.data().buffers()[1],
+ src_values: array.data().buffers()[1],
dst_offsets,
dst_values,
cur_offset,
diff --git a/arrow-select/src/nullif.rs b/arrow-select/src/nullif.rs
index 4b052ce00..ea0c8e3d5 100644
--- a/arrow-select/src/nullif.rs
+++ b/arrow-select/src/nullif.rs
@@ -56,7 +56,7 @@ pub fn nullif(left: &dyn Array, right: &BooleanArray) ->
Result<ArrayRef, ArrowE
let (right, r_offset) = match right_data.nulls() {
Some(nulls) => (
buffer_bin_and(
- &right_data.buffers()[0],
+ right_data.buffers()[0],
right_data.offset(),
nulls.buffer(),
nulls.offset(),
diff --git a/arrow/src/ffi.rs b/arrow/src/ffi.rs
index 4d62b9e7c..c767a69e6 100644
--- a/arrow/src/ffi.rs
+++ b/arrow/src/ffi.rs
@@ -1158,7 +1158,7 @@ mod tests {
// Check type ids
assert_eq!(
Buffer::from_slice_ref(&expected_type_ids),
- array.data().buffers()[0]
+ *array.data().buffers()[0]
);
for (i, id) in expected_type_ids.iter().enumerate() {
assert_eq!(id, &array.type_id(i));
@@ -1222,7 +1222,7 @@ mod tests {
// Check type ids
assert_eq!(
Buffer::from_slice_ref(&expected_type_ids),
- array.data().buffers()[0]
+ *array.data().buffers()[0]
);
for (i, id) in expected_type_ids.iter().enumerate() {
assert_eq!(id, &array.type_id(i));