Jefffrey commented on code in PR #18424:
URL: https://github.com/apache/datafusion/pull/18424#discussion_r2484795942
##########
datafusion/functions-nested/src/reverse.rs:
##########
@@ -183,6 +195,75 @@ fn general_array_reverse<O: OffsetSizeTrait +
TryFrom<i64>>(
)?))
}
+fn list_view_reverse<O: OffsetSizeTrait + TryFrom<i64>>(
+ array: &GenericListViewArray<O>,
+ field: &FieldRef,
+) -> Result<ArrayRef> {
+ let (_, offsets, sizes, values, nulls) = array.clone().into_parts();
+
+ // Construct indices, sizes and offsets for the reversed array by
iterating over
+ // the list view array in the logical order, and reversing the order of
the elements.
+ // We end up with a list view array where the elements are in order,
+ // even if the original array had elements out of order.
+ let mut indices: Vec<O> = Vec::with_capacity(values.len());
+ let mut new_sizes = Vec::with_capacity(sizes.len());
+ let mut new_offsets: Vec<O> = Vec::with_capacity(offsets.len());
+ let mut new_nulls =
+ Vec::with_capacity(nulls.clone().map(|nulls|
nulls.len()).unwrap_or(0));
+ new_offsets.push(O::zero());
+ let has_nulls = nulls.is_some();
+ for (i, offset) in offsets.iter().enumerate().take(offsets.len()) {
Review Comment:
```suggestion
for (i, offset) in offsets.iter().enumerate() {
```
##########
datafusion/functions-nested/src/reverse.rs:
##########
@@ -183,6 +195,75 @@ fn general_array_reverse<O: OffsetSizeTrait +
TryFrom<i64>>(
)?))
}
+fn list_view_reverse<O: OffsetSizeTrait + TryFrom<i64>>(
+ array: &GenericListViewArray<O>,
+ field: &FieldRef,
+) -> Result<ArrayRef> {
+ let (_, offsets, sizes, values, nulls) = array.clone().into_parts();
+
+ // Construct indices, sizes and offsets for the reversed array by
iterating over
+ // the list view array in the logical order, and reversing the order of
the elements.
+ // We end up with a list view array where the elements are in order,
+ // even if the original array had elements out of order.
+ let mut indices: Vec<O> = Vec::with_capacity(values.len());
+ let mut new_sizes = Vec::with_capacity(sizes.len());
+ let mut new_offsets: Vec<O> = Vec::with_capacity(offsets.len());
+ let mut new_nulls =
+ Vec::with_capacity(nulls.clone().map(|nulls|
nulls.len()).unwrap_or(0));
+ new_offsets.push(O::zero());
+ let has_nulls = nulls.is_some();
+ for (i, offset) in offsets.iter().enumerate().take(offsets.len()) {
+ // If this array is null, we set the new array to null with size 0 and
continue
+ if let Some(ref nulls) = nulls {
+ if nulls.is_null(i) {
+ new_nulls.push(false); // null
+ new_sizes.push(O::zero());
+ new_offsets.push(new_offsets[i]);
+ continue;
+ } else {
+ new_nulls.push(true); // valid
+ }
+ }
+
+ // Each array is located at [offset, offset + size), so we collect
indices in the reverse order
+ let array_start = offset.as_usize();
+ let array_end = array_start + sizes[i].as_usize();
+ for idx in (array_start..array_end).rev() {
+ indices.push(O::usize_as(idx));
+ }
+ new_sizes.push(sizes[i]);
+ if i < sizes.len() - 1 {
+ new_offsets.push(new_offsets[i] + sizes[i]);
+ }
+ }
+
+ // Materialize values from underlying array with take
+ let indices_array: ArrayRef = if O::IS_LARGE {
+ Arc::new(arrow::array::UInt64Array::from(
+ indices
+ .iter()
+ .map(|i| i.as_usize() as u64)
+ .collect::<Vec<_>>(),
+ ))
+ } else {
+ Arc::new(UInt32Array::from(
+ indices
+ .iter()
+ .map(|i| i.as_usize() as u32)
+ .collect::<Vec<_>>(),
+ ))
+ };
Review Comment:
Hmm I do wonder if there is a better way to do this, will keep thinking 🤔
##########
datafusion/functions-nested/src/reverse.rs:
##########
@@ -183,6 +195,75 @@ fn general_array_reverse<O: OffsetSizeTrait +
TryFrom<i64>>(
)?))
}
+fn list_view_reverse<O: OffsetSizeTrait + TryFrom<i64>>(
+ array: &GenericListViewArray<O>,
+ field: &FieldRef,
+) -> Result<ArrayRef> {
+ let (_, offsets, sizes, values, nulls) = array.clone().into_parts();
+
+ // Construct indices, sizes and offsets for the reversed array by
iterating over
+ // the list view array in the logical order, and reversing the order of
the elements.
+ // We end up with a list view array where the elements are in order,
+ // even if the original array had elements out of order.
+ let mut indices: Vec<O> = Vec::with_capacity(values.len());
+ let mut new_sizes = Vec::with_capacity(sizes.len());
+ let mut new_offsets: Vec<O> = Vec::with_capacity(offsets.len());
+ let mut new_nulls =
+ Vec::with_capacity(nulls.clone().map(|nulls|
nulls.len()).unwrap_or(0));
+ new_offsets.push(O::zero());
+ let has_nulls = nulls.is_some();
+ for (i, offset) in offsets.iter().enumerate().take(offsets.len()) {
+ // If this array is null, we set the new array to null with size 0 and
continue
+ if let Some(ref nulls) = nulls {
+ if nulls.is_null(i) {
+ new_nulls.push(false); // null
+ new_sizes.push(O::zero());
+ new_offsets.push(new_offsets[i]);
+ continue;
+ } else {
+ new_nulls.push(true); // valid
+ }
+ }
+
+ // Each array is located at [offset, offset + size), so we collect
indices in the reverse order
+ let array_start = offset.as_usize();
+ let array_end = array_start + sizes[i].as_usize();
+ for idx in (array_start..array_end).rev() {
+ indices.push(O::usize_as(idx));
+ }
+ new_sizes.push(sizes[i]);
+ if i < sizes.len() - 1 {
+ new_offsets.push(new_offsets[i] + sizes[i]);
+ }
Review Comment:
Could you help me understand the reasoning for this check?
##########
datafusion/functions-nested/src/reverse.rs:
##########
@@ -183,6 +195,75 @@ fn general_array_reverse<O: OffsetSizeTrait +
TryFrom<i64>>(
)?))
}
+fn list_view_reverse<O: OffsetSizeTrait + TryFrom<i64>>(
+ array: &GenericListViewArray<O>,
+ field: &FieldRef,
+) -> Result<ArrayRef> {
+ let (_, offsets, sizes, values, nulls) = array.clone().into_parts();
+
+ // Construct indices, sizes and offsets for the reversed array by
iterating over
+ // the list view array in the logical order, and reversing the order of
the elements.
+ // We end up with a list view array where the elements are in order,
+ // even if the original array had elements out of order.
+ let mut indices: Vec<O> = Vec::with_capacity(values.len());
+ let mut new_sizes = Vec::with_capacity(sizes.len());
+ let mut new_offsets: Vec<O> = Vec::with_capacity(offsets.len());
+ let mut new_nulls =
+ Vec::with_capacity(nulls.clone().map(|nulls|
nulls.len()).unwrap_or(0));
+ new_offsets.push(O::zero());
+ let has_nulls = nulls.is_some();
+ for (i, offset) in offsets.iter().enumerate().take(offsets.len()) {
+ // If this array is null, we set the new array to null with size 0 and
continue
+ if let Some(ref nulls) = nulls {
+ if nulls.is_null(i) {
+ new_nulls.push(false); // null
+ new_sizes.push(O::zero());
+ new_offsets.push(new_offsets[i]);
+ continue;
+ } else {
+ new_nulls.push(true); // valid
+ }
+ }
+
+ // Each array is located at [offset, offset + size), so we collect
indices in the reverse order
+ let array_start = offset.as_usize();
+ let array_end = array_start + sizes[i].as_usize();
+ for idx in (array_start..array_end).rev() {
+ indices.push(O::usize_as(idx));
+ }
+ new_sizes.push(sizes[i]);
+ if i < sizes.len() - 1 {
+ new_offsets.push(new_offsets[i] + sizes[i]);
+ }
+ }
+
+ // Materialize values from underlying array with take
+ let indices_array: ArrayRef = if O::IS_LARGE {
+ Arc::new(arrow::array::UInt64Array::from(
+ indices
+ .iter()
+ .map(|i| i.as_usize() as u64)
+ .collect::<Vec<_>>(),
+ ))
+ } else {
+ Arc::new(UInt32Array::from(
+ indices
+ .iter()
+ .map(|i| i.as_usize() as u32)
+ .collect::<Vec<_>>(),
+ ))
+ };
+ let values_reversed = take(&values, &indices_array, None)?;
+
+ Ok(Arc::new(GenericListViewArray::<O>::try_new(
+ Arc::clone(field),
+ ScalarBuffer::from(new_offsets),
+ ScalarBuffer::from(new_sizes),
+ values_reversed,
+ has_nulls.then_some(NullBuffer::from(new_nulls)),
Review Comment:
I think we can just directly grab the null buffer from the input array
(I know the other reverse functions don't do this, but I think it's a valid
approach instead of recreating the null buffer element by element)
```suggestion
array.nulls().cloned(),
```
##########
datafusion/functions-nested/src/reverse.rs:
##########
@@ -183,6 +195,75 @@ fn general_array_reverse<O: OffsetSizeTrait +
TryFrom<i64>>(
)?))
}
+fn list_view_reverse<O: OffsetSizeTrait + TryFrom<i64>>(
Review Comment:
```suggestion
fn list_view_reverse<O: OffsetSizeTrait>(
```
I don't think the extra bound is used
##########
datafusion/functions-nested/src/reverse.rs:
##########
@@ -183,6 +196,50 @@ fn general_array_reverse<O: OffsetSizeTrait +
TryFrom<i64>>(
)?))
}
+fn list_view_reverse<O: OffsetSizeTrait + TryFrom<i64>>(
Review Comment:
Using `take` makes sense to me here. If you have time you could try both
approaches and run a benchmark?
##########
datafusion/functions-nested/src/reverse.rs:
##########
@@ -183,6 +195,75 @@ fn general_array_reverse<O: OffsetSizeTrait +
TryFrom<i64>>(
)?))
}
+fn list_view_reverse<O: OffsetSizeTrait + TryFrom<i64>>(
+ array: &GenericListViewArray<O>,
+ field: &FieldRef,
+) -> Result<ArrayRef> {
+ let (_, offsets, sizes, values, nulls) = array.clone().into_parts();
Review Comment:
```suggestion
let offsets = array.offsets();
let values = array.values();
let sizes = array.sizes();
```
I don't think there's a strict need to clone and deconstruct the array, can
just grab constituent parts like so
##########
datafusion/functions-nested/src/reverse.rs:
##########
@@ -183,6 +195,75 @@ fn general_array_reverse<O: OffsetSizeTrait +
TryFrom<i64>>(
)?))
}
+fn list_view_reverse<O: OffsetSizeTrait + TryFrom<i64>>(
+ array: &GenericListViewArray<O>,
+ field: &FieldRef,
+) -> Result<ArrayRef> {
+ let (_, offsets, sizes, values, nulls) = array.clone().into_parts();
+
+ // Construct indices, sizes and offsets for the reversed array by
iterating over
+ // the list view array in the logical order, and reversing the order of
the elements.
+ // We end up with a list view array where the elements are in order,
+ // even if the original array had elements out of order.
+ let mut indices: Vec<O> = Vec::with_capacity(values.len());
+ let mut new_sizes = Vec::with_capacity(sizes.len());
+ let mut new_offsets: Vec<O> = Vec::with_capacity(offsets.len());
+ let mut new_nulls =
+ Vec::with_capacity(nulls.clone().map(|nulls|
nulls.len()).unwrap_or(0));
+ new_offsets.push(O::zero());
+ let has_nulls = nulls.is_some();
+ for (i, offset) in offsets.iter().enumerate().take(offsets.len()) {
+ // If this array is null, we set the new array to null with size 0 and
continue
+ if let Some(ref nulls) = nulls {
+ if nulls.is_null(i) {
+ new_nulls.push(false); // null
+ new_sizes.push(O::zero());
+ new_offsets.push(new_offsets[i]);
+ continue;
+ } else {
+ new_nulls.push(true); // valid
+ }
+ }
+
+ // Each array is located at [offset, offset + size), so we collect
indices in the reverse order
+ let array_start = offset.as_usize();
+ let array_end = array_start + sizes[i].as_usize();
+ for idx in (array_start..array_end).rev() {
+ indices.push(O::usize_as(idx));
+ }
Review Comment:
I feel this roundtripping from O -> usize -> O could be simplified, perhaps
if we use a loop instead of a for? Thoughts? 🤔
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]