mapleFU commented on code in PR #10025:
URL: https://github.com/apache/arrow-rs/pull/10025#discussion_r3311386076
##########
arrow-select/src/interleave.rs:
##########
@@ -373,13 +375,85 @@ fn interleave_struct(
Ok(Arc::new(struct_array))
}
+/// Specialized interleave for list child arrays that are primitive.
+/// Directly copies typed value slices and null bit ranges without
+/// going through MutableArrayData's function pointer indirection.
+fn interleave_list_primitive_child<O: OffsetSizeTrait, T: ArrowPrimitiveType>(
+ interleaved: &Interleave<'_, GenericListArray<O>>,
+ indices: &[(usize, usize)],
+ capacity: usize,
+) -> ArrayRef {
+ let child_arrays: Vec<&PrimitiveArray<T>> = interleaved
+ .arrays
+ .iter()
+ .map(|list| list.values().as_primitive::<T>())
+ .collect();
+
+ let has_child_nulls = child_arrays.iter().any(|a| a.null_count() > 0);
+
+ // Build values buffer by copying contiguous slices
+ let mut values: Vec<T::Native> = Vec::with_capacity(capacity);
+ for &(array, row) in indices {
+ let o = interleaved.arrays[array].value_offsets();
+ let start = o[row].as_usize();
+ let end = o[row + 1].as_usize();
+ if end > start {
+
values.extend_from_slice(&child_arrays[array].values()[start..end]);
+ }
+ }
+
+ // Build null buffer. Pre-allocate with 0x00 (all null), then:
+ // - Sources with nulls: set_bits ORs in valid bits from source.
+ // - Sources without nulls: set the bit range to all 1s directly.
+ let nulls = if has_child_nulls {
+ let null_byte_len = bit_util::ceil(capacity, 8);
+ let mut null_buf = MutableBuffer::new(null_byte_len);
+ null_buf.resize(null_byte_len, 0);
+
+ let mut offset_write = 0;
+ for &(array, row) in indices {
+ let o = interleaved.arrays[array].value_offsets();
+ let start = o[row].as_usize();
+ let end = o[row + 1].as_usize();
+ let len = end - start;
+ if len > 0 {
+ match child_arrays[array].nulls() {
+ Some(null_buffer) => {
+ set_bits(
+ null_buf.as_slice_mut(),
+ null_buffer.validity(),
+ offset_write,
+ null_buffer.offset() + start,
+ len,
+ );
+ }
+ None => {
+ // Slow path. For a non-nullable source, set the bit
range to all 1s directly.
Review Comment:
I think this is merely happens so uses slow path
##########
arrow-select/src/interleave.rs:
##########
@@ -373,13 +375,85 @@ fn interleave_struct(
Ok(Arc::new(struct_array))
}
+/// Specialized interleave for list child arrays that are primitive.
+/// Directly copies typed value slices and null bit ranges without
+/// going through MutableArrayData's function pointer indirection.
+fn interleave_list_primitive_child<O: OffsetSizeTrait, T: ArrowPrimitiveType>(
Review Comment:
I used to uses `MutableArrayData`, but it's about 15% slower than this
implementation.
##########
arrow-select/src/interleave.rs:
##########
@@ -392,29 +466,41 @@ fn interleave_list<O: OffsetSizeTrait>(
);
}
- let mut child_indices = Vec::with_capacity(capacity);
- for (array, row) in indices {
- let list = interleaved.arrays[*array];
- let start = list.value_offsets()[*row].as_usize();
- let end = list.value_offsets()[*row + 1].as_usize();
- child_indices.extend((start..end).map(|i| (*array, i)));
+ // Step 2: build child values.
+ macro_rules! list_primitive_helper {
+ ($t:ty) => {
+ interleave_list_primitive_child::<O, $t>(&interleaved, indices,
capacity)
+ };
}
- let child_arrays: Vec<&dyn Array> = interleaved
- .arrays
- .iter()
- .map(|list| list.values().as_ref())
- .collect();
+ let child_values = downcast_primitive! {
+ // For primitive child types, directly copy typed value slices and
null bit
+ // ranges, avoiding both the intermediate child_indices Vec allocation
and
+ // MutableArrayData's function pointer indirection.
+ field.data_type() => (list_primitive_helper),
Review Comment:
I think this is just for type which could be copied fastly, for
`List<List<...>>`, still we need some optimizations
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]