Dandandan commented on code in PR #9535:
URL: https://github.com/apache/arrow-rs/pull/9535#discussion_r2945124235
##########
arrow-select/src/take.rs:
##########
@@ -723,46 +724,128 @@ fn take_fixed_size_binary<IndexType: ArrowPrimitiveType>(
ArrowError::InvalidArgumentError(format!("Cannot convert size '{}' to
usize", size))
})?;
- let values_buffer = values.values().as_slice();
- let mut values_buffer_builder = BufferBuilder::new(indices.len() *
size_usize);
-
- if indices.null_count() == 0 {
- let array_iter = indices.values().iter().map(|idx| {
- let offset = idx.as_usize() * size_usize;
- &values_buffer[offset..offset + size_usize]
- });
- for slice in array_iter {
- values_buffer_builder.append_slice(slice);
- }
- } else {
- // The indices nullability cannot be ignored here because the values
buffer may contain
- // nulls which should not cause a panic.
- let array_iter = indices.iter().map(|idx| {
- idx.map(|idx| {
- let offset = idx.as_usize() * size_usize;
- &values_buffer[offset..offset + size_usize]
- })
- });
- for slice in array_iter {
- match slice {
- None => values_buffer_builder.append_n(size_usize, 0),
- Some(slice) => values_buffer_builder.append_slice(slice),
- }
- }
- }
+ let result_buffer = match size_usize {
+ 1 => take_fixed_size::<IndexType, 1>(values.values(), indices),
+ 2 => take_fixed_size::<IndexType, 2>(values.values(), indices),
+ 4 => take_fixed_size::<IndexType, 4>(values.values(), indices),
+ 8 => take_fixed_size::<IndexType, 8>(values.values(), indices),
+ 12 => take_fixed_size::<IndexType, 12>(values.values(), indices),
+ 16 => take_fixed_size::<IndexType, 16>(values.values(), indices),
+ _ => take_fixed_size_binary_buffer_dynamic_length(values, indices,
size_usize),
+ };
- let values_buffer = values_buffer_builder.finish();
let value_nulls = take_nulls(values.nulls(), indices);
let final_nulls = NullBuffer::union(value_nulls.as_ref(), indices.nulls());
-
let array_data = ArrayDataBuilder::new(DataType::FixedSizeBinary(size))
.len(indices.len())
.nulls(final_nulls)
.offset(0)
- .add_buffer(values_buffer)
+ .add_buffer(result_buffer)
.build()?;
- Ok(FixedSizeBinaryArray::from(array_data))
+ return Ok(FixedSizeBinaryArray::from(array_data));
+
+ /// Implementation of the take kernel for fixed size binary arrays.
+ #[inline(never)]
+ fn take_fixed_size_binary_buffer_dynamic_length<IndexType:
ArrowPrimitiveType>(
+ values: &FixedSizeBinaryArray,
+ indices: &PrimitiveArray<IndexType>,
+ size_usize: usize,
+ ) -> Buffer {
+ let values_buffer = values.values().as_slice();
+ let mut values_buffer_builder = BufferBuilder::new(indices.len() *
size_usize);
+
+ if indices.null_count() == 0 {
+ let array_iter = indices.values().iter().map(|idx| {
+ let offset = idx.as_usize() * size_usize;
+ &values_buffer[offset..offset + size_usize]
+ });
+ for slice in array_iter {
+ values_buffer_builder.append_slice(slice);
+ }
+ } else {
+ // The indices nullability cannot be ignored here because the
values buffer may contain
+ // nulls which should not cause a panic.
+ let array_iter = indices.iter().map(|idx| {
+ idx.map(|idx| {
+ let offset = idx.as_usize() * size_usize;
+ &values_buffer[offset..offset + size_usize]
+ })
+ });
+ for slice in array_iter {
Review Comment:
Oh wait - the None / Some here is from the indices, not the values.
I think we can build the values / nulls separately like we do in other take
kernels.
The nulls we can just cheaply (ref) clone from `array_iter`, this avoids
rebuilding the entire null buffer from scratch.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]