Jefffrey commented on code in PR #8963:
URL: https://github.com/apache/arrow-rs/pull/8963#discussion_r2647047797
##########
arrow-select/src/zip.rs:
##########
@@ -657,6 +665,181 @@ fn maybe_prep_null_mask_filter(predicate: &BooleanArray)
-> BooleanBuffer {
}
}
+struct ByteViewScalarImpl<T: ByteViewType> {
+ truthy: Option<GenericByteViewArray<T>>,
+ falsy: Option<GenericByteViewArray<T>>,
Review Comment:
I question this choice of holding a `GenericByteViewArray` itself here 🤔
Can we decompose it to just the buffers and view `u128` perhaps?
##########
arrow-select/src/zip.rs:
##########
@@ -657,6 +665,181 @@ fn maybe_prep_null_mask_filter(predicate: &BooleanArray)
-> BooleanBuffer {
}
}
+struct ByteViewScalarImpl<T: ByteViewType> {
+ truthy: Option<GenericByteViewArray<T>>,
+ falsy: Option<GenericByteViewArray<T>>,
+ phantom: PhantomData<T>,
+}
+
+impl<T: ByteViewType> ByteViewScalarImpl<T> {
+ fn new(truthy: &dyn Array, falsy: &dyn Array) -> Self {
+ Self {
+ truthy: Self::get_value_from_scalar(truthy),
+ falsy: Self::get_value_from_scalar(falsy),
+ phantom: PhantomData,
+ }
+ }
+
+ fn get_value_from_scalar(scalar: &dyn Array) ->
Option<GenericByteViewArray<T>> {
+ if scalar.is_null(0) {
+ None
+ } else {
+ Some(scalar.as_byte_view().clone())
+ }
+ }
+
+ fn get_scalar_buffers_and_nulls_for_all_values_null(
+ len: usize,
+ ) -> (ScalarBuffer<u128>, Vec<Buffer>, Option<NullBuffer>) {
+ let mut mutable = MutableBuffer::with_capacity(0);
+ mutable.repeat_slice_n_times((0u128).to_byte_slice(), len);
+
+ (mutable.into(), vec![], Some(NullBuffer::new_null(len)))
+ }
+
+ fn get_scalar_buffers_and_nulls_for_single_non_nullable(
+ predicate: BooleanBuffer,
+ value: &GenericByteViewArray<T>,
+ ) -> (ScalarBuffer<u128>, Vec<Buffer>, Option<NullBuffer>) {
+ let number_of_true = predicate.count_set_bits();
+ let number_of_values = predicate.len();
+
+ // Fast path for all nulls
+ if number_of_true == 0 {
+ // All values are null
+ return
Self::get_scalar_buffers_and_nulls_for_all_values_null(number_of_values);
+ }
+ let view = value.views()[0].to_byte_slice();
+ let mut bytes = MutableBuffer::with_capacity(0);
+ bytes.repeat_slice_n_times(view, number_of_values);
+
+ let bytes = Buffer::from(bytes);
+
+ // If a value is true we need the TRUTHY and the null buffer will have
1 (meaning not null)
+ // If a value is false we need the FALSY and the null buffer will have
0 (meaning null)
Review Comment:
These comments seem out of context or are in the wrong place? In this
function we have no concept of `TRUTHY` and `FALSY`, they are collapsed into
`value`
##########
arrow-select/src/zip.rs:
##########
@@ -657,6 +665,181 @@ fn maybe_prep_null_mask_filter(predicate: &BooleanArray)
-> BooleanBuffer {
}
}
+struct ByteViewScalarImpl<T: ByteViewType> {
+ truthy: Option<GenericByteViewArray<T>>,
+ falsy: Option<GenericByteViewArray<T>>,
+ phantom: PhantomData<T>,
+}
+
+impl<T: ByteViewType> ByteViewScalarImpl<T> {
+ fn new(truthy: &dyn Array, falsy: &dyn Array) -> Self {
+ Self {
+ truthy: Self::get_value_from_scalar(truthy),
+ falsy: Self::get_value_from_scalar(falsy),
+ phantom: PhantomData,
+ }
+ }
+
+ fn get_value_from_scalar(scalar: &dyn Array) ->
Option<GenericByteViewArray<T>> {
+ if scalar.is_null(0) {
+ None
+ } else {
+ Some(scalar.as_byte_view().clone())
+ }
+ }
+
+ fn get_scalar_buffers_and_nulls_for_all_values_null(
+ len: usize,
+ ) -> (ScalarBuffer<u128>, Vec<Buffer>, Option<NullBuffer>) {
+ let mut mutable = MutableBuffer::with_capacity(0);
+ mutable.repeat_slice_n_times((0u128).to_byte_slice(), len);
+
+ (mutable.into(), vec![], Some(NullBuffer::new_null(len)))
+ }
+
+ fn get_scalar_buffers_and_nulls_for_single_non_nullable(
+ predicate: BooleanBuffer,
+ value: &GenericByteViewArray<T>,
+ ) -> (ScalarBuffer<u128>, Vec<Buffer>, Option<NullBuffer>) {
+ let number_of_true = predicate.count_set_bits();
+ let number_of_values = predicate.len();
+
+ // Fast path for all nulls
+ if number_of_true == 0 {
+ // All values are null
+ return
Self::get_scalar_buffers_and_nulls_for_all_values_null(number_of_values);
+ }
+ let view = value.views()[0].to_byte_slice();
+ let mut bytes = MutableBuffer::with_capacity(0);
+ bytes.repeat_slice_n_times(view, number_of_values);
+
+ let bytes = Buffer::from(bytes);
+
+ // If a value is true we need the TRUTHY and the null buffer will have
1 (meaning not null)
+ // If a value is false we need the FALSY and the null buffer will have
0 (meaning null)
+ let nulls = NullBuffer::new(predicate);
+ (bytes.into(), value.data_buffers().into(), Some(nulls))
+ }
+
+ fn get_scalar_buffers_and_nulls_non_nullable(
+ predicate: BooleanBuffer,
+ truthy: &GenericByteViewArray<T>,
+ falsy: &GenericByteViewArray<T>,
+ ) -> (ScalarBuffer<u128>, Vec<Buffer>, Option<NullBuffer>) {
+ let true_count = predicate.count_set_bits();
+ let view_truthy = truthy.views()[0].to_byte_slice();
+ let mut buffers: Vec<Buffer> = truthy.data_buffers().to_vec();
+
+ // if falsy has non-inlined values in the buffer,
+ // include the buffers and recalculate the view,
+ // otherwise, we simply use the view.
+ let view_falsy = if falsy.total_buffer_bytes_used() > 0 {
+ let byte_view_falsy = ByteView::from(falsy.views()[0]);
+ let new_index_falsy_buffers = buffers.len() as u32;
Review Comment:
```suggestion
let new_index_falsy_buffers = buffers.len() as u32 +
byte_view_falsy.buffer_index;
```
We can't assume falsy only has 1 buffer
##########
arrow-select/src/zip.rs:
##########
@@ -657,6 +665,181 @@ fn maybe_prep_null_mask_filter(predicate: &BooleanArray)
-> BooleanBuffer {
}
}
+struct ByteViewScalarImpl<T: ByteViewType> {
+ truthy: Option<GenericByteViewArray<T>>,
+ falsy: Option<GenericByteViewArray<T>>,
+ phantom: PhantomData<T>,
+}
+
+impl<T: ByteViewType> ByteViewScalarImpl<T> {
+ fn new(truthy: &dyn Array, falsy: &dyn Array) -> Self {
+ Self {
+ truthy: Self::get_value_from_scalar(truthy),
+ falsy: Self::get_value_from_scalar(falsy),
+ phantom: PhantomData,
+ }
+ }
+
+ fn get_value_from_scalar(scalar: &dyn Array) ->
Option<GenericByteViewArray<T>> {
+ if scalar.is_null(0) {
+ None
+ } else {
+ Some(scalar.as_byte_view().clone())
+ }
+ }
+
+ fn get_scalar_buffers_and_nulls_for_all_values_null(
+ len: usize,
+ ) -> (ScalarBuffer<u128>, Vec<Buffer>, Option<NullBuffer>) {
+ let mut mutable = MutableBuffer::with_capacity(0);
+ mutable.repeat_slice_n_times((0u128).to_byte_slice(), len);
+
+ (mutable.into(), vec![], Some(NullBuffer::new_null(len)))
+ }
+
+ fn get_scalar_buffers_and_nulls_for_single_non_nullable(
+ predicate: BooleanBuffer,
+ value: &GenericByteViewArray<T>,
+ ) -> (ScalarBuffer<u128>, Vec<Buffer>, Option<NullBuffer>) {
+ let number_of_true = predicate.count_set_bits();
+ let number_of_values = predicate.len();
+
+ // Fast path for all nulls
+ if number_of_true == 0 {
+ // All values are null
+ return
Self::get_scalar_buffers_and_nulls_for_all_values_null(number_of_values);
+ }
+ let view = value.views()[0].to_byte_slice();
+ let mut bytes = MutableBuffer::with_capacity(0);
+ bytes.repeat_slice_n_times(view, number_of_values);
+
+ let bytes = Buffer::from(bytes);
Review Comment:
```suggestion
let bytes = vec![value.views()[0]; number_of_values];
```
No need to use `MutableBuffer` since our values (views) are simple `u128`s
##########
arrow-select/src/zip.rs:
##########
@@ -657,6 +665,181 @@ fn maybe_prep_null_mask_filter(predicate: &BooleanArray)
-> BooleanBuffer {
}
}
+struct ByteViewScalarImpl<T: ByteViewType> {
+ truthy: Option<GenericByteViewArray<T>>,
+ falsy: Option<GenericByteViewArray<T>>,
+ phantom: PhantomData<T>,
+}
+
+impl<T: ByteViewType> ByteViewScalarImpl<T> {
+ fn new(truthy: &dyn Array, falsy: &dyn Array) -> Self {
+ Self {
+ truthy: Self::get_value_from_scalar(truthy),
+ falsy: Self::get_value_from_scalar(falsy),
+ phantom: PhantomData,
+ }
+ }
+
+ fn get_value_from_scalar(scalar: &dyn Array) ->
Option<GenericByteViewArray<T>> {
+ if scalar.is_null(0) {
+ None
+ } else {
+ Some(scalar.as_byte_view().clone())
+ }
+ }
+
+ fn get_scalar_buffers_and_nulls_for_all_values_null(
+ len: usize,
+ ) -> (ScalarBuffer<u128>, Vec<Buffer>, Option<NullBuffer>) {
+ let mut mutable = MutableBuffer::with_capacity(0);
+ mutable.repeat_slice_n_times((0u128).to_byte_slice(), len);
+
+ (mutable.into(), vec![], Some(NullBuffer::new_null(len)))
+ }
+
+ fn get_scalar_buffers_and_nulls_for_single_non_nullable(
+ predicate: BooleanBuffer,
+ value: &GenericByteViewArray<T>,
+ ) -> (ScalarBuffer<u128>, Vec<Buffer>, Option<NullBuffer>) {
+ let number_of_true = predicate.count_set_bits();
+ let number_of_values = predicate.len();
+
+ // Fast path for all nulls
+ if number_of_true == 0 {
+ // All values are null
+ return
Self::get_scalar_buffers_and_nulls_for_all_values_null(number_of_values);
+ }
+ let view = value.views()[0].to_byte_slice();
+ let mut bytes = MutableBuffer::with_capacity(0);
+ bytes.repeat_slice_n_times(view, number_of_values);
+
+ let bytes = Buffer::from(bytes);
+
+ // If a value is true we need the TRUTHY and the null buffer will have
1 (meaning not null)
+ // If a value is false we need the FALSY and the null buffer will have
0 (meaning null)
+ let nulls = NullBuffer::new(predicate);
+ (bytes.into(), value.data_buffers().into(), Some(nulls))
+ }
+
+ fn get_scalar_buffers_and_nulls_non_nullable(
+ predicate: BooleanBuffer,
+ truthy: &GenericByteViewArray<T>,
+ falsy: &GenericByteViewArray<T>,
+ ) -> (ScalarBuffer<u128>, Vec<Buffer>, Option<NullBuffer>) {
+ let true_count = predicate.count_set_bits();
+ let view_truthy = truthy.views()[0].to_byte_slice();
+ let mut buffers: Vec<Buffer> = truthy.data_buffers().to_vec();
+
+ // if falsy has non-inlined values in the buffer,
+ // include the buffers and recalculate the view,
+ // otherwise, we simply use the view.
+ let view_falsy = if falsy.total_buffer_bytes_used() > 0 {
+ let byte_view_falsy = ByteView::from(falsy.views()[0]);
+ let new_index_falsy_buffers = buffers.len() as u32;
+ buffers.extend(falsy.data_buffers().to_vec());
+ let byte_view_falsy =
byte_view_falsy.with_buffer_index(new_index_falsy_buffers);
+ byte_view_falsy.as_u128()
+ } else {
+ falsy.views()[0]
+ };
+
+ let total_number_of_bytes = true_count * view_truthy.len()
+ + (predicate.len() - true_count) *
view_falsy.to_byte_slice().len();
Review Comment:
`view_truthy` and `view_falsy` are `u128`s which we can know the constant
size for; no need to call `len()` on their byte slices
##########
arrow-select/src/zip.rs:
##########
@@ -657,6 +665,181 @@ fn maybe_prep_null_mask_filter(predicate: &BooleanArray)
-> BooleanBuffer {
}
}
+struct ByteViewScalarImpl<T: ByteViewType> {
+ truthy: Option<GenericByteViewArray<T>>,
+ falsy: Option<GenericByteViewArray<T>>,
+ phantom: PhantomData<T>,
+}
+
+impl<T: ByteViewType> ByteViewScalarImpl<T> {
+ fn new(truthy: &dyn Array, falsy: &dyn Array) -> Self {
+ Self {
+ truthy: Self::get_value_from_scalar(truthy),
+ falsy: Self::get_value_from_scalar(falsy),
+ phantom: PhantomData,
+ }
+ }
+
+ fn get_value_from_scalar(scalar: &dyn Array) ->
Option<GenericByteViewArray<T>> {
+ if scalar.is_null(0) {
+ None
+ } else {
+ Some(scalar.as_byte_view().clone())
+ }
+ }
Review Comment:
Personally I would inline this; there's a lot of functions here so we would
benefit from removing simple ones like this
##########
arrow-select/src/zip.rs:
##########
@@ -657,6 +665,181 @@ fn maybe_prep_null_mask_filter(predicate: &BooleanArray)
-> BooleanBuffer {
}
}
+struct ByteViewScalarImpl<T: ByteViewType> {
+ truthy: Option<GenericByteViewArray<T>>,
+ falsy: Option<GenericByteViewArray<T>>,
+ phantom: PhantomData<T>,
+}
+
+impl<T: ByteViewType> ByteViewScalarImpl<T> {
+ fn new(truthy: &dyn Array, falsy: &dyn Array) -> Self {
+ Self {
+ truthy: Self::get_value_from_scalar(truthy),
+ falsy: Self::get_value_from_scalar(falsy),
+ phantom: PhantomData,
+ }
+ }
+
+ fn get_value_from_scalar(scalar: &dyn Array) ->
Option<GenericByteViewArray<T>> {
+ if scalar.is_null(0) {
+ None
+ } else {
+ Some(scalar.as_byte_view().clone())
+ }
+ }
+
+ fn get_scalar_buffers_and_nulls_for_all_values_null(
+ len: usize,
+ ) -> (ScalarBuffer<u128>, Vec<Buffer>, Option<NullBuffer>) {
+ let mut mutable = MutableBuffer::with_capacity(0);
+ mutable.repeat_slice_n_times((0u128).to_byte_slice(), len);
+
+ (mutable.into(), vec![], Some(NullBuffer::new_null(len)))
+ }
+
+ fn get_scalar_buffers_and_nulls_for_single_non_nullable(
Review Comment:
```suggestion
fn get_view_parts_single_value(
```
These function names can do with improving, they aren't very readable
##########
arrow-select/src/zip.rs:
##########
@@ -657,6 +665,181 @@ fn maybe_prep_null_mask_filter(predicate: &BooleanArray)
-> BooleanBuffer {
}
}
+struct ByteViewScalarImpl<T: ByteViewType> {
+ truthy: Option<GenericByteViewArray<T>>,
+ falsy: Option<GenericByteViewArray<T>>,
+ phantom: PhantomData<T>,
+}
+
+impl<T: ByteViewType> ByteViewScalarImpl<T> {
+ fn new(truthy: &dyn Array, falsy: &dyn Array) -> Self {
+ Self {
+ truthy: Self::get_value_from_scalar(truthy),
+ falsy: Self::get_value_from_scalar(falsy),
+ phantom: PhantomData,
+ }
+ }
+
+ fn get_value_from_scalar(scalar: &dyn Array) ->
Option<GenericByteViewArray<T>> {
+ if scalar.is_null(0) {
+ None
+ } else {
+ Some(scalar.as_byte_view().clone())
+ }
+ }
+
+ fn get_scalar_buffers_and_nulls_for_all_values_null(
+ len: usize,
+ ) -> (ScalarBuffer<u128>, Vec<Buffer>, Option<NullBuffer>) {
+ let mut mutable = MutableBuffer::with_capacity(0);
+ mutable.repeat_slice_n_times((0u128).to_byte_slice(), len);
+
+ (mutable.into(), vec![], Some(NullBuffer::new_null(len)))
+ }
+
+ fn get_scalar_buffers_and_nulls_for_single_non_nullable(
+ predicate: BooleanBuffer,
+ value: &GenericByteViewArray<T>,
+ ) -> (ScalarBuffer<u128>, Vec<Buffer>, Option<NullBuffer>) {
+ let number_of_true = predicate.count_set_bits();
+ let number_of_values = predicate.len();
+
+ // Fast path for all nulls
+ if number_of_true == 0 {
+ // All values are null
+ return
Self::get_scalar_buffers_and_nulls_for_all_values_null(number_of_values);
+ }
+ let view = value.views()[0].to_byte_slice();
+ let mut bytes = MutableBuffer::with_capacity(0);
+ bytes.repeat_slice_n_times(view, number_of_values);
+
+ let bytes = Buffer::from(bytes);
+
+ // If a value is true we need the TRUTHY and the null buffer will have
1 (meaning not null)
+ // If a value is false we need the FALSY and the null buffer will have
0 (meaning null)
+ let nulls = NullBuffer::new(predicate);
+ (bytes.into(), value.data_buffers().into(), Some(nulls))
+ }
+
+ fn get_scalar_buffers_and_nulls_non_nullable(
+ predicate: BooleanBuffer,
+ truthy: &GenericByteViewArray<T>,
+ falsy: &GenericByteViewArray<T>,
+ ) -> (ScalarBuffer<u128>, Vec<Buffer>, Option<NullBuffer>) {
+ let true_count = predicate.count_set_bits();
+ let view_truthy = truthy.views()[0].to_byte_slice();
+ let mut buffers: Vec<Buffer> = truthy.data_buffers().to_vec();
+
+ // if falsy has non-inlined values in the buffer,
+ // include the buffers and recalculate the view,
+ // otherwise, we simply use the view.
+ let view_falsy = if falsy.total_buffer_bytes_used() > 0 {
+ let byte_view_falsy = ByteView::from(falsy.views()[0]);
+ let new_index_falsy_buffers = buffers.len() as u32;
+ buffers.extend(falsy.data_buffers().to_vec());
+ let byte_view_falsy =
byte_view_falsy.with_buffer_index(new_index_falsy_buffers);
+ byte_view_falsy.as_u128()
+ } else {
+ falsy.views()[0]
+ };
+
+ let total_number_of_bytes = true_count * view_truthy.len()
+ + (predicate.len() - true_count) *
view_falsy.to_byte_slice().len();
+ let mut mutable = MutableBuffer::new(total_number_of_bytes);
+ let mut filled = 0;
+
+ SlicesIterator::from(&predicate).for_each(|(start, end)| {
+ if start > filled {
+ let false_repeat_count = start - filled;
+ mutable.repeat_slice_n_times(view_falsy.to_byte_slice(),
false_repeat_count);
+ }
+ let true_repeat_count = end - start;
+ mutable.repeat_slice_n_times(view_truthy, true_repeat_count);
+ filled = end;
+ });
+
+ if filled < predicate.len() {
+ let false_repeat_count = predicate.len() - filled;
+ mutable.repeat_slice_n_times(view_falsy.to_byte_slice(),
false_repeat_count);
+ }
+
+ let bytes = Buffer::from(mutable);
+
+ (
+ bytes.into(),
+ buffers,
+ Some(NullBuffer::new_valid(predicate.len())),
+ )
+ }
+
+ fn get_scalar_buffers_and_nulls_for_all_same_value(
+ length: usize,
+ value: &GenericByteViewArray<T>,
+ ) -> (ScalarBuffer<u128>, Vec<Buffer>, Option<NullBuffer>) {
+ let (views, buffers, _) = value.clone().into_parts();
+ let mut mutable = MutableBuffer::with_capacity(0);
+ mutable.repeat_slice_n_times(views[0].to_byte_slice(), length);
+
+ let bytes = Buffer::from(mutable);
+
+ (bytes.into(), buffers, Some(NullBuffer::new_valid(length)))
Review Comment:
```suggestion
(vec![views[0]; length].into(), buffers, None)
```
No need for null buffer if all are valid
Also can simplify buffer creation
##########
arrow-select/src/zip.rs:
##########
@@ -657,6 +665,181 @@ fn maybe_prep_null_mask_filter(predicate: &BooleanArray)
-> BooleanBuffer {
}
}
+struct ByteViewScalarImpl<T: ByteViewType> {
+ truthy: Option<GenericByteViewArray<T>>,
+ falsy: Option<GenericByteViewArray<T>>,
+ phantom: PhantomData<T>,
+}
+
+impl<T: ByteViewType> ByteViewScalarImpl<T> {
+ fn new(truthy: &dyn Array, falsy: &dyn Array) -> Self {
+ Self {
+ truthy: Self::get_value_from_scalar(truthy),
+ falsy: Self::get_value_from_scalar(falsy),
+ phantom: PhantomData,
+ }
+ }
+
+ fn get_value_from_scalar(scalar: &dyn Array) ->
Option<GenericByteViewArray<T>> {
+ if scalar.is_null(0) {
+ None
+ } else {
+ Some(scalar.as_byte_view().clone())
+ }
+ }
+
+ fn get_scalar_buffers_and_nulls_for_all_values_null(
+ len: usize,
+ ) -> (ScalarBuffer<u128>, Vec<Buffer>, Option<NullBuffer>) {
+ let mut mutable = MutableBuffer::with_capacity(0);
+ mutable.repeat_slice_n_times((0u128).to_byte_slice(), len);
+
+ (mutable.into(), vec![], Some(NullBuffer::new_null(len)))
Review Comment:
```suggestion
(vec![0; len].into(), vec![], Some(NullBuffer::new_null(len)))
```
At this point it would be better to inline this instead of having this thin
wrapper function
##########
arrow-select/src/zip.rs:
##########
@@ -657,6 +665,181 @@ fn maybe_prep_null_mask_filter(predicate: &BooleanArray)
-> BooleanBuffer {
}
}
+struct ByteViewScalarImpl<T: ByteViewType> {
+ truthy: Option<GenericByteViewArray<T>>,
+ falsy: Option<GenericByteViewArray<T>>,
+ phantom: PhantomData<T>,
+}
+
+impl<T: ByteViewType> ByteViewScalarImpl<T> {
+ fn new(truthy: &dyn Array, falsy: &dyn Array) -> Self {
+ Self {
+ truthy: Self::get_value_from_scalar(truthy),
+ falsy: Self::get_value_from_scalar(falsy),
+ phantom: PhantomData,
+ }
+ }
+
+ fn get_value_from_scalar(scalar: &dyn Array) ->
Option<GenericByteViewArray<T>> {
+ if scalar.is_null(0) {
+ None
+ } else {
+ Some(scalar.as_byte_view().clone())
+ }
+ }
+
+ fn get_scalar_buffers_and_nulls_for_all_values_null(
+ len: usize,
+ ) -> (ScalarBuffer<u128>, Vec<Buffer>, Option<NullBuffer>) {
+ let mut mutable = MutableBuffer::with_capacity(0);
+ mutable.repeat_slice_n_times((0u128).to_byte_slice(), len);
+
+ (mutable.into(), vec![], Some(NullBuffer::new_null(len)))
+ }
+
+ fn get_scalar_buffers_and_nulls_for_single_non_nullable(
+ predicate: BooleanBuffer,
+ value: &GenericByteViewArray<T>,
+ ) -> (ScalarBuffer<u128>, Vec<Buffer>, Option<NullBuffer>) {
+ let number_of_true = predicate.count_set_bits();
+ let number_of_values = predicate.len();
+
+ // Fast path for all nulls
+ if number_of_true == 0 {
+ // All values are null
+ return
Self::get_scalar_buffers_and_nulls_for_all_values_null(number_of_values);
+ }
+ let view = value.views()[0].to_byte_slice();
+ let mut bytes = MutableBuffer::with_capacity(0);
+ bytes.repeat_slice_n_times(view, number_of_values);
+
+ let bytes = Buffer::from(bytes);
+
+ // If a value is true we need the TRUTHY and the null buffer will have
1 (meaning not null)
+ // If a value is false we need the FALSY and the null buffer will have
0 (meaning null)
+ let nulls = NullBuffer::new(predicate);
+ (bytes.into(), value.data_buffers().into(), Some(nulls))
+ }
+
+ fn get_scalar_buffers_and_nulls_non_nullable(
+ predicate: BooleanBuffer,
+ truthy: &GenericByteViewArray<T>,
+ falsy: &GenericByteViewArray<T>,
+ ) -> (ScalarBuffer<u128>, Vec<Buffer>, Option<NullBuffer>) {
+ let true_count = predicate.count_set_bits();
+ let view_truthy = truthy.views()[0].to_byte_slice();
+ let mut buffers: Vec<Buffer> = truthy.data_buffers().to_vec();
+
+ // if falsy has non-inlined values in the buffer,
+ // include the buffers and recalculate the view,
+ // otherwise, we simply use the view.
+ let view_falsy = if falsy.total_buffer_bytes_used() > 0 {
+ let byte_view_falsy = ByteView::from(falsy.views()[0]);
+ let new_index_falsy_buffers = buffers.len() as u32;
+ buffers.extend(falsy.data_buffers().to_vec());
+ let byte_view_falsy =
byte_view_falsy.with_buffer_index(new_index_falsy_buffers);
+ byte_view_falsy.as_u128()
+ } else {
+ falsy.views()[0]
+ };
+
+ let total_number_of_bytes = true_count * view_truthy.len()
+ + (predicate.len() - true_count) *
view_falsy.to_byte_slice().len();
+ let mut mutable = MutableBuffer::new(total_number_of_bytes);
+ let mut filled = 0;
+
+ SlicesIterator::from(&predicate).for_each(|(start, end)| {
+ if start > filled {
+ let false_repeat_count = start - filled;
+ mutable.repeat_slice_n_times(view_falsy.to_byte_slice(),
false_repeat_count);
+ }
+ let true_repeat_count = end - start;
+ mutable.repeat_slice_n_times(view_truthy, true_repeat_count);
+ filled = end;
+ });
+
+ if filled < predicate.len() {
+ let false_repeat_count = predicate.len() - filled;
+ mutable.repeat_slice_n_times(view_falsy.to_byte_slice(),
false_repeat_count);
+ }
+
+ let bytes = Buffer::from(mutable);
+
+ (
+ bytes.into(),
+ buffers,
+ Some(NullBuffer::new_valid(predicate.len())),
Review Comment:
```suggestion
None,
```
All values are non-null so we don't need a null buffer
##########
arrow-select/src/zip.rs:
##########
@@ -657,6 +665,181 @@ fn maybe_prep_null_mask_filter(predicate: &BooleanArray)
-> BooleanBuffer {
}
}
+struct ByteViewScalarImpl<T: ByteViewType> {
+ truthy: Option<GenericByteViewArray<T>>,
+ falsy: Option<GenericByteViewArray<T>>,
+ phantom: PhantomData<T>,
+}
+
+impl<T: ByteViewType> ByteViewScalarImpl<T> {
+ fn new(truthy: &dyn Array, falsy: &dyn Array) -> Self {
+ Self {
+ truthy: Self::get_value_from_scalar(truthy),
+ falsy: Self::get_value_from_scalar(falsy),
+ phantom: PhantomData,
+ }
+ }
+
+ fn get_value_from_scalar(scalar: &dyn Array) ->
Option<GenericByteViewArray<T>> {
+ if scalar.is_null(0) {
+ None
+ } else {
+ Some(scalar.as_byte_view().clone())
+ }
+ }
+
+ fn get_scalar_buffers_and_nulls_for_all_values_null(
+ len: usize,
+ ) -> (ScalarBuffer<u128>, Vec<Buffer>, Option<NullBuffer>) {
+ let mut mutable = MutableBuffer::with_capacity(0);
+ mutable.repeat_slice_n_times((0u128).to_byte_slice(), len);
+
+ (mutable.into(), vec![], Some(NullBuffer::new_null(len)))
+ }
+
+ fn get_scalar_buffers_and_nulls_for_single_non_nullable(
+ predicate: BooleanBuffer,
+ value: &GenericByteViewArray<T>,
+ ) -> (ScalarBuffer<u128>, Vec<Buffer>, Option<NullBuffer>) {
+ let number_of_true = predicate.count_set_bits();
+ let number_of_values = predicate.len();
+
+ // Fast path for all nulls
+ if number_of_true == 0 {
+ // All values are null
+ return
Self::get_scalar_buffers_and_nulls_for_all_values_null(number_of_values);
+ }
+ let view = value.views()[0].to_byte_slice();
Review Comment:
Does this view have the right buffer offset?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]