alamb commented on code in PR #16877:
URL: https://github.com/apache/datafusion/pull/16877#discussion_r2228560541
##########
datafusion/functions/src/unicode/character_length.rs:
##########
@@ -136,56 +136,31 @@ where
// string is ASCII only is relatively cheap.
// If strings are ASCII only, count bytes instead.
let is_array_ascii_only = array.is_ascii();
- let array = if array.null_count() == 0 {
+ let nulls = array.nulls().cloned();
+ let array = {
if is_array_ascii_only {
let values: Vec<_> = (0..array.len())
.map(|i| {
- let value = array.value(i);
+ // Safety: we are iterating with array.len() so the index
is always valid
+ let value = unsafe { array.value_unchecked(i) };
T::Native::usize_as(value.len())
})
.collect();
- PrimitiveArray::<T>::new(values.into(), None)
+ PrimitiveArray::<T>::new(values.into(), nulls)
} else {
let values: Vec<_> = (0..array.len())
.map(|i| {
- let value = array.value(i);
+ // Safety: we are iterating with array.len() so the index
is always valid
Review Comment:
Another idea here is if you know the values are always ascii, you can avoid
making a `&str` at all -- and instead simply compute the character lengths
based on the offsets array (for StringArray and LargeStringArray) or the views
for `StringViewArray)
##########
datafusion/functions/src/unicode/character_length.rs:
##########
@@ -136,56 +136,31 @@ where
// string is ASCII only is relatively cheap.
// If strings are ASCII only, count bytes instead.
let is_array_ascii_only = array.is_ascii();
- let array = if array.null_count() == 0 {
+ let nulls = array.nulls().cloned();
Review Comment:
Is the idea to remove the no-nulls optimization because it doesn't make
things faster?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]