neilconway commented on code in PR #20385:
URL: https://github.com/apache/datafusion/pull/20385#discussion_r2833497170
##########
datafusion/functions-nested/src/array_has.rs:
##########
@@ -476,6 +483,179 @@ fn array_has_any_inner(args: &[ArrayRef]) ->
Result<ArrayRef> {
array_has_all_and_any_inner(args, ComparisonType::Any)
}
+/// Fast path for `array_has_any` when exactly one argument is a scalar.
+fn array_has_any_with_scalar(
+ columnar_arg: &ColumnarValue,
+ scalar_arg: &ScalarValue,
+) -> Result<ColumnarValue> {
+ if scalar_arg.is_null() {
+ return Ok(ColumnarValue::Scalar(ScalarValue::Boolean(None)));
+ }
+
+ // Convert the scalar to a 1-element ListArray, then extract the inner
values
+ let scalar_array = scalar_arg.to_array_of_size(1)?;
+ let scalar_list: ArrayWrapper = scalar_array.as_ref().try_into()?;
+ let scalar_values = scalar_list.values();
+
+ // If scalar list is empty, result is always false
+ if scalar_values.is_empty() {
+ return match columnar_arg {
+ ColumnarValue::Array(arr) => {
+ let result =
BooleanArray::from(BooleanBuffer::new_unset(arr.len()));
+ Ok(ColumnarValue::Array(Arc::new(result)))
+ }
+ ColumnarValue::Scalar(_) => {
+ Ok(ColumnarValue::Scalar(ScalarValue::Boolean(Some(false))))
+ }
+ };
+ }
+
+ match scalar_values.data_type() {
+ DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => {
+ array_has_any_with_scalar_string(columnar_arg, scalar_values)
+ }
+ _ => array_has_any_with_scalar_general(columnar_arg, scalar_values),
+ }
+}
+
+/// When the scalar argument has more elements than this, the scalar fast path
+/// builds a HashSet for O(1) lookups. At or below this threshold, it falls
+/// back to a linear scan, since hashing every columnar element is more
+/// expensive than a linear scan over a short array.
+const SCALAR_SMALL_THRESHOLD: usize = 8;
+
+/// String-specialized scalar fast path for `array_has_any`.
+fn array_has_any_with_scalar_string(
+ columnar_arg: &ColumnarValue,
+ scalar_values: &ArrayRef,
+) -> Result<ColumnarValue> {
+ let scalar_strings = string_array_to_vec(scalar_values.as_ref());
Review Comment:
I tried this but it didn't seem to help the benchmarks, so I'll keep things
as they were for now.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]