comphead commented on code in PR #20770:
URL: https://github.com/apache/datafusion/pull/20770#discussion_r2942200695
##########
datafusion/functions-nested/src/position.rs:
##########
@@ -185,14 +140,64 @@ impl ScalarUDFImpl for ArrayPosition {
}
}
+/// Attempts the scalar-needle fast path for `array_position`.
+fn try_array_position_scalar(args: &[ColumnarValue]) ->
Result<Option<ColumnarValue>> {
+ if args.len() < 2 || args.len() > 3 {
+ return exec_err!("array_position expects two or three arguments");
+ }
+
+ let scalar_needle = match &args[1] {
+ ColumnarValue::Scalar(s) => s,
+ ColumnarValue::Array(_) => return Ok(None),
+ };
+
+ // `not_distinct` doesn't support nested types (List, Struct, etc.),
+ // so fall back to the per-row path for those.
+ if scalar_needle.data_type().is_nested() {
+ return Ok(None);
+ }
+
+ // Determine batch length from whichever argument is columnar;
+ // if all inputs are scalar, batch length is 1.
+ let (num_rows, all_inputs_scalar) = match (&args[0], args.get(2)) {
+ (ColumnarValue::Array(a), _) => (a.len(), false),
+ (_, Some(ColumnarValue::Array(a))) => (a.len(), false),
+ _ => (1, true),
+ };
+
+ let needle = scalar_needle.to_array_of_size(1)?;
+ let haystack = args[0].to_array(num_rows)?;
+ let arr_from = resolve_start_from(args.get(2), num_rows)?;
+
+ let result = match haystack.data_type() {
+ List(_) => {
+ let list = as_list_array(&haystack)?;
+ array_position_scalar::<i32>(list, &needle, &arr_from)
+ }
+ LargeList(_) => {
+ let list = as_large_list_array(&haystack)?;
+ array_position_scalar::<i64>(list, &needle, &arr_from)
+ }
+ t => exec_err!("array_position does not support type '{t}'"),
+ }?;
+
+ if all_inputs_scalar {
+ Ok(Some(ColumnarValue::Scalar(ScalarValue::try_from_array(
+ &result, 0,
+ )?)))
+ } else {
+ Ok(Some(ColumnarValue::Array(result)))
+ }
+}
+
fn array_position_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
if args.len() < 2 || args.len() > 3 {
return exec_err!("array_position expects two or three arguments");
}
match &args[0].data_type() {
List(_) => general_position_dispatch::<i32>(args),
LargeList(_) => general_position_dispatch::<i64>(args),
- array_type => exec_err!("array_position does not support type
'{array_type}'."),
+ t => exec_err!("array_position does not support type '{t}'"),
Review Comment:
```suggestion
dt => exec_err!("array_position does not support type '{dt}'"),
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]