alamb commented on code in PR #14834:
URL: https://github.com/apache/datafusion/pull/14834#discussion_r1969585132
##########
datafusion/functions/src/math/gcd.rs:
##########
@@ -75,36 +77,73 @@ impl ScalarUDFImpl for GcdFunc {
}
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
- Ok(Int64)
+ Ok(DataType::Int64)
}
fn invoke_with_args(&self, args: ScalarFunctionArgs) ->
Result<ColumnarValue> {
- make_scalar_function(gcd, vec![])(&args.args)
+ let args: [ColumnarValue; 2] = args.args.try_into().map_err(|_| {
+ internal_datafusion_err!("Expected 2 arguments for function gcd")
+ })?;
+
+ match args {
+ [ColumnarValue::Array(a), ColumnarValue::Array(b)] => {
+ compute_gcd_for_arrays(&a, &b)
+ }
+ [ColumnarValue::Scalar(ScalarValue::Int64(a)),
ColumnarValue::Scalar(ScalarValue::Int64(b))] => {
+ match (a, b) {
+ (Some(a), Some(b)) =>
Ok(ColumnarValue::Scalar(ScalarValue::Int64(
+ Some(compute_gcd(a, b)?),
+ ))),
+ _ => Ok(ColumnarValue::Scalar(ScalarValue::Int64(None))),
+ }
+ }
+ [ColumnarValue::Array(a),
ColumnarValue::Scalar(ScalarValue::Int64(b))] => {
+ compute_gcd_with_scalar(&a, b)
+ }
+ [ColumnarValue::Scalar(ScalarValue::Int64(a)),
ColumnarValue::Array(b)] => {
+ compute_gcd_with_scalar(&b, a)
+ }
+ _ => exec_err!("Unsupported argument types for function gcd"),
+ }
}
fn documentation(&self) -> Option<&Documentation> {
self.doc()
}
}
-/// Gcd SQL function
-fn gcd(args: &[ArrayRef]) -> Result<ArrayRef> {
- match args[0].data_type() {
- Int64 => {
- let arg1 = downcast_named_arg!(&args[0], "x", Int64Array);
- let arg2 = downcast_named_arg!(&args[1], "y", Int64Array);
+fn compute_gcd_for_arrays(a: &ArrayRef, b: &ArrayRef) -> Result<ColumnarValue>
{
+ let result: Result<Int64Array> = a
+ .as_primitive::<Int64Type>()
+ .iter()
+ .zip(b.as_primitive::<Int64Type>().iter())
+ .map(|(a, b)| match (a, b) {
+ (Some(a), Some(b)) => Ok(Some(compute_gcd(a, b)?)),
+ _ => Ok(None),
+ })
+ .collect();
+
+ result.map(|arr| ColumnarValue::Array(Arc::new(arr) as ArrayRef))
+}
Review Comment:
I think you can use try_binary and make it even faster:
```suggestion
fn compute_gcd_for_arrays(a: &ArrayRef, b: &ArrayRef) ->
Result<ColumnarValue> {
let result: Result<Int64Array> = a
.as_primitive::<Int64Type>()
.iter()
.zip(b.as_primitive::<Int64Type>().iter())
.map(|(a, b)| match (a, b) {
(Some(a), Some(b)) => Ok(Some(compute_gcd(a, b)?)),
_ => Ok(None),
})
.collect();
result.map(|arr| ColumnarValue::Array(Arc::new(arr) as ArrayRef))
}
```
I'll run some quick numbers with your new benchmark.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]