vibhatha commented on code in PR #12590:
URL: https://github.com/apache/arrow/pull/12590#discussion_r847841147
##########
python/pyarrow/_compute.pyx:
##########
@@ -2251,3 +2331,169 @@ cdef CExpression _bind(Expression filter, Schema
schema) except *:
return GetResultValue(filter.unwrap().Bind(
deref(pyarrow_unwrap_schema(schema).get())))
+
+
+cdef CFunctionDoc _make_function_doc(dict func_doc) except *:
+ """
+ Helper function to generate the FunctionDoc
+ This function accepts a dictionary and expect the
+ summary(str), description(str) and arg_names(List[str]) keys.
+ """
+ cdef:
+ CFunctionDoc f_doc
+ vector[c_string] c_arg_names
+
+ if len(func_doc) <= 1:
+ raise ValueError(
+ "Function doc must contain a summary, a description and arg_names")
+
+ if not "summary" in func_doc.keys():
+ raise ValueError("Function doc must contain a summary")
+
+ if not "description" in func_doc.keys():
+ raise ValueError("Function doc must contain a description")
+
+ if not "arg_names" in func_doc.keys():
+ raise ValueError("Function doc must contain arg_names")
+
+ f_doc.summary = tobytes(func_doc["summary"])
+ f_doc.description = tobytes(func_doc["description"])
+ for arg_name in func_doc["arg_names"]:
+ c_arg_names.push_back(tobytes(arg_name))
+ f_doc.arg_names = c_arg_names
+ # UDFOptions integration:
+ # TODO: https://issues.apache.org/jira/browse/ARROW-16041
+ f_doc.options_class = tobytes("None")
+ f_doc.options_required = False
+ return f_doc
+
+
+def register_scalar_function(func_name, function_doc, in_types,
+ out_type, function):
+ """
+ Register a user-defined-function.
+
+ Parameters
+ ----------
+ func_name : str
+ Name of the function. This name must be globally unique.
+ function_doc : dict
+ A dictionary object with keys "summary" (str),
+ and "description" (str).
+ in_types : Dict[str, InputType]
+ Dictionary containing items with input type name, InputType
+ objects which defines the input types for the function.
+ When defining a list of InputType for a varargs function,
+ the list only needs to contain the number of elements equal
+ to the num_args (which is the miniumu required arguments).
+ out_type : DataType
+ Output type of the function.
+ function : callable
+ User-defined-function
+ function includes arguments equal to the number
+ of input_types defined. The return type of the
+ function is of the type defined as output_type.
+ The output should be an Array or a Scalar.
+
+ Example
+ -------
+
+ >>> import pyarrow.compute as pc
+ >>> from pyarrow.compute import register_scalar_function
+ >>> from pyarrow.compute import InputType
+ >>>
+ >>> func_doc = {}
+ >>> func_doc["summary"] = "simple udf"
+ >>> func_doc["description"] = "add a constant to a scalar"
+ >>>
+ >>> def add_constant(array):
+ ... return pc.call_function("add", [array, 1])
+ ...
+ >>>
+ >>> func_name = "py_add_func"
+ >>> in_types = [InputType.array(pa.int64())]
+ >>> out_type = pa.int64()
+ >>> register_function(func_name, func_doc,
+ ... in_types, out_type, add_constant)
+ >>>
+ >>> func = pc.get_function(func_name)
+ >>> func.name
+ 'py_add_func'
+ >>> ans = pc.call_function(func_name, [pa.array([20])])
+ >>> ans
+ <pyarrow.lib.Int64Array object at 0x10c22e700>
+ [
+ 21
+ ]
+ """
+ cdef:
+ c_string c_func_name
+ CArity* c_arity_ptr
+ CArity c_arity
+ CFunctionDoc c_func_doc
+ CInputType in_tmp
+ vector[CInputType] c_in_types
+ PyObject* c_function
+ shared_ptr[CDataType] c_type
+ COutputType* c_out_type
+ CScalarUdfBuilder* c_sc_builder
+ CStatus st
+ CScalarUdfOptions* c_options
+
+ c_func_name = tobytes(func_name)
+
+ if callable(function):
+ c_function = <PyObject*>function
+ else:
+ raise ValueError("Object must be a callable")
+
+ func_spec = inspect.getfullargspec(function)
+ num_args = -1
+ if isinstance(in_types, dict):
+ for in_type in in_types.values():
+ if isinstance(in_type, InputType):
+ in_tmp = (<InputType> in_type).input_type
+ c_in_types.push_back(in_tmp)
+ else:
+ raise ValueError("Expected an object of type InputType")
+ function_doc["arg_names"] = in_types.keys()
+ num_args = len(in_types)
+ else:
+ if num_args == -1:
+ raise ValueError(
+ "Input types must be an empty list or a List[InputType]")
+
+ if func_spec.varargs:
+ if num_args < 0:
Review Comment:
Yes, this value was passed by value, but we infer it now.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]