vibhatha commented on code in PR #12590:
URL: https://github.com/apache/arrow/pull/12590#discussion_r852572950


##########
python/pyarrow/_compute.pyx:
##########
@@ -2251,3 +2338,161 @@ cdef CExpression _bind(Expression filter, Schema 
schema) except *:
 
     return GetResultValue(filter.unwrap().Bind(
         deref(pyarrow_unwrap_schema(schema).get())))
+
+
+cdef CFunctionDoc _make_function_doc(dict func_doc) except *:
+    """
+    Helper function to generate the FunctionDoc
+    This function accepts a dictionary and expect the 
+    summary(str), description(str) and arg_names(List[str]) keys. 
+    """
+    cdef:
+        CFunctionDoc f_doc
+        vector[c_string] c_arg_names
+
+    if len(func_doc) <= 1:
+        raise ValueError(
+            "Function doc must contain a summary, a description and arg_names")
+
+    if not "summary" in func_doc.keys():
+        raise ValueError("Function doc must contain a summary")
+
+    if not "description" in func_doc.keys():
+        raise ValueError("Function doc must contain a description")
+
+    if not "arg_names" in func_doc.keys():
+        raise ValueError("Function doc must contain arg_names")
+
+    f_doc.summary = tobytes(func_doc["summary"])
+    f_doc.description = tobytes(func_doc["description"])
+    for arg_name in func_doc["arg_names"]:
+        c_arg_names.push_back(tobytes(arg_name))
+    f_doc.arg_names = c_arg_names
+    # UDFOptions integration:
+    # TODO: https://issues.apache.org/jira/browse/ARROW-16041
+    f_doc.options_class = tobytes("None")
+    f_doc.options_required = False
+    return f_doc
+
+
+def register_scalar_function(func_name, function_doc, in_types,
+                             out_type, function):
+    """
+    Register a user-defined scalar function. 
+
+    A scalar function is a function that executes elementwise
+    operations on arrays or scalars, and therefore whose results
+    generally do not depend on the order of the values in the
+    arguments. Accepts and returns arrays that are all of the
+    same size. These functions roughly correspond to the functions
+    used in SQL expressions.
+
+    Parameters
+    ----------
+    func_name : str
+        Name of the function. This name must be globally unique. 
+    function_doc : dict
+        A dictionary object with keys "summary" (str),
+        and "description" (str).
+    in_types : Dict[str, InputType]
+        Dictionary containing items with input label, InputType
+        objects which defines the arguments to the function.
+        The input label is a str that will be used to generate
+        documentation for the function. The number of arguments
+        specified here determines the function arity.
+    out_type : DataType
+        Output type of the function.
+    function : callable
+        A callable implementing the user-defined function.
+        It must take arguments equal to the number of
+        in_types defined. It must return an Array or Scalar
+        matching the out_type. It must return a Scalar if
+        all arguments are scalar, else it must return an array.
+
+        To define a varargs function, pass a callable that takes
+        varargs. The last in_type will be the type of the all
+        varargs arguments.
+
+    Example
+    -------
+
+    >>> import pyarrow.compute as pc
+    >>> from pyarrow.compute import register_scalar_function
+    >>> from pyarrow.compute import InputType
+    >>> 
+    >>> func_doc = {}
+    >>> func_doc["summary"] = "simple udf"
+    >>> func_doc["description"] = "add a constant to a scalar"
+    >>> 
+    >>> def add_constant(array):
+    ...     return pc.call_function("add", [array, 1])
+    ... 
+    >>> 
+    >>> func_name = "py_add_func"
+    >>> in_types = {"array": InputType.array(pa.int64())}
+    >>> out_type = pa.int64()
+    >>> register_function(func_name, func_doc,
+    ...                   in_types, out_type, add_constant)
+    >>> 
+    >>> func = pc.get_function(func_name)
+    >>> func.name
+    'py_add_func'
+    >>> ans = pc.call_function(func_name, [pa.array([20])])
+    >>> ans
+    <pyarrow.lib.Int64Array object at 0x10c22e700>
+    [
+    21
+    ]
+    """
+    cdef:
+        c_string c_func_name
+        CArity c_arity
+        CFunctionDoc c_func_doc
+        CInputType in_tmp
+        vector[CInputType] c_in_types
+        PyObject* c_function
+        shared_ptr[CDataType] c_type
+        COutputType* c_out_type
+        CStatus st
+        CScalarUdfOptions* c_options
+
+    c_func_name = tobytes(func_name)
+
+    if callable(function):
+        c_function = <PyObject*>function
+    else:
+        raise TypeError("Object must be a callable")
+
+    func_spec = inspect.getfullargspec(function)
+    num_args = -1
+    if isinstance(in_types, dict):
+        for in_type in in_types.values():
+            in_tmp = (<InputType> in_type).input_type
+            c_in_types.push_back(in_tmp)
+        function_doc["arg_names"] = in_types.keys()
+        num_args = len(in_types)
+    else:
+        raise TypeError(
+            "in_types must be a dictionary of InputType")
+
+    if func_spec.varargs:
+        c_arity = CArity.VarArgs(num_args)
+    else:
+        c_arity = CArity(num_args, False)
+
+    c_func_doc = _make_function_doc(function_doc)
+
+    if out_type:
+        c_type = pyarrow_unwrap_data_type(out_type)
+    else:
+        raise TypeError(
+            f"out_type must be a DataType, not {out_type!r}")
+
+    c_out_type = new COutputType(c_type)

Review Comment:
   `free(c_out_type)` we can do this right? But in Arrow Cython usage, we have 
avoided this type of usage right? 
   Is there a better way? 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to