Github user gatorsmile commented on a diff in the pull request: https://github.com/apache/spark/pull/19505#discussion_r144912405 --- Diff: python/pyspark/sql/functions.py --- @@ -2121,33 +2127,40 @@ def wrapper(*args): wrapper.func = self.func wrapper.returnType = self.returnType - wrapper.vectorized = self.vectorized + wrapper.pythonUdfType = self.pythonUdfType return wrapper -def _create_udf(f, returnType, vectorized): +def _create_udf(f, returnType, pythonUdfType): - def _udf(f, returnType=StringType(), vectorized=vectorized): - if vectorized: + def _udf(f, returnType=StringType(), pythonUdfType=pythonUdfType): + if pythonUdfType == PythonUdfType.PANDAS_UDF: import inspect argspec = inspect.getargspec(f) if len(argspec.args) == 0 and argspec.varargs is None: raise ValueError( "0-arg pandas_udfs are not supported. " "Instead, create a 1-arg pandas_udf and ignore the arg in your function." ) - udf_obj = UserDefinedFunction(f, returnType, vectorized=vectorized) + elif pythonUdfType == PythonUdfType.PANDAS_GROUPED_UDF: + import inspect + argspec = inspect.getargspec(f) + if len(argspec.args) != 1 and argspec.varargs is None: + raise ValueError("Only 1-arg pandas_grouped_udfs are supported.") + + udf_obj = UserDefinedFunction(f, returnType, pythonUdfType=pythonUdfType) return udf_obj._wrapped() # decorator @udf, @udf(), @udf(dataType()), or similar with @pandas_udf --- End diff -- Nit: update this comment
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org