[GitHub] spark pull request #20288: [SPARK-23122][PYTHON][SQL] Deprecate register* fo...

HyukjinKwon Wed, 17 Jan 2018 04:11:30 -0800

Github user HyukjinKwon commented on a diff in the pull request:

    https://github.com/apache/spark/pull/20288#discussion_r162031948
  
    --- Diff: python/pyspark/sql/context.py ---
    @@ -172,113 +173,34 @@ def range(self, start, end=None, step=1, 
numPartitions=None):
             """
             return self.sparkSession.range(start, end, step, numPartitions)
     
    -    @ignore_unicode_prefix
    -    @since(1.2)
         def registerFunction(self, name, f, returnType=None):
    -        """Registers a Python function (including lambda function) or a 
:class:`UserDefinedFunction`
    -        as a UDF. The registered UDF can be used in SQL statements.
    -
    -        :func:`spark.udf.register` is an alias for 
:func:`sqlContext.registerFunction`.
    -
    -        In addition to a name and the function itself, `returnType` can be 
optionally specified.
    -        1) When f is a Python function, `returnType` defaults to a string. 
The produced object must
    -        match the specified type. 2) When f is a 
:class:`UserDefinedFunction`, Spark uses the return
    -        type of the given UDF as the return type of the registered UDF. 
The input parameter
    -        `returnType` is None by default. If given by users, the value must 
be None.
    -
    -        :param name: name of the UDF in SQL statements.
    -        :param f: a Python function, or a wrapped/native 
UserDefinedFunction. The UDF can be either
    -            row-at-a-time or vectorized.
    -        :param returnType: the return type of the registered UDF.
    -        :return: a wrapped/native :class:`UserDefinedFunction`
    -
    -        >>> strlen = sqlContext.registerFunction("stringLengthString", 
lambda x: len(x))
    -        >>> sqlContext.sql("SELECT stringLengthString('test')").collect()
    -        [Row(stringLengthString(test)=u'4')]
    -
    -        >>> sqlContext.sql("SELECT 'foo' AS 
text").select(strlen("text")).collect()
    -        [Row(stringLengthString(text)=u'3')]
    -
    -        >>> from pyspark.sql.types import IntegerType
    -        >>> _ = sqlContext.registerFunction("stringLengthInt", lambda x: 
len(x), IntegerType())
    -        >>> sqlContext.sql("SELECT stringLengthInt('test')").collect()
    -        [Row(stringLengthInt(test)=4)]
    -
    -        >>> from pyspark.sql.types import IntegerType
    -        >>> _ = sqlContext.udf.register("stringLengthInt", lambda x: 
len(x), IntegerType())
    -        >>> sqlContext.sql("SELECT stringLengthInt('test')").collect()
    -        [Row(stringLengthInt(test)=4)]
    -
    -        >>> from pyspark.sql.types import IntegerType
    -        >>> from pyspark.sql.functions import udf
    -        >>> slen = udf(lambda s: len(s), IntegerType())
    -        >>> _ = sqlContext.udf.register("slen", slen)
    -        >>> sqlContext.sql("SELECT slen('test')").collect()
    -        [Row(slen(test)=4)]
    -
    -        >>> import random
    -        >>> from pyspark.sql.functions import udf
    -        >>> from pyspark.sql.types import IntegerType
    -        >>> random_udf = udf(lambda: random.randint(0, 100), 
IntegerType()).asNondeterministic()
    -        >>> new_random_udf = sqlContext.registerFunction("random_udf", 
random_udf)
    -        >>> sqlContext.sql("SELECT random_udf()").collect()  # doctest: 
+SKIP
    -        [Row(random_udf()=82)]
    -        >>> sqlContext.range(1).select(new_random_udf()).collect()  # 
doctest: +SKIP
    -        [Row(<lambda>()=26)]
    -
    -        >>> from pyspark.sql.functions import pandas_udf, PandasUDFType
    -        >>> @pandas_udf("integer", PandasUDFType.SCALAR)  # doctest: +SKIP
    -        ... def add_one(x):
    -        ...     return x + 1
    -        ...
    -        >>> _ = sqlContext.udf.register("add_one", add_one)  # doctest: 
+SKIP
    -        >>> sqlContext.sql("SELECT add_one(id) FROM range(3)").collect()  
# doctest: +SKIP
    -        [Row(add_one(id)=1), Row(add_one(id)=2), Row(add_one(id)=3)]
    -        """
    -        return self.sparkSession.catalog.registerFunction(name, f, 
returnType)
    +        warnings.warn(
    +            "Deprecated in 2.3.0. Use spark.udf.register instead.",
    +            DeprecationWarning)
    +        return self.sparkSession.udf.register(name, f, returnType)
    +    # Reuse the docstring from UDFRegistration but with few notes.
    +    _register_doc = UDFRegistration.register.__doc__.strip()
    +    registerFunction.__doc__ = """%s
     
    -    @ignore_unicode_prefix
    -    @since(2.1)
    -    def registerJavaFunction(self, name, javaClassName, returnType=None):
    -        """Register a java UDF so it can be used in SQL statements.
    -
    -        In addition to a name and the function itself, the return type can 
be optionally specified.
    -        When the return type is not specified we would infer it via 
reflection.
    -        :param name:  name of the UDF
    -        :param javaClassName: fully qualified name of java class
    -        :param returnType: a :class:`pyspark.sql.types.DataType` object
    -
    -        >>> sqlContext.registerJavaFunction("javaStringLength",
    -        ...   "test.org.apache.spark.sql.JavaStringLength", IntegerType())
    -        >>> sqlContext.sql("SELECT javaStringLength('test')").collect()
    -        [Row(UDF:javaStringLength(test)=4)]
    -        >>> sqlContext.registerJavaFunction("javaStringLength2",
    -        ...   "test.org.apache.spark.sql.JavaStringLength")
    -        >>> sqlContext.sql("SELECT javaStringLength2('test')").collect()
    -        [Row(UDF:javaStringLength2(test)=4)]
    +        .. note:: :func:`sqlContext.registerFunction` is an alias for
    +            :func:`spark.udf.register`.
    +        .. note:: Deprecated in 2.3.0. Use :func:`spark.udf.register` 
instead.
    +        .. versionadded:: 1.2
    +    """ % _register_doc[:_register_doc.rfind('versionadded::')]
     
    -        """
    -        jdt = None
    -        if returnType is not None:
    -            jdt = 
self.sparkSession._jsparkSession.parseDataType(returnType.json())
    -        self.sparkSession._jsparkSession.udf().registerJava(name, 
javaClassName, jdt)
    +    def registerJavaFunction(self, name, javaClassName, returnType=None):
    +        warnings.warn(
    +            "Deprecated in 2.3.0. Use spark.udf.registerJavaFunction 
instead.",
    +            DeprecationWarning)
    +        return self.sparkSession.udf.registerJavaFunction(name, 
javaClassName, returnType)
    +    _registerJavaFunction_doc = 
UDFRegistration.registerJavaFunction.__doc__.strip()
    +    registerJavaFunction.__doc__ = """%s
     
    -    @ignore_unicode_prefix
    -    @since(2.3)
    -    def registerJavaUDAF(self, name, javaClassName):
    --- End diff --
    
    We are fine to remove this one because this is added within 2.3.0 timeline 
- https://issues.apache.org/jira/browse/SPARK-19439



---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #20288: [SPARK-23122][PYTHON][SQL] Deprecate register* fo...

Reply via email to