This is an automated email from the ASF dual-hosted git repository. ruifengz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 2583bd2c16a [SPARK-44958][PYTHON][CONNECT][TESTS] Add a test to validate the parity of functions 2583bd2c16a is described below commit 2583bd2c16a335747895c0843f438d0966f47ecd Author: Ruifeng Zheng <ruife...@apache.org> AuthorDate: Fri Aug 25 18:43:07 2023 +0800 [SPARK-44958][PYTHON][CONNECT][TESTS] Add a test to validate the parity of functions ### What changes were proposed in this pull request? Add a test to validate the parity of functions ### Why are the changes needed? there is [a test](https://github.com/apache/spark/blob/206554f127903f5239b20a7fe1e6e226fcb822ea/python/pyspark/sql/tests/test_functions.py#L37) to compare the functions between PySpark and JVM side, but we don't have one to compare PySpark vs Spark Connect ### Does this PR introduce _any_ user-facing change? no, test-only ### How was this patch tested? added UT ### Was this patch authored or co-authored using generative AI tooling? NO Closes #42671 from zhengruifeng/py_function_parity. Authored-by: Ruifeng Zheng <ruife...@apache.org> Signed-off-by: Ruifeng Zheng <ruife...@apache.org> --- python/pyspark/sql/connect/functions.py | 3 ++ .../sql/tests/connect/test_connect_function.py | 33 ++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/python/pyspark/sql/connect/functions.py b/python/pyspark/sql/connect/functions.py index ea95994c6df..ae28eb9346b 100644 --- a/python/pyspark/sql/connect/functions.py +++ b/python/pyspark/sql/connect/functions.py @@ -744,6 +744,9 @@ def pow(col1: Union["ColumnOrName", float], col2: Union["ColumnOrName", float]) pow.__doc__ = pysparkfuncs.pow.__doc__ +power = pow + + def radians(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("radians", col) diff --git a/python/pyspark/sql/tests/connect/test_connect_function.py b/python/pyspark/sql/tests/connect/test_connect_function.py index a5d330fe1a7..f73126f242f 100644 --- a/python/pyspark/sql/tests/connect/test_connect_function.py +++ b/python/pyspark/sql/tests/connect/test_connect_function.py @@ -16,6 +16,7 @@ # import os import unittest +from inspect import getmembers, isfunction from pyspark.errors import PySparkTypeError, PySparkValueError from pyspark.sql import SparkSession as PySparkSession @@ -2363,6 +2364,38 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, S def test_pandas_udf_import(self): self.assert_eq(getattr(CF, "pandas_udf"), getattr(SF, "pandas_udf")) + def test_function_parity(self): + # This test compares the available list of functions in pyspark.sql.functions with those + # available in the Spark Connect Python Client in pyspark.sql.connect.functions + + sf_fn = {name for (name, value) in getmembers(SF, isfunction) if name[0] != "_"} + + cf_fn = {name for (name, value) in getmembers(CF, isfunction) if name[0] != "_"} + + # Functions in vanilla PySpark we do not expect to be available in Spark Connect + sf_excluded_fn = { + "get_active_spark_context", # internal helper function + "try_remote_functions", # internal helper function + "to_str", # internal helper function + } + + self.assertEqual( + sf_fn - cf_fn, + sf_excluded_fn, + "Missing functions in Spark Connect not as expected", + ) + + # Functions in Spark Connect we do not expect to be available in vanilla PySpark + cf_excluded_fn = { + "check_dependencies", # internal helper function + } + + self.assertEqual( + cf_fn - sf_fn, + cf_excluded_fn, + "Missing functions in vanilla PySpark not as expected", + ) + if __name__ == "__main__": import os --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org