This is an automated email from the ASF dual-hosted git repository. ruifengz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 0ef86631e206 [SPARK-51838][PYTHON][TESTS] Add a test to check function wildcard import 0ef86631e206 is described below commit 0ef86631e2069da034ee7c6feb5fad3771ed86d0 Author: Ruifeng Zheng <ruife...@apache.org> AuthorDate: Fri Apr 18 14:25:17 2025 +0800 [SPARK-51838][PYTHON][TESTS] Add a test to check function wildcard import ### What changes were proposed in this pull request? Add a test to check function wildcard import ### Why are the changes needed? to check what will be imported by `from pyspark.sql.functions import *` ### Does this PR introduce _any_ user-facing change? no, test-only ### How was this patch tested? ci ### Was this patch authored or co-authored using generative AI tooling? no Closes #50634 from zhengruifeng/test_function_all. Authored-by: Ruifeng Zheng <ruife...@apache.org> Signed-off-by: Ruifeng Zheng <ruife...@apache.org> --- .../sql/tests/connect/test_parity_functions.py | 4 ++ python/pyspark/sql/tests/test_functions.py | 73 +++++++++++++++++++++- 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/python/pyspark/sql/tests/connect/test_parity_functions.py b/python/pyspark/sql/tests/connect/test_parity_functions.py index 0a77c5531082..d23df4527133 100644 --- a/python/pyspark/sql/tests/connect/test_parity_functions.py +++ b/python/pyspark/sql/tests/connect/test_parity_functions.py @@ -34,6 +34,10 @@ class FunctionsParityTests(FunctionsTestsMixin, ReusedConnectTestCase): def test_input_file_name_reset_for_rdd(self): super().test_input_file_name_reset_for_rdd() + @unittest.skip("No need to test in Spark Connect.") + def test_wildcard_import(self): + super().test_wildcard_import() + if __name__ == "__main__": from pyspark.sql.tests.connect.test_parity_functions import * # noqa: F401 diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index b627bc793f05..01b2149bda45 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -18,7 +18,7 @@ from contextlib import redirect_stdout import datetime from enum import Enum -from inspect import getmembers, isfunction +from inspect import getmembers, isfunction, isclass import io from itertools import chain import math @@ -90,6 +90,77 @@ class FunctionsTestsMixin: expected_missing_in_py, missing_in_py, "Missing functions in pyspark not as expected" ) + def test_wildcard_import(self): + all_set = set(F.__all__) + + # { + # "abs", + # "acos", + # "acosh", + # "add_months", + # "aes_decrypt", + # "aes_encrypt", + # ..., + # } + fn_set = {name for (name, value) in getmembers(F, isfunction) if name[0] != "_"} + + expected_fn_all_diff = { + "approxCountDistinct", # deprecated + "bitwiseNOT", # deprecated + "cast", # typing + "countDistinct", # deprecated + "overload", # typing + "quote", # new function in 4.1 + "shiftLeft", # deprecated + "shiftRight", # deprecated + "shiftRightUnsigned", # deprecated + "sumDistinct", # deprecated + "toDegrees", # deprecated + "toRadians", # deprecated + } + + self.assertEqual( + expected_fn_all_diff, + fn_set - all_set, + "some functions are not registered in __all__", + ) + + # { + # "AnalyzeArgument", + # "AnalyzeResult", + # ..., + # "UserDefinedFunction", + # "UserDefinedTableFunction", + # } + clz_set = {name for (name, value) in getmembers(F, isclass) if name[0] != "_"} + + expected_clz_all_diff = { + "Any", # typing + "ArrayType", # should be imported from pyspark.sql.types + "ByteType", # should be imported from pyspark.sql.types + "Column", # should be imported from pyspark.sql + "DataType", # should be imported from pyspark.sql.types + "NumericType", # should be imported from pyspark.sql.types + "ParentDataFrame", # internal class + "PySparkTypeError", # should be imported from pyspark.errors + "PySparkValueError", # should be imported from pyspark.errors + "StringType", # should be imported from pyspark.sql.types + "StructType", # should be imported from pyspark.sql.types + } + + self.assertEqual( + expected_clz_all_diff, + clz_set - all_set, + "some classes are not registered in __all__", + ) + + unknonw_set = all_set - (fn_set | clz_set) + self.assertEqual( + unknonw_set, + set(), + "some unknown items are registered in __all__", + ) + def test_explode(self): d = [ Row(a=1, intlist=[1, 2, 3], mapfield={"a": "b"}), --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org