(spark) branch master updated: [SPARK-51838][PYTHON][TESTS] Add a test to check function wildcard import

ruifengz Thu, 17 Apr 2025 23:25:40 -0700

This is an automated email from the ASF dual-hosted git repository.

ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 0ef86631e206 [SPARK-51838][PYTHON][TESTS] Add a test to check function 
wildcard import
0ef86631e206 is described below

commit 0ef86631e2069da034ee7c6feb5fad3771ed86d0
Author: Ruifeng Zheng <ruife...@apache.org>
AuthorDate: Fri Apr 18 14:25:17 2025 +0800

    [SPARK-51838][PYTHON][TESTS] Add a test to check function wildcard import
    
    ### What changes were proposed in this pull request?
    Add a test to check function wildcard import
    
    ### Why are the changes needed?
    to check what will be imported by `from pyspark.sql.functions import *`
    
    ### Does this PR introduce _any_ user-facing change?
    no, test-only
    
    ### How was this patch tested?
    ci
    
    ### Was this patch authored or co-authored using generative AI tooling?
    no
    
    Closes #50634 from zhengruifeng/test_function_all.
    
    Authored-by: Ruifeng Zheng <ruife...@apache.org>
    Signed-off-by: Ruifeng Zheng <ruife...@apache.org>
---
 .../sql/tests/connect/test_parity_functions.py     |  4 ++
 python/pyspark/sql/tests/test_functions.py         | 73 +++++++++++++++++++++-
 2 files changed, 76 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/sql/tests/connect/test_parity_functions.py 
b/python/pyspark/sql/tests/connect/test_parity_functions.py
index 0a77c5531082..d23df4527133 100644
--- a/python/pyspark/sql/tests/connect/test_parity_functions.py
+++ b/python/pyspark/sql/tests/connect/test_parity_functions.py
@@ -34,6 +34,10 @@ class FunctionsParityTests(FunctionsTestsMixin, 
ReusedConnectTestCase):
     def test_input_file_name_reset_for_rdd(self):
         super().test_input_file_name_reset_for_rdd()
 
+    @unittest.skip("No need to test in Spark Connect.")
+    def test_wildcard_import(self):
+        super().test_wildcard_import()
+
 
 if __name__ == "__main__":
     from pyspark.sql.tests.connect.test_parity_functions import *  # noqa: F401
diff --git a/python/pyspark/sql/tests/test_functions.py 
b/python/pyspark/sql/tests/test_functions.py
index b627bc793f05..01b2149bda45 100644
--- a/python/pyspark/sql/tests/test_functions.py
+++ b/python/pyspark/sql/tests/test_functions.py
@@ -18,7 +18,7 @@
 from contextlib import redirect_stdout
 import datetime
 from enum import Enum
-from inspect import getmembers, isfunction
+from inspect import getmembers, isfunction, isclass
 import io
 from itertools import chain
 import math
@@ -90,6 +90,77 @@ class FunctionsTestsMixin:
             expected_missing_in_py, missing_in_py, "Missing functions in 
pyspark not as expected"
         )
 
+    def test_wildcard_import(self):
+        all_set = set(F.__all__)
+
+        # {
+        #     "abs",
+        #     "acos",
+        #     "acosh",
+        #     "add_months",
+        #     "aes_decrypt",
+        #     "aes_encrypt",
+        #     ...,
+        # }
+        fn_set = {name for (name, value) in getmembers(F, isfunction) if 
name[0] != "_"}
+
+        expected_fn_all_diff = {
+            "approxCountDistinct",  # deprecated
+            "bitwiseNOT",  # deprecated
+            "cast",  # typing
+            "countDistinct",  # deprecated
+            "overload",  # typing
+            "quote",  # new function in 4.1
+            "shiftLeft",  # deprecated
+            "shiftRight",  # deprecated
+            "shiftRightUnsigned",  # deprecated
+            "sumDistinct",  # deprecated
+            "toDegrees",  # deprecated
+            "toRadians",  # deprecated
+        }
+
+        self.assertEqual(
+            expected_fn_all_diff,
+            fn_set - all_set,
+            "some functions are not registered in __all__",
+        )
+
+        # {
+        #     "AnalyzeArgument",
+        #     "AnalyzeResult",
+        #     ...,
+        #     "UserDefinedFunction",
+        #     "UserDefinedTableFunction",
+        # }
+        clz_set = {name for (name, value) in getmembers(F, isclass) if name[0] 
!= "_"}
+
+        expected_clz_all_diff = {
+            "Any",  # typing
+            "ArrayType",  # should be imported from pyspark.sql.types
+            "ByteType",  # should be imported from pyspark.sql.types
+            "Column",  # should be imported from pyspark.sql
+            "DataType",  # should be imported from pyspark.sql.types
+            "NumericType",  # should be imported from pyspark.sql.types
+            "ParentDataFrame",  # internal class
+            "PySparkTypeError",  # should be imported from pyspark.errors
+            "PySparkValueError",  # should be imported from pyspark.errors
+            "StringType",  # should be imported from pyspark.sql.types
+            "StructType",  # should be imported from pyspark.sql.types
+        }
+
+        self.assertEqual(
+            expected_clz_all_diff,
+            clz_set - all_set,
+            "some classes are not registered in __all__",
+        )
+
+        unknonw_set = all_set - (fn_set | clz_set)
+        self.assertEqual(
+            unknonw_set,
+            set(),
+            "some unknown items are registered in __all__",
+        )
+
     def test_explode(self):
         d = [
             Row(a=1, intlist=[1, 2, 3], mapfield={"a": "b"}),


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch master updated: [SPARK-51838][PYTHON][TESTS] Add a test to check function wildcard import

Reply via email to