[spark] branch master updated: [SPARK-44357][PYTHON] Add pyspark_testing module for GHA tests

gurwls223 Mon, 10 Jul 2023 20:51:45 -0700

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new c3bfc345191 [SPARK-44357][PYTHON] Add pyspark_testing module for GHA 
tests
c3bfc345191 is described below

commit c3bfc345191d645769d943127eccae91b23390a5
Author: Amanda Liu <amanda....@databricks.com>
AuthorDate: Tue Jul 11 12:51:16 2023 +0900

    [SPARK-44357][PYTHON] Add pyspark_testing module for GHA tests
    
    ### What changes were proposed in this pull request?
    This PR adds a new module and modifies GHA for `pyspark.testing.utils` 
doctests.
    
    ### Why are the changes needed?
    This change ensures that doctests are run by GHA, since 
`pyspark.testing.utils` now contains user-facing APIs with docstrings
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    Existing unit tests
    
    Closes #41896 from asl3/add-pyspark-testing-module.
    
    Authored-by: Amanda Liu <amanda....@databricks.com>
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 .github/workflows/build_and_test.yml |  2 +-
 dev/sparktestsupport/modules.py      | 11 ++++++++++-
 dev/sparktestsupport/utils.py        |  8 ++++----
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/build_and_test.yml 
b/.github/workflows/build_and_test.yml
index 23d5a94c320..0b184c6c248 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -345,7 +345,7 @@ jobs:
           - ${{ inputs.java }}
         modules:
           - >-
-            pyspark-sql, pyspark-mllib, pyspark-resource
+            pyspark-sql, pyspark-mllib, pyspark-resource, pyspark-testing
           - >-
             pyspark-core, pyspark-streaming, pyspark-ml
           - >-
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 2090546512f..72a5a6f6394 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -468,7 +468,6 @@ pyspark_sql = Module(
         "pyspark.sql.pandas.typehints",
         "pyspark.sql.pandas.utils",
         "pyspark.sql.observation",
-        "pyspark.testing.utils",
         # unittests
         "pyspark.sql.tests.test_arrow",
         "pyspark.sql.tests.test_arrow_python_udf",
@@ -508,6 +507,16 @@ pyspark_sql = Module(
     ],
 )
 
+pyspark_testing = Module(
+    name="pyspark-testing",
+    dependencies=[pyspark_core, pyspark_sql],
+    source_file_regexes=["python/pyspark/testing"],
+    python_test_goals=[
+        # doctests
+        "pyspark.testing.utils",
+    ],
+)
+
 pyspark_resource = Module(
     name="pyspark-resource",
     dependencies=[pyspark_core],
diff --git a/dev/sparktestsupport/utils.py b/dev/sparktestsupport/utils.py
index d07fc936f8f..339534bec25 100755
--- a/dev/sparktestsupport/utils.py
+++ b/dev/sparktestsupport/utils.py
@@ -114,14 +114,14 @@ def determine_modules_to_test(changed_modules, 
deduplicated=True):
     ['avro', 'connect', 'docker-integration-tests', 'examples', 'hive', 
'hive-thriftserver',
      'mllib', 'protobuf', 'pyspark-connect', 'pyspark-ml', 'pyspark-mllib', 
'pyspark-pandas',
      'pyspark-pandas-connect', 'pyspark-pandas-slow', 
'pyspark-pandas-slow-connect', 'pyspark-sql',
-     'repl', 'sparkr', 'sql', 'sql-kafka-0-10']
+     'pyspark-testing', 'repl', 'sparkr', 'sql', 'sql-kafka-0-10']
     >>> sorted([x.name for x in determine_modules_to_test(
     ...     [modules.sparkr, modules.sql], deduplicated=False)])
     ... # doctest: +NORMALIZE_WHITESPACE
     ['avro', 'connect', 'docker-integration-tests', 'examples', 'hive', 
'hive-thriftserver',
      'mllib', 'protobuf', 'pyspark-connect', 'pyspark-ml', 'pyspark-mllib', 
'pyspark-pandas',
      'pyspark-pandas-connect', 'pyspark-pandas-slow', 
'pyspark-pandas-slow-connect', 'pyspark-sql',
-     'repl', 'sparkr', 'sql', 'sql-kafka-0-10']
+     'pyspark-testing', 'repl', 'sparkr', 'sql', 'sql-kafka-0-10']
     >>> sorted([x.name for x in determine_modules_to_test(
     ...     [modules.sql, modules.core], deduplicated=False)])
     ... # doctest: +NORMALIZE_WHITESPACE
@@ -129,8 +129,8 @@ def determine_modules_to_test(changed_modules, 
deduplicated=True):
      'hive', 'hive-thriftserver', 'mllib', 'mllib-local', 'protobuf', 
'pyspark-connect',
      'pyspark-core', 'pyspark-ml', 'pyspark-mllib', 'pyspark-pandas', 
'pyspark-pandas-connect',
      'pyspark-pandas-slow', 'pyspark-pandas-slow-connect', 'pyspark-resource', 
'pyspark-sql',
-     'pyspark-streaming', 'repl', 'root', 'sparkr', 'sql', 'sql-kafka-0-10', 
'streaming',
-     'streaming-kafka-0-10', 'streaming-kinesis-asl']
+     'pyspark-streaming', 'pyspark-testing', 'repl', 'root', 'sparkr', 'sql', 
'sql-kafka-0-10',
+     'streaming', 'streaming-kafka-0-10', 'streaming-kinesis-asl']
     """
     modules_to_test = set()
     for module in changed_modules:


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-44357][PYTHON] Add pyspark_testing module for GHA tests

Reply via email to