This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new c3bfc345191 [SPARK-44357][PYTHON] Add pyspark_testing module for GHA tests c3bfc345191 is described below commit c3bfc345191d645769d943127eccae91b23390a5 Author: Amanda Liu <amanda....@databricks.com> AuthorDate: Tue Jul 11 12:51:16 2023 +0900 [SPARK-44357][PYTHON] Add pyspark_testing module for GHA tests ### What changes were proposed in this pull request? This PR adds a new module and modifies GHA for `pyspark.testing.utils` doctests. ### Why are the changes needed? This change ensures that doctests are run by GHA, since `pyspark.testing.utils` now contains user-facing APIs with docstrings ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing unit tests Closes #41896 from asl3/add-pyspark-testing-module. Authored-by: Amanda Liu <amanda....@databricks.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- .github/workflows/build_and_test.yml | 2 +- dev/sparktestsupport/modules.py | 11 ++++++++++- dev/sparktestsupport/utils.py | 8 ++++---- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 23d5a94c320..0b184c6c248 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -345,7 +345,7 @@ jobs: - ${{ inputs.java }} modules: - >- - pyspark-sql, pyspark-mllib, pyspark-resource + pyspark-sql, pyspark-mllib, pyspark-resource, pyspark-testing - >- pyspark-core, pyspark-streaming, pyspark-ml - >- diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index 2090546512f..72a5a6f6394 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -468,7 +468,6 @@ pyspark_sql = Module( "pyspark.sql.pandas.typehints", "pyspark.sql.pandas.utils", "pyspark.sql.observation", - "pyspark.testing.utils", # unittests "pyspark.sql.tests.test_arrow", "pyspark.sql.tests.test_arrow_python_udf", @@ -508,6 +507,16 @@ pyspark_sql = Module( ], ) +pyspark_testing = Module( + name="pyspark-testing", + dependencies=[pyspark_core, pyspark_sql], + source_file_regexes=["python/pyspark/testing"], + python_test_goals=[ + # doctests + "pyspark.testing.utils", + ], +) + pyspark_resource = Module( name="pyspark-resource", dependencies=[pyspark_core], diff --git a/dev/sparktestsupport/utils.py b/dev/sparktestsupport/utils.py index d07fc936f8f..339534bec25 100755 --- a/dev/sparktestsupport/utils.py +++ b/dev/sparktestsupport/utils.py @@ -114,14 +114,14 @@ def determine_modules_to_test(changed_modules, deduplicated=True): ['avro', 'connect', 'docker-integration-tests', 'examples', 'hive', 'hive-thriftserver', 'mllib', 'protobuf', 'pyspark-connect', 'pyspark-ml', 'pyspark-mllib', 'pyspark-pandas', 'pyspark-pandas-connect', 'pyspark-pandas-slow', 'pyspark-pandas-slow-connect', 'pyspark-sql', - 'repl', 'sparkr', 'sql', 'sql-kafka-0-10'] + 'pyspark-testing', 'repl', 'sparkr', 'sql', 'sql-kafka-0-10'] >>> sorted([x.name for x in determine_modules_to_test( ... [modules.sparkr, modules.sql], deduplicated=False)]) ... # doctest: +NORMALIZE_WHITESPACE ['avro', 'connect', 'docker-integration-tests', 'examples', 'hive', 'hive-thriftserver', 'mllib', 'protobuf', 'pyspark-connect', 'pyspark-ml', 'pyspark-mllib', 'pyspark-pandas', 'pyspark-pandas-connect', 'pyspark-pandas-slow', 'pyspark-pandas-slow-connect', 'pyspark-sql', - 'repl', 'sparkr', 'sql', 'sql-kafka-0-10'] + 'pyspark-testing', 'repl', 'sparkr', 'sql', 'sql-kafka-0-10'] >>> sorted([x.name for x in determine_modules_to_test( ... [modules.sql, modules.core], deduplicated=False)]) ... # doctest: +NORMALIZE_WHITESPACE @@ -129,8 +129,8 @@ def determine_modules_to_test(changed_modules, deduplicated=True): 'hive', 'hive-thriftserver', 'mllib', 'mllib-local', 'protobuf', 'pyspark-connect', 'pyspark-core', 'pyspark-ml', 'pyspark-mllib', 'pyspark-pandas', 'pyspark-pandas-connect', 'pyspark-pandas-slow', 'pyspark-pandas-slow-connect', 'pyspark-resource', 'pyspark-sql', - 'pyspark-streaming', 'repl', 'root', 'sparkr', 'sql', 'sql-kafka-0-10', 'streaming', - 'streaming-kafka-0-10', 'streaming-kinesis-asl'] + 'pyspark-streaming', 'pyspark-testing', 'repl', 'root', 'sparkr', 'sql', 'sql-kafka-0-10', + 'streaming', 'streaming-kafka-0-10', 'streaming-kinesis-asl'] """ modules_to_test = set() for module in changed_modules: --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org