This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 23ce9c46fa8 [SPARK-44964][ML][CONNECT][TESTS] Clean up pyspark.ml.connect.functions doctest 23ce9c46fa8 is described below commit 23ce9c46fa80a2256ebe06932bf2963a611d1a4d Author: Hyukjin Kwon <gurwls...@apache.org> AuthorDate: Sat Aug 26 20:26:46 2023 -0700 [SPARK-44964][ML][CONNECT][TESTS] Clean up pyspark.ml.connect.functions doctest ### What changes were proposed in this pull request? This PR proposes to clean up `pyspark.ml.connect.functions` doctest. All of the tests under that are being skipped. ### Why are the changes needed? To remove unused test codes. ### Does this PR introduce _any_ user-facing change? No, test-only. ### How was this patch tested? Manually ran the tests via: ```python ./python/run-tests --python-executables=python3 --modules=pyspark-ml-connect ``` ### Was this patch authored or co-authored using generative AI tooling? No Closes #42679 from HyukjinKwon/SPARK-44964. Authored-by: Hyukjin Kwon <gurwls...@apache.org> Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- dev/sparktestsupport/modules.py | 2 -- python/pyspark/ml/connect/__init__.py | 3 +++ python/pyspark/ml/connect/functions.py | 43 ---------------------------------- 3 files changed, 3 insertions(+), 45 deletions(-) diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index 64ccf600ef0..3c018ac7c83 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -886,8 +886,6 @@ pyspark_ml_connect = Module( "python/pyspark/ml/connect", ], python_test_goals=[ - # ml doctests - "pyspark.ml.connect.functions", # ml unittests "pyspark.ml.tests.connect.test_connect_function", "pyspark.ml.tests.connect.test_parity_torch_distributor", diff --git a/python/pyspark/ml/connect/__init__.py b/python/pyspark/ml/connect/__init__.py index 2ee152f6a38..fb92b4d81bf 100644 --- a/python/pyspark/ml/connect/__init__.py +++ b/python/pyspark/ml/connect/__init__.py @@ -16,6 +16,9 @@ # """Spark Connect Python Client - ML module""" +from pyspark.sql.connect.utils import check_dependencies + +check_dependencies(__name__) from pyspark.ml.connect.base import ( Estimator, diff --git a/python/pyspark/ml/connect/functions.py b/python/pyspark/ml/connect/functions.py index ab7e3ab3c9a..c681bf5926b 100644 --- a/python/pyspark/ml/connect/functions.py +++ b/python/pyspark/ml/connect/functions.py @@ -14,12 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from pyspark.sql.connect.utils import check_dependencies - -check_dependencies(__name__) - from pyspark.ml import functions as PyMLFunctions - from pyspark.sql.connect.column import Column from pyspark.sql.connect.functions import _invoke_function, _to_col, lit @@ -36,41 +31,3 @@ def array_to_vector(col: Column) -> Column: array_to_vector.__doc__ = PyMLFunctions.array_to_vector.__doc__ - - -def _test() -> None: - import sys - import doctest - from pyspark.sql import SparkSession as PySparkSession - import pyspark.ml.connect.functions - - globs = pyspark.ml.connect.functions.__dict__.copy() - - # TODO: split vector_to_array doctest since it includes .mllib vectors - del pyspark.ml.connect.functions.vector_to_array.__doc__ - - # TODO: spark.createDataFrame should support UDT - del pyspark.ml.connect.functions.array_to_vector.__doc__ - - globs["spark"] = ( - PySparkSession.builder.appName("ml.connect.functions tests") - .remote("local[4]") - .getOrCreate() - ) - - (failure_count, test_count) = doctest.testmod( - pyspark.ml.connect.functions, - globs=globs, - optionflags=doctest.ELLIPSIS - | doctest.NORMALIZE_WHITESPACE - | doctest.IGNORE_EXCEPTION_DETAIL, - ) - - globs["spark"].stop() - - if failure_count: - sys.exit(-1) - - -if __name__ == "__main__": - _test() --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org