This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 39569d5a8ac [SPARK-41657][CONNECT][DOCS][TESTS] Enable doctests in pyspark.sql.connect.session 39569d5a8ac is described below commit 39569d5a8ac0bf192748220d28f76dfe3fc357d3 Author: Hyukjin Kwon <gurwls...@apache.org> AuthorDate: Mon Jan 2 19:58:46 2023 +0900 [SPARK-41657][CONNECT][DOCS][TESTS] Enable doctests in pyspark.sql.connect.session ### What changes were proposed in this pull request? This PR proposes to enable doctests in `pyspark.sql.connect.session` that is virtually the same as `pyspark.sql.session`. ### Why are the changes needed? To make sure on the PySpark compatibility and test coverage. ### Does this PR introduce _any_ user-facing change? No, test-only. ### How was this patch tested? CI in this PR should test this out. Closes #39341 from HyukjinKwon/SPARK-41657. Authored-by: Hyukjin Kwon <gurwls...@apache.org> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- dev/sparktestsupport/modules.py | 1 + python/pyspark/sql/connect/session.py | 60 +++++++++++++++++++++++++++++++++++ python/pyspark/sql/session.py | 4 +-- 3 files changed, 63 insertions(+), 2 deletions(-) diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index dff17792148..99f1cc6894f 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -506,6 +506,7 @@ pyspark_connect = Module( # doctests "pyspark.sql.connect.catalog", "pyspark.sql.connect.group", + "pyspark.sql.connect.session", "pyspark.sql.connect.window", "pyspark.sql.connect.column", # unittests diff --git a/python/pyspark/sql/connect/session.py b/python/pyspark/sql/connect/session.py index ae228317696..a461372c08c 100644 --- a/python/pyspark/sql/connect/session.py +++ b/python/pyspark/sql/connect/session.py @@ -22,6 +22,7 @@ import numpy as np import pandas as pd import pyarrow as pa +from pyspark import SparkContext, SparkConf from pyspark.sql.session import classproperty, SparkSession as PySparkSession from pyspark.sql.types import ( _infer_schema, @@ -311,3 +312,62 @@ class SparkSession: SparkSession.__doc__ = PySparkSession.__doc__ + + +def _test() -> None: + import os + import sys + import doctest + from pyspark.sql import SparkSession as PySparkSession + from pyspark.testing.connectutils import should_test_connect, connect_requirement_message + + os.chdir(os.environ["SPARK_HOME"]) + + if should_test_connect: + import pyspark.sql.connect.session + + globs = pyspark.sql.connect.session.__dict__.copy() + # Works around to create a regular Spark session + sc = SparkContext("local[4]", "sql.connect.session tests", conf=SparkConf()) + globs["_spark"] = PySparkSession( + sc, options={"spark.app.name": "sql.connect.session tests"} + ) + + # Creates a remote Spark session. + os.environ["SPARK_REMOTE"] = "sc://localhost" + globs["spark"] = PySparkSession.builder.remote("sc://localhost").getOrCreate() + + # Uses PySpark session to test builder. + globs["SparkSession"] = PySparkSession + # Spark Connect does not support to set master together. + pyspark.sql.connect.session.SparkSession.__doc__ = None + del pyspark.sql.connect.session.SparkSession.Builder.master.__doc__ + + # TODO(SPARK-41746): SparkSession.createDataFrame does not respect the column names in + # dictionary + del pyspark.sql.connect.session.SparkSession.createDataFrame.__doc__ + del pyspark.sql.connect.session.SparkSession.read.__doc__ + # TODO(SPARK-41811): Implement SparkSession.sql's string formatter + del pyspark.sql.connect.session.SparkSession.sql.__doc__ + + (failure_count, test_count) = doctest.testmod( + pyspark.sql.connect.session, + globs=globs, + optionflags=doctest.ELLIPSIS + | doctest.NORMALIZE_WHITESPACE + | doctest.IGNORE_EXCEPTION_DETAIL, + ) + + globs["spark"].stop() + globs["_spark"].stop() + if failure_count: + sys.exit(-1) + else: + print( + f"Skipping pyspark.sql.connect.session doctests: {connect_requirement_message}", + file=sys.stderr, + ) + + +if __name__ == "__main__": + _test() diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index 3f99dc7ab84..1e4e6f5e3ad 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -676,7 +676,7 @@ class SparkSession(SparkConversionMixin): Examples -------- >>> spark.catalog - <pyspark.sql.catalog.Catalog object ...> + <...Catalog object ...> Create a temp view, show the list, and drop it. @@ -1460,7 +1460,7 @@ class SparkSession(SparkConversionMixin): Examples -------- >>> spark.read - <pyspark.sql.readwriter.DataFrameReader object ...> + <...DataFrameReader object ...> Write a DataFrame into a JSON file and read it back. --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org