This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 48c5843ad4b [SPARK-41655][CONNECT] Enable doctests in pyspark.sql.connect.column 48c5843ad4b is described below commit 48c5843ad4b06e85e07d1db5b308a460209f6126 Author: Sandeep Singh <sand...@techaddict.me> AuthorDate: Thu Dec 29 20:52:35 2022 +0900 [SPARK-41655][CONNECT] Enable doctests in pyspark.sql.connect.column ### What changes were proposed in this pull request? This PR proposes to enable doctests in pyspark.sql.connect.column that is virtually the same as pyspark.sql.column. ### Why are the changes needed? To make sure on the PySpark compatibility and test coverage. ### Does this PR introduce any user-facing change? No, doctest's only. ### How was this patch tested? New Doctests Added Closes #39249 from techaddict/SPARK-41655-pyspark.sql.connect.column. Lead-authored-by: Sandeep Singh <sand...@techaddict.me> Co-authored-by: Hyukjin Kwon <gurwls...@gmail.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- dev/sparktestsupport/modules.py | 1 + python/pyspark/sql/column.py | 15 ++++----- python/pyspark/sql/connect/column.py | 60 ++++++++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+), 7 deletions(-) diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index 558d058f3e5..df3a1f180fc 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -507,6 +507,7 @@ pyspark_connect = Module( "pyspark.sql.connect.catalog", "pyspark.sql.connect.group", "pyspark.sql.connect.window", + "pyspark.sql.connect.column", # unittests "pyspark.sql.tests.connect.test_connect_column_expressions", "pyspark.sql.tests.connect.test_connect_plan_only", diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py index 3bc49ef8031..5a0987b4cfe 100644 --- a/python/pyspark/sql/column.py +++ b/python/pyspark/sql/column.py @@ -182,6 +182,9 @@ def _reverse_op( return _ +# TODO(SPARK-41757): Compatibility of string representation for Column + + class Column: """ @@ -200,17 +203,16 @@ class Column: ... [(2, "Alice"), (5, "Bob")], ["age", "name"]) Select a column out of a DataFrame - - >>> df.name + >>> df.name # doctest: +SKIP Column<'name'> - >>> df["name"] + >>> df["name"] # doctest: +SKIP Column<'name'> Create from an expression - >>> df.age + 1 + >>> df.age + 1 # doctest: +SKIP Column<'(age + 1)'> - >>> 1 / df.age + >>> 1 / df.age # doctest: +SKIP Column<'(1 / age)'> """ @@ -1258,8 +1260,7 @@ class Column: >>> from pyspark.sql import Window >>> window = Window.partitionBy("name").orderBy("age") \ .rowsBetween(Window.unboundedPreceding, Window.currentRow) - >>> from pyspark.sql.functions import rank, min - >>> from pyspark.sql.functions import desc + >>> from pyspark.sql.functions import rank, min, desc >>> df = spark.createDataFrame( ... [(2, "Alice"), (5, "Bob")], ["age", "name"]) >>> df.withColumn("rank", rank().over(window)) \ diff --git a/python/pyspark/sql/connect/column.py b/python/pyspark/sql/connect/column.py index b873a757e41..58d86a3d389 100644 --- a/python/pyspark/sql/connect/column.py +++ b/python/pyspark/sql/connect/column.py @@ -28,6 +28,7 @@ from typing import ( Optional, ) +from pyspark import SparkContext, SparkConf from pyspark.sql.types import DataType from pyspark.sql.column import Column as PySparkColumn @@ -390,3 +391,62 @@ class Column: Column.__doc__ = PySparkColumn.__doc__ + + +def _test() -> None: + import os + import sys + import doctest + from pyspark.sql import SparkSession as PySparkSession + from pyspark.testing.connectutils import should_test_connect, connect_requirement_message + + os.chdir(os.environ["SPARK_HOME"]) + + if should_test_connect: + import pyspark.sql.connect.column + + globs = pyspark.sql.connect.column.__dict__.copy() + # Works around to create a regular Spark session + sc = SparkContext("local[4]", "sql.connect.column tests", conf=SparkConf()) + globs["_spark"] = PySparkSession(sc, options={"spark.app.name": "sql.connect.column tests"}) + + # Creates a remote Spark session. + os.environ["SPARK_REMOTE"] = "sc://localhost" + globs["spark"] = PySparkSession.builder.remote("sc://localhost").getOrCreate() + + # TODO(SPARK-41751): Support Column.bitwiseAND,bitwiseOR,bitwiseXOR,eqNullSafe,isNotNull, + # isNull,isin + del pyspark.sql.connect.column.Column.bitwiseAND.__doc__ + del pyspark.sql.connect.column.Column.bitwiseOR.__doc__ + del pyspark.sql.connect.column.Column.bitwiseXOR.__doc__ + del pyspark.sql.connect.column.Column.eqNullSafe.__doc__ + del pyspark.sql.connect.column.Column.isNotNull.__doc__ + del pyspark.sql.connect.column.Column.isNull.__doc__ + del pyspark.sql.connect.column.Column.isin.__doc__ + # TODO(SPARK-41756): Fix createDataFrame + del pyspark.sql.connect.column.Column.getField.__doc__ + del pyspark.sql.connect.column.Column.getItem.__doc__ + # TODO(SPARK-41758): Support Window functions + del pyspark.sql.connect.column.Column.over.__doc__ + + (failure_count, test_count) = doctest.testmod( + pyspark.sql.connect.column, + globs=globs, + optionflags=doctest.ELLIPSIS + | doctest.NORMALIZE_WHITESPACE + | doctest.IGNORE_EXCEPTION_DETAIL, + ) + + globs["spark"].stop() + globs["_spark"].stop() + if failure_count: + sys.exit(-1) + else: + print( + f"Skipping pyspark.sql.connect.column doctests: {connect_requirement_message}", + file=sys.stderr, + ) + + +if __name__ == "__main__": + _test() --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org