This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 47457cb218c3 [SPARK-52352][PYTHON][DOCS] Update `pyspark.sql.functions.to_json` docstring to include `VariantType` as valid input 47457cb218c3 is described below commit 47457cb218c3f41a221026bec87ae4ce09468dac Author: Austin Warner <austin.richard.war...@gmail.com> AuthorDate: Wed Jun 4 10:45:30 2025 +0900 [SPARK-52352][PYTHON][DOCS] Update `pyspark.sql.functions.to_json` docstring to include `VariantType` as valid input ### What changes were proposed in this pull request? Updated the `pyspark.sql.functions.to_json` docstring to include `VariantType` as a valid input. This includes updates to the summary line, the `col` parameter description, and a new example. ### Why are the changes needed? With the release of Spark 4.0, users of the new Variant Type will sometimes need to save out the JSON string representation when using PySpark. Before this change, the API docs flasely imply that `to_json` cannot be used for VariantType columns. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? No tests added (docs-only change) ### Was this patch authored or co-authored using generative AI tooling? No Closes #51064 from austinrwarner/SPARK-52352. Authored-by: Austin Warner <austin.richard.war...@gmail.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/sql/functions/builtin.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index dd4c363de636..37b65c3203da 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -20712,8 +20712,8 @@ def schema_of_variant_agg(v: "ColumnOrName") -> Column: @_try_remote_functions def to_json(col: "ColumnOrName", options: Optional[Mapping[str, str]] = None) -> Column: """ - Converts a column containing a :class:`StructType`, :class:`ArrayType` or a :class:`MapType` - into a JSON string. Throws an exception, in the case of an unsupported type. + Converts a column containing a :class:`StructType`, :class:`ArrayType`, :class:`MapType` + or a :class:`VariantType` into a JSON string. Throws an exception, in the case of an unsupported type. .. versionadded:: 2.1.0 @@ -20723,7 +20723,7 @@ def to_json(col: "ColumnOrName", options: Optional[Mapping[str, str]] = None) -> Parameters ---------- col : :class:`~pyspark.sql.Column` or str - name of column containing a struct, an array or a map. + name of column containing a struct, an array, a map, or a variant object. options : dict, optional options to control converting. accepts the same options as the JSON datasource. See `Data Source Option <https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option>`_ @@ -20777,7 +20777,18 @@ def to_json(col: "ColumnOrName", options: Optional[Mapping[str, str]] = None) -> |{"name":"Alice"}| +----------------+ - Example 4: Converting a nested MapType column to JSON + Example 4: Converting a VariantType column to JSON + + >>> import pyspark.sql.functions as sf + >>> df = spark.createDataFrame([(1, '{"name": "Alice"}')], ("key", "value")) + >>> df.select(sf.to_json(sf.parse_json(df.value)).alias("json")).show(truncate=False) + +----------------+ + |json | + +----------------+ + |{"name":"Alice"}| + +----------------+ + + Example 5: Converting a nested MapType column to JSON >>> import pyspark.sql.functions as sf >>> df = spark.createDataFrame([(1, [{"name": "Alice"}, {"name": "Bob"}])], ("key", "value")) @@ -20788,7 +20799,7 @@ def to_json(col: "ColumnOrName", options: Optional[Mapping[str, str]] = None) -> |[{"name":"Alice"},{"name":"Bob"}]| +---------------------------------+ - Example 5: Converting a simple ArrayType column to JSON + Example 6: Converting a simple ArrayType column to JSON >>> import pyspark.sql.functions as sf >>> df = spark.createDataFrame([(1, ["Alice", "Bob"])], ("key", "value")) @@ -20799,7 +20810,7 @@ def to_json(col: "ColumnOrName", options: Optional[Mapping[str, str]] = None) -> |["Alice","Bob"]| +---------------+ - Example 6: Converting to JSON with specified options + Example 7: Converting to JSON with specified options >>> import pyspark.sql.functions as sf >>> df = spark.sql("SELECT (DATE('2022-02-22'), 1) AS date") --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org