This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 1157ffddde97 [SPARK-45186][PYTHON][DOCS] Refine docstring of `schema_of_xml` 1157ffddde97 is described below commit 1157ffddde9713cccfd6f8572171fff36e5aa3be Author: Hyukjin Kwon <gurwls...@apache.org> AuthorDate: Tue Nov 7 10:03:05 2023 -0800 [SPARK-45186][PYTHON][DOCS] Refine docstring of `schema_of_xml` ### What changes were proposed in this pull request? This PR proposes to improve the docstring of `schema_of_xml`. ### Why are the changes needed? For end users, and better usability of PySpark. ### Does this PR introduce _any_ user-facing change? Yes, it fixes the user facing documentation. ### How was this patch tested? Manually tested. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #43681 from HyukjinKwon/SPARK-45186-1. Authored-by: Hyukjin Kwon <gurwls...@apache.org> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/sql/functions.py | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index ef5c0ea073ab..95821deeec17 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -13755,14 +13755,45 @@ def schema_of_xml(xml: "ColumnOrName", options: Optional[Dict[str, str]] = None) Examples -------- + Example 1: Parsing a simple XML with a single element + + >>> from pyspark.sql import functions as sf >>> df = spark.range(1) - >>> df.select(schema_of_xml(lit('<p><a>1</a></p>')).alias("xml")).collect() + >>> df.select(sf.schema_of_xml(sf.lit('<p><a>1</a></p>')).alias("xml")).collect() [Row(xml='STRUCT<a: BIGINT>')] - >>> df.select(schema_of_xml(lit('<p><a>1</a><a>2</a></p>')).alias("xml")).collect() + + Example 2: Parsing an XML with multiple elements in an array + + >>> from pyspark.sql import functions as sf + >>> df.select(sf.schema_of_xml(sf.lit('<p><a>1</a><a>2</a></p>')).alias("xml")).collect() [Row(xml='STRUCT<a: ARRAY<BIGINT>>')] - >>> schema = schema_of_xml('<p><a attr="2">1</a></p>', {'excludeAttribute':'true'}) + + Example 3: Parsing XML with options to exclude attributes + + >>> from pyspark.sql import functions as sf + >>> schema = sf.schema_of_xml('<p><a attr="2">1</a></p>', {'excludeAttribute':'true'}) >>> df.select(schema.alias("xml")).collect() [Row(xml='STRUCT<a: BIGINT>')] + + Example 4: Parsing XML with complex structure + + >>> from pyspark.sql import functions as sf + >>> df.select( + ... sf.schema_of_xml( + ... sf.lit('<root><person><name>Alice</name><age>30</age></person></root>') + ... ).alias("xml") + ... ).collect() + [Row(xml='STRUCT<person: STRUCT<age: BIGINT, name: STRING>>')] + + Example 5: Parsing XML with nested arrays + + >>> from pyspark.sql import functions as sf + >>> df.select( + ... sf.schema_of_xml( + ... sf.lit('<data><values><value>1</value><value>2</value></values></data>') + ... ).alias("xml") + ... ).collect() + [Row(xml='STRUCT<values: STRUCT<value: ARRAY<BIGINT>>>')] """ if isinstance(xml, str): col = _create_column_from_literal(xml) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org