This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 90c6c2b36743 [SPARK-45222][PYTHON][DOCS] Refine docstring of `DataFrameReader.json` 90c6c2b36743 is described below commit 90c6c2b36743e64ecdeaebb34fe37aa348701370 Author: Hyukjin Kwon <gurwls...@apache.org> AuthorDate: Tue Nov 7 09:58:50 2023 -0800 [SPARK-45222][PYTHON][DOCS] Refine docstring of `DataFrameReader.json` ### What changes were proposed in this pull request? This PR proposes to improve the docstring of `DataFrameReader.json`. ### Why are the changes needed? For end users, and better usability of PySpark. ### Does this PR introduce _any_ user-facing change? Yes, it fixes the user facing documentation. ### How was this patch tested? Manually tested. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #43687 from HyukjinKwon/SPARK-45222. Authored-by: Hyukjin Kwon <gurwls...@apache.org> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/sql/readwriter.py | 51 ++++++++++++++++++++++++++++++++++------ 1 file changed, 44 insertions(+), 7 deletions(-) diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index 75faa13f02b3..b7e2c145f443 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -380,22 +380,59 @@ class DataFrameReader(OptionUtils): Examples -------- - Write a DataFrame into a JSON file and read it back. + Example 1: Write a DataFrame into a JSON file and read it back. >>> import tempfile >>> with tempfile.TemporaryDirectory() as d: ... # Write a DataFrame into a JSON file ... spark.createDataFrame( - ... [{"age": 100, "name": "Hyukjin Kwon"}] + ... [{"age": 100, "name": "Hyukjin"}] ... ).write.mode("overwrite").format("json").save(d) ... ... # Read the JSON file as a DataFrame. ... spark.read.json(d).show() - +---+------------+ - |age| name| - +---+------------+ - |100|Hyukjin Kwon| - +---+------------+ + +---+-------+ + |age| name| + +---+-------+ + |100|Hyukjin| + +---+-------+ + + Example 2: Read JSON from multiple files in a directory + + >>> import tempfile + >>> with tempfile.TemporaryDirectory() as d1, tempfile.TemporaryDirectory() as d2: + ... # Write a DataFrame into a JSON file + ... spark.createDataFrame( + ... [{"age": 30, "name": "Bob"}] + ... ).write.mode("overwrite").format("json").save(d1) + ... + ... # Read the JSON files as a DataFrame. + ... spark.createDataFrame( + ... [{"age": 25, "name": "Alice"}] + ... ).write.mode("overwrite").format("json").save(d2) + ... spark.read.json([d1, d2]).show() + +---+-----+ + |age| name| + +---+-----+ + | 25|Alice| + | 30| Bob| + +---+-----+ + + Example 3: Read JSON with a custom schema + + >>> import tempfile + >>> with tempfile.TemporaryDirectory() as d: + ... # Write a DataFrame into a JSON file + ... spark.createDataFrame( + ... [{"age": 30, "name": "Bob"}] + ... ).write.mode("overwrite").format("json").save(d) + ... custom_schema = "name STRING, age INT" + ... spark.read.json(d, schema=custom_schema).show() + +----+---+ + |name|age| + +----+---+ + | Bob| 30| + +----+---+ """ self._set_opts( schema=schema, --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org