This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 5259dcc667df [SPARK-47644][PYTHON][DOCS] Refine docstrings of try_*
5259dcc667df is described below
commit 5259dcc667df3d088877456b7cfa3c5da3d7bb78
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Fri Mar 29 16:28:34 2024 +0900
[SPARK-47644][PYTHON][DOCS] Refine docstrings of try_*
### What changes were proposed in this pull request?
This PR refines docstring of `try_*` functions with more descriptive
examples.
### Why are the changes needed?
For better API reference documentation.
### Does this PR introduce _any_ user-facing change?
Yes, it fixes user-facing documentation.
### How was this patch tested?
Manually tested. GitHub Actions should verify them.
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #45769 from HyukjinKwon/SPARK-47644.
Authored-by: Hyukjin Kwon <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/sql/functions/builtin.py | 211 +++++++++++++++++++++++++++++---
1 file changed, 195 insertions(+), 16 deletions(-)
diff --git a/python/pyspark/sql/functions/builtin.py
b/python/pyspark/sql/functions/builtin.py
index 47995a3d66fd..c8a9a8975159 100644
--- a/python/pyspark/sql/functions/builtin.py
+++ b/python/pyspark/sql/functions/builtin.py
@@ -511,6 +511,22 @@ def try_add(left: "ColumnOrName", right: "ColumnOrName")
-> Column:
+-----------------+
|INTERVAL '3' YEAR|
+-----------------+
+
+ Example 5: Overflow results in NULL when ANSI mode is on
+
+ >>> import pyspark.sql.functions as sf
+ >>> origin = spark.conf.get("spark.sql.ansi.enabled")
+ >>> spark.conf.set("spark.sql.ansi.enabled", "true")
+ >>> try:
+ ... df = spark.range(1)
+ ... df.select(sf.try_add(sf.lit(sys.maxsize),
sf.lit(sys.maxsize))).show()
+ ... finally:
+ ... spark.conf.set("spark.sql.ansi.enabled", origin)
+ +-------------------------------------------------+
+ |try_add(9223372036854775807, 9223372036854775807)|
+ +-------------------------------------------------+
+ | NULL|
+ +-------------------------------------------------+
"""
return _invoke_function_over_columns("try_add", left, right)
@@ -616,6 +632,22 @@ def try_divide(left: "ColumnOrName", right:
"ColumnOrName") -> Column:
| 6 months|
| 4 months|
+--------------------------------------------------+
+
+ Example 3: Exception druing division, resulting in NULL when ANSI mode is
on
+
+ >>> import pyspark.sql.functions as sf
+ >>> origin = spark.conf.get("spark.sql.ansi.enabled")
+ >>> spark.conf.set("spark.sql.ansi.enabled", "true")
+ >>> try:
+ ... df = spark.range(1)
+ ... df.select(sf.try_divide(df.id, sf.lit(0))).show()
+ ... finally:
+ ... spark.conf.set("spark.sql.ansi.enabled", origin)
+ +-----------------+
+ |try_divide(id, 0)|
+ +-----------------+
+ | NULL|
+ +-----------------+
"""
return _invoke_function_over_columns("try_divide", left, right)
@@ -666,6 +698,22 @@ def try_multiply(left: "ColumnOrName", right:
"ColumnOrName") -> Column:
| 1 years|
| 1 years 3 months|
+----------------------------------------------------+
+
+ Example 3: Overflow results in NULL when ANSI mode is on
+
+ >>> import pyspark.sql.functions as sf
+ >>> origin = spark.conf.get("spark.sql.ansi.enabled")
+ >>> spark.conf.set("spark.sql.ansi.enabled", "true")
+ >>> try:
+ ... df = spark.range(1)
+ ... df.select(sf.try_multiply(sf.lit(sys.maxsize),
sf.lit(sys.maxsize))).show()
+ ... finally:
+ ... spark.conf.set("spark.sql.ansi.enabled", origin)
+ +------------------------------------------------------+
+ |try_multiply(9223372036854775807, 9223372036854775807)|
+ +------------------------------------------------------+
+ | NULL|
+ +------------------------------------------------------+
"""
return _invoke_function_over_columns("try_multiply", left, right)
@@ -733,6 +781,22 @@ def try_subtract(left: "ColumnOrName", right:
"ColumnOrName") -> Column:
+------------------+
|INTERVAL '-1' YEAR|
+------------------+
+
+ Example 5: Overflow results in NULL when ANSI mode is on
+
+ >>> import pyspark.sql.functions as sf
+ >>> origin = spark.conf.get("spark.sql.ansi.enabled")
+ >>> spark.conf.set("spark.sql.ansi.enabled", "true")
+ >>> try:
+ ... df = spark.range(1)
+ ... df.select(sf.try_subtract(sf.lit(-sys.maxsize),
sf.lit(sys.maxsize))).show()
+ ... finally:
+ ... spark.conf.set("spark.sql.ansi.enabled", origin)
+ +-------------------------------------------------------+
+ |try_subtract(-9223372036854775807, 9223372036854775807)|
+ +-------------------------------------------------------+
+ | NULL|
+ +-------------------------------------------------------+
"""
return _invoke_function_over_columns("try_subtract", left, right)
@@ -8492,13 +8556,27 @@ def to_timestamp(col: "ColumnOrName", format:
Optional[str] = None) -> Column:
Examples
--------
+ Example 1: Convert string to a timestamp
+
+ >>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
- >>> df.select(to_timestamp(df.t).alias('dt')).collect()
- [Row(dt=datetime.datetime(1997, 2, 28, 10, 30))]
+ >>> df.select(sf.try_to_timestamp(df.t).alias('dt')).show()
+ +-------------------+
+ | dt|
+ +-------------------+
+ |1997-02-28 10:30:00|
+ +-------------------+
+
+ Example 2: Convert string to a timestamp with a format
+ >>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
- >>> df.select(to_timestamp(df.t, 'yyyy-MM-dd
HH:mm:ss').alias('dt')).collect()
- [Row(dt=datetime.datetime(1997, 2, 28, 10, 30))]
+ >>> df.select(sf.try_to_timestamp(df.t, sf.lit('yyyy-MM-dd
HH:mm:ss')).alias('dt')).show()
+ +-------------------+
+ | dt|
+ +-------------------+
+ |1997-02-28 10:30:00|
+ +-------------------+
"""
if format is None:
return _invoke_function_over_columns("to_timestamp", col)
@@ -8524,12 +8602,43 @@ def try_to_timestamp(col: "ColumnOrName", format:
Optional["ColumnOrName"] = Non
Examples
--------
+ Example 1: Convert string to a timestamp
+
+ >>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
- >>> df.select(try_to_timestamp(df.t).alias('dt')).collect()
- [Row(dt=datetime.datetime(1997, 2, 28, 10, 30))]
+ >>> df.select(sf.try_to_timestamp(df.t).alias('dt')).show()
+ +-------------------+
+ | dt|
+ +-------------------+
+ |1997-02-28 10:30:00|
+ +-------------------+
+
+ Example 2: Convert string to a timestamp with a format
+
+ >>> import pyspark.sql.functions as sf
+ >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
+ >>> df.select(sf.try_to_timestamp(df.t, sf.lit('yyyy-MM-dd
HH:mm:ss')).alias('dt')).show()
+ +-------------------+
+ | dt|
+ +-------------------+
+ |1997-02-28 10:30:00|
+ +-------------------+
+
+ Example 3: Converion failure results in NULL when ANSI mode is on
- >>> df.select(try_to_timestamp(df.t, lit('yyyy-MM-dd
HH:mm:ss')).alias('dt')).collect()
- [Row(dt=datetime.datetime(1997, 2, 28, 10, 30))]
+ >>> import pyspark.sql.functions as sf
+ >>> origin = spark.conf.get("spark.sql.ansi.enabled")
+ >>> spark.conf.set("spark.sql.ansi.enabled", "true")
+ >>> try:
+ ... df = spark.createDataFrame([('malformed',)], ['t'])
+ ... df.select(sf.try_to_timestamp(df.t)).show()
+ ... finally:
+ ... spark.conf.set("spark.sql.ansi.enabled", origin)
+ +-------------------+
+ |try_to_timestamp(t)|
+ +-------------------+
+ | NULL|
+ +-------------------+
"""
if format is not None:
return _invoke_function_over_columns("try_to_timestamp", col, format)
@@ -11516,12 +11625,18 @@ def to_binary(col: "ColumnOrName", format:
Optional["ColumnOrName"] = None) -> C
Examples
--------
+ Example 1: Convert string to a binary with encoding specified
+
+ >>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([("abc",)], ["e"])
- >>> df.select(to_binary(df.e, lit("utf-8")).alias('r')).collect()
+ >>> df.select(sf.try_to_binary(df.e, sf.lit("utf-8")).alias('r')).collect()
[Row(r=bytearray(b'abc'))]
+ Example 2: Convert string to a timestamp without encoding specified
+
+ >>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([("414243",)], ["e"])
- >>> df.select(to_binary(df.e).alias('r')).collect()
+ >>> df.select(sf.try_to_binary(df.e).alias('r')).collect()
[Row(r=bytearray(b'ABC'))]
"""
if format is not None:
@@ -12310,13 +12425,35 @@ def try_to_binary(col: "ColumnOrName", format:
Optional["ColumnOrName"] = None)
Examples
--------
+ Example 1: Convert string to a binary with encoding specified
+
+ >>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([("abc",)], ["e"])
- >>> df.select(try_to_binary(df.e, lit("utf-8")).alias('r')).collect()
+ >>> df.select(sf.try_to_binary(df.e, sf.lit("utf-8")).alias('r')).collect()
[Row(r=bytearray(b'abc'))]
+ Example 2: Convert string to a timestamp without encoding specified
+
+ >>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([("414243",)], ["e"])
- >>> df.select(try_to_binary(df.e).alias('r')).collect()
+ >>> df.select(sf.try_to_binary(df.e).alias('r')).collect()
[Row(r=bytearray(b'ABC'))]
+
+ Example 3: Converion failure results in NULL when ANSI mode is on
+
+ >>> import pyspark.sql.functions as sf
+ >>> origin = spark.conf.get("spark.sql.ansi.enabled")
+ >>> spark.conf.set("spark.sql.ansi.enabled", "true")
+ >>> try:
+ ... df = spark.range(1)
+ ... df.select(sf.try_to_binary(sf.lit("malformed"),
sf.lit("hex"))).show()
+ ... finally:
+ ... spark.conf.set("spark.sql.ansi.enabled", origin)
+ +-----------------------------+
+ |try_to_binary(malformed, hex)|
+ +-----------------------------+
+ | NULL|
+ +-----------------------------+
"""
if format is not None:
return _invoke_function_over_columns("try_to_binary", col, format)
@@ -12342,9 +12479,32 @@ def try_to_number(col: "ColumnOrName", format:
"ColumnOrName") -> Column:
Examples
--------
+ Example 1: Convert a string to a number with a format specified
+
+ >>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([("$78.12",)], ["e"])
- >>> df.select(try_to_number(df.e, lit("$99.99")).alias('r')).collect()
- [Row(r=Decimal('78.12'))]
+ >>> df.select(sf.try_to_number(df.e, sf.lit("$99.99")).alias('r')).show()
+ +-----+
+ | r|
+ +-----+
+ |78.12|
+ +-----+
+
+ Example 2: Converion failure results in NULL when ANSI mode is on
+
+ >>> import pyspark.sql.functions as sf
+ >>> origin = spark.conf.get("spark.sql.ansi.enabled")
+ >>> spark.conf.set("spark.sql.ansi.enabled", "true")
+ >>> try:
+ ... df = spark.range(1)
+ ... df.select(sf.try_to_number(sf.lit("77"),
sf.lit("$99.99")).alias('r')).show()
+ ... finally:
+ ... spark.conf.set("spark.sql.ansi.enabled", origin)
+ +----+
+ | r|
+ +----+
+ |NULL|
+ +----+
"""
return _invoke_function_over_columns("try_to_number", col, format)
@@ -19977,12 +20137,31 @@ def try_reflect(*cols: "ColumnOrName") -> Column:
Examples
--------
+ Example 1: Reflecting a method call with arguments
+
>>> from pyspark.sql import functions as sf
>>> df =
spark.createDataFrame([("a5cf6c42-0c85-418f-af6c-3e4e5b1328f2",)], ["a"])
>>> df.select(
... sf.try_reflect(sf.lit("java.util.UUID"), sf.lit("fromString"),
df.a)
- ... ).collect()
- [Row(try_reflect(java.util.UUID, fromString,
a)='a5cf6c42-0c85-418f-af6c-3e4e5b1328f2')]
+ ... ).show()
+ +------------------------------------------+
+ |try_reflect(java.util.UUID, fromString, a)|
+ +------------------------------------------+
+ | a5cf6c42-0c85-418...|
+ +------------------------------------------+
+
+ Example 2: Exception in the reflection call, resulting in null
+
+ >>> from pyspark.sql import functions as sf
+ >>> df = spark.range(1)
+ >>> df.select(
+ ... sf.try_reflect(sf.lit("scala.Predef"), sf.lit("require"),
sf.lit(False))
+ ... ).show()
+ +-----------------------------------------+
+ |try_reflect(scala.Predef, require, false)|
+ +-----------------------------------------+
+ | NULL|
+ +-----------------------------------------+
"""
return _invoke_function_over_seq_of_columns("try_reflect", cols)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]