kosiew commented on code in PR #1424:
URL:
https://github.com/apache/datafusion-python/pull/1424#discussion_r2930373855
##########
python/datafusion/functions.py:
##########
@@ -1254,69 +1316,198 @@ def to_timestamp_seconds(arg: Expr, *formatters: Expr)
-> Expr:
"""Converts a string and optional formats to a ``Timestamp`` in seconds.
See :py:func:`to_timestamp` for a description on how to use formatters.
+
+ Examples:
+ ---------
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": ["2021-01-01T00:00:00"]})
+ >>> result = df.select(
+ ... dfn.functions.to_timestamp_seconds(
+ ... dfn.col("a")
+ ... ).alias("ts")
+ ... )
+ >>> str(result.collect_column("ts")[0].as_py())
+ '2021-01-01 00:00:00'
"""
return Expr(f.to_timestamp_seconds(arg.expr, *_unwrap_exprs(formatters)))
def to_unixtime(string: Expr, *format_arguments: Expr) -> Expr:
- """Converts a string and optional formats to a Unixtime."""
+ """Converts a string and optional formats to a Unixtime.
+
+ Examples:
+ ---------
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": ["1970-01-01T00:00:00"]})
+ >>> result = df.select(dfn.functions.to_unixtime(dfn.col("a")).alias("u"))
+ >>> result.collect_column("u")[0].as_py()
+ 0
+ """
return Expr(f.to_unixtime(string.expr, *_unwrap_exprs(format_arguments)))
def current_date() -> Expr:
- """Returns current UTC date as a Date32 value."""
+ """Returns current UTC date as a Date32 value.
+
+ Examples:
+ ---------
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [1]})
+ >>> result = df.select(
+ ... dfn.functions.current_date().alias("d")
+ ... )
+ >>> result.collect_column("d")[0].as_py() is not None
+ True
+ """
return Expr(f.current_date())
today = current_date
def current_time() -> Expr:
- """Returns current UTC time as a Time64 value."""
+ """Returns current UTC time as a Time64 value.
+
+ Examples:
+ ---------
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [1]})
+ >>> result = df.select(
+ ... dfn.functions.current_time().alias("t")
+ ... )
+
+ Use .value instead of .as_py() because nanosecond timestamps
+ require pandas to convert to Python datetime objects.
+
+ >>> result.collect_column("t")[0].value > 0
+ True
+ """
return Expr(f.current_time())
def datepart(part: Expr, date: Expr) -> Expr:
"""Return a specified part of a date.
This is an alias for :py:func:`date_part`.
+
+ Examples:
+ ---------
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": ["2021-07-15T00:00:00"]})
+ >>> df = df.select(dfn.functions.to_timestamp(dfn.col("a")).alias("a"))
+ >>> result = df.select(
+ ... dfn.functions.datepart(dfn.lit("month"), dfn.col("a")).alias("m"))
+ >>> result.collect_column("m")[0].as_py()
+ 7
"""
return date_part(part, date)
def date_part(part: Expr, date: Expr) -> Expr:
- """Extracts a subfield from the date."""
+ """Extracts a subfield from the date.
+
+ Examples:
+ ---------
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": ["2021-07-15T00:00:00"]})
+ >>> df = df.select(dfn.functions.to_timestamp(dfn.col("a")).alias("a"))
+ >>> result = df.select(
+ ... dfn.functions.date_part(dfn.lit("year"), dfn.col("a")).alias("y"))
+ >>> result.collect_column("y")[0].as_py()
+ 2021
+ """
return Expr(f.date_part(part.expr, date.expr))
def extract(part: Expr, date: Expr) -> Expr:
"""Extracts a subfield from the date.
This is an alias for :py:func:`date_part`.
+
+ Examples:
+ ---------
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": ["2021-07-15T00:00:00"]})
+ >>> df = df.select(dfn.functions.to_timestamp(dfn.col("a")).alias("a"))
+ >>> result = df.select(
+ ... dfn.functions.extract(dfn.lit("day"), dfn.col("a")).alias("d"))
+ >>> result.collect_column("d")[0].as_py()
+ 15
"""
return date_part(part, date)
def date_trunc(part: Expr, date: Expr) -> Expr:
- """Truncates the date to a specified level of precision."""
+ """Truncates the date to a specified level of precision.
+
+ Examples:
+ ---------
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": ["2021-07-15T12:34:56"]})
+ >>> df = df.select(dfn.functions.to_timestamp(dfn.col("a")).alias("a"))
+ >>> result = df.select(
+ ... dfn.functions.date_trunc(
+ ... dfn.lit("month"), dfn.col("a")
+ ... ).alias("t")
+ ... )
+ >>> str(result.collect_column("t")[0].as_py())
+ '2021-07-01 00:00:00'
+ """
return Expr(f.date_trunc(part.expr, date.expr))
def datetrunc(part: Expr, date: Expr) -> Expr:
"""Truncates the date to a specified level of precision.
This is an alias for :py:func:`date_trunc`.
+
+ Examples:
+ ---------
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": ["2021-07-15T12:34:56"]})
+ >>> df = df.select(dfn.functions.to_timestamp(dfn.col("a")).alias("a"))
+ >>> result = df.select(
+ ... dfn.functions.datetrunc(
+ ... dfn.lit("year"), dfn.col("a")
+ ... ).alias("t")
+ ... )
+ >>> str(result.collect_column("t")[0].as_py())
+ '2021-01-01 00:00:00'
"""
return date_trunc(part, date)
def date_bin(stride: Expr, source: Expr, origin: Expr) -> Expr:
- """Coerces an arbitrary timestamp to the start of the nearest specified
interval."""
+ """Coerces an arbitrary timestamp to the start of the nearest specified
interval.
+
+ Examples:
+ ---------
+ >>> ctx = dfn.SessionContext()
+ >>> result = ctx.sql(
+ ... "SELECT date_bin(interval '1 day',"
Review Comment:
It would be more consistent to use
`dfn.functions.date_bin`
here
##########
python/datafusion/functions.py:
##########
@@ -1393,7 +1584,20 @@ def named_struct(name_pairs: list[tuple[str, Expr]]) ->
Expr:
def from_unixtime(arg: Expr) -> Expr:
- """Converts an integer to RFC3339 timestamp format string."""
+ """Converts an integer to RFC3339 timestamp format string.
+
+ Examples:
+ ---------
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [0]})
+ >>> result = df.select(
+ ... dfn.functions.from_unixtime(
+ ... dfn.col("a")
+ ... ).alias("ts")
+ ... )
+ >>> str(result.collect_column("ts")[0].as_py())
+ '1970-01-01 00:00:00'
+ """
return Expr(f.from_unixtime(arg.expr))
Review Comment:
The PR title is `Add docstring examples for scalar temporal functions`, but
`to_date`, `to_time`, and `to_local_time` still have no examples. Since those
are public temporal wrappers in the same section, this feels incomplete.
##########
python/datafusion/functions.py:
##########
@@ -1254,69 +1316,198 @@ def to_timestamp_seconds(arg: Expr, *formatters: Expr)
-> Expr:
"""Converts a string and optional formats to a ``Timestamp`` in seconds.
See :py:func:`to_timestamp` for a description on how to use formatters.
+
+ Examples:
+ ---------
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": ["2021-01-01T00:00:00"]})
+ >>> result = df.select(
+ ... dfn.functions.to_timestamp_seconds(
+ ... dfn.col("a")
+ ... ).alias("ts")
+ ... )
+ >>> str(result.collect_column("ts")[0].as_py())
+ '2021-01-01 00:00:00'
"""
return Expr(f.to_timestamp_seconds(arg.expr, *_unwrap_exprs(formatters)))
def to_unixtime(string: Expr, *format_arguments: Expr) -> Expr:
- """Converts a string and optional formats to a Unixtime."""
+ """Converts a string and optional formats to a Unixtime.
+
+ Examples:
+ ---------
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": ["1970-01-01T00:00:00"]})
+ >>> result = df.select(dfn.functions.to_unixtime(dfn.col("a")).alias("u"))
+ >>> result.collect_column("u")[0].as_py()
+ 0
+ """
return Expr(f.to_unixtime(string.expr, *_unwrap_exprs(format_arguments)))
def current_date() -> Expr:
- """Returns current UTC date as a Date32 value."""
+ """Returns current UTC date as a Date32 value.
+
+ Examples:
+ ---------
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [1]})
+ >>> result = df.select(
+ ... dfn.functions.current_date().alias("d")
+ ... )
+ >>> result.collect_column("d")[0].as_py() is not None
+ True
+ """
return Expr(f.current_date())
today = current_date
def current_time() -> Expr:
- """Returns current UTC time as a Time64 value."""
+ """Returns current UTC time as a Time64 value.
+
+ Examples:
+ ---------
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": [1]})
+ >>> result = df.select(
+ ... dfn.functions.current_time().alias("t")
+ ... )
+
+ Use .value instead of .as_py() because nanosecond timestamps
+ require pandas to convert to Python datetime objects.
+
+ >>> result.collect_column("t")[0].value > 0
+ True
+ """
return Expr(f.current_time())
def datepart(part: Expr, date: Expr) -> Expr:
"""Return a specified part of a date.
This is an alias for :py:func:`date_part`.
+
+ Examples:
+ ---------
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": ["2021-07-15T00:00:00"]})
+ >>> df = df.select(dfn.functions.to_timestamp(dfn.col("a")).alias("a"))
+ >>> result = df.select(
+ ... dfn.functions.datepart(dfn.lit("month"), dfn.col("a")).alias("m"))
+ >>> result.collect_column("m")[0].as_py()
+ 7
"""
return date_part(part, date)
def date_part(part: Expr, date: Expr) -> Expr:
- """Extracts a subfield from the date."""
+ """Extracts a subfield from the date.
+
+ Examples:
+ ---------
+ >>> ctx = dfn.SessionContext()
+ >>> df = ctx.from_pydict({"a": ["2021-07-15T00:00:00"]})
+ >>> df = df.select(dfn.functions.to_timestamp(dfn.col("a")).alias("a"))
+ >>> result = df.select(
+ ... dfn.functions.date_part(dfn.lit("year"), dfn.col("a")).alias("y"))
+ >>> result.collect_column("y")[0].as_py()
+ 2021
+ """
return Expr(f.date_part(part.expr, date.expr))
def extract(part: Expr, date: Expr) -> Expr:
"""Extracts a subfield from the date.
This is an alias for :py:func:`date_part`.
Review Comment:
`datepart`, `extract`, and `datetrunc` are aliases, but each now carries its
own full doctest.
I think setting up one example would risk of docs drift between the
canonical function and its alias.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]