This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new bc20e85b0e1 [SPARK-43710][PS][CONNECT] Support `functions.date_part` for Spark Connect bc20e85b0e1 is described below commit bc20e85b0e1da510cc091dbd03f210ef9fc56b25 Author: Ruifeng Zheng <ruife...@apache.org> AuthorDate: Thu Jun 22 08:47:27 2023 +0900 [SPARK-43710][PS][CONNECT] Support `functions.date_part` for Spark Connect ### What changes were proposed in this pull request? switch to the [newly added `date_part` function](https://github.com/apache/spark/commit/8dc02863b926b9e0780b994f9ee6c5c1058d49a0) ### Why are the changes needed? to support connect ### Does this PR introduce _any_ user-facing change? yes ### How was this patch tested? existing UT Closes #41691 from zhengruifeng/ps_date_part. Authored-by: Ruifeng Zheng <ruife...@apache.org> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/pandas/indexes/timedelta.py | 6 +++--- python/pyspark/pandas/spark/functions.py | 32 ++---------------------------- 2 files changed, 5 insertions(+), 33 deletions(-) diff --git a/python/pyspark/pandas/indexes/timedelta.py b/python/pyspark/pandas/indexes/timedelta.py index 564c484d968..e46d602e985 100644 --- a/python/pyspark/pandas/indexes/timedelta.py +++ b/python/pyspark/pandas/indexes/timedelta.py @@ -150,9 +150,9 @@ class TimedeltaIndex(Index): @no_type_check def get_seconds(scol): - hour_scol = SF.date_part("HOUR", scol) - minute_scol = SF.date_part("MINUTE", scol) - second_scol = SF.date_part("SECOND", scol) + hour_scol = F.date_part("HOUR", scol) + minute_scol = F.date_part("MINUTE", scol) + second_scol = F.date_part("SECOND", scol) return ( F.when( hour_scol < 0, diff --git a/python/pyspark/pandas/spark/functions.py b/python/pyspark/pandas/spark/functions.py index a904071aee7..b33705263c7 100644 --- a/python/pyspark/pandas/spark/functions.py +++ b/python/pyspark/pandas/spark/functions.py @@ -17,15 +17,11 @@ """ Additional Spark functions used in pandas-on-Spark. """ -from typing import Union, no_type_check +from typing import Union from pyspark import SparkContext import pyspark.sql.functions as F -from pyspark.sql.column import ( - Column, - _to_java_column, - _create_column_from_literal, -) +from pyspark.sql.column import Column # For supporting Spark Connect from pyspark.sql.utils import is_remote @@ -145,27 +141,3 @@ def repeat(col: Column, n: Union[int, Column]) -> Column: """ _n = F.lit(n) if isinstance(n, int) else n return F.call_udf("repeat", col, _n) - - -def date_part(field: Union[str, Column], source: Column) -> Column: - """ - Extracts a part of the date/timestamp or interval source. - """ - sc = SparkContext._active_spark_context - field = ( - _to_java_column(field) if isinstance(field, Column) else _create_column_from_literal(field) - ) - return _call_udf(sc, "date_part", field, _to_java_column(source)) - - -@no_type_check -def _call_udf(sc, name, *cols): - return Column(sc._jvm.functions.callUDF(name, _make_arguments(sc, *cols))) - - -@no_type_check -def _make_arguments(sc, *cols): - java_arr = sc._gateway.new_array(sc._jvm.Column, len(cols)) - for i, col in enumerate(cols): - java_arr[i] = col - return java_arr --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org