This is an automated email from the ASF dual-hosted git repository. ruifengz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new d0f4533f4e79 [SPARK-48287][PS][CONNECT] Apply the builtin `timestamp_diff` method d0f4533f4e79 is described below commit d0f4533f4e797a439eb78b8214e7bbfe06d0839a Author: Ruifeng Zheng <ruife...@apache.org> AuthorDate: Thu May 16 12:15:18 2024 +0800 [SPARK-48287][PS][CONNECT] Apply the builtin `timestamp_diff` method ### What changes were proposed in this pull request? Apply the builtin `timestamp_diff` method ### Why are the changes needed? `timestamp_diff` method was added as a builtin method, no need to maintain a PS-specific method ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? ci ### Was this patch authored or co-authored using generative AI tooling? no Closes #46595 from zhengruifeng/ps_ts_diff. Authored-by: Ruifeng Zheng <ruife...@apache.org> Signed-off-by: Ruifeng Zheng <ruife...@apache.org> --- python/pyspark/pandas/resample.py | 3 +-- python/pyspark/pandas/spark/functions.py | 18 ------------------ .../apache/spark/sql/api/python/PythonSQLUtils.scala | 4 ---- 3 files changed, 1 insertion(+), 24 deletions(-) diff --git a/python/pyspark/pandas/resample.py b/python/pyspark/pandas/resample.py index 9683fc4f4e7f..5557ca2af773 100644 --- a/python/pyspark/pandas/resample.py +++ b/python/pyspark/pandas/resample.py @@ -56,7 +56,6 @@ from pyspark.pandas.utils import ( scol_for, verify_temp_column_name, ) -from pyspark.pandas.spark.functions import timestampdiff class Resampler(Generic[FrameLike], metaclass=ABCMeta): @@ -279,7 +278,7 @@ class Resampler(Generic[FrameLike], metaclass=ABCMeta): truncated_ts_scol = F.date_trunc(unit_str, ts_scol) if isinstance(key_type, TimestampNTZType): truncated_ts_scol = F.to_timestamp_ntz(truncated_ts_scol) - diff = timestampdiff(unit_str, origin_scol, truncated_ts_scol) + diff = F.timestamp_diff(unit_str, origin_scol, truncated_ts_scol) mod = F.lit(0) if n == 1 else (diff % F.lit(n)) if rule_code in ["h", "H"]: diff --git a/python/pyspark/pandas/spark/functions.py b/python/pyspark/pandas/spark/functions.py index 91602ae2b2b8..db1cc423078a 100644 --- a/python/pyspark/pandas/spark/functions.py +++ b/python/pyspark/pandas/spark/functions.py @@ -171,21 +171,3 @@ def null_index(col: Column) -> Column: sc = SparkContext._active_spark_context return Column(sc._jvm.PythonSQLUtils.nullIndex(col._jc)) - - -def timestampdiff(unit: str, start: Column, end: Column) -> Column: - if is_remote(): - from pyspark.sql.connect.functions.builtin import _invoke_function_over_columns, lit - - return _invoke_function_over_columns( - "timestampdiff", - lit(unit), - start, - end, - ) - - else: - from pyspark import SparkContext - - sc = SparkContext._active_spark_context - return Column(sc._jvm.PythonSQLUtils.timestampDiff(unit, start._jc, end._jc)) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala index 1e42e6a5adaa..eb8c1d65a8b5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala @@ -165,10 +165,6 @@ private[sql] object PythonSQLUtils extends Logging { } } - def timestampDiff(unit: String, start: Column, end: Column): Column = { - Column(TimestampDiff(unit, start.expr, end.expr)) - } - def pandasProduct(e: Column, ignoreNA: Boolean): Column = { Column(PandasProduct(e.expr, ignoreNA).toAggregateExpression(false)) } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org