This is an automated email from the ASF dual-hosted git repository. ruifengz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new bd14d6412a3 [SPARK-43660][CONNECT][PS][FOLLOWUP] Remove JVM dependency for resample bd14d6412a3 is described below commit bd14d6412a3124eecce1493fcad436280915ba71 Author: itholic <haejoon....@databricks.com> AuthorDate: Thu Aug 10 18:20:24 2023 +0800 [SPARK-43660][CONNECT][PS][FOLLOWUP] Remove JVM dependency for resample ### What changes were proposed in this pull request? This is follow-up for https://github.com/apache/spark/pull/41877 to remove JVM dependency. ### Why are the changes needed? To remove JVM dependency from Pandas API on Spark with Spark Connect. ### Does this PR introduce _any_ user-facing change? No, it's internal handling. ### How was this patch tested? The existing UT. Closes #42410 from itholic/resample_followup. Authored-by: itholic <haejoon....@databricks.com> Signed-off-by: Ruifeng Zheng <ruife...@apache.org> --- python/pyspark/pandas/resample.py | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/python/pyspark/pandas/resample.py b/python/pyspark/pandas/resample.py index 30f8c9d3169..2d10802b651 100644 --- a/python/pyspark/pandas/resample.py +++ b/python/pyspark/pandas/resample.py @@ -41,7 +41,6 @@ else: _builtin_table = SelectionMixin._builtin_table # type: ignore[attr-defined] -from pyspark import SparkContext from pyspark.sql import Column, functions as F from pyspark.sql.types import ( NumericType, @@ -67,7 +66,6 @@ from pyspark.pandas.utils import ( scol_for, verify_temp_column_name, ) -from pyspark.sql.utils import is_remote from pyspark.pandas.spark.functions import timestampdiff @@ -145,22 +143,15 @@ class Resampler(Generic[FrameLike], metaclass=ABCMeta): def get_make_interval( # type: ignore[return] self, unit: str, col: Union[Column, int, float] ) -> Column: - if is_remote(): - from pyspark.sql.connect.functions import lit, make_interval - - col = col if not isinstance(col, (int, float)) else lit(col) # type: ignore[assignment] - if unit == "MONTH": - return make_interval(months=col) # type: ignore - if unit == "HOUR": - return make_interval(hours=col) # type: ignore - if unit == "MINUTE": - return make_interval(mins=col) # type: ignore - if unit == "SECOND": - return make_interval(secs=col) # type: ignore - else: - sql_utils = SparkContext._active_spark_context._jvm.PythonSQLUtils - col = col._jc if isinstance(col, Column) else F.lit(col)._jc - return sql_utils.makeInterval(unit, col) + col = col if not isinstance(col, (int, float)) else F.lit(col) + if unit == "MONTH": + return F.make_interval(months=col) + if unit == "HOUR": + return F.make_interval(hours=col) + if unit == "MINUTE": + return F.make_interval(mins=col) + if unit == "SECOND": + return F.make_interval(secs=col) def _bin_timestamp(self, origin: pd.Timestamp, ts_scol: Column) -> Column: key_type = self._resamplekey_type --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org