This is an automated email from the ASF dual-hosted git repository. ruifengz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 283aa9941bb [SPARK-39300][PS] Move pandasSkewness and pandasKurtosis into pandas.spark.functions 283aa9941bb is described below commit 283aa9941bb159b05542d81138d92d5dc79fbde8 Author: Ruifeng Zheng <ruife...@apache.org> AuthorDate: Thu May 26 19:46:14 2022 +0800 [SPARK-39300][PS] Move pandasSkewness and pandasKurtosis into pandas.spark.functions init ### What changes were proposed in this pull request? `pandasSkewness` and `pandasKurtosis` are used in `generic`,`groupby`,`window`. move them into `SF` for reuse ### Why are the changes needed? code clean up ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? existing UT Closes #36682 from zhengruifeng/ps_mv_pandas_skew_kurt_to_SF. Authored-by: Ruifeng Zheng <ruife...@apache.org> Signed-off-by: Ruifeng Zheng <ruife...@apache.org> --- python/pyspark/pandas/generic.py | 7 ++----- python/pyspark/pandas/groupby.py | 8 +------- python/pyspark/pandas/spark/functions.py | 10 ++++++++++ python/pyspark/pandas/window.py | 6 ++---- 4 files changed, 15 insertions(+), 16 deletions(-) diff --git a/python/pyspark/pandas/generic.py b/python/pyspark/pandas/generic.py index ce13ae5ad1b..dbfaedc9321 100644 --- a/python/pyspark/pandas/generic.py +++ b/python/pyspark/pandas/generic.py @@ -41,7 +41,6 @@ import numpy as np import pandas as pd from pandas.api.types import is_list_like # type: ignore[attr-defined] -from pyspark import SparkContext from pyspark.sql import Column, functions as F from pyspark.sql.types import ( BooleanType, @@ -1518,8 +1517,7 @@ class Frame(object, metaclass=ABCMeta): ) ) - sql_utils = SparkContext._active_spark_context._jvm.PythonSQLUtils - return Column(sql_utils.pandasSkewness(spark_column._jc)) + return SF.skew(spark_column) return self._reduce_for_stat_function( skew, @@ -1588,8 +1586,7 @@ class Frame(object, metaclass=ABCMeta): ) ) - sql_utils = SparkContext._active_spark_context._jvm.PythonSQLUtils - return Column(sql_utils.pandasKurtosis(spark_column._jc)) + return SF.kurt(spark_column) return self._reduce_for_stat_function( kurtosis, diff --git a/python/pyspark/pandas/groupby.py b/python/pyspark/pandas/groupby.py index 03e6a038232..3201d70d417 100644 --- a/python/pyspark/pandas/groupby.py +++ b/python/pyspark/pandas/groupby.py @@ -54,7 +54,6 @@ else: _builtin_table = SelectionMixin._builtin_table # type: ignore[attr-defined] -from pyspark import SparkContext from pyspark.sql import Column, DataFrame as SparkDataFrame, Window, functions as F from pyspark.sql.types import ( BooleanType, @@ -748,13 +747,8 @@ class GroupBy(Generic[FrameLike], metaclass=ABCMeta): pyspark.pandas.Series.groupby pyspark.pandas.DataFrame.groupby """ - - def skew(scol: Column) -> Column: - sql_utils = SparkContext._active_spark_context._jvm.PythonSQLUtils - return Column(sql_utils.pandasSkewness(scol._jc)) - return self._reduce_for_stat_function( - skew, + SF.skew, accepted_spark_types=(NumericType,), bool_to_numeric=True, ) diff --git a/python/pyspark/pandas/spark/functions.py b/python/pyspark/pandas/spark/functions.py index b7d57b4c3f8..11f9dbbb8c0 100644 --- a/python/pyspark/pandas/spark/functions.py +++ b/python/pyspark/pandas/spark/functions.py @@ -36,6 +36,16 @@ from pyspark.sql.types import ( ) +def skew(col: Column) -> Column: + sc = SparkContext._active_spark_context + return Column(sc._jvm.PythonSQLUtils.pandasSkewness(col._jc)) + + +def kurt(col: Column) -> Column: + sc = SparkContext._active_spark_context + return Column(sc._jvm.PythonSQLUtils.pandasKurtosis(col._jc)) + + def repeat(col: Column, n: Union[int, Column]) -> Column: """ Repeats a string column n times, and returns it as a new string column. diff --git a/python/pyspark/pandas/window.py b/python/pyspark/pandas/window.py index 5c21e31f377..2808f72fd3c 100644 --- a/python/pyspark/pandas/window.py +++ b/python/pyspark/pandas/window.py @@ -121,20 +121,18 @@ class RollingAndExpanding(Generic[FrameLike], metaclass=ABCMeta): def skew(self) -> FrameLike: def skew(scol: Column) -> Column: - sql_utils = SparkContext._active_spark_context._jvm.PythonSQLUtils return F.when( F.row_number().over(self._unbounded_window) >= self._min_periods, - Column(sql_utils.pandasSkewness(scol._jc)).over(self._window), + SF.skew(scol).over(self._window), ).otherwise(SF.lit(None)) return self._apply_as_series_or_frame(skew) def kurt(self) -> FrameLike: def kurt(scol: Column) -> Column: - sql_utils = SparkContext._active_spark_context._jvm.PythonSQLUtils return F.when( F.row_number().over(self._unbounded_window) >= self._min_periods, - Column(sql_utils.pandasKurtosis(scol._jc)).over(self._window), + SF.kurt(scol).over(self._window), ).otherwise(SF.lit(None)) return self._apply_as_series_or_frame(kurt) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org