[spark] branch master updated: [SPARK-39300][PS] Move pandasSkewness and pandasKurtosis into pandas.spark.functions

ruifengz Thu, 26 May 2022 04:46:44 -0700

This is an automated email from the ASF dual-hosted git repository.

ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 283aa9941bb [SPARK-39300][PS] Move pandasSkewness and pandasKurtosis 
into pandas.spark.functions
283aa9941bb is described below

commit 283aa9941bb159b05542d81138d92d5dc79fbde8
Author: Ruifeng Zheng <ruife...@apache.org>
AuthorDate: Thu May 26 19:46:14 2022 +0800

    [SPARK-39300][PS] Move pandasSkewness and pandasKurtosis into 
pandas.spark.functions
    
    init
    
    ### What changes were proposed in this pull request?
    `pandasSkewness` and `pandasKurtosis` are used in 
`generic`,`groupby`,`window`.
    
    move them into `SF` for reuse
    
    ### Why are the changes needed?
    code clean up
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    existing UT
    
    Closes #36682 from zhengruifeng/ps_mv_pandas_skew_kurt_to_SF.
    
    Authored-by: Ruifeng Zheng <ruife...@apache.org>
    Signed-off-by: Ruifeng Zheng <ruife...@apache.org>
---
 python/pyspark/pandas/generic.py         |  7 ++-----
 python/pyspark/pandas/groupby.py         |  8 +-------
 python/pyspark/pandas/spark/functions.py | 10 ++++++++++
 python/pyspark/pandas/window.py          |  6 ++----
 4 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/python/pyspark/pandas/generic.py b/python/pyspark/pandas/generic.py
index ce13ae5ad1b..dbfaedc9321 100644
--- a/python/pyspark/pandas/generic.py
+++ b/python/pyspark/pandas/generic.py
@@ -41,7 +41,6 @@ import numpy as np
 import pandas as pd
 from pandas.api.types import is_list_like  # type: ignore[attr-defined]
 
-from pyspark import SparkContext
 from pyspark.sql import Column, functions as F
 from pyspark.sql.types import (
     BooleanType,
@@ -1518,8 +1517,7 @@ class Frame(object, metaclass=ABCMeta):
                     )
                 )
 
-            sql_utils = SparkContext._active_spark_context._jvm.PythonSQLUtils
-            return Column(sql_utils.pandasSkewness(spark_column._jc))
+            return SF.skew(spark_column)
 
         return self._reduce_for_stat_function(
             skew,
@@ -1588,8 +1586,7 @@ class Frame(object, metaclass=ABCMeta):
                     )
                 )
 
-            sql_utils = SparkContext._active_spark_context._jvm.PythonSQLUtils
-            return Column(sql_utils.pandasKurtosis(spark_column._jc))
+            return SF.kurt(spark_column)
 
         return self._reduce_for_stat_function(
             kurtosis,
diff --git a/python/pyspark/pandas/groupby.py b/python/pyspark/pandas/groupby.py
index 03e6a038232..3201d70d417 100644
--- a/python/pyspark/pandas/groupby.py
+++ b/python/pyspark/pandas/groupby.py
@@ -54,7 +54,6 @@ else:
 
     _builtin_table = SelectionMixin._builtin_table  # type: 
ignore[attr-defined]
 
-from pyspark import SparkContext
 from pyspark.sql import Column, DataFrame as SparkDataFrame, Window, functions 
as F
 from pyspark.sql.types import (
     BooleanType,
@@ -748,13 +747,8 @@ class GroupBy(Generic[FrameLike], metaclass=ABCMeta):
         pyspark.pandas.Series.groupby
         pyspark.pandas.DataFrame.groupby
         """
-
-        def skew(scol: Column) -> Column:
-            sql_utils = SparkContext._active_spark_context._jvm.PythonSQLUtils
-            return Column(sql_utils.pandasSkewness(scol._jc))
-
         return self._reduce_for_stat_function(
-            skew,
+            SF.skew,
             accepted_spark_types=(NumericType,),
             bool_to_numeric=True,
         )
diff --git a/python/pyspark/pandas/spark/functions.py 
b/python/pyspark/pandas/spark/functions.py
index b7d57b4c3f8..11f9dbbb8c0 100644
--- a/python/pyspark/pandas/spark/functions.py
+++ b/python/pyspark/pandas/spark/functions.py
@@ -36,6 +36,16 @@ from pyspark.sql.types import (
 )
 
 
+def skew(col: Column) -> Column:
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.PythonSQLUtils.pandasSkewness(col._jc))
+
+
+def kurt(col: Column) -> Column:
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.PythonSQLUtils.pandasKurtosis(col._jc))
+
+
 def repeat(col: Column, n: Union[int, Column]) -> Column:
     """
     Repeats a string column n times, and returns it as a new string column.
diff --git a/python/pyspark/pandas/window.py b/python/pyspark/pandas/window.py
index 5c21e31f377..2808f72fd3c 100644
--- a/python/pyspark/pandas/window.py
+++ b/python/pyspark/pandas/window.py
@@ -121,20 +121,18 @@ class RollingAndExpanding(Generic[FrameLike], 
metaclass=ABCMeta):
 
     def skew(self) -> FrameLike:
         def skew(scol: Column) -> Column:
-            sql_utils = SparkContext._active_spark_context._jvm.PythonSQLUtils
             return F.when(
                 F.row_number().over(self._unbounded_window) >= 
self._min_periods,
-                Column(sql_utils.pandasSkewness(scol._jc)).over(self._window),
+                SF.skew(scol).over(self._window),
             ).otherwise(SF.lit(None))
 
         return self._apply_as_series_or_frame(skew)
 
     def kurt(self) -> FrameLike:
         def kurt(scol: Column) -> Column:
-            sql_utils = SparkContext._active_spark_context._jvm.PythonSQLUtils
             return F.when(
                 F.row_number().over(self._unbounded_window) >= 
self._min_periods,
-                Column(sql_utils.pandasKurtosis(scol._jc)).over(self._window),
+                SF.kurt(scol).over(self._window),
             ).otherwise(SF.lit(None))
 
         return self._apply_as_series_or_frame(kurt)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-39300][PS] Move pandasSkewness and pandasKurtosis into pandas.spark.functions

Reply via email to