This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 5d763eb63b6 [SPARK-38857][PYTHON] series name should be preserved in series.mode() 5d763eb63b6 is described below commit 5d763eb63b67d4fee5972559ddfe0ff3e0e8e210 Author: Yikun Jiang <yikunk...@gmail.com> AuthorDate: Thu Apr 14 10:27:19 2022 +0900 [SPARK-38857][PYTHON] series name should be preserved in series.mode() ### What changes were proposed in this pull request? series name is preserved in `series.mode`. ### Why are the changes needed? series name should be preserved in series.mode() to follow pandas 1.4.x behavior. ### Does this PR introduce _any_ user-facing change? Yes, if series set name, it will be preserved in series.mode() ### How was this patch tested? UT test both in before and after 1.4.x Closes #36159 from Yikun/SPARK-38857. Authored-by: Yikun Jiang <yikunk...@gmail.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/pandas/series.py | 8 ++++++-- python/pyspark/pandas/tests/test_series.py | 7 ++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/python/pyspark/pandas/series.py b/python/pyspark/pandas/series.py index da1d41c2abe..f4638fe22de 100644 --- a/python/pyspark/pandas/series.py +++ b/python/pyspark/pandas/series.py @@ -4523,6 +4523,9 @@ class Series(Frame, IndexOpsMixin, Generic[T]): Always returns Series even if only one value is returned. + .. versionchanged:: 3.4.0 + Series name is preserved to follow pandas 1.4+ behavior. + Parameters ---------- dropna : bool, default True @@ -4597,8 +4600,9 @@ class Series(Frame, IndexOpsMixin, Generic[T]): F.col(SPARK_DEFAULT_INDEX_NAME).alias(SPARK_DEFAULT_SERIES_NAME) ) internal = InternalFrame(spark_frame=sdf, index_spark_columns=None, column_labels=[None]) - - return first_series(DataFrame(internal)) + ser_mode = first_series(DataFrame(internal)) + ser_mode.name = self.name + return ser_mode def keys(self) -> "ps.Index": """ diff --git a/python/pyspark/pandas/tests/test_series.py b/python/pyspark/pandas/tests/test_series.py index 76d35c51196..68fed26324d 100644 --- a/python/pyspark/pandas/tests/test_series.py +++ b/python/pyspark/pandas/tests/test_series.py @@ -2121,7 +2121,12 @@ class SeriesTest(PandasOnSparkTestCase, SQLTestUtils): pser.name = "x" psser = ps.from_pandas(pser) - self.assert_eq(psser.mode(), pser.mode()) + if LooseVersion(pd.__version__) < LooseVersion("1.4"): + # Due to pandas bug: https://github.com/pandas-dev/pandas/issues/46737 + psser.name = None + self.assert_eq(psser.mode(), pser.mode()) + else: + self.assert_eq(psser.mode(), pser.mode()) self.assert_eq( psser.mode(dropna=False).sort_values().reset_index(drop=True), pser.mode(dropna=False).sort_values().reset_index(drop=True), --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org