This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new dcbe275543e [SPARK-45634][PS] Remove `DataFrame.get_dtype_counts` from Pandas API on Spark dcbe275543e is described below commit dcbe275543e05cb4529317ddb933d09253d65d6f Author: Haejoon Lee <haejoon....@databricks.com> AuthorDate: Thu Oct 26 11:16:36 2023 +0900 [SPARK-45634][PS] Remove `DataFrame.get_dtype_counts` from Pandas API on Spark ### What changes were proposed in this pull request? This PR proposes to remove old API `get_dtype_counts` from Pandas API on Spark ### Why are the changes needed? This API was deprecated a long time ago, but has not been removed since it's internally used in our code base. But it's no longer used in anywhere currently. ### Does this PR introduce _any_ user-facing change? `DataFrame.get_dtype_counts` is removed. ### How was this patch tested? No new test is required for API removal. The existing CI should pass. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #43488 from itholic/SPARK-45634. Authored-by: Haejoon Lee <haejoon....@databricks.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- .../source/migration_guide/pyspark_upgrade.rst | 1 + python/pyspark/pandas/generic.py | 51 ---------------------- 2 files changed, 1 insertion(+), 51 deletions(-) diff --git a/python/docs/source/migration_guide/pyspark_upgrade.rst b/python/docs/source/migration_guide/pyspark_upgrade.rst index 933fa936f70..20fab578504 100644 --- a/python/docs/source/migration_guide/pyspark_upgrade.rst +++ b/python/docs/source/migration_guide/pyspark_upgrade.rst @@ -53,6 +53,7 @@ Upgrading from PySpark 3.5 to 4.0 * In Spark 4.0, ``col_space`` parameter from ``DataFrame.to_latex`` and ``Series.to_latex`` has been removed from pandas API on Spark. * In Spark 4.0, ``DataFrame.to_spark_io`` has been removed from pandas API on Spark, use ``DataFrame.spark.to_spark_io`` instead. * In Spark 4.0, ``Series.is_monotonic`` and ``Index.is_monotonic`` have been removed from pandas API on Spark, use ``Series.is_monotonic_increasing`` or ``Index.is_monotonic_increasing`` instead respectively. +* In Spark 4.0, ``DataFrame.get_dtype_counts`` has been removed from pandas API on Spark, use ``DataFrame.dtypes.value_counts()`` instead. Upgrading from PySpark 3.3 to 3.4 diff --git a/python/pyspark/pandas/generic.py b/python/pyspark/pandas/generic.py index c6f1b9ccbb7..16eaeb6142e 100644 --- a/python/pyspark/pandas/generic.py +++ b/python/pyspark/pandas/generic.py @@ -19,13 +19,11 @@ A base class of DataFrame/Column to behave like pandas DataFrame/Series. """ from abc import ABCMeta, abstractmethod -from collections import Counter from functools import reduce from typing import ( Any, Callable, Dict, - Iterable, IO, List, Optional, @@ -400,55 +398,6 @@ class Frame(object, metaclass=ABCMeta): """ return self._apply_series_op(lambda psser: psser._cumprod(skipna), should_resolve=True) - # TODO: Although this has removed pandas >= 1.0.0, but we're keeping this as deprecated - # since we're using this for `DataFrame.info` internally. - # We can drop it once our minimal pandas version becomes 1.0.0. - def get_dtype_counts(self) -> pd.Series: - """ - Return counts of unique dtypes in this object. - - .. deprecated:: 0.14.0 - - Returns - ------- - dtype: pd.Series - Series with the count of columns with each dtype. - - See Also - -------- - dtypes: Return the dtypes in this object. - - Examples - -------- - >>> a = [['a', 1, 1], ['b', 2, 2], ['c', 3, 3]] - >>> df = ps.DataFrame(a, columns=['str', 'int1', 'int2']) - >>> df - str int1 int2 - 0 a 1 1 - 1 b 2 2 - 2 c 3 3 - - >>> df.get_dtype_counts().sort_values() - object 1 - int64 2 - dtype: int64 - - >>> df.str.get_dtype_counts().sort_values() - object 1 - dtype: int64 - """ - warnings.warn( - "`get_dtype_counts` has been deprecated and will be " - "removed in a future version. For DataFrames use " - "`.dtypes.value_counts()", - FutureWarning, - ) - if not isinstance(self.dtypes, Iterable): - dtypes = [self.dtypes] - else: - dtypes = list(self.dtypes) - return pd.Series(dict(Counter([d.name for d in dtypes]))) - def pipe(self, func: Callable[..., Any], *args: Any, **kwargs: Any) -> Any: r""" Apply func(self, \*args, \*\*kwargs). --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org