This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 91ccc0f8e9f [SPARK-45165][PS] Remove `inplace` parameter from `CategoricalIndex` APIs 91ccc0f8e9f is described below commit 91ccc0f8e9fd59ae13ac11640c8440317121d8b7 Author: Haejoon Lee <haejoon....@databricks.com> AuthorDate: Thu Sep 14 21:09:58 2023 -0700 [SPARK-45165][PS] Remove `inplace` parameter from `CategoricalIndex` APIs ### What changes were proposed in this pull request? This PR proposes to deprecated `inplace` parameter from `CategoricalIndex` APIs. ### Why are the changes needed? Because they're also removed from Pandas. <img width="722" alt="Screenshot 2023-09-14 at 7 25 14 PM" src="https://github.com/apache/spark/assets/44108233/ef997036-77e0-49d4-9031-7dc892ef45d2"> We should match our behavior with the latest Pandas. ### Does this PR introduce _any_ user-facing change? Yes, the `inplace` parameter is no longer available for `CategoricalIndex` APIs ### How was this patch tested? Updated UTs. This existing CI should pass. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #42927 from itholic/SPARK-45165. Authored-by: Haejoon Lee <haejoon....@databricks.com> Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- .../source/migration_guide/pyspark_upgrade.rst | 1 + python/pyspark/pandas/categorical.py | 30 ++--- python/pyspark/pandas/indexes/category.py | 147 +++------------------ .../pyspark/pandas/tests/indexes/test_category.py | 18 --- 4 files changed, 37 insertions(+), 159 deletions(-) diff --git a/python/docs/source/migration_guide/pyspark_upgrade.rst b/python/docs/source/migration_guide/pyspark_upgrade.rst index d743384dee6..992101734ff 100644 --- a/python/docs/source/migration_guide/pyspark_upgrade.rst +++ b/python/docs/source/migration_guide/pyspark_upgrade.rst @@ -31,6 +31,7 @@ Upgrading from PySpark 3.5 to 4.0 * In Spark 4.0, ``Series.mad`` has been removed from pandas API on Spark. * In Spark 4.0, ``na_sentinel`` parameter from ``Index.factorize`` and ``Series.factorize`` has been removed from pandas API on Spark, use ``use_na_sentinel`` instead. * In Spark 4.0, ``inplace`` parameter from ``Categorical.add_categories``, ``Categorical.remove_categories``, ``Categorical.set_categories``, ``Categorical.rename_categories``, ``Categorical.reorder_categories``, ``Categorical.as_ordered``, ``Categorical.as_unordered`` have been removed from pandas API on Spark. +* In Spark 4.0, ``inplace`` parameter from ``CategoricalIndex.add_categories``, ``CategoricalIndex.remove_categories``, ``CategoricalIndex.remove_unused_categories``, ``CategoricalIndex.set_categories``, ``CategoricalIndex.rename_categories``, ``CategoricalIndex.reorder_categories``, ``CategoricalIndex.as_ordered``, ``CategoricalIndex.as_unordered`` have been removed from pandas API on Spark. * In Spark 4.0, ``closed`` parameter from ``ps.date_range`` has been removed from pandas API on Spark. * In Spark 4.0, ``include_start`` and ``include_end`` parameters from ``DataFrame.between_time`` have been removed from pandas API on Spark, use ``inclusive`` instead. * In Spark 4.0, ``include_start`` and ``include_end`` parameters from ``Series.between_time`` have been removed from pandas API on Spark, use ``inclusive`` instead. diff --git a/python/pyspark/pandas/categorical.py b/python/pyspark/pandas/categorical.py index 7043d1709ee..c7e6ab873f6 100644 --- a/python/pyspark/pandas/categorical.py +++ b/python/pyspark/pandas/categorical.py @@ -185,8 +185,8 @@ class CategoricalAccessor: Returns ------- - Series or None - Categorical with new categories added or None if ``inplace=True``. + Series + Categorical with new categories added Raises ------ @@ -270,8 +270,8 @@ class CategoricalAccessor: Returns ------- - Series or None - Ordered Categorical or None if ``inplace=True``. + Series + Ordered Categorical Examples -------- @@ -304,8 +304,8 @@ class CategoricalAccessor: Returns ------- - Series or None - Unordered Categorical or None if ``inplace=True``. + Series + Unordered Categorical Examples -------- @@ -346,8 +346,8 @@ class CategoricalAccessor: Returns ------- - Series or None - Categorical with removed categories or None if ``inplace=True``. + Series + Categorical with removed categories Raises ------ @@ -421,8 +421,8 @@ class CategoricalAccessor: Returns ------- - cat : Series or None - Categorical with unused categories dropped or None if ``inplace=True``. + cat : Series + Categorical with unused categories dropped See Also -------- @@ -491,8 +491,8 @@ class CategoricalAccessor: Returns ------- - cat : Series or None - Categorical with removed categories or None if ``inplace=True``. + cat : Series + Categorical with removed categories Raises ------ @@ -583,8 +583,8 @@ class CategoricalAccessor: Returns ------- - cat : Series or None - Categorical with removed categories or None if ``inplace=True``. + cat : Series + Categorical with removed categories Raises ------ @@ -679,7 +679,7 @@ class CategoricalAccessor: Returns ------- - Series with reordered categories or None if inplace. + Series with reordered categories Raises ------ diff --git a/python/pyspark/pandas/indexes/category.py b/python/pyspark/pandas/indexes/category.py index 94725f90679..85dbe7654ef 100644 --- a/python/pyspark/pandas/indexes/category.py +++ b/python/pyspark/pandas/indexes/category.py @@ -14,7 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import warnings from typing import Any, Callable, List, Optional, Union, cast, no_type_check import pandas as pd @@ -204,7 +203,7 @@ class CategoricalIndex(Index): return self.dtype.ordered def add_categories( - self, new_categories: Union[pd.Index, Any, List], inplace: bool = False + self, new_categories: Union[pd.Index, Any, List] ) -> Optional["CategoricalIndex"]: """ Add new categories. @@ -216,16 +215,11 @@ class CategoricalIndex(Index): ---------- new_categories : category or list-like of category The new categories to be included. - inplace : bool, default False - Whether or not to add the categories inplace or return a copy of - this categorical with added categories. - - .. deprecated:: 3.2.0 Returns ------- - CategoricalIndex or None - Categorical with new categories added or None if ``inplace=True``. + CategoricalIndex + Categorical with new categories added Raises ------ @@ -252,31 +246,18 @@ class CategoricalIndex(Index): CategoricalIndex(['a', 'b', 'b', 'c', 'c', 'c'], categories=['a', 'b', 'c', 'x'], ordered=False, dtype='category') """ - if inplace: - warnings.warn( - "Argument `inplace` will be removed in 4.0.0.", - FutureWarning, - ) - raise ValueError("cannot use inplace with CategoricalIndex") - return CategoricalIndex( self.to_series().cat.add_categories(new_categories=new_categories) ).rename(self.name) - def as_ordered(self, inplace: bool = False) -> Optional["CategoricalIndex"]: + def as_ordered(self) -> Optional["CategoricalIndex"]: """ Set the Categorical to be ordered. - Parameters - ---------- - inplace : bool, default False - Whether or not to set the ordered attribute in-place or return - a copy of this categorical with ordered set to True. - Returns ------- - CategoricalIndex or None - Ordered Categorical or None if ``inplace=True``. + CategoricalIndex + Ordered Categorical Examples -------- @@ -289,29 +270,16 @@ class CategoricalIndex(Index): CategoricalIndex(['a', 'b', 'b', 'c', 'c', 'c'], categories=['a', 'b', 'c'], ordered=True, dtype='category') """ - if inplace: - warnings.warn( - "Argument `inplace` will be removed in 4.0.0.", - FutureWarning, - ) - raise ValueError("cannot use inplace with CategoricalIndex") - return CategoricalIndex(self.to_series().cat.as_ordered()).rename(self.name) - def as_unordered(self, inplace: bool = False) -> Optional["CategoricalIndex"]: + def as_unordered(self) -> Optional["CategoricalIndex"]: """ Set the Categorical to be unordered. - Parameters - ---------- - inplace : bool, default False - Whether or not to set the ordered attribute in-place or return - a copy of this categorical with ordered set to False. - Returns ------- - CategoricalIndex or None - Unordered Categorical or None if ``inplace=True``. + CategoricalIndex + Unordered Categorical Examples -------- @@ -324,17 +292,10 @@ class CategoricalIndex(Index): CategoricalIndex(['a', 'b', 'b', 'c', 'c', 'c'], categories=['a', 'b', 'c'], ordered=False, dtype='category') """ - if inplace: - warnings.warn( - "Argument `inplace` will be removed in 4.0.0.", - FutureWarning, - ) - raise ValueError("cannot use inplace with CategoricalIndex") - return CategoricalIndex(self.to_series().cat.as_unordered()).rename(self.name) def remove_categories( - self, removals: Union[pd.Index, Any, List], inplace: bool = False + self, removals: Union[pd.Index, Any, List] ) -> Optional["CategoricalIndex"]: """ Remove the specified categories. @@ -346,16 +307,11 @@ class CategoricalIndex(Index): ---------- removals : category or list of categories The categories which should be removed. - inplace : bool, default False - Whether or not to remove the categories inplace or return a copy of - this categorical with removed categories. - - .. deprecated:: 3.2.0 Returns ------- - CategoricalIndex or None - Categorical with removed categories or None if ``inplace=True``. + CategoricalIndex + Categorical with removed categories Raises ------ @@ -381,31 +337,16 @@ class CategoricalIndex(Index): CategoricalIndex(['a', nan, nan, 'c', 'c', 'c'], categories=['a', 'c'], ordered=False, dtype='category') """ - if inplace: - warnings.warn( - "Argument `inplace` will be removed in 4.0.0.", - FutureWarning, - ) - raise ValueError("cannot use inplace with CategoricalIndex") - return CategoricalIndex(self.to_series().cat.remove_categories(removals)).rename(self.name) - def remove_unused_categories(self, inplace: bool = False) -> Optional["CategoricalIndex"]: + def remove_unused_categories(self) -> Optional["CategoricalIndex"]: """ Remove categories which are not used. - Parameters - ---------- - inplace : bool, default False - Whether or not to drop unused categories inplace or return a copy of - this categorical with unused categories dropped. - - .. deprecated:: 3.2.0 - Returns ------- - cat : CategoricalIndex or None - Categorical with unused categories dropped or None if ``inplace=True``. + cat : CategoricalIndex + Categorical with unused categories dropped See Also -------- @@ -426,17 +367,10 @@ class CategoricalIndex(Index): CategoricalIndex(['a', 'b', 'b', 'c', 'c', 'c'], categories=['a', 'b', 'c'], ordered=False, dtype='category') """ - if inplace: - warnings.warn( - "Argument `inplace` will be removed in 4.0.0.", - FutureWarning, - ) - raise ValueError("cannot use inplace with CategoricalIndex") - return CategoricalIndex(self.to_series().cat.remove_unused_categories()).rename(self.name) def rename_categories( - self, new_categories: Union[list, dict, Callable], inplace: bool = False + self, new_categories: Union[list, dict, Callable] ) -> Optional["CategoricalIndex"]: """ Rename categories. @@ -458,16 +392,10 @@ class CategoricalIndex(Index): * callable : a callable that is called on all items in the old categories and whose return values comprise the new categories. - inplace : bool, default False - Whether or not to rename the categories inplace or return a copy of - this categorical with renamed categories. - - .. deprecated:: 3.2.0 - Returns ------- - cat : CategoricalIndex or None - Categorical with removed categories or None if ``inplace=True``. + cat : CategoricalIndex + Categorical with removed categories or None Raises ------ @@ -500,13 +428,6 @@ class CategoricalIndex(Index): >>> idx.rename_categories(lambda x: x.upper()) CategoricalIndex(['A', 'A', 'B'], categories=['A', 'B'], ordered=False, dtype='category') """ - if inplace: - warnings.warn( - "Argument `inplace` will be removed in 4.0.0.", - FutureWarning, - ) - raise ValueError("cannot use inplace with CategoricalIndex") - return CategoricalIndex(self.to_series().cat.rename_categories(new_categories)).rename( self.name ) @@ -515,7 +436,6 @@ class CategoricalIndex(Index): self, new_categories: Union[pd.Index, Any, List], ordered: Optional[bool] = None, - inplace: bool = False, ) -> Optional["CategoricalIndex"]: """ Reorder categories as specified in new_categories. @@ -530,16 +450,11 @@ class CategoricalIndex(Index): ordered : bool, optional Whether or not the categorical is treated as an ordered categorical. If not given, do not change the ordered information. - inplace : bool, default False - Whether or not to reorder the categories inplace or return a copy of - this categorical with reordered categories. - - .. deprecated:: 3.2.0 Returns ------- - cat : CategoricalIndex or None - Categorical with removed categories or None if ``inplace=True``. + cat : CategoricalIndex + Categorical with removed categories Raises ------ @@ -566,13 +481,6 @@ class CategoricalIndex(Index): CategoricalIndex(['a', 'b', 'b', 'c', 'c', 'c'], categories=['c', 'b', 'a'], ordered=False, dtype='category') """ - if inplace: - warnings.warn( - "Argument `inplace` will be removed in 4.0.0.", - FutureWarning, - ) - raise ValueError("cannot use inplace with CategoricalIndex") - return CategoricalIndex( self.to_series().cat.reorder_categories(new_categories=new_categories, ordered=ordered) ).rename(self.name) @@ -582,7 +490,6 @@ class CategoricalIndex(Index): new_categories: Union[pd.Index, List], ordered: Optional[bool] = None, rename: bool = False, - inplace: bool = False, ) -> Optional["CategoricalIndex"]: """ Set the categories to the specified new_categories. @@ -613,15 +520,10 @@ class CategoricalIndex(Index): rename : bool, default False Whether or not the new_categories should be considered as a rename of the old categories or as reordered categories. - inplace : bool, default False - Whether or not to reorder the categories in-place or return a copy - of this categorical with reordered categories. - - .. deprecated:: 3.2.0 Returns ------- - CategoricalIndex with reordered categories or None if inplace. + CategoricalIndex with reordered categories Raises ------ @@ -653,13 +555,6 @@ class CategoricalIndex(Index): >>> idx.set_categories([1, 2, 3], rename=True, ordered=True) CategoricalIndex([1, 2, 2, 3, 3, 3], categories=[1, 2, 3], ordered=True, dtype='category') """ - if inplace: - warnings.warn( - "Argument `inplace` will be removed in 4.0.0.", - FutureWarning, - ) - raise ValueError("cannot use inplace with CategoricalIndex") - return CategoricalIndex( self.to_series().cat.set_categories(new_categories, ordered=ordered, rename=rename) ).rename(self.name) diff --git a/python/pyspark/pandas/tests/indexes/test_category.py b/python/pyspark/pandas/tests/indexes/test_category.py index d2405f6adb3..6bf985b0ea5 100644 --- a/python/pyspark/pandas/tests/indexes/test_category.py +++ b/python/pyspark/pandas/tests/indexes/test_category.py @@ -104,7 +104,6 @@ class CategoricalIndexTestsMixin: self.assert_eq(pidx.add_categories([4, 5]), psidx.add_categories([4, 5])) self.assert_eq(pidx.add_categories([]), psidx.add_categories([])) - self.assertRaises(ValueError, lambda: psidx.add_categories(4, inplace=True)) self.assertRaises(ValueError, lambda: psidx.add_categories(3)) self.assertRaises(ValueError, lambda: psidx.add_categories([4, 4])) @@ -120,7 +119,6 @@ class CategoricalIndexTestsMixin: self.assert_eq(pidx.remove_categories(None), psidx.remove_categories(None)) self.assert_eq(pidx.remove_categories([None]), psidx.remove_categories([None])) - self.assertRaises(ValueError, lambda: psidx.remove_categories(4, inplace=True)) self.assertRaises(ValueError, lambda: psidx.remove_categories(4)) self.assertRaises(ValueError, lambda: psidx.remove_categories([4, None])) @@ -130,8 +128,6 @@ class CategoricalIndexTestsMixin: self.assert_eq(pidx.remove_unused_categories(), psidx.remove_unused_categories()) - self.assertRaises(ValueError, lambda: psidx.remove_unused_categories(inplace=True)) - def test_reorder_categories(self): pidx = pd.CategoricalIndex([1, 2, 3]) psidx = ps.from_pandas(pidx) @@ -147,7 +143,6 @@ class CategoricalIndexTestsMixin: psidx.reorder_categories([3, 2, 1], ordered=True), ) - self.assertRaises(ValueError, lambda: psidx.reorder_categories([1, 2, 3], inplace=True)) self.assertRaises(ValueError, lambda: psidx.reorder_categories([1, 2])) self.assertRaises(ValueError, lambda: psidx.reorder_categories([1, 2, 4])) self.assertRaises(ValueError, lambda: psidx.reorder_categories([1, 2, 2])) @@ -160,9 +155,6 @@ class CategoricalIndexTestsMixin: self.assert_eq(pidx.as_ordered(), psidx.as_ordered()) self.assert_eq(pidx.as_unordered(), psidx.as_unordered()) - self.assertRaises(ValueError, lambda: psidx.as_ordered(inplace=True)) - self.assertRaises(ValueError, lambda: psidx.as_unordered(inplace=True)) - def test_astype(self): pidx = pd.Index(["a", "b", "c"]) psidx = ps.from_pandas(pidx) @@ -322,10 +314,6 @@ class CategoricalIndexTestsMixin: TypeError, lambda: psidx.rename_categories("x"), ) - self.assertRaises( - ValueError, - lambda: psidx.rename_categories({"b": "B", "c": "C"}, inplace=True), - ) def test_set_categories(self): pidx = pd.CategoricalIndex(["a", "b", "c", "d"]) @@ -370,12 +358,6 @@ class CategoricalIndexTestsMixin: psidx.set_categories(["a", "c", "b", "d", "e"], ordered=True), ) - self.assertRaisesRegex( - ValueError, - "cannot use inplace with CategoricalIndex", - lambda: psidx.set_categories(["a", "c", "b", "o"], inplace=True), - ) - def test_map(self): pidxs = [pd.CategoricalIndex([1, 2, 3]), pd.CategoricalIndex([1, 2, 3], ordered=True)] psidxs = [ps.from_pandas(pidx) for pidx in pidxs] --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org