This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 91ccc0f8e9f [SPARK-45165][PS] Remove `inplace` parameter from 
`CategoricalIndex` APIs
91ccc0f8e9f is described below

commit 91ccc0f8e9fd59ae13ac11640c8440317121d8b7
Author: Haejoon Lee <haejoon....@databricks.com>
AuthorDate: Thu Sep 14 21:09:58 2023 -0700

    [SPARK-45165][PS] Remove `inplace` parameter from `CategoricalIndex` APIs
    
    ### What changes were proposed in this pull request?
    
    This PR proposes to deprecated `inplace` parameter from `CategoricalIndex` 
APIs.
    
    ### Why are the changes needed?
    
    Because they're also removed from Pandas.
    <img width="722" alt="Screenshot 2023-09-14 at 7 25 14 PM" 
src="https://github.com/apache/spark/assets/44108233/ef997036-77e0-49d4-9031-7dc892ef45d2";>
    
    We should match our behavior with the latest Pandas.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, the `inplace` parameter is no longer available for `CategoricalIndex` 
APIs
    
    ### How was this patch tested?
    
    Updated UTs. This existing CI should pass.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #42927 from itholic/SPARK-45165.
    
    Authored-by: Haejoon Lee <haejoon....@databricks.com>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 .../source/migration_guide/pyspark_upgrade.rst     |   1 +
 python/pyspark/pandas/categorical.py               |  30 ++---
 python/pyspark/pandas/indexes/category.py          | 147 +++------------------
 .../pyspark/pandas/tests/indexes/test_category.py  |  18 ---
 4 files changed, 37 insertions(+), 159 deletions(-)

diff --git a/python/docs/source/migration_guide/pyspark_upgrade.rst 
b/python/docs/source/migration_guide/pyspark_upgrade.rst
index d743384dee6..992101734ff 100644
--- a/python/docs/source/migration_guide/pyspark_upgrade.rst
+++ b/python/docs/source/migration_guide/pyspark_upgrade.rst
@@ -31,6 +31,7 @@ Upgrading from PySpark 3.5 to 4.0
 * In Spark 4.0, ``Series.mad`` has been removed from pandas API on Spark.
 * In Spark 4.0, ``na_sentinel`` parameter from ``Index.factorize`` and 
``Series.factorize`` has been removed from pandas API on Spark, use 
``use_na_sentinel`` instead.
 * In Spark 4.0, ``inplace`` parameter from ``Categorical.add_categories``, 
``Categorical.remove_categories``, ``Categorical.set_categories``, 
``Categorical.rename_categories``, ``Categorical.reorder_categories``, 
``Categorical.as_ordered``, ``Categorical.as_unordered`` have been removed from 
pandas API on Spark.
+* In Spark 4.0, ``inplace`` parameter from 
``CategoricalIndex.add_categories``, ``CategoricalIndex.remove_categories``, 
``CategoricalIndex.remove_unused_categories``, 
``CategoricalIndex.set_categories``, ``CategoricalIndex.rename_categories``, 
``CategoricalIndex.reorder_categories``, ``CategoricalIndex.as_ordered``, 
``CategoricalIndex.as_unordered`` have been removed from pandas API on Spark.
 * In Spark 4.0, ``closed`` parameter from ``ps.date_range`` has been removed 
from pandas API on Spark.
 * In Spark 4.0, ``include_start`` and ``include_end`` parameters from 
``DataFrame.between_time`` have been removed from pandas API on Spark, use 
``inclusive`` instead.
 * In Spark 4.0, ``include_start`` and ``include_end`` parameters from 
``Series.between_time`` have been removed from pandas API on Spark, use 
``inclusive`` instead.
diff --git a/python/pyspark/pandas/categorical.py 
b/python/pyspark/pandas/categorical.py
index 7043d1709ee..c7e6ab873f6 100644
--- a/python/pyspark/pandas/categorical.py
+++ b/python/pyspark/pandas/categorical.py
@@ -185,8 +185,8 @@ class CategoricalAccessor:
 
         Returns
         -------
-        Series or None
-            Categorical with new categories added or None if ``inplace=True``.
+        Series
+            Categorical with new categories added
 
         Raises
         ------
@@ -270,8 +270,8 @@ class CategoricalAccessor:
 
         Returns
         -------
-        Series or None
-            Ordered Categorical or None if ``inplace=True``.
+        Series
+            Ordered Categorical
 
         Examples
         --------
@@ -304,8 +304,8 @@ class CategoricalAccessor:
 
         Returns
         -------
-        Series or None
-            Unordered Categorical or None if ``inplace=True``.
+        Series
+            Unordered Categorical
 
         Examples
         --------
@@ -346,8 +346,8 @@ class CategoricalAccessor:
 
         Returns
         -------
-        Series or None
-            Categorical with removed categories or None if ``inplace=True``.
+        Series
+            Categorical with removed categories
 
         Raises
         ------
@@ -421,8 +421,8 @@ class CategoricalAccessor:
 
         Returns
         -------
-        cat : Series or None
-            Categorical with unused categories dropped or None if 
``inplace=True``.
+        cat : Series
+            Categorical with unused categories dropped
 
         See Also
         --------
@@ -491,8 +491,8 @@ class CategoricalAccessor:
 
         Returns
         -------
-        cat : Series or None
-            Categorical with removed categories or None if ``inplace=True``.
+        cat : Series
+            Categorical with removed categories
 
         Raises
         ------
@@ -583,8 +583,8 @@ class CategoricalAccessor:
 
         Returns
         -------
-        cat : Series or None
-            Categorical with removed categories or None if ``inplace=True``.
+        cat : Series
+            Categorical with removed categories
 
         Raises
         ------
@@ -679,7 +679,7 @@ class CategoricalAccessor:
 
         Returns
         -------
-        Series with reordered categories or None if inplace.
+        Series with reordered categories
 
         Raises
         ------
diff --git a/python/pyspark/pandas/indexes/category.py 
b/python/pyspark/pandas/indexes/category.py
index 94725f90679..85dbe7654ef 100644
--- a/python/pyspark/pandas/indexes/category.py
+++ b/python/pyspark/pandas/indexes/category.py
@@ -14,7 +14,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-import warnings
 from typing import Any, Callable, List, Optional, Union, cast, no_type_check
 
 import pandas as pd
@@ -204,7 +203,7 @@ class CategoricalIndex(Index):
         return self.dtype.ordered
 
     def add_categories(
-        self, new_categories: Union[pd.Index, Any, List], inplace: bool = False
+        self, new_categories: Union[pd.Index, Any, List]
     ) -> Optional["CategoricalIndex"]:
         """
         Add new categories.
@@ -216,16 +215,11 @@ class CategoricalIndex(Index):
         ----------
         new_categories : category or list-like of category
            The new categories to be included.
-        inplace : bool, default False
-           Whether or not to add the categories inplace or return a copy of
-           this categorical with added categories.
-
-           .. deprecated:: 3.2.0
 
         Returns
         -------
-        CategoricalIndex or None
-            Categorical with new categories added or None if ``inplace=True``.
+        CategoricalIndex
+            Categorical with new categories added
 
         Raises
         ------
@@ -252,31 +246,18 @@ class CategoricalIndex(Index):
         CategoricalIndex(['a', 'b', 'b', 'c', 'c', 'c'],
                          categories=['a', 'b', 'c', 'x'], ordered=False, 
dtype='category')
         """
-        if inplace:
-            warnings.warn(
-                "Argument `inplace` will be removed in 4.0.0.",
-                FutureWarning,
-            )
-            raise ValueError("cannot use inplace with CategoricalIndex")
-
         return CategoricalIndex(
             self.to_series().cat.add_categories(new_categories=new_categories)
         ).rename(self.name)
 
-    def as_ordered(self, inplace: bool = False) -> 
Optional["CategoricalIndex"]:
+    def as_ordered(self) -> Optional["CategoricalIndex"]:
         """
         Set the Categorical to be ordered.
 
-        Parameters
-        ----------
-        inplace : bool, default False
-           Whether or not to set the ordered attribute in-place or return
-           a copy of this categorical with ordered set to True.
-
         Returns
         -------
-        CategoricalIndex or None
-            Ordered Categorical or None if ``inplace=True``.
+        CategoricalIndex
+            Ordered Categorical
 
         Examples
         --------
@@ -289,29 +270,16 @@ class CategoricalIndex(Index):
         CategoricalIndex(['a', 'b', 'b', 'c', 'c', 'c'],
                          categories=['a', 'b', 'c'], ordered=True, 
dtype='category')
         """
-        if inplace:
-            warnings.warn(
-                "Argument `inplace` will be removed in 4.0.0.",
-                FutureWarning,
-            )
-            raise ValueError("cannot use inplace with CategoricalIndex")
-
         return 
CategoricalIndex(self.to_series().cat.as_ordered()).rename(self.name)
 
-    def as_unordered(self, inplace: bool = False) -> 
Optional["CategoricalIndex"]:
+    def as_unordered(self) -> Optional["CategoricalIndex"]:
         """
         Set the Categorical to be unordered.
 
-        Parameters
-        ----------
-        inplace : bool, default False
-           Whether or not to set the ordered attribute in-place or return
-           a copy of this categorical with ordered set to False.
-
         Returns
         -------
-        CategoricalIndex or None
-            Unordered Categorical or None if ``inplace=True``.
+        CategoricalIndex
+            Unordered Categorical
 
         Examples
         --------
@@ -324,17 +292,10 @@ class CategoricalIndex(Index):
         CategoricalIndex(['a', 'b', 'b', 'c', 'c', 'c'],
                          categories=['a', 'b', 'c'], ordered=False, 
dtype='category')
         """
-        if inplace:
-            warnings.warn(
-                "Argument `inplace` will be removed in 4.0.0.",
-                FutureWarning,
-            )
-            raise ValueError("cannot use inplace with CategoricalIndex")
-
         return 
CategoricalIndex(self.to_series().cat.as_unordered()).rename(self.name)
 
     def remove_categories(
-        self, removals: Union[pd.Index, Any, List], inplace: bool = False
+        self, removals: Union[pd.Index, Any, List]
     ) -> Optional["CategoricalIndex"]:
         """
         Remove the specified categories.
@@ -346,16 +307,11 @@ class CategoricalIndex(Index):
         ----------
         removals : category or list of categories
            The categories which should be removed.
-        inplace : bool, default False
-           Whether or not to remove the categories inplace or return a copy of
-           this categorical with removed categories.
-
-           .. deprecated:: 3.2.0
 
         Returns
         -------
-        CategoricalIndex or None
-            Categorical with removed categories or None if ``inplace=True``.
+        CategoricalIndex
+            Categorical with removed categories
 
         Raises
         ------
@@ -381,31 +337,16 @@ class CategoricalIndex(Index):
         CategoricalIndex(['a', nan, nan, 'c', 'c', 'c'],
                          categories=['a', 'c'], ordered=False, 
dtype='category')
         """
-        if inplace:
-            warnings.warn(
-                "Argument `inplace` will be removed in 4.0.0.",
-                FutureWarning,
-            )
-            raise ValueError("cannot use inplace with CategoricalIndex")
-
         return 
CategoricalIndex(self.to_series().cat.remove_categories(removals)).rename(self.name)
 
-    def remove_unused_categories(self, inplace: bool = False) -> 
Optional["CategoricalIndex"]:
+    def remove_unused_categories(self) -> Optional["CategoricalIndex"]:
         """
         Remove categories which are not used.
 
-        Parameters
-        ----------
-        inplace : bool, default False
-           Whether or not to drop unused categories inplace or return a copy of
-           this categorical with unused categories dropped.
-
-           .. deprecated:: 3.2.0
-
         Returns
         -------
-        cat : CategoricalIndex or None
-            Categorical with unused categories dropped or None if 
``inplace=True``.
+        cat : CategoricalIndex
+            Categorical with unused categories dropped
 
         See Also
         --------
@@ -426,17 +367,10 @@ class CategoricalIndex(Index):
         CategoricalIndex(['a', 'b', 'b', 'c', 'c', 'c'],
                          categories=['a', 'b', 'c'], ordered=False, 
dtype='category')
         """
-        if inplace:
-            warnings.warn(
-                "Argument `inplace` will be removed in 4.0.0.",
-                FutureWarning,
-            )
-            raise ValueError("cannot use inplace with CategoricalIndex")
-
         return 
CategoricalIndex(self.to_series().cat.remove_unused_categories()).rename(self.name)
 
     def rename_categories(
-        self, new_categories: Union[list, dict, Callable], inplace: bool = 
False
+        self, new_categories: Union[list, dict, Callable]
     ) -> Optional["CategoricalIndex"]:
         """
         Rename categories.
@@ -458,16 +392,10 @@ class CategoricalIndex(Index):
             * callable : a callable that is called on all items in the old
               categories and whose return values comprise the new categories.
 
-        inplace : bool, default False
-            Whether or not to rename the categories inplace or return a copy of
-            this categorical with renamed categories.
-
-            .. deprecated:: 3.2.0
-
         Returns
         -------
-        cat : CategoricalIndex or None
-            Categorical with removed categories or None if ``inplace=True``.
+        cat : CategoricalIndex
+            Categorical with removed categories or None
 
         Raises
         ------
@@ -500,13 +428,6 @@ class CategoricalIndex(Index):
         >>> idx.rename_categories(lambda x: x.upper())
         CategoricalIndex(['A', 'A', 'B'], categories=['A', 'B'], 
ordered=False, dtype='category')
         """
-        if inplace:
-            warnings.warn(
-                "Argument `inplace` will be removed in 4.0.0.",
-                FutureWarning,
-            )
-            raise ValueError("cannot use inplace with CategoricalIndex")
-
         return 
CategoricalIndex(self.to_series().cat.rename_categories(new_categories)).rename(
             self.name
         )
@@ -515,7 +436,6 @@ class CategoricalIndex(Index):
         self,
         new_categories: Union[pd.Index, Any, List],
         ordered: Optional[bool] = None,
-        inplace: bool = False,
     ) -> Optional["CategoricalIndex"]:
         """
         Reorder categories as specified in new_categories.
@@ -530,16 +450,11 @@ class CategoricalIndex(Index):
         ordered : bool, optional
            Whether or not the categorical is treated as an ordered categorical.
            If not given, do not change the ordered information.
-        inplace : bool, default False
-           Whether or not to reorder the categories inplace or return a copy of
-           this categorical with reordered categories.
-
-           .. deprecated:: 3.2.0
 
         Returns
         -------
-        cat : CategoricalIndex or None
-            Categorical with removed categories or None if ``inplace=True``.
+        cat : CategoricalIndex
+            Categorical with removed categories
 
         Raises
         ------
@@ -566,13 +481,6 @@ class CategoricalIndex(Index):
         CategoricalIndex(['a', 'b', 'b', 'c', 'c', 'c'],
                          categories=['c', 'b', 'a'], ordered=False, 
dtype='category')
         """
-        if inplace:
-            warnings.warn(
-                "Argument `inplace` will be removed in 4.0.0.",
-                FutureWarning,
-            )
-            raise ValueError("cannot use inplace with CategoricalIndex")
-
         return CategoricalIndex(
             
self.to_series().cat.reorder_categories(new_categories=new_categories, 
ordered=ordered)
         ).rename(self.name)
@@ -582,7 +490,6 @@ class CategoricalIndex(Index):
         new_categories: Union[pd.Index, List],
         ordered: Optional[bool] = None,
         rename: bool = False,
-        inplace: bool = False,
     ) -> Optional["CategoricalIndex"]:
         """
         Set the categories to the specified new_categories.
@@ -613,15 +520,10 @@ class CategoricalIndex(Index):
         rename : bool, default False
            Whether or not the new_categories should be considered as a rename
            of the old categories or as reordered categories.
-        inplace : bool, default False
-           Whether or not to reorder the categories in-place or return a copy
-           of this categorical with reordered categories.
-
-           .. deprecated:: 3.2.0
 
         Returns
         -------
-        CategoricalIndex with reordered categories or None if inplace.
+        CategoricalIndex with reordered categories
 
         Raises
         ------
@@ -653,13 +555,6 @@ class CategoricalIndex(Index):
         >>> idx.set_categories([1, 2, 3], rename=True, ordered=True)
         CategoricalIndex([1, 2, 2, 3, 3, 3], categories=[1, 2, 3], 
ordered=True, dtype='category')
         """
-        if inplace:
-            warnings.warn(
-                "Argument `inplace` will be removed in 4.0.0.",
-                FutureWarning,
-            )
-            raise ValueError("cannot use inplace with CategoricalIndex")
-
         return CategoricalIndex(
             self.to_series().cat.set_categories(new_categories, 
ordered=ordered, rename=rename)
         ).rename(self.name)
diff --git a/python/pyspark/pandas/tests/indexes/test_category.py 
b/python/pyspark/pandas/tests/indexes/test_category.py
index d2405f6adb3..6bf985b0ea5 100644
--- a/python/pyspark/pandas/tests/indexes/test_category.py
+++ b/python/pyspark/pandas/tests/indexes/test_category.py
@@ -104,7 +104,6 @@ class CategoricalIndexTestsMixin:
         self.assert_eq(pidx.add_categories([4, 5]), psidx.add_categories([4, 
5]))
         self.assert_eq(pidx.add_categories([]), psidx.add_categories([]))
 
-        self.assertRaises(ValueError, lambda: psidx.add_categories(4, 
inplace=True))
         self.assertRaises(ValueError, lambda: psidx.add_categories(3))
         self.assertRaises(ValueError, lambda: psidx.add_categories([4, 4]))
 
@@ -120,7 +119,6 @@ class CategoricalIndexTestsMixin:
         self.assert_eq(pidx.remove_categories(None), 
psidx.remove_categories(None))
         self.assert_eq(pidx.remove_categories([None]), 
psidx.remove_categories([None]))
 
-        self.assertRaises(ValueError, lambda: psidx.remove_categories(4, 
inplace=True))
         self.assertRaises(ValueError, lambda: psidx.remove_categories(4))
         self.assertRaises(ValueError, lambda: psidx.remove_categories([4, 
None]))
 
@@ -130,8 +128,6 @@ class CategoricalIndexTestsMixin:
 
         self.assert_eq(pidx.remove_unused_categories(), 
psidx.remove_unused_categories())
 
-        self.assertRaises(ValueError, lambda: 
psidx.remove_unused_categories(inplace=True))
-
     def test_reorder_categories(self):
         pidx = pd.CategoricalIndex([1, 2, 3])
         psidx = ps.from_pandas(pidx)
@@ -147,7 +143,6 @@ class CategoricalIndexTestsMixin:
             psidx.reorder_categories([3, 2, 1], ordered=True),
         )
 
-        self.assertRaises(ValueError, lambda: psidx.reorder_categories([1, 2, 
3], inplace=True))
         self.assertRaises(ValueError, lambda: psidx.reorder_categories([1, 2]))
         self.assertRaises(ValueError, lambda: psidx.reorder_categories([1, 2, 
4]))
         self.assertRaises(ValueError, lambda: psidx.reorder_categories([1, 2, 
2]))
@@ -160,9 +155,6 @@ class CategoricalIndexTestsMixin:
         self.assert_eq(pidx.as_ordered(), psidx.as_ordered())
         self.assert_eq(pidx.as_unordered(), psidx.as_unordered())
 
-        self.assertRaises(ValueError, lambda: psidx.as_ordered(inplace=True))
-        self.assertRaises(ValueError, lambda: psidx.as_unordered(inplace=True))
-
     def test_astype(self):
         pidx = pd.Index(["a", "b", "c"])
         psidx = ps.from_pandas(pidx)
@@ -322,10 +314,6 @@ class CategoricalIndexTestsMixin:
             TypeError,
             lambda: psidx.rename_categories("x"),
         )
-        self.assertRaises(
-            ValueError,
-            lambda: psidx.rename_categories({"b": "B", "c": "C"}, 
inplace=True),
-        )
 
     def test_set_categories(self):
         pidx = pd.CategoricalIndex(["a", "b", "c", "d"])
@@ -370,12 +358,6 @@ class CategoricalIndexTestsMixin:
             psidx.set_categories(["a", "c", "b", "d", "e"], ordered=True),
         )
 
-        self.assertRaisesRegex(
-            ValueError,
-            "cannot use inplace with CategoricalIndex",
-            lambda: psidx.set_categories(["a", "c", "b", "o"], inplace=True),
-        )
-
     def test_map(self):
         pidxs = [pd.CategoricalIndex([1, 2, 3]), pd.CategoricalIndex([1, 2, 
3], ordered=True)]
         psidxs = [ps.from_pandas(pidx) for pidx in pidxs]


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to