(spark) branch master updated: [SPARK-46517][PS][TESTS] Reorganize `IndexingTest`: factor out `test_loc*` tests

ruifengz Tue, 26 Dec 2023 23:02:46 -0800

This is an automated email from the ASF dual-hosted git repository.

ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new b106f80521d6 [SPARK-46517][PS][TESTS] Reorganize `IndexingTest`: 
factor out `test_loc*` tests
b106f80521d6 is described below

commit b106f80521d609b280950f63ff670c1f39ec3cee
Author: Ruifeng Zheng <ruife...@apache.org>
AuthorDate: Wed Dec 27 15:02:22 2023 +0800

    [SPARK-46517][PS][TESTS] Reorganize `IndexingTest`: factor out `test_loc*` 
tests
    
    ### What changes were proposed in this pull request?
    1, factor out `test_loc*` tests
    2, add the missing parity tests
    (will fix remaining parts in followups)
    
    ### Why are the changes needed?
    1, for test parity
    2, for parallelism
    
    ### Does this PR introduce _any_ user-facing change?
    no, test-only
    
    ### How was this patch tested?
    ci
    
    ### Was this patch authored or co-authored using generative AI tooling?
    no
    
    Closes #44502 from zhengruifeng/ps_test_indexing_loc.
    
    Authored-by: Ruifeng Zheng <ruife...@apache.org>
    Signed-off-by: Ruifeng Zheng <ruife...@apache.org>
---
 dev/sparktestsupport/modules.py                    |   4 +
 .../connect/indexes/test_parity_indexing_loc.py    |  41 ++
 .../indexes/test_parity_indexing_loc_multi_idx.py  |  41 ++
 .../pandas/tests/indexes/test_indexing_loc.py      | 394 +++++++++++++++++++
 .../tests/indexes/test_indexing_loc_multi_idx.py   | 150 ++++++++
 python/pyspark/pandas/tests/test_indexing.py       | 419 ---------------------
 6 files changed, 630 insertions(+), 419 deletions(-)

diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 042e72863efd..7b34bbf25bb6 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -817,6 +817,8 @@ pyspark_pandas_slow = Module(
         "pyspark.pandas.tests.indexes.test_align",
         "pyspark.pandas.tests.indexes.test_indexing",
         "pyspark.pandas.tests.indexes.test_indexing_basic",
+        "pyspark.pandas.tests.indexes.test_indexing_loc",
+        "pyspark.pandas.tests.indexes.test_indexing_loc_multi_idx",
         "pyspark.pandas.tests.indexes.test_reindex",
         "pyspark.pandas.tests.indexes.test_rename",
         "pyspark.pandas.tests.indexes.test_reset_index",
@@ -1090,6 +1092,8 @@ pyspark_pandas_connect_part0 = Module(
         "pyspark.pandas.tests.connect.indexes.test_parity_align",
         "pyspark.pandas.tests.connect.indexes.test_parity_indexing",
         "pyspark.pandas.tests.connect.indexes.test_parity_indexing_basic",
+        "pyspark.pandas.tests.connect.indexes.test_parity_indexing_loc",
+        
"pyspark.pandas.tests.connect.indexes.test_parity_indexing_loc_multi_idx",
         "pyspark.pandas.tests.connect.indexes.test_parity_reindex",
         "pyspark.pandas.tests.connect.indexes.test_parity_rename",
         "pyspark.pandas.tests.connect.indexes.test_parity_reset_index",
diff --git 
a/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing_loc.py 
b/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing_loc.py
new file mode 100644
index 000000000000..161215b05a51
--- /dev/null
+++ b/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing_loc.py
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+from pyspark.pandas.tests.indexes.test_indexing_loc import IndexingLocMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.pandasutils import PandasOnSparkTestUtils
+
+
+class IndexingLocParityTests(
+    IndexingLocMixin,
+    PandasOnSparkTestUtils,
+    ReusedConnectTestCase,
+):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.connect.indexes.test_parity_indexing_loc import 
*  # noqa
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", 
verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git 
a/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing_loc_multi_idx.py
 
b/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing_loc_multi_idx.py
new file mode 100644
index 000000000000..84a92c43455c
--- /dev/null
+++ 
b/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing_loc_multi_idx.py
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+from pyspark.pandas.tests.indexes.test_indexing_loc_multi_idx import 
IndexingLocMultiIdxMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.pandasutils import PandasOnSparkTestUtils
+
+
+class IndexingLocMultiIdxParityTests(
+    IndexingLocMultiIdxMixin,
+    PandasOnSparkTestUtils,
+    ReusedConnectTestCase,
+):
+    pass
+
+
+if __name__ == "__main__":
+    from 
pyspark.pandas.tests.connect.indexes.test_parity_indexing_loc_multi_idx import 
*  # noqa
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", 
verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/indexes/test_indexing_loc.py 
b/python/pyspark/pandas/tests/indexes/test_indexing_loc.py
new file mode 100644
index 000000000000..3b38aa4b1469
--- /dev/null
+++ b/python/pyspark/pandas/tests/indexes/test_indexing_loc.py
@@ -0,0 +1,394 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+import numpy as np
+import pandas as pd
+
+from pyspark import pandas as ps
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.testing.sqlutils import SQLTestUtils
+
+
+class IndexingLocMixin:
+    @property
+    def pdf(self):
+        return pd.DataFrame(
+            {"a": [1, 2, 3, 4, 5, 6, 7, 8, 9], "b": [4, 5, 6, 3, 2, 1, 0, 0, 
0]},
+            index=[0, 1, 3, 5, 6, 8, 9, 9, 9],
+        )
+
+    @property
+    def pdf2(self):
+        return pd.DataFrame(
+            {0: [1, 2, 3, 4, 5, 6, 7, 8, 9], 1: [4, 5, 6, 3, 2, 1, 0, 0, 0]},
+            index=[0, 1, 3, 5, 6, 8, 9, 9, 9],
+        )
+
+    @property
+    def psdf(self):
+        return ps.from_pandas(self.pdf)
+
+    @property
+    def psdf2(self):
+        return ps.from_pandas(self.pdf2)
+
+    def test_loc(self):
+        psdf = self.psdf
+        pdf = self.pdf
+
+        self.assert_eq(psdf.loc[5:5], pdf.loc[5:5])
+        self.assert_eq(psdf.loc[3:8], pdf.loc[3:8])
+        self.assert_eq(psdf.loc[:8], pdf.loc[:8])
+        self.assert_eq(psdf.loc[3:], pdf.loc[3:])
+        self.assert_eq(psdf.loc[[5]], pdf.loc[[5]])
+        self.assert_eq(psdf.loc[:], pdf.loc[:])
+
+        # TODO?: self.assert_eq(psdf.loc[[3, 4, 1, 8]], pdf.loc[[3, 4, 1, 8]])
+        # TODO?: self.assert_eq(psdf.loc[[3, 4, 1, 9]], pdf.loc[[3, 4, 1, 9]])
+        # TODO?: self.assert_eq(psdf.loc[np.array([3, 4, 1, 9])], 
pdf.loc[np.array([3, 4, 1, 9])])
+
+        self.assert_eq(psdf.a.loc[5:5], pdf.a.loc[5:5])
+        self.assert_eq(psdf.a.loc[3:8], pdf.a.loc[3:8])
+        self.assert_eq(psdf.a.loc[:8], pdf.a.loc[:8])
+        self.assert_eq(psdf.a.loc[3:], pdf.a.loc[3:])
+        self.assert_eq(psdf.a.loc[[5]], pdf.a.loc[[5]])
+
+        # TODO?: self.assert_eq(psdf.a.loc[[3, 4, 1, 8]], pdf.a.loc[[3, 4, 1, 
8]])
+        # TODO?: self.assert_eq(psdf.a.loc[[3, 4, 1, 9]], pdf.a.loc[[3, 4, 1, 
9]])
+        # TODO?: self.assert_eq(psdf.a.loc[np.array([3, 4, 1, 9])],
+        #                       pdf.a.loc[np.array([3, 4, 1, 9])])
+
+        self.assert_eq(psdf.a.loc[[]], pdf.a.loc[[]])
+        self.assert_eq(psdf.a.loc[np.array([])], pdf.a.loc[np.array([])])
+
+        self.assert_eq(psdf.loc[1000:], pdf.loc[1000:])
+        self.assert_eq(psdf.loc[-2000:-1000], pdf.loc[-2000:-1000])
+
+        self.assert_eq(psdf.loc[5], pdf.loc[5])
+        self.assert_eq(psdf.loc[9], pdf.loc[9])
+        self.assert_eq(psdf.a.loc[5], pdf.a.loc[5])
+        self.assert_eq(psdf.a.loc[9], pdf.a.loc[9])
+
+        self.assertRaises(KeyError, lambda: psdf.loc[10])
+        self.assertRaises(KeyError, lambda: psdf.a.loc[10])
+
+        # monotonically increasing index test
+        pdf = pd.DataFrame({"a": [1, 2, 3, 4, 5, 6, 7, 8, 9]}, index=[0, 1, 1, 
2, 2, 2, 4, 5, 6])
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(psdf.loc[:2], pdf.loc[:2])
+        self.assert_eq(psdf.loc[:3], pdf.loc[:3])
+        self.assert_eq(psdf.loc[3:], pdf.loc[3:])
+        self.assert_eq(psdf.loc[4:], pdf.loc[4:])
+        self.assert_eq(psdf.loc[3:2], pdf.loc[3:2])
+        self.assert_eq(psdf.loc[-1:2], pdf.loc[-1:2])
+        self.assert_eq(psdf.loc[3:10], pdf.loc[3:10])
+
+        # monotonically decreasing index test
+        pdf = pd.DataFrame({"a": [1, 2, 3, 4, 5, 6, 7, 8, 9]}, index=[6, 5, 5, 
4, 4, 4, 2, 1, 0])
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(psdf.loc[:4], pdf.loc[:4])
+        self.assert_eq(psdf.loc[:3], pdf.loc[:3])
+        self.assert_eq(psdf.loc[3:], pdf.loc[3:])
+        self.assert_eq(psdf.loc[2:], pdf.loc[2:])
+        self.assert_eq(psdf.loc[2:3], pdf.loc[2:3])
+        self.assert_eq(psdf.loc[2:-1], pdf.loc[2:-1])
+        self.assert_eq(psdf.loc[10:3], pdf.loc[10:3])
+
+        # test when type of key is string and given value is not included in 
key
+        pdf = pd.DataFrame({"a": [1, 2, 3]}, index=["a", "b", "d"])
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(psdf.loc["a":"z"], pdf.loc["a":"z"])
+
+        # KeyError when index is not monotonic increasing or decreasing
+        # and specified values don't exist in index
+        psdf = ps.DataFrame([[1, 2], [4, 5], [7, 8]], index=["cobra", "viper", 
"sidewinder"])
+
+        self.assertRaises(KeyError, lambda: psdf.loc["cobra":"koalas"])
+        self.assertRaises(KeyError, lambda: psdf.loc["koalas":"viper"])
+
+        psdf = ps.DataFrame([[1, 2], [4, 5], [7, 8]], index=[10, 30, 20])
+
+        self.assertRaises(KeyError, lambda: psdf.loc[0:30])
+        self.assertRaises(KeyError, lambda: psdf.loc[10:100])
+
+    def test_loc_getitem_boolean_series(self):
+        pdf = pd.DataFrame(
+            {"A": [0, 1, 2, 3, 4], "B": [100, 200, 300, 400, 500]}, index=[20, 
10, 30, 0, 50]
+        )
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(pdf.A.loc[pdf.B > 200], psdf.A.loc[psdf.B > 200])
+        self.assert_eq(pdf.B.loc[pdf.B > 200], psdf.B.loc[psdf.B > 200])
+        self.assert_eq(pdf.loc[pdf.B > 200], psdf.loc[psdf.B > 200])
+
+    def test_loc_non_informative_index(self):
+        pdf = pd.DataFrame({"x": [1, 2, 3, 4]}, index=[10, 20, 30, 40])
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(psdf.loc[20:30], pdf.loc[20:30])
+
+        pdf = pd.DataFrame({"x": [1, 2, 3, 4]}, index=[10, 20, 20, 40])
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(psdf.loc[20:20], pdf.loc[20:20])
+
+    def test_loc_with_series(self):
+        psdf = self.psdf
+        pdf = self.pdf
+
+        self.assert_eq(psdf.loc[psdf.a % 2 == 0], pdf.loc[pdf.a % 2 == 0])
+        self.assert_eq(psdf.loc[psdf.a % 2 == 0, "a"], pdf.loc[pdf.a % 2 == 0, 
"a"])
+        self.assert_eq(psdf.loc[psdf.a % 2 == 0, ["a"]], pdf.loc[pdf.a % 2 == 
0, ["a"]])
+        self.assert_eq(psdf.a.loc[psdf.a % 2 == 0], pdf.a.loc[pdf.a % 2 == 0])
+
+        self.assert_eq(psdf.loc[psdf.copy().a % 2 == 0], pdf.loc[pdf.copy().a 
% 2 == 0])
+        self.assert_eq(psdf.loc[psdf.copy().a % 2 == 0, "a"], 
pdf.loc[pdf.copy().a % 2 == 0, "a"])
+        self.assert_eq(
+            psdf.loc[psdf.copy().a % 2 == 0, ["a"]], pdf.loc[pdf.copy().a % 2 
== 0, ["a"]]
+        )
+        self.assert_eq(psdf.a.loc[psdf.copy().a % 2 == 0], 
pdf.a.loc[pdf.copy().a % 2 == 0])
+
+    def test_loc_noindex(self):
+        psdf = self.psdf
+        psdf = psdf.reset_index()
+        pdf = self.pdf
+        pdf = pdf.reset_index()
+
+        self.assert_eq(psdf[["a"]], pdf[["a"]])
+
+        self.assert_eq(psdf.loc[:], pdf.loc[:])
+        self.assert_eq(psdf.loc[5:5], pdf.loc[5:5])
+
+    def test_loc_on_numpy_datetimes(self):
+        pdf = pd.DataFrame(
+            {"x": [1, 2, 3]}, index=list(map(np.datetime64, ["2014", "2015", 
"2016"]))
+        )
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(psdf.loc["2014":"2015"], pdf.loc["2014":"2015"])
+
+    def test_loc_on_pandas_datetimes(self):
+        pdf = pd.DataFrame(
+            {"x": [1, 2, 3]}, index=list(map(pd.Timestamp, ["2014", "2015", 
"2016"]))
+        )
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(psdf.loc["2014":"2015"], pdf.loc["2014":"2015"])
+
+    @unittest.skip("TODO?: the behavior of slice for datetime")
+    def test_loc_datetime_no_freq(self):
+        datetime_index = pd.date_range("2016-01-01", "2016-01-31", freq="12h")
+        datetime_index.freq = None  # FORGET FREQUENCY
+        pdf = pd.DataFrame({"num": range(len(datetime_index))}, 
index=datetime_index)
+        psdf = ps.from_pandas(pdf)
+
+        slice_ = slice("2016-01-03", "2016-01-05")
+        result = psdf.loc[slice_, :]
+        expected = pdf.loc[slice_, :]
+        self.assert_eq(result, expected)
+
+    @unittest.skip("TODO?: the behavior of slice for datetime")
+    def test_loc_timestamp_str(self):
+        pdf = pd.DataFrame(
+            {"A": np.random.randn(100), "B": np.random.randn(100)},
+            index=pd.date_range("2011-01-01", freq="H", periods=100),
+        )
+        psdf = ps.from_pandas(pdf)
+
+        # partial string slice
+        # TODO?: self.assert_eq(pdf.loc['2011-01-02'],
+        # TODO?:                psdf.loc['2011-01-02'])
+        self.assert_eq(pdf.loc["2011-01-02":"2011-01-05"], 
psdf.loc["2011-01-02":"2011-01-05"])
+
+        # series
+        # TODO?: self.assert_eq(pdf.A.loc['2011-01-02'],
+        # TODO?:                psdf.A.loc['2011-01-02'])
+        self.assert_eq(pdf.A.loc["2011-01-02":"2011-01-05"], 
psdf.A.loc["2011-01-02":"2011-01-05"])
+
+        pdf = pd.DataFrame(
+            {"A": np.random.randn(100), "B": np.random.randn(100)},
+            index=pd.date_range("2011-01-01", freq="M", periods=100),
+        )
+        psdf = ps.from_pandas(pdf)
+        # TODO?: self.assert_eq(pdf.loc['2011-01'], psdf.loc['2011-01'])
+        # TODO?: self.assert_eq(pdf.loc['2011'], psdf.loc['2011'])
+
+        self.assert_eq(pdf.loc["2011-01":"2012-05"], 
psdf.loc["2011-01":"2012-05"])
+        self.assert_eq(pdf.loc["2011":"2015"], psdf.loc["2011":"2015"])
+
+        # series
+        # TODO?: self.assert_eq(pdf.B.loc['2011-01'], psdf.B.loc['2011-01'])
+        # TODO?: self.assert_eq(pdf.B.loc['2011'], psdf.B.loc['2011'])
+
+        self.assert_eq(pdf.B.loc["2011-01":"2012-05"], 
psdf.B.loc["2011-01":"2012-05"])
+        self.assert_eq(pdf.B.loc["2011":"2015"], psdf.B.loc["2011":"2015"])
+
+    def test_frame_loc_setitem(self):
+        pdf = pd.DataFrame(
+            [[1, 2], [4, 5], [7, 8]],
+            index=["cobra", "viper", "sidewinder"],
+            columns=["max_speed", "shield"],
+        )
+        psdf = ps.from_pandas(pdf)
+
+        pser1 = pdf.max_speed
+        pser2 = pdf.shield
+        psser1 = psdf.max_speed
+        psser2 = psdf.shield
+
+        pdf.loc[["viper", "sidewinder"], ["shield", "max_speed"]] = 10
+        psdf.loc[["viper", "sidewinder"], ["shield", "max_speed"]] = 10
+        self.assert_eq(psdf, pdf)
+        self.assert_eq(psser1, pser1)
+        self.assert_eq(psser2, pser2)
+
+        pdf.loc[["viper", "sidewinder"], "shield"] = 50
+        psdf.loc[["viper", "sidewinder"], "shield"] = 50
+        self.assert_eq(psdf, pdf)
+        self.assert_eq(psser1, pser1)
+        self.assert_eq(psser2, pser2)
+
+        pdf.loc["cobra", "max_speed"] = 30
+        psdf.loc["cobra", "max_speed"] = 30
+        self.assert_eq(psdf, pdf)
+        self.assert_eq(psser1, pser1)
+        self.assert_eq(psser2, pser2)
+
+        pdf.loc[pdf.max_speed < 5, "max_speed"] = -pdf.max_speed
+        psdf.loc[psdf.max_speed < 5, "max_speed"] = -psdf.max_speed
+        self.assert_eq(psdf, pdf)
+        self.assert_eq(psser1, pser1)
+        self.assert_eq(psser2, pser2)
+
+        pdf.loc[pdf.max_speed < 2, "max_speed"] = -pdf.max_speed
+        psdf.loc[psdf.max_speed < 2, "max_speed"] = -psdf.max_speed
+        self.assert_eq(psdf, pdf)
+        self.assert_eq(psser1, pser1)
+        self.assert_eq(psser2, pser2)
+
+        pdf.loc[:, "min_speed"] = 0
+        psdf.loc[:, "min_speed"] = 0
+        self.assert_eq(psdf, pdf, almost=True)
+        self.assert_eq(psser1, pser1)
+        self.assert_eq(psser2, pser2)
+
+        with self.assertRaisesRegex(ValueError, "Incompatible indexer with 
Series"):
+            psdf.loc["cobra", "max_speed"] = -psdf.max_speed
+        with self.assertRaisesRegex(ValueError, "shape mismatch"):
+            psdf.loc[:, ["shield", "max_speed"]] = -psdf.max_speed
+        with self.assertRaisesRegex(ValueError, "Only a dataframe with one 
column can be assigned"):
+            psdf.loc[:, "max_speed"] = psdf
+
+        # multi-index columns
+        columns = pd.MultiIndex.from_tuples(
+            [("x", "max_speed"), ("x", "shield"), ("y", "min_speed")]
+        )
+        pdf.columns = columns
+        psdf.columns = columns
+
+        pdf.loc[:, ("y", "shield")] = -pdf[("x", "shield")]
+        psdf.loc[:, ("y", "shield")] = -psdf[("x", "shield")]
+        self.assert_eq(psdf, pdf, almost=True)
+        self.assert_eq(psser1, pser1)
+        self.assert_eq(psser2, pser2)
+
+        pdf.loc[:, "z"] = 100
+        psdf.loc[:, "z"] = 100
+        self.assert_eq(psdf, pdf, almost=True)
+        self.assert_eq(psser1, pser1)
+        self.assert_eq(psser2, pser2)
+
+        with self.assertRaisesRegex(KeyError, "Key length \\(3\\) exceeds 
index depth \\(2\\)"):
+            psdf.loc[:, [("x", "max_speed", "foo")]] = -psdf[("x", "shield")]
+
+        pdf = pd.DataFrame(
+            [[1], [4], [7]], index=["cobra", "viper", "sidewinder"], 
columns=["max_speed"]
+        )
+        psdf = ps.from_pandas(pdf)
+
+        pdf.loc[:, "max_speed"] = pdf
+        psdf.loc[:, "max_speed"] = psdf
+        self.assert_eq(psdf, pdf)
+
+    def test_series_loc_setitem(self):
+        pdf = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}, index=["cobra", 
"viper", "sidewinder"])
+        psdf = ps.from_pandas(pdf)
+
+        pser = pdf.x
+        psery = pdf.y
+        psser = psdf.x
+        pssery = psdf.y
+
+        pser.loc[pser % 2 == 1] = -pser
+        psser.loc[psser % 2 == 1] = -psser
+        self.assert_eq(psser, pser)
+        self.assert_eq(psdf, pdf)
+        self.assert_eq(pssery, psery)
+
+        for key, value in [
+            (["viper", "sidewinder"], 10),
+            ("viper", 50),
+            (slice(None), 10),
+            (slice(None, "viper"), 20),
+            (slice("viper", None), 30),
+        ]:
+            with self.subTest(key=key, value=value):
+                pser.loc[key] = value
+                psser.loc[key] = value
+                self.assert_eq(psser, pser)
+                self.assert_eq(psdf, pdf)
+                self.assert_eq(pssery, psery)
+
+        with self.assertRaises(ValueError):
+            psser.loc["viper"] = -psser
+
+        # multiindex
+        pser = pd.Series(
+            [1, 2, 3],
+            index=pd.MultiIndex.from_tuples([("x", "cobra"), ("x", "viper"), 
("y", "sidewinder")]),
+        )
+        psser = ps.from_pandas(pser)
+
+        pser.loc["x"] = pser * 10
+        psser.loc["x"] = psser * 10
+        self.assert_eq(psser, pser)
+
+        pser.loc["y"] = pser * 10
+        psser.loc["y"] = psser * 10
+        self.assert_eq(psser, pser)
+
+
+class IndexingLocTests(
+    IndexingLocMixin,
+    PandasOnSparkTestCase,
+    SQLTestUtils,
+):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.indexes.test_indexing_loc import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", 
verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/indexes/test_indexing_loc_multi_idx.py 
b/python/pyspark/pandas/tests/indexes/test_indexing_loc_multi_idx.py
new file mode 100644
index 000000000000..024c22d281c9
--- /dev/null
+++ b/python/pyspark/pandas/tests/indexes/test_indexing_loc_multi_idx.py
@@ -0,0 +1,150 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+import numpy as np
+import pandas as pd
+
+from pyspark import pandas as ps
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.testing.sqlutils import SQLTestUtils
+
+
+class IndexingLocMultiIdxMixin:
+    @property
+    def pdf(self):
+        return pd.DataFrame(
+            {"a": [1, 2, 3, 4, 5, 6, 7, 8, 9], "b": [4, 5, 6, 3, 2, 1, 0, 0, 
0]},
+            index=[0, 1, 3, 5, 6, 8, 9, 9, 9],
+        )
+
+    @property
+    def psdf(self):
+        return ps.from_pandas(self.pdf)
+
+    def test_loc_multiindex(self):
+        psdf = self.psdf
+        psdf = psdf.set_index("b", append=True)
+        pdf = self.pdf
+        pdf = pdf.set_index("b", append=True)
+
+        self.assert_eq(psdf.loc[:], pdf.loc[:])
+        self.assert_eq(psdf.loc[5:5], pdf.loc[5:5])
+        self.assert_eq(psdf.loc[5:9], pdf.loc[5:9])
+
+        self.assert_eq(psdf.loc[5], pdf.loc[5])
+        self.assert_eq(psdf.loc[9], pdf.loc[9])
+        # TODO: self.assert_eq(psdf.loc[(5, 3)], pdf.loc[(5, 3)])
+        # TODO: self.assert_eq(psdf.loc[(9, 0)], pdf.loc[(9, 0)])
+        self.assert_eq(psdf.a.loc[5], pdf.a.loc[5])
+        self.assert_eq(psdf.a.loc[9], pdf.a.loc[9])
+        self.assertTrue((psdf.a.loc[(5, 3)] == pdf.a.loc[(5, 3)]).all())
+        self.assert_eq(psdf.a.loc[(9, 0)], pdf.a.loc[(9, 0)])
+
+        # monotonically increasing index test
+        pdf = pd.DataFrame(
+            {"a": [1, 2, 3, 4, 5]},
+            index=pd.MultiIndex.from_tuples(
+                [("x", "a"), ("x", "b"), ("y", "c"), ("y", "d"), ("z", "e")]
+            ),
+        )
+        psdf = ps.from_pandas(pdf)
+
+        for rows_sel in [
+            slice(None),
+            slice("y", None),
+            slice(None, "y"),
+            slice(("x", "b"), None),
+            slice(None, ("y", "c")),
+            slice(("x", "b"), ("y", "c")),
+            slice("x", ("y", "c")),
+            slice(("x", "b"), "y"),
+        ]:
+            with self.subTest("monotonically increasing", rows_sel=rows_sel):
+                self.assert_eq(psdf.loc[rows_sel], pdf.loc[rows_sel])
+                self.assert_eq(psdf.a.loc[rows_sel], pdf.a.loc[rows_sel])
+
+        # monotonically increasing first index test
+        pdf = pd.DataFrame(
+            {"a": [1, 2, 3, 4, 5]},
+            index=pd.MultiIndex.from_tuples(
+                [("x", "a"), ("x", "b"), ("y", "c"), ("y", "a"), ("z", "e")]
+            ),
+        )
+        psdf = ps.from_pandas(pdf)
+
+        for rows_sel in [
+            slice(None),
+            slice("y", None),
+            slice(None, "y"),
+        ]:
+            with self.subTest("monotonically increasing first index", 
rows_sel=rows_sel):
+                self.assert_eq(psdf.loc[rows_sel], pdf.loc[rows_sel])
+                self.assert_eq(psdf.a.loc[rows_sel], pdf.a.loc[rows_sel])
+
+        for rows_sel in [
+            slice(("x", "b"), None),
+            slice(None, ("y", "c")),
+            slice(("x", "b"), ("y", "c")),
+            slice("x", ("y", "c")),
+            slice(("x", "b"), "y"),
+        ]:
+            with self.subTest("monotonically increasing first index", 
rows_sel=rows_sel):
+                self.assertRaises(KeyError, lambda: psdf.loc[rows_sel])
+                self.assertRaises(KeyError, lambda: psdf.a.loc[rows_sel])
+
+        # not monotonically increasing index test
+        pdf = pd.DataFrame(
+            {"a": [1, 2, 3, 4, 5]},
+            index=pd.MultiIndex.from_tuples(
+                [("z", "e"), ("y", "d"), ("y", "c"), ("x", "b"), ("x", "a")]
+            ),
+        )
+        psdf = ps.from_pandas(pdf)
+
+        for rows_sel in [
+            slice("y", None),
+            slice(None, "y"),
+            slice(("x", "b"), None),
+            slice(None, ("y", "c")),
+            slice(("x", "b"), ("y", "c")),
+            slice("x", ("y", "c")),
+            slice(("x", "b"), "y"),
+        ]:
+            with self.subTest("monotonically decreasing", rows_sel=rows_sel):
+                self.assertRaises(KeyError, lambda: psdf.loc[rows_sel])
+                self.assertRaises(KeyError, lambda: psdf.a.loc[rows_sel])
+
+
+class IndexingLocMultiIdxTests(
+    IndexingLocMultiIdxMixin,
+    PandasOnSparkTestCase,
+    SQLTestUtils,
+):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.indexes.test_indexing_loc_multi_idx import *  # 
noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", 
verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/test_indexing.py 
b/python/pyspark/pandas/tests/test_indexing.py
index 0240f2c75996..e90db26223bc 100644
--- a/python/pyspark/pandas/tests/test_indexing.py
+++ b/python/pyspark/pandas/tests/test_indexing.py
@@ -196,227 +196,6 @@ class IndexingTest(ComparisonTestBase):
         with self.assertRaises(KeyError):
             psdf.iat[99, 0]
 
-    def test_loc(self):
-        psdf = self.psdf
-        pdf = self.pdf
-
-        self.assert_eq(psdf.loc[5:5], pdf.loc[5:5])
-        self.assert_eq(psdf.loc[3:8], pdf.loc[3:8])
-        self.assert_eq(psdf.loc[:8], pdf.loc[:8])
-        self.assert_eq(psdf.loc[3:], pdf.loc[3:])
-        self.assert_eq(psdf.loc[[5]], pdf.loc[[5]])
-        self.assert_eq(psdf.loc[:], pdf.loc[:])
-
-        # TODO?: self.assert_eq(psdf.loc[[3, 4, 1, 8]], pdf.loc[[3, 4, 1, 8]])
-        # TODO?: self.assert_eq(psdf.loc[[3, 4, 1, 9]], pdf.loc[[3, 4, 1, 9]])
-        # TODO?: self.assert_eq(psdf.loc[np.array([3, 4, 1, 9])], 
pdf.loc[np.array([3, 4, 1, 9])])
-
-        self.assert_eq(psdf.a.loc[5:5], pdf.a.loc[5:5])
-        self.assert_eq(psdf.a.loc[3:8], pdf.a.loc[3:8])
-        self.assert_eq(psdf.a.loc[:8], pdf.a.loc[:8])
-        self.assert_eq(psdf.a.loc[3:], pdf.a.loc[3:])
-        self.assert_eq(psdf.a.loc[[5]], pdf.a.loc[[5]])
-
-        # TODO?: self.assert_eq(psdf.a.loc[[3, 4, 1, 8]], pdf.a.loc[[3, 4, 1, 
8]])
-        # TODO?: self.assert_eq(psdf.a.loc[[3, 4, 1, 9]], pdf.a.loc[[3, 4, 1, 
9]])
-        # TODO?: self.assert_eq(psdf.a.loc[np.array([3, 4, 1, 9])],
-        #                       pdf.a.loc[np.array([3, 4, 1, 9])])
-
-        self.assert_eq(psdf.a.loc[[]], pdf.a.loc[[]])
-        self.assert_eq(psdf.a.loc[np.array([])], pdf.a.loc[np.array([])])
-
-        self.assert_eq(psdf.loc[1000:], pdf.loc[1000:])
-        self.assert_eq(psdf.loc[-2000:-1000], pdf.loc[-2000:-1000])
-
-        self.assert_eq(psdf.loc[5], pdf.loc[5])
-        self.assert_eq(psdf.loc[9], pdf.loc[9])
-        self.assert_eq(psdf.a.loc[5], pdf.a.loc[5])
-        self.assert_eq(psdf.a.loc[9], pdf.a.loc[9])
-
-        self.assertRaises(KeyError, lambda: psdf.loc[10])
-        self.assertRaises(KeyError, lambda: psdf.a.loc[10])
-
-        # monotonically increasing index test
-        pdf = pd.DataFrame({"a": [1, 2, 3, 4, 5, 6, 7, 8, 9]}, index=[0, 1, 1, 
2, 2, 2, 4, 5, 6])
-        psdf = ps.from_pandas(pdf)
-
-        self.assert_eq(psdf.loc[:2], pdf.loc[:2])
-        self.assert_eq(psdf.loc[:3], pdf.loc[:3])
-        self.assert_eq(psdf.loc[3:], pdf.loc[3:])
-        self.assert_eq(psdf.loc[4:], pdf.loc[4:])
-        self.assert_eq(psdf.loc[3:2], pdf.loc[3:2])
-        self.assert_eq(psdf.loc[-1:2], pdf.loc[-1:2])
-        self.assert_eq(psdf.loc[3:10], pdf.loc[3:10])
-
-        # monotonically decreasing index test
-        pdf = pd.DataFrame({"a": [1, 2, 3, 4, 5, 6, 7, 8, 9]}, index=[6, 5, 5, 
4, 4, 4, 2, 1, 0])
-        psdf = ps.from_pandas(pdf)
-
-        self.assert_eq(psdf.loc[:4], pdf.loc[:4])
-        self.assert_eq(psdf.loc[:3], pdf.loc[:3])
-        self.assert_eq(psdf.loc[3:], pdf.loc[3:])
-        self.assert_eq(psdf.loc[2:], pdf.loc[2:])
-        self.assert_eq(psdf.loc[2:3], pdf.loc[2:3])
-        self.assert_eq(psdf.loc[2:-1], pdf.loc[2:-1])
-        self.assert_eq(psdf.loc[10:3], pdf.loc[10:3])
-
-        # test when type of key is string and given value is not included in 
key
-        pdf = pd.DataFrame({"a": [1, 2, 3]}, index=["a", "b", "d"])
-        psdf = ps.from_pandas(pdf)
-
-        self.assert_eq(psdf.loc["a":"z"], pdf.loc["a":"z"])
-
-        # KeyError when index is not monotonic increasing or decreasing
-        # and specified values don't exist in index
-        psdf = ps.DataFrame([[1, 2], [4, 5], [7, 8]], index=["cobra", "viper", 
"sidewinder"])
-
-        self.assertRaises(KeyError, lambda: psdf.loc["cobra":"koalas"])
-        self.assertRaises(KeyError, lambda: psdf.loc["koalas":"viper"])
-
-        psdf = ps.DataFrame([[1, 2], [4, 5], [7, 8]], index=[10, 30, 20])
-
-        self.assertRaises(KeyError, lambda: psdf.loc[0:30])
-        self.assertRaises(KeyError, lambda: psdf.loc[10:100])
-
-    def test_loc_getitem_boolean_series(self):
-        pdf = pd.DataFrame(
-            {"A": [0, 1, 2, 3, 4], "B": [100, 200, 300, 400, 500]}, index=[20, 
10, 30, 0, 50]
-        )
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(pdf.A.loc[pdf.B > 200], psdf.A.loc[psdf.B > 200])
-        self.assert_eq(pdf.B.loc[pdf.B > 200], psdf.B.loc[psdf.B > 200])
-        self.assert_eq(pdf.loc[pdf.B > 200], psdf.loc[psdf.B > 200])
-
-    def test_loc_non_informative_index(self):
-        pdf = pd.DataFrame({"x": [1, 2, 3, 4]}, index=[10, 20, 30, 40])
-        psdf = ps.from_pandas(pdf)
-
-        self.assert_eq(psdf.loc[20:30], pdf.loc[20:30])
-
-        pdf = pd.DataFrame({"x": [1, 2, 3, 4]}, index=[10, 20, 20, 40])
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(psdf.loc[20:20], pdf.loc[20:20])
-
-    def test_loc_with_series(self):
-        psdf = self.psdf
-        pdf = self.pdf
-
-        self.assert_eq(psdf.loc[psdf.a % 2 == 0], pdf.loc[pdf.a % 2 == 0])
-        self.assert_eq(psdf.loc[psdf.a % 2 == 0, "a"], pdf.loc[pdf.a % 2 == 0, 
"a"])
-        self.assert_eq(psdf.loc[psdf.a % 2 == 0, ["a"]], pdf.loc[pdf.a % 2 == 
0, ["a"]])
-        self.assert_eq(psdf.a.loc[psdf.a % 2 == 0], pdf.a.loc[pdf.a % 2 == 0])
-
-        self.assert_eq(psdf.loc[psdf.copy().a % 2 == 0], pdf.loc[pdf.copy().a 
% 2 == 0])
-        self.assert_eq(psdf.loc[psdf.copy().a % 2 == 0, "a"], 
pdf.loc[pdf.copy().a % 2 == 0, "a"])
-        self.assert_eq(
-            psdf.loc[psdf.copy().a % 2 == 0, ["a"]], pdf.loc[pdf.copy().a % 2 
== 0, ["a"]]
-        )
-        self.assert_eq(psdf.a.loc[psdf.copy().a % 2 == 0], 
pdf.a.loc[pdf.copy().a % 2 == 0])
-
-    def test_loc_noindex(self):
-        psdf = self.psdf
-        psdf = psdf.reset_index()
-        pdf = self.pdf
-        pdf = pdf.reset_index()
-
-        self.assert_eq(psdf[["a"]], pdf[["a"]])
-
-        self.assert_eq(psdf.loc[:], pdf.loc[:])
-        self.assert_eq(psdf.loc[5:5], pdf.loc[5:5])
-
-    def test_loc_multiindex(self):
-        psdf = self.psdf
-        psdf = psdf.set_index("b", append=True)
-        pdf = self.pdf
-        pdf = pdf.set_index("b", append=True)
-
-        self.assert_eq(psdf.loc[:], pdf.loc[:])
-        self.assert_eq(psdf.loc[5:5], pdf.loc[5:5])
-        self.assert_eq(psdf.loc[5:9], pdf.loc[5:9])
-
-        self.assert_eq(psdf.loc[5], pdf.loc[5])
-        self.assert_eq(psdf.loc[9], pdf.loc[9])
-        # TODO: self.assert_eq(psdf.loc[(5, 3)], pdf.loc[(5, 3)])
-        # TODO: self.assert_eq(psdf.loc[(9, 0)], pdf.loc[(9, 0)])
-        self.assert_eq(psdf.a.loc[5], pdf.a.loc[5])
-        self.assert_eq(psdf.a.loc[9], pdf.a.loc[9])
-        self.assertTrue((psdf.a.loc[(5, 3)] == pdf.a.loc[(5, 3)]).all())
-        self.assert_eq(psdf.a.loc[(9, 0)], pdf.a.loc[(9, 0)])
-
-        # monotonically increasing index test
-        pdf = pd.DataFrame(
-            {"a": [1, 2, 3, 4, 5]},
-            index=pd.MultiIndex.from_tuples(
-                [("x", "a"), ("x", "b"), ("y", "c"), ("y", "d"), ("z", "e")]
-            ),
-        )
-        psdf = ps.from_pandas(pdf)
-
-        for rows_sel in [
-            slice(None),
-            slice("y", None),
-            slice(None, "y"),
-            slice(("x", "b"), None),
-            slice(None, ("y", "c")),
-            slice(("x", "b"), ("y", "c")),
-            slice("x", ("y", "c")),
-            slice(("x", "b"), "y"),
-        ]:
-            with self.subTest("monotonically increasing", rows_sel=rows_sel):
-                self.assert_eq(psdf.loc[rows_sel], pdf.loc[rows_sel])
-                self.assert_eq(psdf.a.loc[rows_sel], pdf.a.loc[rows_sel])
-
-        # monotonically increasing first index test
-        pdf = pd.DataFrame(
-            {"a": [1, 2, 3, 4, 5]},
-            index=pd.MultiIndex.from_tuples(
-                [("x", "a"), ("x", "b"), ("y", "c"), ("y", "a"), ("z", "e")]
-            ),
-        )
-        psdf = ps.from_pandas(pdf)
-
-        for rows_sel in [
-            slice(None),
-            slice("y", None),
-            slice(None, "y"),
-        ]:
-            with self.subTest("monotonically increasing first index", 
rows_sel=rows_sel):
-                self.assert_eq(psdf.loc[rows_sel], pdf.loc[rows_sel])
-                self.assert_eq(psdf.a.loc[rows_sel], pdf.a.loc[rows_sel])
-
-        for rows_sel in [
-            slice(("x", "b"), None),
-            slice(None, ("y", "c")),
-            slice(("x", "b"), ("y", "c")),
-            slice("x", ("y", "c")),
-            slice(("x", "b"), "y"),
-        ]:
-            with self.subTest("monotonically increasing first index", 
rows_sel=rows_sel):
-                self.assertRaises(KeyError, lambda: psdf.loc[rows_sel])
-                self.assertRaises(KeyError, lambda: psdf.a.loc[rows_sel])
-
-        # not monotonically increasing index test
-        pdf = pd.DataFrame(
-            {"a": [1, 2, 3, 4, 5]},
-            index=pd.MultiIndex.from_tuples(
-                [("z", "e"), ("y", "d"), ("y", "c"), ("x", "b"), ("x", "a")]
-            ),
-        )
-        psdf = ps.from_pandas(pdf)
-
-        for rows_sel in [
-            slice("y", None),
-            slice(None, "y"),
-            slice(("x", "b"), None),
-            slice(None, ("y", "c")),
-            slice(("x", "b"), ("y", "c")),
-            slice("x", ("y", "c")),
-            slice(("x", "b"), "y"),
-        ]:
-            with self.subTest("monotonically decreasing", rows_sel=rows_sel):
-                self.assertRaises(KeyError, lambda: psdf.loc[rows_sel])
-                self.assertRaises(KeyError, lambda: psdf.a.loc[rows_sel])
-
     def test_loc2d_multiindex(self):
         psdf = self.psdf
         psdf = psdf.set_index("b", append=True)
@@ -651,70 +430,6 @@ class IndexingTest(ComparisonTestBase):
         self.assert_eq(psdf["a":"b"], pdf["a":"b"])
         self.assert_eq(psdf["f":], pdf["f":])
 
-    def test_loc_on_numpy_datetimes(self):
-        pdf = pd.DataFrame(
-            {"x": [1, 2, 3]}, index=list(map(np.datetime64, ["2014", "2015", 
"2016"]))
-        )
-        psdf = ps.from_pandas(pdf)
-
-        self.assert_eq(psdf.loc["2014":"2015"], pdf.loc["2014":"2015"])
-
-    def test_loc_on_pandas_datetimes(self):
-        pdf = pd.DataFrame(
-            {"x": [1, 2, 3]}, index=list(map(pd.Timestamp, ["2014", "2015", 
"2016"]))
-        )
-        psdf = ps.from_pandas(pdf)
-
-        self.assert_eq(psdf.loc["2014":"2015"], pdf.loc["2014":"2015"])
-
-    @unittest.skip("TODO?: the behavior of slice for datetime")
-    def test_loc_datetime_no_freq(self):
-        datetime_index = pd.date_range("2016-01-01", "2016-01-31", freq="12h")
-        datetime_index.freq = None  # FORGET FREQUENCY
-        pdf = pd.DataFrame({"num": range(len(datetime_index))}, 
index=datetime_index)
-        psdf = ps.from_pandas(pdf)
-
-        slice_ = slice("2016-01-03", "2016-01-05")
-        result = psdf.loc[slice_, :]
-        expected = pdf.loc[slice_, :]
-        self.assert_eq(result, expected)
-
-    @unittest.skip("TODO?: the behavior of slice for datetime")
-    def test_loc_timestamp_str(self):
-        pdf = pd.DataFrame(
-            {"A": np.random.randn(100), "B": np.random.randn(100)},
-            index=pd.date_range("2011-01-01", freq="H", periods=100),
-        )
-        psdf = ps.from_pandas(pdf)
-
-        # partial string slice
-        # TODO?: self.assert_eq(pdf.loc['2011-01-02'],
-        # TODO?:                psdf.loc['2011-01-02'])
-        self.assert_eq(pdf.loc["2011-01-02":"2011-01-05"], 
psdf.loc["2011-01-02":"2011-01-05"])
-
-        # series
-        # TODO?: self.assert_eq(pdf.A.loc['2011-01-02'],
-        # TODO?:                psdf.A.loc['2011-01-02'])
-        self.assert_eq(pdf.A.loc["2011-01-02":"2011-01-05"], 
psdf.A.loc["2011-01-02":"2011-01-05"])
-
-        pdf = pd.DataFrame(
-            {"A": np.random.randn(100), "B": np.random.randn(100)},
-            index=pd.date_range("2011-01-01", freq="M", periods=100),
-        )
-        psdf = ps.from_pandas(pdf)
-        # TODO?: self.assert_eq(pdf.loc['2011-01'], psdf.loc['2011-01'])
-        # TODO?: self.assert_eq(pdf.loc['2011'], psdf.loc['2011'])
-
-        self.assert_eq(pdf.loc["2011-01":"2012-05"], 
psdf.loc["2011-01":"2012-05"])
-        self.assert_eq(pdf.loc["2011":"2015"], psdf.loc["2011":"2015"])
-
-        # series
-        # TODO?: self.assert_eq(pdf.B.loc['2011-01'], psdf.B.loc['2011-01'])
-        # TODO?: self.assert_eq(pdf.B.loc['2011'], psdf.B.loc['2011'])
-
-        self.assert_eq(pdf.B.loc["2011-01":"2012-05"], 
psdf.B.loc["2011-01":"2012-05"])
-        self.assert_eq(pdf.B.loc["2011":"2015"], psdf.B.loc["2011":"2015"])
-
     @unittest.skip("TODO?: the behavior of slice for datetime")
     def test_getitem_timestamp_str(self):
         pdf = pd.DataFrame(
@@ -870,93 +585,6 @@ class IndexingTest(ComparisonTestBase):
                     psdf.iloc[rows_sel, :1].sort_index(), pdf.iloc[rows_sel, 
:1].sort_index()
                 )
 
-    def test_frame_loc_setitem(self):
-        pdf = pd.DataFrame(
-            [[1, 2], [4, 5], [7, 8]],
-            index=["cobra", "viper", "sidewinder"],
-            columns=["max_speed", "shield"],
-        )
-        psdf = ps.from_pandas(pdf)
-
-        pser1 = pdf.max_speed
-        pser2 = pdf.shield
-        psser1 = psdf.max_speed
-        psser2 = psdf.shield
-
-        pdf.loc[["viper", "sidewinder"], ["shield", "max_speed"]] = 10
-        psdf.loc[["viper", "sidewinder"], ["shield", "max_speed"]] = 10
-        self.assert_eq(psdf, pdf)
-        self.assert_eq(psser1, pser1)
-        self.assert_eq(psser2, pser2)
-
-        pdf.loc[["viper", "sidewinder"], "shield"] = 50
-        psdf.loc[["viper", "sidewinder"], "shield"] = 50
-        self.assert_eq(psdf, pdf)
-        self.assert_eq(psser1, pser1)
-        self.assert_eq(psser2, pser2)
-
-        pdf.loc["cobra", "max_speed"] = 30
-        psdf.loc["cobra", "max_speed"] = 30
-        self.assert_eq(psdf, pdf)
-        self.assert_eq(psser1, pser1)
-        self.assert_eq(psser2, pser2)
-
-        pdf.loc[pdf.max_speed < 5, "max_speed"] = -pdf.max_speed
-        psdf.loc[psdf.max_speed < 5, "max_speed"] = -psdf.max_speed
-        self.assert_eq(psdf, pdf)
-        self.assert_eq(psser1, pser1)
-        self.assert_eq(psser2, pser2)
-
-        pdf.loc[pdf.max_speed < 2, "max_speed"] = -pdf.max_speed
-        psdf.loc[psdf.max_speed < 2, "max_speed"] = -psdf.max_speed
-        self.assert_eq(psdf, pdf)
-        self.assert_eq(psser1, pser1)
-        self.assert_eq(psser2, pser2)
-
-        pdf.loc[:, "min_speed"] = 0
-        psdf.loc[:, "min_speed"] = 0
-        self.assert_eq(psdf, pdf, almost=True)
-        self.assert_eq(psser1, pser1)
-        self.assert_eq(psser2, pser2)
-
-        with self.assertRaisesRegex(ValueError, "Incompatible indexer with 
Series"):
-            psdf.loc["cobra", "max_speed"] = -psdf.max_speed
-        with self.assertRaisesRegex(ValueError, "shape mismatch"):
-            psdf.loc[:, ["shield", "max_speed"]] = -psdf.max_speed
-        with self.assertRaisesRegex(ValueError, "Only a dataframe with one 
column can be assigned"):
-            psdf.loc[:, "max_speed"] = psdf
-
-        # multi-index columns
-        columns = pd.MultiIndex.from_tuples(
-            [("x", "max_speed"), ("x", "shield"), ("y", "min_speed")]
-        )
-        pdf.columns = columns
-        psdf.columns = columns
-
-        pdf.loc[:, ("y", "shield")] = -pdf[("x", "shield")]
-        psdf.loc[:, ("y", "shield")] = -psdf[("x", "shield")]
-        self.assert_eq(psdf, pdf, almost=True)
-        self.assert_eq(psser1, pser1)
-        self.assert_eq(psser2, pser2)
-
-        pdf.loc[:, "z"] = 100
-        psdf.loc[:, "z"] = 100
-        self.assert_eq(psdf, pdf, almost=True)
-        self.assert_eq(psser1, pser1)
-        self.assert_eq(psser2, pser2)
-
-        with self.assertRaisesRegex(KeyError, "Key length \\(3\\) exceeds 
index depth \\(2\\)"):
-            psdf.loc[:, [("x", "max_speed", "foo")]] = -psdf[("x", "shield")]
-
-        pdf = pd.DataFrame(
-            [[1], [4], [7]], index=["cobra", "viper", "sidewinder"], 
columns=["max_speed"]
-        )
-        psdf = ps.from_pandas(pdf)
-
-        pdf.loc[:, "max_speed"] = pdf
-        psdf.loc[:, "max_speed"] = psdf
-        self.assert_eq(psdf, pdf)
-
     def test_frame_iloc_setitem(self):
         pdf = pd.DataFrame(
             [[1, 2], [4, 5], [7, 8]],
@@ -989,53 +617,6 @@ class IndexingTest(ComparisonTestBase):
         psdf.iloc[:, 0] = psdf
         self.assert_eq(psdf, pdf)
 
-    def test_series_loc_setitem(self):
-        pdf = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}, index=["cobra", 
"viper", "sidewinder"])
-        psdf = ps.from_pandas(pdf)
-
-        pser = pdf.x
-        psery = pdf.y
-        psser = psdf.x
-        pssery = psdf.y
-
-        pser.loc[pser % 2 == 1] = -pser
-        psser.loc[psser % 2 == 1] = -psser
-        self.assert_eq(psser, pser)
-        self.assert_eq(psdf, pdf)
-        self.assert_eq(pssery, psery)
-
-        for key, value in [
-            (["viper", "sidewinder"], 10),
-            ("viper", 50),
-            (slice(None), 10),
-            (slice(None, "viper"), 20),
-            (slice("viper", None), 30),
-        ]:
-            with self.subTest(key=key, value=value):
-                pser.loc[key] = value
-                psser.loc[key] = value
-                self.assert_eq(psser, pser)
-                self.assert_eq(psdf, pdf)
-                self.assert_eq(pssery, psery)
-
-        with self.assertRaises(ValueError):
-            psser.loc["viper"] = -psser
-
-        # multiindex
-        pser = pd.Series(
-            [1, 2, 3],
-            index=pd.MultiIndex.from_tuples([("x", "cobra"), ("x", "viper"), 
("y", "sidewinder")]),
-        )
-        psser = ps.from_pandas(pser)
-
-        pser.loc["x"] = pser * 10
-        psser.loc["x"] = psser * 10
-        self.assert_eq(psser, pser)
-
-        pser.loc["y"] = pser * 10
-        psser.loc["y"] = psser * 10
-        self.assert_eq(psser, pser)
-
     def test_series_iloc_setitem(self):
         pdf = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}, index=["cobra", 
"viper", "sidewinder"])
         psdf = ps.from_pandas(pdf)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch master updated: [SPARK-46517][PS][TESTS] Reorganize `IndexingTest`: factor out `test_loc*` tests

Reply via email to