(spark) branch master updated: [SPARK-46513][PS][TESTS] Move `BasicIndexingTests` to `pyspark.pandas.tests.indexes.*`

gurwls223 Tue, 26 Dec 2023 15:56:54 -0800

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new fb09e31fffc5 [SPARK-46513][PS][TESTS] Move `BasicIndexingTests` to 
`pyspark.pandas.tests.indexes.*`
fb09e31fffc5 is described below

commit fb09e31fffc5dc26ee2f33233a806eac6514654a
Author: Ruifeng Zheng <ruife...@apache.org>
AuthorDate: Wed Dec 27 08:56:38 2023 +0900

    [SPARK-46513][PS][TESTS] Move `BasicIndexingTests` to 
`pyspark.pandas.tests.indexes.*`
    
    ### What changes were proposed in this pull request?
    Move `BasicIndexingTests` to `pyspark.pandas.tests.indexes.*`
    
    ### Why are the changes needed?
    test code clean up
    
    ### Does this PR introduce _any_ user-facing change?
    no, test-only
    
    ### How was this patch tested?
    ci
    
    ### Was this patch authored or co-authored using generative AI tooling?
    no
    
    Closes #44499 from zhengruifeng/ps_test_index_basic.
    
    Authored-by: Ruifeng Zheng <ruife...@apache.org>
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 dev/sparktestsupport/modules.py                    |   3 +-
 .../test_parity_indexing_basic.py}                 |  21 +--
 .../pandas/tests/indexes/test_indexing_basic.py    | 171 +++++++++++++++++++++
 python/pyspark/pandas/tests/test_indexing.py       | 130 ----------------
 4 files changed, 179 insertions(+), 146 deletions(-)

diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 66ae11886cd4..042e72863efd 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -816,6 +816,7 @@ pyspark_pandas_slow = Module(
         "pyspark.pandas.tests.indexes.test_datetime_round",
         "pyspark.pandas.tests.indexes.test_align",
         "pyspark.pandas.tests.indexes.test_indexing",
+        "pyspark.pandas.tests.indexes.test_indexing_basic",
         "pyspark.pandas.tests.indexes.test_reindex",
         "pyspark.pandas.tests.indexes.test_rename",
         "pyspark.pandas.tests.indexes.test_reset_index",
@@ -1088,6 +1089,7 @@ pyspark_pandas_connect_part0 = Module(
         "pyspark.pandas.tests.connect.indexes.test_parity_map",
         "pyspark.pandas.tests.connect.indexes.test_parity_align",
         "pyspark.pandas.tests.connect.indexes.test_parity_indexing",
+        "pyspark.pandas.tests.connect.indexes.test_parity_indexing_basic",
         "pyspark.pandas.tests.connect.indexes.test_parity_reindex",
         "pyspark.pandas.tests.connect.indexes.test_parity_rename",
         "pyspark.pandas.tests.connect.indexes.test_parity_reset_index",
@@ -1136,7 +1138,6 @@ pyspark_pandas_connect_part1 = Module(
         "pyspark.pandas.tests.connect.groupby.test_parity_cumulative",
         "pyspark.pandas.tests.connect.groupby.test_parity_missing_data",
         "pyspark.pandas.tests.connect.groupby.test_parity_split_apply",
-        "pyspark.pandas.tests.connect.test_parity_indexing",
         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_align",
         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_basic_slow",
         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_cov",
diff --git a/python/pyspark/pandas/tests/connect/test_parity_indexing.py 
b/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing_basic.py
similarity index 70%
rename from python/pyspark/pandas/tests/connect/test_parity_indexing.py
rename to 
python/pyspark/pandas/tests/connect/indexes/test_parity_indexing_basic.py
index 950bd2d0b2d0..626f17c72113 100644
--- a/python/pyspark/pandas/tests/connect/test_parity_indexing.py
+++ b/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing_basic.py
@@ -16,30 +16,21 @@
 #
 import unittest
 
-import pandas as pd
-
-from pyspark import pandas as ps
-from pyspark.pandas.tests.test_indexing import BasicIndexingTestsMixin
+from pyspark.pandas.tests.indexes.test_indexing_basic import 
BasicIndexingTestsMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
 from pyspark.testing.pandasutils import PandasOnSparkTestUtils
 
 
 class BasicIndexingParityTests(
-    BasicIndexingTestsMixin, PandasOnSparkTestUtils, ReusedConnectTestCase
+    BasicIndexingTestsMixin,
+    PandasOnSparkTestUtils,
+    ReusedConnectTestCase,
 ):
-    @property
-    def pdf(self):
-        return pd.DataFrame(
-            {"month": [1, 4, 7, 10], "year": [2012, 2014, 2013, 2014], "sale": 
[55, 40, 84, 31]}
-        )
-
-    @property
-    def psdf(self):
-        return ps.from_pandas(self.pdf)
+    pass
 
 
 if __name__ == "__main__":
-    from pyspark.pandas.tests.connect.test_parity_indexing import *  # noqa: 
F401
+    from pyspark.pandas.tests.connect.indexes.test_parity_indexing_basic 
import *  # noqa: F401
 
     try:
         import xmlrunner  # type: ignore[import]
diff --git a/python/pyspark/pandas/tests/indexes/test_indexing_basic.py 
b/python/pyspark/pandas/tests/indexes/test_indexing_basic.py
new file mode 100644
index 000000000000..365ac0b86d47
--- /dev/null
+++ b/python/pyspark/pandas/tests/indexes/test_indexing_basic.py
@@ -0,0 +1,171 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+import pandas as pd
+
+from pyspark import pandas as ps
+from pyspark.testing.pandasutils import PandasOnSparkTestCase, compare_both
+
+
+class BasicIndexingTestsMixin:
+    @property
+    def pdf(self):
+        return pd.DataFrame(
+            {"month": [1, 4, 7, 10], "year": [2012, 2014, 2013, 2014], "sale": 
[55, 40, 84, 31]}
+        )
+
+    @property
+    def psdf(self):
+        return ps.from_pandas(self.pdf)
+
+    @compare_both(almost=False)
+    def test_indexing(self, df):
+        df1 = df.set_index("month")
+        yield df1
+
+        yield df.set_index("month", drop=False)
+        yield df.set_index("month", append=True)
+        yield df.set_index(["year", "month"])
+        yield df.set_index(["year", "month"], drop=False)
+        yield df.set_index(["year", "month"], append=True)
+
+        yield df1.set_index("year", drop=False, append=True)
+
+        df2 = df1.copy()
+        df2.set_index("year", append=True, inplace=True)
+        yield df2
+
+        self.assertRaisesRegex(KeyError, "unknown", lambda: 
df.set_index("unknown"))
+        self.assertRaisesRegex(KeyError, "unknown", lambda: 
df.set_index(["month", "unknown"]))
+
+        for d in [df, df1, df2]:
+            yield d.reset_index()
+            yield d.reset_index(drop=True)
+
+        yield df1.reset_index(level=0)
+        yield df2.reset_index(level=1)
+        yield df2.reset_index(level=[1, 0])
+        yield df1.reset_index(level="month")
+        yield df2.reset_index(level="year")
+        yield df2.reset_index(level=["month", "year"])
+        yield df2.reset_index(level="month", drop=True)
+        yield df2.reset_index(level=["month", "year"], drop=True)
+
+        self.assertRaisesRegex(
+            IndexError,
+            "Too many levels: Index has only 1 level, not 3",
+            lambda: df1.reset_index(level=2),
+        )
+        self.assertRaisesRegex(
+            IndexError,
+            "Too many levels: Index has only 1 level, not 4",
+            lambda: df1.reset_index(level=[3, 2]),
+        )
+        self.assertRaisesRegex(KeyError, "unknown.*month", lambda: 
df1.reset_index(level="unknown"))
+        self.assertRaisesRegex(
+            KeyError, "Level unknown not found", lambda: 
df2.reset_index(level="unknown")
+        )
+
+        df3 = df2.copy()
+        df3.reset_index(inplace=True)
+        yield df3
+
+        yield df1.sale.reset_index()
+        yield df1.sale.reset_index(level=0)
+        yield df2.sale.reset_index(level=[1, 0])
+        yield df1.sale.reset_index(drop=True)
+        yield df1.sale.reset_index(name="s")
+        yield df1.sale.reset_index(name="s", drop=True)
+
+        s = df1.sale
+        self.assertRaisesRegex(
+            TypeError,
+            "Cannot reset_index inplace on a Series to create a DataFrame",
+            lambda: s.reset_index(inplace=True),
+        )
+        s.reset_index(drop=True, inplace=True)
+        yield s
+        yield df1
+
+        # multi-index columns
+        df4 = df.copy()
+        df4.columns = pd.MultiIndex.from_tuples(
+            [("cal", "month"), ("cal", "year"), ("num", "sale")]
+        )
+        df5 = df4.set_index(("cal", "month"))
+        yield df5
+        yield df4.set_index([("cal", "month"), ("num", "sale")])
+
+        self.assertRaises(KeyError, lambda: df5.reset_index(level=("cal", 
"month")))
+
+        yield df5.reset_index(level=[("cal", "month")])
+
+        # non-string names
+        df6 = df.copy()
+        df6.columns = [10.0, 20.0, 30.0]
+        df7 = df6.set_index(10.0)
+        yield df7
+        yield df6.set_index([10.0, 30.0])
+
+        yield df7.reset_index(level=10.0)
+        yield df7.reset_index(level=[10.0])
+
+        df8 = df.copy()
+        df8.columns = pd.MultiIndex.from_tuples([(10, "month"), (10, "year"), 
(20, "sale")])
+        df9 = df8.set_index((10, "month"))
+        yield df9
+        yield df8.set_index([(10, "month"), (20, "sale")])
+
+        yield df9.reset_index(level=[(10, "month")])
+
+    def test_from_pandas_with_explicit_index(self):
+        pdf = self.pdf
+
+        df1 = ps.from_pandas(pdf.set_index("month"))
+        self.assertPandasEqual(df1._to_pandas(), pdf.set_index("month"))
+
+        df2 = ps.from_pandas(pdf.set_index(["year", "month"]))
+        self.assertPandasEqual(df2._to_pandas(), pdf.set_index(["year", 
"month"]))
+
+    def test_limitations(self):
+        df = self.psdf.set_index("month")
+
+        self.assertRaisesRegex(
+            ValueError,
+            "Level should be all int or all string.",
+            lambda: df.reset_index([1, "month"]),
+        )
+
+
+class BasicIndexingTests(
+    BasicIndexingTestsMixin,
+    PandasOnSparkTestCase,
+):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.indexes.test_indexing_basic import *  # noqa: 
F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", 
verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/test_indexing.py 
b/python/pyspark/pandas/tests/test_indexing.py
index eb86c9ffabc4..0240f2c75996 100644
--- a/python/pyspark/pandas/tests/test_indexing.py
+++ b/python/pyspark/pandas/tests/test_indexing.py
@@ -26,132 +26,6 @@ from pyspark.pandas.exceptions import 
SparkPandasIndexingError, SparkPandasNotIm
 from pyspark.testing.pandasutils import ComparisonTestBase, compare_both
 
 
-class BasicIndexingTestsMixin:
-    @property
-    def pdf(self):
-        return pd.DataFrame(
-            {"month": [1, 4, 7, 10], "year": [2012, 2014, 2013, 2014], "sale": 
[55, 40, 84, 31]}
-        )
-
-    @compare_both(almost=False)
-    def test_indexing(self, df):
-        df1 = df.set_index("month")
-        yield df1
-
-        yield df.set_index("month", drop=False)
-        yield df.set_index("month", append=True)
-        yield df.set_index(["year", "month"])
-        yield df.set_index(["year", "month"], drop=False)
-        yield df.set_index(["year", "month"], append=True)
-
-        yield df1.set_index("year", drop=False, append=True)
-
-        df2 = df1.copy()
-        df2.set_index("year", append=True, inplace=True)
-        yield df2
-
-        self.assertRaisesRegex(KeyError, "unknown", lambda: 
df.set_index("unknown"))
-        self.assertRaisesRegex(KeyError, "unknown", lambda: 
df.set_index(["month", "unknown"]))
-
-        for d in [df, df1, df2]:
-            yield d.reset_index()
-            yield d.reset_index(drop=True)
-
-        yield df1.reset_index(level=0)
-        yield df2.reset_index(level=1)
-        yield df2.reset_index(level=[1, 0])
-        yield df1.reset_index(level="month")
-        yield df2.reset_index(level="year")
-        yield df2.reset_index(level=["month", "year"])
-        yield df2.reset_index(level="month", drop=True)
-        yield df2.reset_index(level=["month", "year"], drop=True)
-
-        self.assertRaisesRegex(
-            IndexError,
-            "Too many levels: Index has only 1 level, not 3",
-            lambda: df1.reset_index(level=2),
-        )
-        self.assertRaisesRegex(
-            IndexError,
-            "Too many levels: Index has only 1 level, not 4",
-            lambda: df1.reset_index(level=[3, 2]),
-        )
-        self.assertRaisesRegex(KeyError, "unknown.*month", lambda: 
df1.reset_index(level="unknown"))
-        self.assertRaisesRegex(
-            KeyError, "Level unknown not found", lambda: 
df2.reset_index(level="unknown")
-        )
-
-        df3 = df2.copy()
-        df3.reset_index(inplace=True)
-        yield df3
-
-        yield df1.sale.reset_index()
-        yield df1.sale.reset_index(level=0)
-        yield df2.sale.reset_index(level=[1, 0])
-        yield df1.sale.reset_index(drop=True)
-        yield df1.sale.reset_index(name="s")
-        yield df1.sale.reset_index(name="s", drop=True)
-
-        s = df1.sale
-        self.assertRaisesRegex(
-            TypeError,
-            "Cannot reset_index inplace on a Series to create a DataFrame",
-            lambda: s.reset_index(inplace=True),
-        )
-        s.reset_index(drop=True, inplace=True)
-        yield s
-        yield df1
-
-        # multi-index columns
-        df4 = df.copy()
-        df4.columns = pd.MultiIndex.from_tuples(
-            [("cal", "month"), ("cal", "year"), ("num", "sale")]
-        )
-        df5 = df4.set_index(("cal", "month"))
-        yield df5
-        yield df4.set_index([("cal", "month"), ("num", "sale")])
-
-        self.assertRaises(KeyError, lambda: df5.reset_index(level=("cal", 
"month")))
-
-        yield df5.reset_index(level=[("cal", "month")])
-
-        # non-string names
-        df6 = df.copy()
-        df6.columns = [10.0, 20.0, 30.0]
-        df7 = df6.set_index(10.0)
-        yield df7
-        yield df6.set_index([10.0, 30.0])
-
-        yield df7.reset_index(level=10.0)
-        yield df7.reset_index(level=[10.0])
-
-        df8 = df.copy()
-        df8.columns = pd.MultiIndex.from_tuples([(10, "month"), (10, "year"), 
(20, "sale")])
-        df9 = df8.set_index((10, "month"))
-        yield df9
-        yield df8.set_index([(10, "month"), (20, "sale")])
-
-        yield df9.reset_index(level=[(10, "month")])
-
-    def test_from_pandas_with_explicit_index(self):
-        pdf = self.pdf
-
-        df1 = ps.from_pandas(pdf.set_index("month"))
-        self.assertPandasEqual(df1._to_pandas(), pdf.set_index("month"))
-
-        df2 = ps.from_pandas(pdf.set_index(["year", "month"]))
-        self.assertPandasEqual(df2._to_pandas(), pdf.set_index(["year", 
"month"]))
-
-    def test_limitations(self):
-        df = self.psdf.set_index("month")
-
-        self.assertRaisesRegex(
-            ValueError,
-            "Level should be all int or all string.",
-            lambda: df.reset_index([1, "month"]),
-        )
-
-
 class IndexingTest(ComparisonTestBase):
     @property
     def pdf(self):
@@ -1320,10 +1194,6 @@ class IndexingTest(ComparisonTestBase):
             psdf.iloc[[1, 1]]
 
 
-class BasicIndexingTests(BasicIndexingTestsMixin, ComparisonTestBase):
-    pass
-
-
 if __name__ == "__main__":
     from pyspark.pandas.tests.test_indexing import *  # noqa: F401
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch master updated: [SPARK-46513][PS][TESTS] Move `BasicIndexingTests` to `pyspark.pandas.tests.indexes.*`

Reply via email to