This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 1ff40d6 [SPARK-37886][PYTHON][TESTS] Use ComparisonTestBase as base class in OpsTestBase 1ff40d6 is described below commit 1ff40d61cee754d3ba60ee45f839dba76a9955d3 Author: Yikun Jiang <yikunk...@gmail.com> AuthorDate: Sun Jan 23 11:32:41 2022 +0900 [SPARK-37886][PYTHON][TESTS] Use ComparisonTestBase as base class in OpsTestBase ### What changes were proposed in this pull request? - Rename TestCasesUtils to OpsTestBase - Make OpsTestCase inherited from `ComparisonTestBase`(`PandasOnSparkTestCase` with `pdf` and `psdf`) - Make `*OpsTest` inherited from `OpsTestBase` ### Why are the changes needed? All data type ops related tests case are using `PandasOnSparkTestCase, TestCasesUtils` as basic classes, we'd better just let `TestCasesUtils` inherited from `PandasOnSparkTestCase` instead of multiple inheritance. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? UT Closes #35203 from Yikun/opstest_refactor. Authored-by: Yikun Jiang <yikunk...@gmail.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py | 5 ++--- python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py | 7 +++---- .../pyspark/pandas/tests/data_type_ops/test_categorical_ops.py | 5 ++--- python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py | 5 ++--- python/pyspark/pandas/tests/data_type_ops/test_date_ops.py | 5 ++--- python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py | 5 ++--- python/pyspark/pandas/tests/data_type_ops/test_null_ops.py | 5 ++--- python/pyspark/pandas/tests/data_type_ops/test_num_ops.py | 9 ++++----- python/pyspark/pandas/tests/data_type_ops/test_string_ops.py | 7 +++---- .../pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py | 5 ++--- python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py | 5 ++--- python/pyspark/pandas/tests/data_type_ops/testing_utils.py | 10 ++++------ 12 files changed, 30 insertions(+), 43 deletions(-) diff --git a/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py index 5dc7f80..35fcb37 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py @@ -19,11 +19,10 @@ import pandas as pd from pandas.api.types import CategoricalDtype from pyspark import pandas as ps -from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils -from pyspark.testing.pandasutils import PandasOnSparkTestCase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase -class BinaryOpsTest(PandasOnSparkTestCase, TestCasesUtils): +class BinaryOpsTest(OpsTestBase): @property def pser(self): return pd.Series([b"1", b"2", b"3"]) diff --git a/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py index b83b610..02bb048 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py @@ -25,15 +25,14 @@ from pandas.api.types import CategoricalDtype from pyspark import pandas as ps from pyspark.pandas import option_context -from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.pandas.typedef.typehints import ( extension_float_dtypes_available, extension_object_dtypes_available, ) -from pyspark.testing.pandasutils import PandasOnSparkTestCase -class BooleanOpsTest(PandasOnSparkTestCase, TestCasesUtils): +class BooleanOpsTest(OpsTestBase): @property def bool_pdf(self): return pd.DataFrame({"this": [True, False, True], "that": [False, True, True]}) @@ -381,7 +380,7 @@ class BooleanOpsTest(PandasOnSparkTestCase, TestCasesUtils): @unittest.skipIf( not extension_object_dtypes_available, "pandas extension object dtypes are not available" ) -class BooleanExtensionOpsTest(PandasOnSparkTestCase, TestCasesUtils): +class BooleanExtensionOpsTest(OpsTestBase): @property def boolean_pdf(self): return pd.DataFrame( diff --git a/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py index e07af72..b84c35b 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py @@ -23,11 +23,10 @@ from pandas.api.types import CategoricalDtype from pyspark import pandas as ps from pyspark.pandas.config import option_context -from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils -from pyspark.testing.pandasutils import PandasOnSparkTestCase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase -class CategoricalOpsTest(PandasOnSparkTestCase, TestCasesUtils): +class CategoricalOpsTest(OpsTestBase): @property def pdf(self): return pd.DataFrame( diff --git a/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py index 91a92ba..cc9a0bf 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py @@ -21,11 +21,10 @@ import datetime import pandas as pd from pyspark import pandas as ps -from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils -from pyspark.testing.pandasutils import PandasOnSparkTestCase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase -class ComplexOpsTest(PandasOnSparkTestCase, TestCasesUtils): +class ComplexOpsTest(OpsTestBase): @property def pser(self): return pd.Series([[1, 2, 3]]) diff --git a/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py index 8c196d2..f0585c3 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py @@ -21,11 +21,10 @@ import pandas as pd from pandas.api.types import CategoricalDtype from pyspark import pandas as ps -from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils -from pyspark.testing.pandasutils import PandasOnSparkTestCase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase -class DateOpsTest(PandasOnSparkTestCase, TestCasesUtils): +class DateOpsTest(OpsTestBase): @property def pser(self): return pd.Series( diff --git a/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py index 5eba485..f29f9d3 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py @@ -21,11 +21,10 @@ import pandas as pd from pandas.api.types import CategoricalDtype from pyspark import pandas as ps -from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils -from pyspark.testing.pandasutils import PandasOnSparkTestCase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase -class DatetimeOpsTest(PandasOnSparkTestCase, TestCasesUtils): +class DatetimeOpsTest(OpsTestBase): @property def pser(self): return pd.Series(pd.date_range("1994-1-31 10:30:15", periods=3, freq="D")) diff --git a/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py index c2b6be2..009d4d0 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py @@ -19,11 +19,10 @@ import pandas as pd from pandas.api.types import CategoricalDtype import pyspark.pandas as ps -from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils -from pyspark.testing.pandasutils import PandasOnSparkTestCase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase -class NullOpsTest(PandasOnSparkTestCase, TestCasesUtils): +class NullOpsTest(OpsTestBase): @property def pser(self): return pd.Series([None, None, None]) diff --git a/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py index 785eb25..0c2c94e 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py @@ -25,17 +25,16 @@ from pandas.api.types import CategoricalDtype from pyspark import pandas as ps from pyspark.pandas.config import option_context -from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.pandas.typedef.typehints import ( extension_dtypes_available, extension_float_dtypes_available, extension_object_dtypes_available, ) from pyspark.sql.types import DecimalType, IntegralType -from pyspark.testing.pandasutils import PandasOnSparkTestCase -class NumOpsTest(PandasOnSparkTestCase, TestCasesUtils): +class NumOpsTest(OpsTestBase): """Unit tests for arithmetic operations of numeric data types. A few test cases are disabled because pandas-on-Spark returns float64 whereas pandas @@ -450,7 +449,7 @@ class NumOpsTest(PandasOnSparkTestCase, TestCasesUtils): @unittest.skipIf(not extension_dtypes_available, "pandas extension dtypes are not available") -class IntegralExtensionOpsTest(PandasOnSparkTestCase, TestCasesUtils): +class IntegralExtensionOpsTest(OpsTestBase): @property def intergral_extension_psers(self): return [pd.Series([1, 2, 3, None], dtype=dtype) for dtype in self.integral_extension_dtypes] @@ -590,7 +589,7 @@ class IntegralExtensionOpsTest(PandasOnSparkTestCase, TestCasesUtils): @unittest.skipIf( not extension_float_dtypes_available, "pandas extension float dtypes are not available" ) -class FractionalExtensionOpsTest(PandasOnSparkTestCase, TestCasesUtils): +class FractionalExtensionOpsTest(OpsTestBase): @property def fractional_extension_psers(self): return [ diff --git a/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py index f7c45cc..572ea76 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py @@ -23,15 +23,14 @@ from pandas.api.types import CategoricalDtype from pyspark import pandas as ps from pyspark.pandas.config import option_context -from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.pandas.typedef.typehints import extension_object_dtypes_available -from pyspark.testing.pandasutils import PandasOnSparkTestCase if extension_object_dtypes_available: from pandas import StringDtype -class StringOpsTest(PandasOnSparkTestCase, TestCasesUtils): +class StringOpsTest(OpsTestBase): @property def bool_pdf(self): return pd.DataFrame({"this": ["x", "y", "z"], "that": ["z", "y", "x"]}) @@ -237,7 +236,7 @@ class StringOpsTest(PandasOnSparkTestCase, TestCasesUtils): @unittest.skipIf( not extension_object_dtypes_available, "pandas extension object dtypes are not available" ) -class StringExtensionOpsTest(StringOpsTest, PandasOnSparkTestCase, TestCasesUtils): +class StringExtensionOpsTest(StringOpsTest): @property def pser(self): return pd.Series(["x", "y", "z", None], dtype="string") diff --git a/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py index 40882b8..16788c0 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py @@ -21,11 +21,10 @@ import pandas as pd from pandas.api.types import CategoricalDtype import pyspark.pandas as ps -from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils -from pyspark.testing.pandasutils import PandasOnSparkTestCase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase -class TimedeltaOpsTest(PandasOnSparkTestCase, TestCasesUtils): +class TimedeltaOpsTest(OpsTestBase): @property def pser(self): return pd.Series([timedelta(1), timedelta(microseconds=2), timedelta(weeks=3)]) diff --git a/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py index 70175c4..a71691c 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py @@ -19,11 +19,10 @@ import pandas as pd import pyspark.pandas as ps from pyspark.ml.linalg import SparseVector -from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils -from pyspark.testing.pandasutils import PandasOnSparkTestCase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase -class UDTOpsTest(PandasOnSparkTestCase, TestCasesUtils): +class UDTOpsTest(OpsTestBase): @property def pser(self): sparse_values = {0: 0.1, 1: 1.1} diff --git a/python/pyspark/pandas/tests/data_type_ops/testing_utils.py b/python/pyspark/pandas/tests/data_type_ops/testing_utils.py index 9f57ad4..222b945 100644 --- a/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +++ b/python/pyspark/pandas/tests/data_type_ops/testing_utils.py @@ -31,6 +31,8 @@ from pyspark.pandas.typedef.typehints import ( extension_object_dtypes_available, ) +from pyspark.testing.pandasutils import ComparisonTestBase + if extension_dtypes_available: from pandas import Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype @@ -41,8 +43,8 @@ if extension_object_dtypes_available: from pandas import BooleanDtype, StringDtype -class TestCasesUtils: - """A utility holding common test cases for arithmetic operations of different data types.""" +class OpsTestBase(ComparisonTestBase): + """The test base for arithmetic operations of different data types.""" @property def numeric_pdf(self): @@ -111,10 +113,6 @@ class TestCasesUtils: return pd.concat([self.numeric_pdf, self.non_numeric_pdf], axis=1) @property - def psdf(self): - return ps.from_pandas(self.pdf) - - @property def df_cols(self): return self.pdf.columns --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org