This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new de5c512e0179 [SPARK-47987][PYTHON][CONNECT][TESTS] Enable `ArrowParityTests.test_createDataFrame_empty_partition` de5c512e0179 is described below commit de5c512e017965b5c726e254f8969fb17d5c17ea Author: Ruifeng Zheng <ruife...@apache.org> AuthorDate: Thu Apr 25 08:16:56 2024 -0700 [SPARK-47987][PYTHON][CONNECT][TESTS] Enable `ArrowParityTests.test_createDataFrame_empty_partition` ### What changes were proposed in this pull request? Reenable `ArrowParityTests.test_createDataFrame_empty_partition` We actually already had set up Classic SparkContext `_legacy_sc ` for Spark Connect test, so only need to add `_legacy_sc` in Classic PySpark test. ### Why are the changes needed? to improve test coverage ### Does this PR introduce _any_ user-facing change? no, test only ### How was this patch tested? ci ### Was this patch authored or co-authored using generative AI tooling? no Closes #46220 from zhengruifeng/enable_test_createDataFrame_empty_partition. Authored-by: Ruifeng Zheng <ruife...@apache.org> Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- python/pyspark/sql/tests/connect/test_parity_arrow.py | 4 ---- python/pyspark/sql/tests/test_arrow.py | 4 +++- python/pyspark/testing/sqlutils.py | 1 + 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/python/pyspark/sql/tests/connect/test_parity_arrow.py b/python/pyspark/sql/tests/connect/test_parity_arrow.py index 93d0b6cf0f5f..8727cc279641 100644 --- a/python/pyspark/sql/tests/connect/test_parity_arrow.py +++ b/python/pyspark/sql/tests/connect/test_parity_arrow.py @@ -24,10 +24,6 @@ from pyspark.testing.pandasutils import PandasOnSparkTestUtils class ArrowParityTests(ArrowTestsMixin, ReusedConnectTestCase, PandasOnSparkTestUtils): - @unittest.skip("Spark Connect does not support Spark Context but the test depends on that.") - def test_createDataFrame_empty_partition(self): - super().test_createDataFrame_empty_partition() - @unittest.skip("Spark Connect does not support fallback.") def test_createDataFrame_fallback_disabled(self): super().test_createDataFrame_fallback_disabled() diff --git a/python/pyspark/sql/tests/test_arrow.py b/python/pyspark/sql/tests/test_arrow.py index 5235e021bae9..03cb35feb994 100644 --- a/python/pyspark/sql/tests/test_arrow.py +++ b/python/pyspark/sql/tests/test_arrow.py @@ -56,6 +56,7 @@ from pyspark.testing.sqlutils import ( ExamplePointUDT, ) from pyspark.errors import ArithmeticException, PySparkTypeError, UnsupportedOperationException +from pyspark.util import is_remote_only if have_pandas: import pandas as pd @@ -830,7 +831,8 @@ class ArrowTestsMixin: pdf = pd.DataFrame({"c1": [1], "c2": ["string"]}) df = self.spark.createDataFrame(pdf) self.assertEqual([Row(c1=1, c2="string")], df.collect()) - self.assertGreater(self.spark.sparkContext.defaultParallelism, len(pdf)) + if not is_remote_only(): + self.assertGreater(self._legacy_sc.defaultParallelism, len(pdf)) def test_toPandas_error(self): for arrow_enabled in [True, False]: diff --git a/python/pyspark/testing/sqlutils.py b/python/pyspark/testing/sqlutils.py index 690d5c37b22e..a0fdada72972 100644 --- a/python/pyspark/testing/sqlutils.py +++ b/python/pyspark/testing/sqlutils.py @@ -258,6 +258,7 @@ class ReusedSQLTestCase(ReusedPySparkTestCase, SQLTestUtils, PySparkErrorTestUti @classmethod def setUpClass(cls): super(ReusedSQLTestCase, cls).setUpClass() + cls._legacy_sc = cls.sc cls.spark = SparkSession(cls.sc) cls.tempdir = tempfile.NamedTemporaryFile(delete=False) os.unlink(cls.tempdir.name) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org