This is an automated email from the ASF dual-hosted git repository. ruifengz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new ed5aa56f1200 [SPARK-48055][PYTHON][CONNECT][TESTS] Enable `PandasUDFScalarParityTests.{test_vectorized_udf_empty_partition, test_vectorized_udf_struct_with_empty_partition}` ed5aa56f1200 is described below commit ed5aa56f1200bc1b0a455269eeb57863b2043fa1 Author: Ruifeng Zheng <ruife...@apache.org> AuthorDate: Tue Apr 30 14:37:30 2024 +0800 [SPARK-48055][PYTHON][CONNECT][TESTS] Enable `PandasUDFScalarParityTests.{test_vectorized_udf_empty_partition, test_vectorized_udf_struct_with_empty_partition}` ### What changes were proposed in this pull request? enable two test in `PandasUDFScalarParityTests` ### Why are the changes needed? test coverage ### Does this PR introduce _any_ user-facing change? no, test only ### How was this patch tested? ci ### Was this patch authored or co-authored using generative AI tooling? no Closes #46296 from zhengruifeng/enable_test_vectorized_udf_empty_partition. Authored-by: Ruifeng Zheng <ruife...@apache.org> Signed-off-by: Ruifeng Zheng <ruife...@apache.org> --- .../sql/tests/connect/test_parity_pandas_udf_scalar.py | 11 ----------- python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py | 8 +++++--- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py b/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py index b42bfaf0f58d..590ab695ee07 100644 --- a/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py +++ b/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py @@ -21,17 +21,6 @@ from pyspark.testing.connectutils import ReusedConnectTestCase class PandasUDFScalarParityTests(ScalarPandasUDFTestsMixin, ReusedConnectTestCase): - def test_nondeterministic_vectorized_udf_in_aggregate(self): - self.check_nondeterministic_analysis_exception() - - @unittest.skip("Spark Connect doesn't support RDD but the test depends on it.") - def test_vectorized_udf_empty_partition(self): - super().test_vectorized_udf_empty_partition() - - @unittest.skip("Spark Connect doesn't support RDD but the test depends on it.") - def test_vectorized_udf_struct_with_empty_partition(self): - super().test_vectorized_udf_struct_with_empty_partition() - # TODO(SPARK-43727): Parity returnType check in Spark Connect @unittest.skip("Fails in Spark Connect, should enable.") def test_vectorized_udf_wrong_return_type(self): diff --git a/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py b/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py index 9edd585da6a0..38bc633cd1ed 100644 --- a/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py +++ b/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py @@ -764,15 +764,17 @@ class ScalarPandasUDFTestsMixin: self.assertEqual(df.collect(), res.collect()) def test_vectorized_udf_empty_partition(self): - df = self.spark.createDataFrame(self.sc.parallelize([Row(id=1)], 2)) + df = self.spark.createDataFrame([Row(id=1)]).repartition(2) for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]: f = pandas_udf(lambda x: x, LongType(), udf_type) res = df.select(f(col("id"))) self.assertEqual(df.collect(), res.collect()) def test_vectorized_udf_struct_with_empty_partition(self): - df = self.spark.createDataFrame(self.sc.parallelize([Row(id=1)], 2)).withColumn( - "name", lit("John Doe") + df = ( + self.spark.createDataFrame([Row(id=1)]) + .repartition(2) + .withColumn("name", lit("John Doe")) ) @pandas_udf("first string, last string") --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org