This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.5
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.5 by this push:
     new 704f956dcbed [MINOR][PYTHON][TESTS] Remove the doc in error message 
tests to allow other PyArrow versions in tests
704f956dcbed is described below

commit 704f956dcbeddc9067e4ec502c4fd07175171cac
Author: Hyukjin Kwon <gurwls...@apache.org>
AuthorDate: Tue May 7 20:07:25 2024 -0700

    [MINOR][PYTHON][TESTS] Remove the doc in error message tests to allow other 
PyArrow versions in tests
    
    This PR is a minor change to support more PyArrow versions in the test.
    
    To support more PyArrow versions in the test. it can fail: 
(https://github.com/HyukjinKwon/spark/actions/runs/8994639538/job/24708397027)
    
    ```
    Traceback (most recent call last):
      File 
"/home/runner/work/spark/spark-3.5/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py",
 line 585, in _test_merge_error
        self.__test_merge_error(
      File 
"/home/runner/work/spark/spark-3.5/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py",
 line 606, in __test_merge_error
        with self.assertRaisesRegex(error_class, error_message_regex):
    AssertionError: "Return type of the user-defined function should be 
pandas.DataFrame, but is int64." does not match "
      An exception was thrown from the Python worker. Please see the stack 
trace below.
    Traceback (most recent call last):
      File 
"/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 
1834, in main
        process()
      File 
"/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 
1826, in process
        serializer.dump_stream(out_iter, outfile)
      File 
"/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py",
 line 531, in dump_stream
        return ArrowStreamSerializer.dump_stream(self, 
init_stream_yield_batches(), stream)
               
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
      File 
"/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py",
 line 104, in dump_stream
        for batch in iterator:
      File 
"/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py",
 line 524, in init_stream_yield_batches
        for series in iterator:
      File 
"/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 
1694, in mapper
        return f(df1_keys, df1_vals, df2_keys, df2_vals)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
      File 
"/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 
370, in <lambda>
        return lambda kl, vl, kr, vr: [(wrapped(kl, vl, kr, vr), 
to_arrow_type(return_type))]
                                        ^^^^^^^^^^^^^^^^^^^^^^^
      File 
"/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 
364, in wrapped
        verify_pandas_result(
      File 
"/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 
234, in verify_pandas_result
        raise PySparkTypeError(
    pyspark.errors.exceptions.base.PySparkTypeError: [UDF_RETURN_TYPE] Return 
type of the user-defined function should be pandas.DataFrame, but is int.
    ```
    
    No, test-only.
    
    Ci should validate it.
    
    No.
    
    Closes #46453 from HyukjinKwon/minor-test.
    
    Authored-by: Hyukjin Kwon <gurwls...@apache.org>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py | 2 +-
 python/pyspark/sql/tests/pandas/test_pandas_map.py           | 4 ++--
 python/pyspark/sql/tests/test_arrow_map.py                   | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py 
b/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py
index c3cd0f37b103..948ef4a53f2c 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py
@@ -166,7 +166,7 @@ class CogroupedApplyInPandasTestsMixin:
             fn=lambda lft, rgt: lft.size + rgt.size,
             error_class=PythonException,
             error_message_regex="Return type of the user-defined function "
-            "should be pandas.DataFrame, but is int64.",
+            "should be pandas.DataFrame, but is int",
         )
 
     def test_apply_in_pandas_returning_column_names(self):
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_map.py 
b/python/pyspark/sql/tests/pandas/test_pandas_map.py
index c3ba7b3e93a0..4b2be2bcf844 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_map.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_map.py
@@ -151,14 +151,14 @@ class MapInPandasTestsMixin:
         with self.assertRaisesRegex(
             PythonException,
             "Return type of the user-defined function should be iterator of 
pandas.DataFrame, "
-            "but is int.",
+            "but is int",
         ):
             (self.spark.range(10, numPartitions=3).mapInPandas(no_iter, "a 
int").count())
 
         with self.assertRaisesRegex(
             PythonException,
             "Return type of the user-defined function should be iterator of 
pandas.DataFrame, "
-            "but is iterator of int.",
+            "but is iterator of int",
         ):
             (self.spark.range(10, numPartitions=3).mapInPandas(bad_iter_elem, 
"a int").count())
 
diff --git a/python/pyspark/sql/tests/test_arrow_map.py 
b/python/pyspark/sql/tests/test_arrow_map.py
index 15367743585e..176286a809d4 100644
--- a/python/pyspark/sql/tests/test_arrow_map.py
+++ b/python/pyspark/sql/tests/test_arrow_map.py
@@ -104,14 +104,14 @@ class MapInArrowTestsMixin(object):
         with self.assertRaisesRegex(
             PythonException,
             "Return type of the user-defined function should be iterator "
-            "of pyarrow.RecordBatch, but is int.",
+            "of pyarrow.RecordBatch, but is int",
         ):
             (self.spark.range(10, numPartitions=3).mapInArrow(not_iter, "a 
int").count())
 
         with self.assertRaisesRegex(
             PythonException,
             "Return type of the user-defined function should be iterator "
-            "of pyarrow.RecordBatch, but is iterator of int.",
+            "of pyarrow.RecordBatch, but is iterator of int",
         ):
             (self.spark.range(10, numPartitions=3).mapInArrow(bad_iter_elem, 
"a int").count())
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to