[
https://issues.apache.org/jira/browse/SPARK-54482?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
ASF GitHub Bot updated SPARK-54482:
-----------------------------------
Labels: pull-request-available (was: )
> Reeanble test_apply_in_pandas_returning_column_names_*
> ------------------------------------------------------
>
> Key: SPARK-54482
> URL: https://issues.apache.org/jira/browse/SPARK-54482
> Project: Spark
> Issue Type: Sub-task
> Components: Tests
> Affects Versions: 4.1.0, 4.0.1, 4.2.0
> Reporter: Hyukjin Kwon
> Priority: Major
> Labels: pull-request-available
>
> {code}
> ======================================================================
> ERROR [0.770s]: test_apply_in_pandas_returning_column_names
> (pyspark.sql.tests.connect.pandas.test_parity_pandas_grouped_map.GroupedApplyInPandasTests.test_apply_in_pandas_returning_column_names)
> ----------------------------------------------------------------------
> Traceback (most recent call last):
> File
> "/home/runner/work/spark/spark-4.0/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py",
> line 303, in test_apply_in_pandas_returning_column_names
>
> self._test_apply_in_pandas(GroupedApplyInPandasTestsMixin.stats_with_column_names)
> File
> "/home/runner/work/spark/spark-4.0/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py",
> line 839, in _test_apply_in_pandas
> df.groupby("id").applyInPandas(f, schema=output_schema).sort("id",
> "mean").toPandas()
> File
> "/home/runner/work/spark/spark-4.0/python/pyspark/sql/connect/dataframe.py",
> line 1807, in toPandas
> pdf, ei = self._session.client.to_pandas(query, self._plan.observations)
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> File
> "/home/runner/work/spark/spark-4.0/python/pyspark/sql/connect/client/core.py",
> line 948, in to_pandas
> table, schema, metrics, observed_metrics, _ = self._execute_and_fetch(
> ^^^^^^^^^^^^^^^^^^^^^^^^
> File
> "/home/runner/work/spark/spark-4.0/python/pyspark/sql/connect/client/core.py",
> line 1560, in _execute_and_fetch
> for response in self._execute_and_fetch_as_iterator(
> File
> "/home/runner/work/spark/spark-4.0/python/pyspark/sql/connect/client/core.py",
> line 1537, in _execute_and_fetch_as_iterator
> self._handle_error(error)
> File
> "/home/runner/work/spark/spark-4.0/python/pyspark/sql/connect/client/core.py",
> line 1811, in _handle_error
> self._handle_rpc_error(error)
> File
> "/home/runner/work/spark/spark-4.0/python/pyspark/sql/connect/client/core.py",
> line 1882, in _handle_rpc_error
> raise convert_exception(
> pyspark.errors.exceptions.connect.PythonException:
> An exception was thrown from the Python worker. Please see the stack trace
> below.
> Traceback (most recent call last):
> File
> "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py",
> line 3375, in main
> func, profiler, deserializer, serializer = read_udfs(pickleSer, infile,
> eval_type)
>
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> File
> "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py",
> line 2982, in read_udfs
> arg_offsets, f = read_single_udf(
> ^^^^^^^^^^^^^^^^
> File
> "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py",
> line 1306, in read_single_udf
> f, return_type = read_command(pickleSer, infile)
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> File
> "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker_util.py",
> line 64, in read_command
> command = serializer._read_with_length(file)
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> File
> "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/serializers.py",
> line 173, in _read_with_length
> return self.loads(obj)
> ^^^^^^^^^^^^^^^
> File
> "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/serializers.py",
> line 461, in loads
> return cloudpickle.loads(obj, encoding=encoding)
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> AttributeError: Can't get attribute
> 'GroupedApplyInPandasTestsMixin.stats_with_column_names' on <module
> 'pyspark.sql.tests.pandas.test_pandas_grouped_map' from
> '/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/sql/tests/pandas/test_pandas_grouped_map.py'>
> ======================================================================
> ERROR [0.766s]: test_apply_in_pandas_returning_column_names_sometimes
> (pyspark.sql.tests.connect.pandas.test_parity_pandas_grouped_map.GroupedApplyInPandasTests.test_apply_in_pandas_returning_column_names_sometimes)
> ----------------------------------------------------------------------
> Traceback (most recent call last):
> File
> "/home/runner/work/spark/spark-4.0/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py",
> line 315, in test_apply_in_pandas_returning_column_names_sometimes
> pdf, ei = self._session.client.to_pandas(query, self._plan.observations)
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> File
> "/home/runner/work/spark/spark-4.0/python/pyspark/sql/connect/client/core.py",
> line 948, in to_pandas
> table, schema, metrics, observed_metrics, _ = self._execute_and_fetch(
> ^^^^^^^^^^^^^^^^^^^^^^^^
> File
> "/home/runner/work/spark/spark-4.0/python/pyspark/sql/connect/client/core.py",
> line 1560, in _execute_and_fetch
> for response in self._execute_and_fetch_as_iterator(
> File
> "/home/runner/work/spark/spark-4.0/python/pyspark/sql/connect/client/core.py",
> line 1537, in _execute_and_fetch_as_iterator
> self._handle_error(error)
> File
> "/home/runner/work/spark/spark-4.0/python/pyspark/sql/connect/client/core.py",
> line 1811, in _handle_error
> self._handle_rpc_error(error)
> File
> "/home/runner/work/spark/spark-4.0/python/pyspark/sql/connect/client/core.py",
> line 1882, in _handle_rpc_error
> raise convert_exception(
> pyspark.errors.exceptions.connect.PythonException:
> An exception was thrown from the Python worker. Please see the stack trace
> below.
> Traceback (most recent call last):
> File
> "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py",
> line 3375, in main
> func, profiler, deserializer, serializer = read_udfs(pickleSer, infile,
> eval_type)
>
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> File
> "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py",
> line 2982, in read_udfs
> arg_offsets, f = read_single_udf(
> ^^^^^^^^^^^^^^^^
> File
> "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py",
> line 1306, in read_single_udf
> f, return_type = read_command(pickleSer, infile)
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> File
> "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker_util.py",
> line 64, in read_command
> command = serializer._read_with_length(file)
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> File
> "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/serializers.py",
> line 173, in _read_with_length
> return self.loads(obj)
> ^^^^^^^^^^^^^^^
> File
> "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/serializers.py",
> line 461, in loads
> return cloudpickle.loads(obj, encoding=encoding)
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> AttributeError: Can't get attribute
> 'GroupedApplyInPandasTestsMixin.stats_with_no_column_names' on <module
> 'pyspark.sql.tests.pandas.test_pandas_grouped_map' from
> '/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/sq
> {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]