[ 
https://issues.apache.org/jira/browse/SPARK-46148?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Hyukjin Kwon updated SPARK-46148:
---------------------------------
    Description: 
{code}
**********************************************************************
File "/__w/spark/spark/python/pyspark/pandas/mlflow.py", line 172, in 
pyspark.pandas.mlflow.load_model
Failed example:
    prediction_df
Exception raised:
    Traceback (most recent call last):
      File "/usr/lib/python3.10/doctest.py", line 1350, in __run
        exec(compile(example.source, filename, "single",
      File "<doctest pyspark.pandas.mlflow.load_model[18]>", line 1, in <module>
        prediction_df
      File "/__w/spark/spark/python/pyspark/pandas/frame.py", line 13291, in 
__repr__
        pdf = cast("DataFrame", 
self._get_or_create_repr_pandas_cache(max_display_count))
      File "/__w/spark/spark/python/pyspark/pandas/frame.py", line 13282, in 
_get_or_create_repr_pandas_cache
        self, "_repr_pandas_cache", {n: self.head(n + 1)._to_internal_pandas()}
      File "/__w/spark/spark/python/pyspark/pandas/frame.py", line 13277, in 
_to_internal_pandas
        return self._internal.to_pandas_frame
      File "/__w/spark/spark/python/pyspark/pandas/utils.py", line 599, in 
wrapped_lazy_property
        setattr(self, attr_name, fn(self))
      File "/__w/spark/spark/python/pyspark/pandas/internal.py", line 1110, in 
to_pandas_frame
        pdf = sdf.toPandas()
      File "/__w/spark/spark/python/pyspark/sql/pandas/conversion.py", line 
213, in toPandas
        rows = self.collect()
      File "/__w/spark/spark/python/pyspark/sql/dataframe.py", line 1369, in 
collect
        sock_info = self._jdf.collectToPython()
      File 
"/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py", line 
1322, in __call__
        return_value = get_return_value(
      File "/__w/spark/spark/python/pyspark/errors/exceptions/captured.py", 
line 188, in deco
        raise converted from None
    pyspark.errors.exceptions.captured.PythonException: 
      An exception was thrown from the Python worker. Please see the stack 
trace below.
    Traceback (most recent call last):
      File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 
1523, in main
        process()
      File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 
1515, in process
        serializer.dump_stream(out_iter, outfile)
      File 
"/__w/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", 
line 485, in dump_stream
        return ArrowStreamSerializer.dump_stream(self, 
init_stream_yield_batches(), stream)
      File 
"/__w/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", 
line 101, in dump_stream
        for batch in iterator:
      File 
"/__w/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", 
line 478, in init_stream_yield_batches
        for series in iterator:
      File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 
1284, in func
        for result_batch, result_type in result_iter:
      File "/usr/local/lib/python3.10/dist-packages/mlflow/pyfunc/__init__.py", 
line 1619, in udf
        yield _predict_row_batch(batch_predict_fn, row_batch_args)
      File "/usr/local/lib/python3.10/dist-packages/mlflow/pyfunc/__init__.py", 
line 1383, in _predict_row_batch
        result = predict_fn(pdf, params)
      File "/usr/local/lib/python3.10/dist-packages/mlflow/pyfunc/__init__.py", 
line 1601, in batch_predict_fn
        return loaded_model.predict(pdf, params=params)
      File "/usr/local/lib/python3.10/dist-packages/mlflow/pyfunc/__init__.py", 
line 491, in predict
        return _predict()
      File "/usr/local/lib/python3.10/dist-packages/mlflow/pyfunc/__init__.py", 
line 477, in _predict
        return self._predict_fn(data, params=params)
      File 
"/usr/local/lib/python3.10/dist-packages/mlflow/sklearn/__init__.py", line 517, 
in predict
        return self.sklearn_model.predict(data)
      File 
"/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_base.py", line 
386, in predict
        return self._decision_function(X)
      File 
"/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_base.py", line 
369, in _decision_function
        X = self._validate_data(X, accept_sparse=["csr", "csc", "coo"], 
reset=False)
      File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 580, 
in _validate_data
        self._check_feature_names(X, reset=reset)
      File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 507, 
in _check_feature_names
        raise ValueError(message)
    ValueError: The feature names should match those that were passed during 
fit.
    Feature names unseen at fit time:
    - 0
    - 1
    Feature names seen at fit time, yet now missing:
    - x1
    - x2



    JVM stacktrace:
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 2 
in stage 1.0 failed 1 times, most recent failure: Lost task 2.0 in stage 1.0 
(TID 3) (localhost executor driver): 
org.apache.spark.api.python.PythonException: Traceback (most recent call last):
      File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 
1523, in main
        process()
      File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 
1515, in process
        serializer.dump_stream(out_iter, outfile)
      File 
"/__w/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", 
line 485, in dump_stream
        return ArrowStreamSerializer.dump_stream(self, 
init_stream_yield_batches(), stream)
      File 
"/__w/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", 
line 101, in dump_stream
        for batch in iterator:
      File 
"/__w/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", 
line 478, in init_stream_yield_batches
        for series in iterator:
      File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 
1284, in func
        for result_batch, result_type in result_iter:
      File "/usr/local/lib/python3.10/dist-packages/mlflow/pyfunc/__init__.py", 
line 1619, in udf
        yield _predict_row_batch(batch_predict_fn, row_batch_args)
      File "/usr/local/lib/python3.10/dist-packages/mlflow/pyfunc/__init__.py", 
line 1383, in _predict_row_batch
        at scala.collection.Iterator$$anon$9.hasNext(Iterator.scala:583)
        at 
org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:140)
        at 
org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:57)
        at 
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:111)
        at 
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:54)
        at 
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
        at org.apache.spark.scheduler.Task.run(Task.scala:141)
        at 
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:628)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
        at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:96)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:631)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
        at java.base/java.lang.Thread.run(Thread.java:840)

**********************************************************************
{code}

See https://github.com/apache/spark/actions/runs/7020654429/job/19100965399

  was:
{code}
**********************************************************************
File "/__w/spark/spark/python/pyspark/pandas/mlflow.py", line 172, in 
pyspark.pandas.mlflow.load_model
Failed example:
    prediction_df
Exception raised:
    Traceback (most recent call last):
      File "/usr/lib/python3.10/doctest.py", line 1350, in __run
        exec(compile(example.source, filename, "single",
      File "<doctest pyspark.pandas.mlflow.load_model[18]>", line 1, in <module>
        prediction_df
      File "/__w/spark/spark/python/pyspark/pandas/frame.py", line 13291, in 
__repr__
        pdf = cast("DataFrame", 
self._get_or_create_repr_pandas_cache(max_display_count))
      File "/__w/spark/spark/python/pyspark/pandas/frame.py", line 13282, in 
_get_or_create_repr_pandas_cache
        self, "_repr_pandas_cache", {n: self.head(n + 1)._to_internal_pandas()}
      File "/__w/spark/spark/python/pyspark/pandas/frame.py", line 13277, in 
_to_internal_pandas
        return self._internal.to_pandas_frame
      File "/__w/spark/spark/python/pyspark/pandas/utils.py", line 599, in 
wrapped_lazy_property
        setattr(self, attr_name, fn(self))
      File "/__w/spark/spark/python/pyspark/pandas/internal.py", line 1110, in 
to_pandas_frame
        pdf = sdf.toPandas()
      File "/__w/spark/spark/python/pyspark/sql/pandas/conversion.py", line 
213, in toPandas
        rows = self.collect()
      File "/__w/spark/spark/python/pyspark/sql/dataframe.py", line 1369, in 
collect
        sock_info = self._jdf.collectToPython()
      File 
"/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py", line 
1322, in __call__
        return_value = get_return_value(
      File "/__w/spark/spark/python/pyspark/errors/exceptions/captured.py", 
line 188, in deco
        raise converted from None
    pyspark.errors.exceptions.captured.PythonException: 
      An exception was thrown from the Python worker. Please see the stack 
trace below.
    Traceback (most recent call last):
      File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 
1523, in main
        process()
      File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 
1515, in process
        serializer.dump_stream(out_iter, outfile)
      File 
"/__w/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", 
line 485, in dump_stream
        return ArrowStreamSerializer.dump_stream(self, 
init_stream_yield_batches(), stream)
      File 
"/__w/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", 
line 101, in dump_stream
        for batch in iterator:
      File 
"/__w/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", 
line 478, in init_stream_yield_batches
        for series in iterator:
      File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 
1284, in func
        for result_batch, result_type in result_iter:
      File "/usr/local/lib/python3.10/dist-packages/mlflow/pyfunc/__init__.py", 
line 1619, in udf
        yield _predict_row_batch(batch_predict_fn, row_batch_args)
      File "/usr/local/lib/python3.10/dist-packages/mlflow/pyfunc/__init__.py", 
line 1383, in _predict_row_batch
        result = predict_fn(pdf, params)
      File "/usr/local/lib/python3.10/dist-packages/mlflow/pyfunc/__init__.py", 
line 1601, in batch_predict_fn
        return loaded_model.predict(pdf, params=params)
      File "/usr/local/lib/python3.10/dist-packages/mlflow/pyfunc/__init__.py", 
line 491, in predict
        return _predict()
      File "/usr/local/lib/python3.10/dist-packages/mlflow/pyfunc/__init__.py", 
line 477, in _predict
        return self._predict_fn(data, params=params)
      File 
"/usr/local/lib/python3.10/dist-packages/mlflow/sklearn/__init__.py", line 517, 
in predict
        return self.sklearn_model.predict(data)
      File 
"/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_base.py", line 
386, in predict
        return self._decision_function(X)
      File 
"/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_base.py", line 
369, in _decision_function
        X = self._validate_data(X, accept_sparse=["csr", "csc", "coo"], 
reset=False)
      File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 580, 
in _validate_data
        self._check_feature_names(X, reset=reset)
      File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 507, 
in _check_feature_names
        raise ValueError(message)
    ValueError: The feature names should match those that were passed during 
fit.
    Feature names unseen at fit time:
    - 0
    - 1
    Feature names seen at fit time, yet now missing:
    - x1
    - x2



    JVM stacktrace:
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 2 
in stage 1.0 failed 1 times, most recent failure: Lost task 2.0 in stage 1.0 
(TID 3) (localhost executor driver): 
org.apache.spark.api.python.PythonException: Traceback (most recent call last):
      File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 
1523, in main
        process()
      File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 
1515, in process
        serializer.dump_stream(out_iter, outfile)
      File 
"/__w/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", 
line 485, in dump_stream
        return ArrowStreamSerializer.dump_stream(self, 
init_stream_yield_batches(), stream)
      File 
"/__w/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", 
line 101, in dump_stream
        for batch in iterator:
      File 
"/__w/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", 
line 478, in init_stream_yield_batches
        for series in iterator:
      File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 
1284, in func
        for result_batch, result_type in result_iter:
      File "/usr/local/lib/python3.10/dist-packages/mlflow/pyfunc/__init__.py", 
line 1619, in udf
        yield _predict_row_batch(batch_predict_fn, row_batch_args)
      File "/usr/local/lib/python3.10/dist-packages/mlflow/pyfunc/__init__.py", 
line 1383, in _predict_row_batch
        at scala.collection.Iterator$$anon$9.hasNext(Iterator.scala:583)
        at 
org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:140)
        at 
org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:57)
        at 
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:111)
        at 
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:54)
        at 
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
        at org.apache.spark.scheduler.Task.run(Task.scala:141)
        at 
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:628)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
        at 
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
        at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:96)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:631)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
        at 
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
        at java.base/java.lang.Thread.run(Thread.java:840)

**********************************************************************
{code}

SEe https://github.com/apache/spark/actions/runs/7020654429/job/19100965399


> Fix pyspark.pandas.mlflow.load_model test (Python 3.12)
> -------------------------------------------------------
>
>                 Key: SPARK-46148
>                 URL: https://issues.apache.org/jira/browse/SPARK-46148
>             Project: Spark
>          Issue Type: Sub-task
>          Components: PySpark
>    Affects Versions: 4.0.0
>            Reporter: Hyukjin Kwon
>            Priority: Major
>
> {code}
> **********************************************************************
> File "/__w/spark/spark/python/pyspark/pandas/mlflow.py", line 172, in 
> pyspark.pandas.mlflow.load_model
> Failed example:
>     prediction_df
> Exception raised:
>     Traceback (most recent call last):
>       File "/usr/lib/python3.10/doctest.py", line 1350, in __run
>         exec(compile(example.source, filename, "single",
>       File "<doctest pyspark.pandas.mlflow.load_model[18]>", line 1, in 
> <module>
>         prediction_df
>       File "/__w/spark/spark/python/pyspark/pandas/frame.py", line 13291, in 
> __repr__
>         pdf = cast("DataFrame", 
> self._get_or_create_repr_pandas_cache(max_display_count))
>       File "/__w/spark/spark/python/pyspark/pandas/frame.py", line 13282, in 
> _get_or_create_repr_pandas_cache
>         self, "_repr_pandas_cache", {n: self.head(n + 
> 1)._to_internal_pandas()}
>       File "/__w/spark/spark/python/pyspark/pandas/frame.py", line 13277, in 
> _to_internal_pandas
>         return self._internal.to_pandas_frame
>       File "/__w/spark/spark/python/pyspark/pandas/utils.py", line 599, in 
> wrapped_lazy_property
>         setattr(self, attr_name, fn(self))
>       File "/__w/spark/spark/python/pyspark/pandas/internal.py", line 1110, 
> in to_pandas_frame
>         pdf = sdf.toPandas()
>       File "/__w/spark/spark/python/pyspark/sql/pandas/conversion.py", line 
> 213, in toPandas
>         rows = self.collect()
>       File "/__w/spark/spark/python/pyspark/sql/dataframe.py", line 1369, in 
> collect
>         sock_info = self._jdf.collectToPython()
>       File 
> "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py", 
> line 1322, in __call__
>         return_value = get_return_value(
>       File "/__w/spark/spark/python/pyspark/errors/exceptions/captured.py", 
> line 188, in deco
>         raise converted from None
>     pyspark.errors.exceptions.captured.PythonException: 
>       An exception was thrown from the Python worker. Please see the stack 
> trace below.
>     Traceback (most recent call last):
>       File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 
> 1523, in main
>         process()
>       File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 
> 1515, in process
>         serializer.dump_stream(out_iter, outfile)
>       File 
> "/__w/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", 
> line 485, in dump_stream
>         return ArrowStreamSerializer.dump_stream(self, 
> init_stream_yield_batches(), stream)
>       File 
> "/__w/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", 
> line 101, in dump_stream
>         for batch in iterator:
>       File 
> "/__w/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", 
> line 478, in init_stream_yield_batches
>         for series in iterator:
>       File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 
> 1284, in func
>         for result_batch, result_type in result_iter:
>       File 
> "/usr/local/lib/python3.10/dist-packages/mlflow/pyfunc/__init__.py", line 
> 1619, in udf
>         yield _predict_row_batch(batch_predict_fn, row_batch_args)
>       File 
> "/usr/local/lib/python3.10/dist-packages/mlflow/pyfunc/__init__.py", line 
> 1383, in _predict_row_batch
>         result = predict_fn(pdf, params)
>       File 
> "/usr/local/lib/python3.10/dist-packages/mlflow/pyfunc/__init__.py", line 
> 1601, in batch_predict_fn
>         return loaded_model.predict(pdf, params=params)
>       File 
> "/usr/local/lib/python3.10/dist-packages/mlflow/pyfunc/__init__.py", line 
> 491, in predict
>         return _predict()
>       File 
> "/usr/local/lib/python3.10/dist-packages/mlflow/pyfunc/__init__.py", line 
> 477, in _predict
>         return self._predict_fn(data, params=params)
>       File 
> "/usr/local/lib/python3.10/dist-packages/mlflow/sklearn/__init__.py", line 
> 517, in predict
>         return self.sklearn_model.predict(data)
>       File 
> "/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_base.py", line 
> 386, in predict
>         return self._decision_function(X)
>       File 
> "/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_base.py", line 
> 369, in _decision_function
>         X = self._validate_data(X, accept_sparse=["csr", "csc", "coo"], 
> reset=False)
>       File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 
> 580, in _validate_data
>         self._check_feature_names(X, reset=reset)
>       File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 
> 507, in _check_feature_names
>         raise ValueError(message)
>     ValueError: The feature names should match those that were passed during 
> fit.
>     Feature names unseen at fit time:
>     - 0
>     - 1
>     Feature names seen at fit time, yet now missing:
>     - x1
>     - x2
>     JVM stacktrace:
>     org.apache.spark.SparkException: Job aborted due to stage failure: Task 2 
> in stage 1.0 failed 1 times, most recent failure: Lost task 2.0 in stage 1.0 
> (TID 3) (localhost executor driver): 
> org.apache.spark.api.python.PythonException: Traceback (most recent call 
> last):
>       File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 
> 1523, in main
>         process()
>       File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 
> 1515, in process
>         serializer.dump_stream(out_iter, outfile)
>       File 
> "/__w/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", 
> line 485, in dump_stream
>         return ArrowStreamSerializer.dump_stream(self, 
> init_stream_yield_batches(), stream)
>       File 
> "/__w/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", 
> line 101, in dump_stream
>         for batch in iterator:
>       File 
> "/__w/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", 
> line 478, in init_stream_yield_batches
>         for series in iterator:
>       File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 
> 1284, in func
>         for result_batch, result_type in result_iter:
>       File 
> "/usr/local/lib/python3.10/dist-packages/mlflow/pyfunc/__init__.py", line 
> 1619, in udf
>         yield _predict_row_batch(batch_predict_fn, row_batch_args)
>       File 
> "/usr/local/lib/python3.10/dist-packages/mlflow/pyfunc/__init__.py", line 
> 1383, in _predict_row_batch
>       at scala.collection.Iterator$$anon$9.hasNext(Iterator.scala:583)
>       at 
> org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:140)
>       at 
> org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:57)
>       at 
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:111)
>       at 
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:54)
>       at 
> org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
>       at org.apache.spark.scheduler.Task.run(Task.scala:141)
>       at 
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:628)
>       at 
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
>       at 
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
>       at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:96)
>       at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:631)
>       at 
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
>       at 
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
>       at java.base/java.lang.Thread.run(Thread.java:840)
> **********************************************************************
> {code}
> See https://github.com/apache/spark/actions/runs/7020654429/job/19100965399



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to