[ 
https://issues.apache.org/jira/browse/SPARK-46177?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Hyukjin Kwon reassigned SPARK-46177:
------------------------------------

    Assignee: Hyukjin Kwon

>  Skip 'CrossValidatorTests.test_crossvalidator_with_fold_col' with Python 3.12
> ------------------------------------------------------------------------------
>
>                 Key: SPARK-46177
>                 URL: https://issues.apache.org/jira/browse/SPARK-46177
>             Project: Spark
>          Issue Type: Sub-task
>          Components: ML, PySpark, Tests
>    Affects Versions: 4.0.0
>            Reporter: Hyukjin Kwon
>            Assignee: Hyukjin Kwon
>            Priority: Minor
>              Labels: pull-request-available
>
> {code}
> ======================================================================
> ERROR [68.963s]: test_crossvalidator_with_fold_col 
> (pyspark.ml.tests.connect.test_legacy_mode_tuning.CrossValidatorTests.test_crossvalidator_with_fold_col)
> ----------------------------------------------------------------------
> Traceback (most recent call last):
>   File 
> "/__w/spark/spark/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py",
>  line 274, in test_crossvalidator_with_fold_col
>     cv.fit(train_dataset)
>   File "/__w/spark/spark/python/pyspark/ml/connect/base.py", line 105, in fit
>     return self._fit(dataset)
>            ^^^^^^^^^^^^^^^^^^
>   File "/__w/spark/spark/python/pyspark/ml/connect/tuning.py", line 447, in 
> _fit
>     bestModel = cast(Model, est.fit(dataset, epm[bestIndex]))
>                             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
>   File "/__w/spark/spark/python/pyspark/ml/connect/base.py", line 103, in fit
>     return self.copy(params)._fit(dataset)
>            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
>   File "/__w/spark/spark/python/pyspark/ml/connect/classification.py", line 
> 251, in _fit
>     model_state_dict = distributor._train_on_dataframe(
>                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
>   File "/__w/spark/spark/python/pyspark/ml/torch/distributor.py", line 1043, 
> in _train_on_dataframe
>     return self._run_distributed_training(
>            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
>   File "/__w/spark/spark/python/pyspark/ml/torch/distributor.py", line 804, 
> in _run_distributed_training
>     ).collect()
>       ^^^^^^^^^
>   File "/__w/spark/spark/python/pyspark/sql/dataframe.py", line 1369, in 
> collect
>     sock_info = self._jdf.collectToPython()
>                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
>   File 
> "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py", 
> line 1322, in __call__
>     return_value = get_return_value(
>                    ^^^^^^^^^^^^^^^^^
>   File "/__w/spark/spark/python/pyspark/errors/exceptions/captured.py", line 
> 182, in deco
>     return f(*a, **kw)
>            ^^^^^^^^^^^
>   File "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/protocol.py", 
> line 326, in get_return_value
>     raise Py4JJavaError(
> py4j.protocol.Py4JJavaError: An error occurred while calling 
> o2582.collectToPython.
> : org.apache.spark.SparkException: Job aborted due to stage failure: Could 
> not recover from a failed barrier ResultStage. Most recent failure reason: 
> Stage failed because barrier task ResultTask(89, 0) finished unsuccessfully.
> org.apache.spark.api.python.PythonException: Traceback (most recent call 
> last):
>   File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 
> 1535, in main
>     process()
>   File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 
> 1527, in process
>     serializer.dump_stream(out_iter, outfile)
>   File 
> "/__w/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", 
> line 162, in dump_stream
>     return super(ArrowStreamUDFSerializer, 
> self).dump_stream(wrap_and_init_stream(), stream)
>            
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
>   File 
> "/__w/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", 
> line 101, in dump_stream
>     for batch in iterator:
>   File 
> "/__w/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", 
> line 147, in wrap_and_init_stream
>     for batch, _ in iterator:
>   File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 
> 1296, in func
>     for result_batch, result_type in result_iter:
>   File "/__w/spark/spark/python/pyspark/ml/torch/distributor.py", line 721, 
> in wrapped_train_fn
>     output = TorchDistributor._get_output_from_framework_wrapper(
>              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
>   File 
> "/__w/spark/spark/python/lib/pyspark.zip/pyspark/ml/torch/distributor.py", 
> line 567, in _get_output_from_framework_wrapper
>     return framework_wrapper(
>            ^^^^^^^^^^^^^^^^^^
>   File 
> "/__w/spark/spark/python/lib/pyspark.zip/pyspark/ml/torch/distributor.py", 
> line 908, in _run_training_on_pytorch_function
>     raise RuntimeError(
> RuntimeError: TorchDistributor failed during training.View stdout logs for 
> detailed error message.
>       at 
> org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:517)
>       at 
> org.apache.spark.sql.execution.python.PythonArrowOutput$$anon$1.read(PythonArrowOutput.scala:117)
>       at 
> org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:473)
>       at 
> org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
>       at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:601)
>       at scala.collection.Iterator$$anon$9.hasNext(Iterator.scala:583)
>       at 
> org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:388)
>       at 
> org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:891)
>       at 
> org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:891)
>       at 
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
>       at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:365)
>       at org.apache.spark.rdd.RDD.iterator(RDD.scala:329)
>       at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
>       at 
> org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
>       at org.apache.spark.scheduler.Task.run(Task.scala:141)
>       at 
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:628)
>       at 
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
>       at 
> org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
>       at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:96)
>       at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:631)
>       at 
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
>       at 
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
>       at java.base/java.lang.Thread.run(Thread.java:840)
>       at 
> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2820)
>       at 
> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2817)
>       at scala.collection.immutable.List.foreach(List.scala:333)
>       at 
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2817)
>       at 
> org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:2252)
>       at 
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:3081)
>       at 
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3021)
>       at 
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3010)
>       at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
>       at 
> org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:990)
>       at org.apache.spark.SparkContext.runJob(SparkContext.scala:2428)
>       at org.apache.spark.SparkContext.runJob(SparkContext.scala:2449)
>       at org.apache.spark.SparkContext.runJob(SparkContext.scala:2468)
>       at org.apache.spark.SparkContext.runJob(SparkContext.scala:2493)
>       at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1047)
>       at 
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
>       at 
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
>       at org.apache.spark.rdd.RDD.withScope(RDD.scala:408)
>       at org.apache.spark.rdd.RDD.collect(RDD.scala:1046)
>       at 
> org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:448)
>       at 
> org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.$anonfun$executeCollect$1(AdaptiveSparkPlanExec.scala:380)
>       at 
> org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.withFinalPlanUpdate(AdaptiveSparkPlanExec.scala:408)
>       at 
> org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.executeCollect(AdaptiveSparkPlanExec.scala:380)
>       at 
> org.apache.spark.sql.Dataset.$anonfun$collectToPython$1(Dataset.scala:4246)
>       at 
> org.apache.spark.sql.Dataset.$anonfun$withAction$2(Dataset.scala:4420)
>       at 
> org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:557)
>       at 
> org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:4418)
>       at 
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$6(SQLExecution.scala:150)
>       at 
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:241)
>       at 
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$1(SQLExecution.scala:116)
>       at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:918)
>       at 
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId0(SQLExecution.scala:72)
>       at 
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:196)
>       at org.apache.spark.sql.Dataset.withAction(Dataset.scala:4418)
>       at org.apache.spark.sql.Dataset.collectToPython(Dataset.scala:4243)
>       at jdk.internal.reflect.GeneratedMethodAccessor84.invoke(Unknown Source)
>       at 
> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>       at java.base/java.lang.reflect.Method.invoke(Method.java:568)
>       at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
>       at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)
>       at py4j.Gateway.invoke(Gateway.java:282)
>       at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
>       at py4j.commands.CallCommand.execute(CallCommand.java:79)
>       at 
> py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
>       at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
>       at java.base/java.lang.Thread.run(Thread.java:840)
> ----------------------------------------------------------------------
> {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to