[ 
https://issues.apache.org/jira/browse/SPARK-48084?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Hyukjin Kwon reassigned SPARK-48084:
------------------------------------

    Assignee: Weichen Xu

> pyspark.ml.connect.evaluation not working in 3.5 client <> 4.0 server
> ---------------------------------------------------------------------
>
>                 Key: SPARK-48084
>                 URL: https://issues.apache.org/jira/browse/SPARK-48084
>             Project: Spark
>          Issue Type: Sub-task
>          Components: ML, PySpark
>    Affects Versions: 4.0.0
>            Reporter: Hyukjin Kwon
>            Assignee: Weichen Xu
>            Priority: Major
>
> {code}
> ======================================================================
> ERROR [3.966s]: test_regressor_evaluator 
> (pyspark.ml.tests.connect.test_connect_evaluation.EvaluationTestsOnConnect.test_regressor_evaluator)
> ----------------------------------------------------------------------
> Traceback (most recent call last):
>   File 
> "/home/runner/work/spark/spark-3.5/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py",
>  line 69, in test_regressor_evaluator
>     rmse = rmse_evaluator.evaluate(df1)
>            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
>   File "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/base.py", 
> line 255, in evaluate
>     return self._evaluate(dataset)
>            ^^^^^^^^^^^^^^^^^^^^^^^
>   File 
> "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/evaluation.py", 
> line 70, in _evaluate
>     return aggregate_dataframe(
>            ^^^^^^^^^^^^^^^^^^^^
>   File "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/util.py", 
> line 93, in aggregate_dataframe
>     state = cloudpickle.loads(state)
>             ^^^^^^^^^^^^^^^^^^^^^^^^
> AttributeError: Can't get attribute '_class_setstate' on <module 
> 'pyspark.cloudpickle.cloudpickle' from 
> '/home/runner/work/spark/spark-3.5/python/pyspark/cloudpickle/cloudpickle.py'>
> ----------------------------------------------------------------------
> {code}
> {code}
> ======================================================================
> ERROR [4.664s]: test_copy 
> (pyspark.ml.tests.connect.test_connect_tuning.CrossValidatorTestsOnConnect.test_copy)
> ----------------------------------------------------------------------
> Traceback (most recent call last):
>   File 
> "/home/runner/work/spark/spark-3.5/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py",
>  line 115, in test_copy
>     cvModel = cv.fit(dataset)
>               ^^^^^^^^^^^^^^^
>   File "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/base.py", 
> line 106, in fit
>     return self._fit(dataset)
>            ^^^^^^^^^^^^^^^^^^
>   File 
> "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/tuning.py", line 
> 437, in _fit
>     for j, metric in pool.imap_unordered(lambda f: f(), tasks):
>   File 
> "/opt/hostedtoolcache/Python/3.11.9/x64/lib/python3.11/multiprocessing/pool.py",
>  line 873, in next
>     raise value
>   File 
> "/opt/hostedtoolcache/Python/3.11.9/x64/lib/python3.11/multiprocessing/pool.py",
>  line 125, in worker
>     result = (True, func(*args, **kwds))
>                     ^^^^^^^^^^^^^^^^^^^
>   File 
> "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/tuning.py", line 
> 437, in <lambda>
>     for j, metric in pool.imap_unordered(lambda f: f(), tasks):
>                                                    ^^^
>   File 
> "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/tuning.py", line 
> 188, in single_task
>     metric = evaluator.evaluate(
>              ^^^^^^^^^^^^^^^^^^^
>   File "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/base.py", 
> line 255, in evaluate
>     return self._evaluate(dataset)
>            ^^^^^^^^^^^^^^^^^^^^^^^
>   File 
> "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/evaluation.py", 
> line 70, in _evaluate
>     return aggregate_dataframe(
>            ^^^^^^^^^^^^^^^^^^^^
>   File "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/util.py", 
> line 93, in aggregate_dataframe
>     state = cloudpickle.loads(state)
>             ^^^^^^^^^^^^^^^^^^^^^^^^
> AttributeError: Can't get attribute '_class_setstate' on <module 
> 'pyspark.cloudpickle.cloudpickle' from 
> '/home/runner/work/spark/spark-3.5/python/pyspark/cloudpickle/cloudpickle.py'>
> {code}
> {code}
> ======================================================================
> ERROR [3.938s]: test_fit_minimize_metric 
> (pyspark.ml.tests.connect.test_connect_tuning.CrossValidatorTestsOnConnect.test_fit_minimize_metric)
> ----------------------------------------------------------------------
> Traceback (most recent call last):
>   File 
> "/home/runner/work/spark/spark-3.5/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py",
>  line 149, in test_fit_minimize_metric
>     cvModel = cv.fit(dataset)
>               ^^^^^^^^^^^^^^^
>   File "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/base.py", 
> line 106, in fit
>     return self._fit(dataset)
>            ^^^^^^^^^^^^^^^^^^
>   File 
> "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/tuning.py", line 
> 437, in _fit
>     for j, metric in pool.imap_unordered(lambda f: f(), tasks):
>   File 
> "/opt/hostedtoolcache/Python/3.11.9/x64/lib/python3.11/multiprocessing/pool.py",
>  line 873, in next
>     raise value
>   File 
> "/opt/hostedtoolcache/Python/3.11.9/x64/lib/python3.11/multiprocessing/pool.py",
>  line 125, in worker
>     result = (True, func(*args, **kwds))
>                     ^^^^^^^^^^^^^^^^^^^
>   File 
> "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/tuning.py", line 
> 437, in <lambda>
>     for j, metric in pool.imap_unordered(lambda f: f(), tasks):
>                                                    ^^^
>   File 
> "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/tuning.py", line 
> 188, in single_task
>     metric = evaluator.evaluate(
>              ^^^^^^^^^^^^^^^^^^^
>   File "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/base.py", 
> line 255, in evaluate
>     return self._evaluate(dataset)
>            ^^^^^^^^^^^^^^^^^^^^^^^
>   File 
> "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/evaluation.py", 
> line 70, in _evaluate
>     return aggregate_dataframe(
>            ^^^^^^^^^^^^^^^^^^^^
>   File "/home/runner/work/spark/spark-3.5/python/pyspark/ml/connect/util.py", 
> line 93, in aggregate_dataframe
>     state = cloudpickle.loads(state)
>             ^^^^^^^^^^^^^^^^^^^^^^^^
> AttributeError: Can't get attribute '_class_setstate' on <module 
> 'pyspark.cloudpickle.cloudpickle' from 
> '/home/runner/work/spark/spark-3.5/python/pyspark/cloudpickle/cloudpickle.py'>
> {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to