This is an automated email from the ASF dual-hosted git repository. huaxingao pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new 8aa644e [SPARK-32092][ML][PYSPARK][3.0] Removed foldCol related code 8aa644e is described below commit 8aa644e9a991cd7f965aec082adcc3a3d19d452f Author: Louiszr <zxhs...@gmail.com> AuthorDate: Sun Aug 23 21:10:52 2020 -0700 [SPARK-32092][ML][PYSPARK][3.0] Removed foldCol related code ### What changes were proposed in this pull request? - Removed `foldCol` related code introduced in #29445 which is causing issues in the base branch. - Fixed `CrossValidatorModel.copy()` so that it correctly calls `.copy()` on the models instead of lists of models. ### Why are the changes needed? - `foldCol` is from 3.1 hence causing tests to fail. - `CrossValidatorModel.copy()` is supposed to shallow copy models not lists of models. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - Existing tests created in #29445 ran and passed. - Updated `test_copy` to make sure `copy()` is called on models instead of lists of models. Closes #29524 from Louiszr/remove-foldcol-3.0. Authored-by: Louiszr <zxhs...@gmail.com> Signed-off-by: Huaxin Gao <huax...@us.ibm.com> --- python/pyspark/ml/tests/test_tuning.py | 11 ++++------- python/pyspark/ml/tuning.py | 7 ++++--- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/python/pyspark/ml/tests/test_tuning.py b/python/pyspark/ml/tests/test_tuning.py index b250740..b1acaf6 100644 --- a/python/pyspark/ml/tests/test_tuning.py +++ b/python/pyspark/ml/tests/test_tuning.py @@ -101,7 +101,6 @@ class CrossValidatorTests(SparkSessionTestCase): lambda x: x.getEstimator().uid, # SPARK-32092: CrossValidator.copy() needs to copy all existing params lambda x: x.getNumFolds(), - lambda x: x.getFoldCol(), lambda x: x.getCollectSubModels(), lambda x: x.getParallelism(), lambda x: x.getSeed() @@ -116,7 +115,6 @@ class CrossValidatorTests(SparkSessionTestCase): # SPARK-32092: CrossValidatorModel.copy() needs to copy all existing params for param in [ lambda x: x.getNumFolds(), - lambda x: x.getFoldCol(), lambda x: x.getSeed() ]: self.assertEqual(param(cvModel), param(cvModelCopied)) @@ -127,9 +125,9 @@ class CrossValidatorTests(SparkSessionTestCase): 'foo', "Changing the original avgMetrics should not affect the copied model" ) - cvModel.subModels[0] = 'foo' + cvModel.subModels[0][0].getInducedError = lambda: 'foo' self.assertNotEqual( - cvModelCopied.subModels[0], + cvModelCopied.subModels[0][0].getInducedError(), 'foo', "Changing the original subModels should not affect the copied model" ) @@ -224,7 +222,6 @@ class CrossValidatorTests(SparkSessionTestCase): loadedCvModel = CrossValidatorModel.load(cvModelPath) for param in [ lambda x: x.getNumFolds(), - lambda x: x.getFoldCol(), lambda x: x.getSeed(), lambda x: len(x.subModels) ]: @@ -780,9 +777,9 @@ class TrainValidationSplitTests(SparkSessionTestCase): 'foo', "Changing the original validationMetrics should not affect the copied model" ) - tvsModel.subModels[0] = 'foo' + tvsModel.subModels[0].getInducedError = lambda: 'foo' self.assertNotEqual( - tvsModelCopied.subModels[0], + tvsModelCopied.subModels[0].getInducedError(), 'foo', "Changing the original subModels should not affect the copied model" ) diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py index 91f34ef..6283c8b 100644 --- a/python/pyspark/ml/tuning.py +++ b/python/pyspark/ml/tuning.py @@ -480,7 +480,10 @@ class CrossValidatorModel(Model, _CrossValidatorParams, MLReadable, MLWritable): extra = dict() bestModel = self.bestModel.copy(extra) avgMetrics = list(self.avgMetrics) - subModels = [model.copy() for model in self.subModels] + subModels = [ + [sub_model.copy() for sub_model in fold_sub_models] + for fold_sub_models in self.subModels + ] return self._copyValues(CrossValidatorModel(bestModel, avgMetrics, subModels), extra=extra) @since("2.3.0") @@ -511,7 +514,6 @@ class CrossValidatorModel(Model, _CrossValidatorParams, MLReadable, MLWritable): "estimator": estimator, "estimatorParamMaps": epms, "numFolds": java_stage.getNumFolds(), - "foldCol": java_stage.getFoldCol(), "seed": java_stage.getSeed(), } for param_name, param_val in params.items(): @@ -544,7 +546,6 @@ class CrossValidatorModel(Model, _CrossValidatorParams, MLReadable, MLWritable): "estimator": estimator, "estimatorParamMaps": epms, "numFolds": self.getNumFolds(), - "foldCol": self.getFoldCol(), "seed": self.getSeed(), } for param_name, param_val in params.items(): --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org