[spark] branch branch-3.0 updated: [SPARK-32092][ML][PYSPARK][3.0] Removed foldCol related code
This is an automated email from the ASF dual-hosted git repository. huaxingao pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.0 by this push: new 8aa644e [SPARK-32092][ML][PYSPARK][3.0] Removed foldCol related code 8aa644e is described below commit 8aa644e9a991cd7f965aec082adcc3a3d19d452f Author: Louiszr AuthorDate: Sun Aug 23 21:10:52 2020 -0700 [SPARK-32092][ML][PYSPARK][3.0] Removed foldCol related code ### What changes were proposed in this pull request? - Removed `foldCol` related code introduced in #29445 which is causing issues in the base branch. - Fixed `CrossValidatorModel.copy()` so that it correctly calls `.copy()` on the models instead of lists of models. ### Why are the changes needed? - `foldCol` is from 3.1 hence causing tests to fail. - `CrossValidatorModel.copy()` is supposed to shallow copy models not lists of models. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - Existing tests created in #29445 ran and passed. - Updated `test_copy` to make sure `copy()` is called on models instead of lists of models. Closes #29524 from Louiszr/remove-foldcol-3.0. Authored-by: Louiszr Signed-off-by: Huaxin Gao --- python/pyspark/ml/tests/test_tuning.py | 11 --- python/pyspark/ml/tuning.py| 7 --- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/python/pyspark/ml/tests/test_tuning.py b/python/pyspark/ml/tests/test_tuning.py index b250740..b1acaf6 100644 --- a/python/pyspark/ml/tests/test_tuning.py +++ b/python/pyspark/ml/tests/test_tuning.py @@ -101,7 +101,6 @@ class CrossValidatorTests(SparkSessionTestCase): lambda x: x.getEstimator().uid, # SPARK-32092: CrossValidator.copy() needs to copy all existing params lambda x: x.getNumFolds(), -lambda x: x.getFoldCol(), lambda x: x.getCollectSubModels(), lambda x: x.getParallelism(), lambda x: x.getSeed() @@ -116,7 +115,6 @@ class CrossValidatorTests(SparkSessionTestCase): # SPARK-32092: CrossValidatorModel.copy() needs to copy all existing params for param in [ lambda x: x.getNumFolds(), -lambda x: x.getFoldCol(), lambda x: x.getSeed() ]: self.assertEqual(param(cvModel), param(cvModelCopied)) @@ -127,9 +125,9 @@ class CrossValidatorTests(SparkSessionTestCase): 'foo', "Changing the original avgMetrics should not affect the copied model" ) -cvModel.subModels[0] = 'foo' +cvModel.subModels[0][0].getInducedError = lambda: 'foo' self.assertNotEqual( -cvModelCopied.subModels[0], +cvModelCopied.subModels[0][0].getInducedError(), 'foo', "Changing the original subModels should not affect the copied model" ) @@ -224,7 +222,6 @@ class CrossValidatorTests(SparkSessionTestCase): loadedCvModel = CrossValidatorModel.load(cvModelPath) for param in [ lambda x: x.getNumFolds(), -lambda x: x.getFoldCol(), lambda x: x.getSeed(), lambda x: len(x.subModels) ]: @@ -780,9 +777,9 @@ class TrainValidationSplitTests(SparkSessionTestCase): 'foo', "Changing the original validationMetrics should not affect the copied model" ) -tvsModel.subModels[0] = 'foo' +tvsModel.subModels[0].getInducedError = lambda: 'foo' self.assertNotEqual( -tvsModelCopied.subModels[0], +tvsModelCopied.subModels[0].getInducedError(), 'foo', "Changing the original subModels should not affect the copied model" ) diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py index 91f34ef..6283c8b 100644 --- a/python/pyspark/ml/tuning.py +++ b/python/pyspark/ml/tuning.py @@ -480,7 +480,10 @@ class CrossValidatorModel(Model, _CrossValidatorParams, MLReadable, MLWritable): extra = dict() bestModel = self.bestModel.copy(extra) avgMetrics = list(self.avgMetrics) -subModels = [model.copy() for model in self.subModels] +subModels = [ +[sub_model.copy() for sub_model in fold_sub_models] +for fold_sub_models in self.subModels +] return self._copyValues(CrossValidatorModel(bestModel, avgMetrics, subModels), extra=extra) @since("2.3.0") @@ -511,7 +514,6 @@ class CrossValidatorModel(Model, _CrossValidatorParams, MLReadable, MLWritable): "estimator": estimator, "estimatorParamMaps": epms, "numFolds": java_stage.getNumFolds(), -"foldCol": java_stage.getFoldCol(), "seed":
[spark] branch branch-3.0 updated: [SPARK-32092][ML][PYSPARK][3.0] Removed foldCol related code
This is an automated email from the ASF dual-hosted git repository. huaxingao pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.0 by this push: new 8aa644e [SPARK-32092][ML][PYSPARK][3.0] Removed foldCol related code 8aa644e is described below commit 8aa644e9a991cd7f965aec082adcc3a3d19d452f Author: Louiszr AuthorDate: Sun Aug 23 21:10:52 2020 -0700 [SPARK-32092][ML][PYSPARK][3.0] Removed foldCol related code ### What changes were proposed in this pull request? - Removed `foldCol` related code introduced in #29445 which is causing issues in the base branch. - Fixed `CrossValidatorModel.copy()` so that it correctly calls `.copy()` on the models instead of lists of models. ### Why are the changes needed? - `foldCol` is from 3.1 hence causing tests to fail. - `CrossValidatorModel.copy()` is supposed to shallow copy models not lists of models. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - Existing tests created in #29445 ran and passed. - Updated `test_copy` to make sure `copy()` is called on models instead of lists of models. Closes #29524 from Louiszr/remove-foldcol-3.0. Authored-by: Louiszr Signed-off-by: Huaxin Gao --- python/pyspark/ml/tests/test_tuning.py | 11 --- python/pyspark/ml/tuning.py| 7 --- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/python/pyspark/ml/tests/test_tuning.py b/python/pyspark/ml/tests/test_tuning.py index b250740..b1acaf6 100644 --- a/python/pyspark/ml/tests/test_tuning.py +++ b/python/pyspark/ml/tests/test_tuning.py @@ -101,7 +101,6 @@ class CrossValidatorTests(SparkSessionTestCase): lambda x: x.getEstimator().uid, # SPARK-32092: CrossValidator.copy() needs to copy all existing params lambda x: x.getNumFolds(), -lambda x: x.getFoldCol(), lambda x: x.getCollectSubModels(), lambda x: x.getParallelism(), lambda x: x.getSeed() @@ -116,7 +115,6 @@ class CrossValidatorTests(SparkSessionTestCase): # SPARK-32092: CrossValidatorModel.copy() needs to copy all existing params for param in [ lambda x: x.getNumFolds(), -lambda x: x.getFoldCol(), lambda x: x.getSeed() ]: self.assertEqual(param(cvModel), param(cvModelCopied)) @@ -127,9 +125,9 @@ class CrossValidatorTests(SparkSessionTestCase): 'foo', "Changing the original avgMetrics should not affect the copied model" ) -cvModel.subModels[0] = 'foo' +cvModel.subModels[0][0].getInducedError = lambda: 'foo' self.assertNotEqual( -cvModelCopied.subModels[0], +cvModelCopied.subModels[0][0].getInducedError(), 'foo', "Changing the original subModels should not affect the copied model" ) @@ -224,7 +222,6 @@ class CrossValidatorTests(SparkSessionTestCase): loadedCvModel = CrossValidatorModel.load(cvModelPath) for param in [ lambda x: x.getNumFolds(), -lambda x: x.getFoldCol(), lambda x: x.getSeed(), lambda x: len(x.subModels) ]: @@ -780,9 +777,9 @@ class TrainValidationSplitTests(SparkSessionTestCase): 'foo', "Changing the original validationMetrics should not affect the copied model" ) -tvsModel.subModels[0] = 'foo' +tvsModel.subModels[0].getInducedError = lambda: 'foo' self.assertNotEqual( -tvsModelCopied.subModels[0], +tvsModelCopied.subModels[0].getInducedError(), 'foo', "Changing the original subModels should not affect the copied model" ) diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py index 91f34ef..6283c8b 100644 --- a/python/pyspark/ml/tuning.py +++ b/python/pyspark/ml/tuning.py @@ -480,7 +480,10 @@ class CrossValidatorModel(Model, _CrossValidatorParams, MLReadable, MLWritable): extra = dict() bestModel = self.bestModel.copy(extra) avgMetrics = list(self.avgMetrics) -subModels = [model.copy() for model in self.subModels] +subModels = [ +[sub_model.copy() for sub_model in fold_sub_models] +for fold_sub_models in self.subModels +] return self._copyValues(CrossValidatorModel(bestModel, avgMetrics, subModels), extra=extra) @since("2.3.0") @@ -511,7 +514,6 @@ class CrossValidatorModel(Model, _CrossValidatorParams, MLReadable, MLWritable): "estimator": estimator, "estimatorParamMaps": epms, "numFolds": java_stage.getNumFolds(), -"foldCol": java_stage.getFoldCol(), "seed":
[spark] branch branch-3.0 updated: [SPARK-32092][ML][PYSPARK][3.0] Removed foldCol related code
This is an automated email from the ASF dual-hosted git repository. huaxingao pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.0 by this push: new 8aa644e [SPARK-32092][ML][PYSPARK][3.0] Removed foldCol related code 8aa644e is described below commit 8aa644e9a991cd7f965aec082adcc3a3d19d452f Author: Louiszr AuthorDate: Sun Aug 23 21:10:52 2020 -0700 [SPARK-32092][ML][PYSPARK][3.0] Removed foldCol related code ### What changes were proposed in this pull request? - Removed `foldCol` related code introduced in #29445 which is causing issues in the base branch. - Fixed `CrossValidatorModel.copy()` so that it correctly calls `.copy()` on the models instead of lists of models. ### Why are the changes needed? - `foldCol` is from 3.1 hence causing tests to fail. - `CrossValidatorModel.copy()` is supposed to shallow copy models not lists of models. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - Existing tests created in #29445 ran and passed. - Updated `test_copy` to make sure `copy()` is called on models instead of lists of models. Closes #29524 from Louiszr/remove-foldcol-3.0. Authored-by: Louiszr Signed-off-by: Huaxin Gao --- python/pyspark/ml/tests/test_tuning.py | 11 --- python/pyspark/ml/tuning.py| 7 --- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/python/pyspark/ml/tests/test_tuning.py b/python/pyspark/ml/tests/test_tuning.py index b250740..b1acaf6 100644 --- a/python/pyspark/ml/tests/test_tuning.py +++ b/python/pyspark/ml/tests/test_tuning.py @@ -101,7 +101,6 @@ class CrossValidatorTests(SparkSessionTestCase): lambda x: x.getEstimator().uid, # SPARK-32092: CrossValidator.copy() needs to copy all existing params lambda x: x.getNumFolds(), -lambda x: x.getFoldCol(), lambda x: x.getCollectSubModels(), lambda x: x.getParallelism(), lambda x: x.getSeed() @@ -116,7 +115,6 @@ class CrossValidatorTests(SparkSessionTestCase): # SPARK-32092: CrossValidatorModel.copy() needs to copy all existing params for param in [ lambda x: x.getNumFolds(), -lambda x: x.getFoldCol(), lambda x: x.getSeed() ]: self.assertEqual(param(cvModel), param(cvModelCopied)) @@ -127,9 +125,9 @@ class CrossValidatorTests(SparkSessionTestCase): 'foo', "Changing the original avgMetrics should not affect the copied model" ) -cvModel.subModels[0] = 'foo' +cvModel.subModels[0][0].getInducedError = lambda: 'foo' self.assertNotEqual( -cvModelCopied.subModels[0], +cvModelCopied.subModels[0][0].getInducedError(), 'foo', "Changing the original subModels should not affect the copied model" ) @@ -224,7 +222,6 @@ class CrossValidatorTests(SparkSessionTestCase): loadedCvModel = CrossValidatorModel.load(cvModelPath) for param in [ lambda x: x.getNumFolds(), -lambda x: x.getFoldCol(), lambda x: x.getSeed(), lambda x: len(x.subModels) ]: @@ -780,9 +777,9 @@ class TrainValidationSplitTests(SparkSessionTestCase): 'foo', "Changing the original validationMetrics should not affect the copied model" ) -tvsModel.subModels[0] = 'foo' +tvsModel.subModels[0].getInducedError = lambda: 'foo' self.assertNotEqual( -tvsModelCopied.subModels[0], +tvsModelCopied.subModels[0].getInducedError(), 'foo', "Changing the original subModels should not affect the copied model" ) diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py index 91f34ef..6283c8b 100644 --- a/python/pyspark/ml/tuning.py +++ b/python/pyspark/ml/tuning.py @@ -480,7 +480,10 @@ class CrossValidatorModel(Model, _CrossValidatorParams, MLReadable, MLWritable): extra = dict() bestModel = self.bestModel.copy(extra) avgMetrics = list(self.avgMetrics) -subModels = [model.copy() for model in self.subModels] +subModels = [ +[sub_model.copy() for sub_model in fold_sub_models] +for fold_sub_models in self.subModels +] return self._copyValues(CrossValidatorModel(bestModel, avgMetrics, subModels), extra=extra) @since("2.3.0") @@ -511,7 +514,6 @@ class CrossValidatorModel(Model, _CrossValidatorParams, MLReadable, MLWritable): "estimator": estimator, "estimatorParamMaps": epms, "numFolds": java_stage.getNumFolds(), -"foldCol": java_stage.getFoldCol(), "seed":
[spark] branch branch-3.0 updated: [SPARK-32092][ML][PYSPARK][3.0] Removed foldCol related code
This is an automated email from the ASF dual-hosted git repository. huaxingao pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.0 by this push: new 8aa644e [SPARK-32092][ML][PYSPARK][3.0] Removed foldCol related code 8aa644e is described below commit 8aa644e9a991cd7f965aec082adcc3a3d19d452f Author: Louiszr AuthorDate: Sun Aug 23 21:10:52 2020 -0700 [SPARK-32092][ML][PYSPARK][3.0] Removed foldCol related code ### What changes were proposed in this pull request? - Removed `foldCol` related code introduced in #29445 which is causing issues in the base branch. - Fixed `CrossValidatorModel.copy()` so that it correctly calls `.copy()` on the models instead of lists of models. ### Why are the changes needed? - `foldCol` is from 3.1 hence causing tests to fail. - `CrossValidatorModel.copy()` is supposed to shallow copy models not lists of models. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - Existing tests created in #29445 ran and passed. - Updated `test_copy` to make sure `copy()` is called on models instead of lists of models. Closes #29524 from Louiszr/remove-foldcol-3.0. Authored-by: Louiszr Signed-off-by: Huaxin Gao --- python/pyspark/ml/tests/test_tuning.py | 11 --- python/pyspark/ml/tuning.py| 7 --- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/python/pyspark/ml/tests/test_tuning.py b/python/pyspark/ml/tests/test_tuning.py index b250740..b1acaf6 100644 --- a/python/pyspark/ml/tests/test_tuning.py +++ b/python/pyspark/ml/tests/test_tuning.py @@ -101,7 +101,6 @@ class CrossValidatorTests(SparkSessionTestCase): lambda x: x.getEstimator().uid, # SPARK-32092: CrossValidator.copy() needs to copy all existing params lambda x: x.getNumFolds(), -lambda x: x.getFoldCol(), lambda x: x.getCollectSubModels(), lambda x: x.getParallelism(), lambda x: x.getSeed() @@ -116,7 +115,6 @@ class CrossValidatorTests(SparkSessionTestCase): # SPARK-32092: CrossValidatorModel.copy() needs to copy all existing params for param in [ lambda x: x.getNumFolds(), -lambda x: x.getFoldCol(), lambda x: x.getSeed() ]: self.assertEqual(param(cvModel), param(cvModelCopied)) @@ -127,9 +125,9 @@ class CrossValidatorTests(SparkSessionTestCase): 'foo', "Changing the original avgMetrics should not affect the copied model" ) -cvModel.subModels[0] = 'foo' +cvModel.subModels[0][0].getInducedError = lambda: 'foo' self.assertNotEqual( -cvModelCopied.subModels[0], +cvModelCopied.subModels[0][0].getInducedError(), 'foo', "Changing the original subModels should not affect the copied model" ) @@ -224,7 +222,6 @@ class CrossValidatorTests(SparkSessionTestCase): loadedCvModel = CrossValidatorModel.load(cvModelPath) for param in [ lambda x: x.getNumFolds(), -lambda x: x.getFoldCol(), lambda x: x.getSeed(), lambda x: len(x.subModels) ]: @@ -780,9 +777,9 @@ class TrainValidationSplitTests(SparkSessionTestCase): 'foo', "Changing the original validationMetrics should not affect the copied model" ) -tvsModel.subModels[0] = 'foo' +tvsModel.subModels[0].getInducedError = lambda: 'foo' self.assertNotEqual( -tvsModelCopied.subModels[0], +tvsModelCopied.subModels[0].getInducedError(), 'foo', "Changing the original subModels should not affect the copied model" ) diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py index 91f34ef..6283c8b 100644 --- a/python/pyspark/ml/tuning.py +++ b/python/pyspark/ml/tuning.py @@ -480,7 +480,10 @@ class CrossValidatorModel(Model, _CrossValidatorParams, MLReadable, MLWritable): extra = dict() bestModel = self.bestModel.copy(extra) avgMetrics = list(self.avgMetrics) -subModels = [model.copy() for model in self.subModels] +subModels = [ +[sub_model.copy() for sub_model in fold_sub_models] +for fold_sub_models in self.subModels +] return self._copyValues(CrossValidatorModel(bestModel, avgMetrics, subModels), extra=extra) @since("2.3.0") @@ -511,7 +514,6 @@ class CrossValidatorModel(Model, _CrossValidatorParams, MLReadable, MLWritable): "estimator": estimator, "estimatorParamMaps": epms, "numFolds": java_stage.getNumFolds(), -"foldCol": java_stage.getFoldCol(), "seed":