spark git commit: [SPARK-20631][PYTHON][ML] LogisticRegression._checkThresholdConsistency should use values not Params
Repository: spark Updated Branches: refs/heads/branch-2.0 46659974e -> d86dae8fe [SPARK-20631][PYTHON][ML] LogisticRegression._checkThresholdConsistency should use values not Params ## What changes were proposed in this pull request? - Replace `getParam` calls with `getOrDefault` calls. - Fix exception message to avoid unintended `TypeError`. - Add unit tests ## How was this patch tested? New unit tests. Author: zero323Closes #17891 from zero323/SPARK-20631. (cherry picked from commit 804949c6bf00b8e26c39d48bbcc4d0470ee84e47) Signed-off-by: Yanbo Liang Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d86dae8f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d86dae8f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d86dae8f Branch: refs/heads/branch-2.0 Commit: d86dae8feec5e9bf77dd5ba0cf9caa1b955de020 Parents: 4665997 Author: zero323 Authored: Wed May 10 16:57:52 2017 +0800 Committer: Yanbo Liang Committed: Wed May 10 17:00:22 2017 +0800 -- python/pyspark/ml/classification.py | 6 +++--- python/pyspark/ml/tests.py | 12 2 files changed, 15 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d86dae8f/python/pyspark/ml/classification.py -- diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index bfeda7c..0a30321 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -200,13 +200,13 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti def _checkThresholdConsistency(self): if self.isSet(self.threshold) and self.isSet(self.thresholds): -ts = self.getParam(self.thresholds) +ts = self.getOrDefault(self.thresholds) if len(ts) != 2: raise ValueError("Logistic Regression getThreshold only applies to" + " binary classification, but thresholds has length != 2." + - " thresholds: " + ",".join(ts)) + " thresholds: {0}".format(str(ts))) t = 1.0/(1.0 + ts[0]/ts[1]) -t2 = self.getParam(self.threshold) +t2 = self.getOrDefault(self.threshold) if abs(t2 - t) >= 1E-5: raise ValueError("Logistic Regression getThreshold found inconsistent values for" + " threshold (%g) and thresholds (equivalent to %g)" % (t2, t)) http://git-wip-us.apache.org/repos/asf/spark/blob/d86dae8f/python/pyspark/ml/tests.py -- diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py index 3c346b9..87f0aff 100755 --- a/python/pyspark/ml/tests.py +++ b/python/pyspark/ml/tests.py @@ -765,6 +765,18 @@ class PersistenceTest(SparkSessionTestCase): except OSError: pass +def logistic_regression_check_thresholds(self): +self.assertIsInstance( +LogisticRegression(threshold=0.5, thresholds=[0.5, 0.5]), +LogisticRegressionModel +) + +self.assertRaisesRegexp( +ValueError, +"Logistic Regression getThreshold found inconsistent.*$", +LogisticRegression, threshold=0.42, thresholds=[0.5, 0.5] +) + def _compare_params(self, m1, m2, param): """ Compare 2 ML Params instances for the given param, and assert both have the same param value - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20631][PYTHON][ML] LogisticRegression._checkThresholdConsistency should use values not Params
Repository: spark Updated Branches: refs/heads/master 0ef16bd4b -> 804949c6b [SPARK-20631][PYTHON][ML] LogisticRegression._checkThresholdConsistency should use values not Params ## What changes were proposed in this pull request? - Replace `getParam` calls with `getOrDefault` calls. - Fix exception message to avoid unintended `TypeError`. - Add unit tests ## How was this patch tested? New unit tests. Author: zero323Closes #17891 from zero323/SPARK-20631. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/804949c6 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/804949c6 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/804949c6 Branch: refs/heads/master Commit: 804949c6bf00b8e26c39d48bbcc4d0470ee84e47 Parents: 0ef16bd Author: zero323 Authored: Wed May 10 16:57:52 2017 +0800 Committer: Yanbo Liang Committed: Wed May 10 16:57:52 2017 +0800 -- python/pyspark/ml/classification.py | 6 +++--- python/pyspark/ml/tests.py | 12 2 files changed, 15 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/804949c6/python/pyspark/ml/classification.py -- diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index a9756ea..dcc12d9 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -349,13 +349,13 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti def _checkThresholdConsistency(self): if self.isSet(self.threshold) and self.isSet(self.thresholds): -ts = self.getParam(self.thresholds) +ts = self.getOrDefault(self.thresholds) if len(ts) != 2: raise ValueError("Logistic Regression getThreshold only applies to" + " binary classification, but thresholds has length != 2." + - " thresholds: " + ",".join(ts)) + " thresholds: {0}".format(str(ts))) t = 1.0/(1.0 + ts[0]/ts[1]) -t2 = self.getParam(self.threshold) +t2 = self.getOrDefault(self.threshold) if abs(t2 - t) >= 1E-5: raise ValueError("Logistic Regression getThreshold found inconsistent values for" + " threshold (%g) and thresholds (equivalent to %g)" % (t2, t)) http://git-wip-us.apache.org/repos/asf/spark/blob/804949c6/python/pyspark/ml/tests.py -- diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py index 571ac4b..51a3e8e 100755 --- a/python/pyspark/ml/tests.py +++ b/python/pyspark/ml/tests.py @@ -807,6 +807,18 @@ class PersistenceTest(SparkSessionTestCase): except OSError: pass +def logistic_regression_check_thresholds(self): +self.assertIsInstance( +LogisticRegression(threshold=0.5, thresholds=[0.5, 0.5]), +LogisticRegressionModel +) + +self.assertRaisesRegexp( +ValueError, +"Logistic Regression getThreshold found inconsistent.*$", +LogisticRegression, threshold=0.42, thresholds=[0.5, 0.5] +) + def _compare_params(self, m1, m2, param): """ Compare 2 ML Params instances for the given param, and assert both have the same param value - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20631][PYTHON][ML] LogisticRegression._checkThresholdConsistency should use values not Params
Repository: spark Updated Branches: refs/heads/branch-2.1 8e097890a -> 69786ea3a [SPARK-20631][PYTHON][ML] LogisticRegression._checkThresholdConsistency should use values not Params ## What changes were proposed in this pull request? - Replace `getParam` calls with `getOrDefault` calls. - Fix exception message to avoid unintended `TypeError`. - Add unit tests ## How was this patch tested? New unit tests. Author: zero323Closes #17891 from zero323/SPARK-20631. (cherry picked from commit 804949c6bf00b8e26c39d48bbcc4d0470ee84e47) Signed-off-by: Yanbo Liang Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/69786ea3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/69786ea3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/69786ea3 Branch: refs/heads/branch-2.1 Commit: 69786ea3a972af1b29a332dc11ac507ed4368cc6 Parents: 8e09789 Author: zero323 Authored: Wed May 10 16:57:52 2017 +0800 Committer: Yanbo Liang Committed: Wed May 10 16:58:34 2017 +0800 -- python/pyspark/ml/classification.py | 6 +++--- python/pyspark/ml/tests.py | 12 2 files changed, 15 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/69786ea3/python/pyspark/ml/classification.py -- diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index 570a414..2b47c40 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -238,13 +238,13 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti def _checkThresholdConsistency(self): if self.isSet(self.threshold) and self.isSet(self.thresholds): -ts = self.getParam(self.thresholds) +ts = self.getOrDefault(self.thresholds) if len(ts) != 2: raise ValueError("Logistic Regression getThreshold only applies to" + " binary classification, but thresholds has length != 2." + - " thresholds: " + ",".join(ts)) + " thresholds: {0}".format(str(ts))) t = 1.0/(1.0 + ts[0]/ts[1]) -t2 = self.getParam(self.threshold) +t2 = self.getOrDefault(self.threshold) if abs(t2 - t) >= 1E-5: raise ValueError("Logistic Regression getThreshold found inconsistent values for" + " threshold (%g) and thresholds (equivalent to %g)" % (t2, t)) http://git-wip-us.apache.org/repos/asf/spark/blob/69786ea3/python/pyspark/ml/tests.py -- diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py index 70e0c6d..7152036 100755 --- a/python/pyspark/ml/tests.py +++ b/python/pyspark/ml/tests.py @@ -808,6 +808,18 @@ class PersistenceTest(SparkSessionTestCase): except OSError: pass +def logistic_regression_check_thresholds(self): +self.assertIsInstance( +LogisticRegression(threshold=0.5, thresholds=[0.5, 0.5]), +LogisticRegressionModel +) + +self.assertRaisesRegexp( +ValueError, +"Logistic Regression getThreshold found inconsistent.*$", +LogisticRegression, threshold=0.42, thresholds=[0.5, 0.5] +) + def _compare_params(self, m1, m2, param): """ Compare 2 ML Params instances for the given param, and assert both have the same param value - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20631][PYTHON][ML] LogisticRegression._checkThresholdConsistency should use values not Params
Repository: spark Updated Branches: refs/heads/branch-2.2 ef50a9548 -> 3ed2f4d51 [SPARK-20631][PYTHON][ML] LogisticRegression._checkThresholdConsistency should use values not Params ## What changes were proposed in this pull request? - Replace `getParam` calls with `getOrDefault` calls. - Fix exception message to avoid unintended `TypeError`. - Add unit tests ## How was this patch tested? New unit tests. Author: zero323Closes #17891 from zero323/SPARK-20631. (cherry picked from commit 804949c6bf00b8e26c39d48bbcc4d0470ee84e47) Signed-off-by: Yanbo Liang Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3ed2f4d5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3ed2f4d5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3ed2f4d5 Branch: refs/heads/branch-2.2 Commit: 3ed2f4d516ce02dfef929195778f8214703913d8 Parents: ef50a95 Author: zero323 Authored: Wed May 10 16:57:52 2017 +0800 Committer: Yanbo Liang Committed: Wed May 10 16:58:08 2017 +0800 -- python/pyspark/ml/classification.py | 6 +++--- python/pyspark/ml/tests.py | 12 2 files changed, 15 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3ed2f4d5/python/pyspark/ml/classification.py -- diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index a9756ea..dcc12d9 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -349,13 +349,13 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti def _checkThresholdConsistency(self): if self.isSet(self.threshold) and self.isSet(self.thresholds): -ts = self.getParam(self.thresholds) +ts = self.getOrDefault(self.thresholds) if len(ts) != 2: raise ValueError("Logistic Regression getThreshold only applies to" + " binary classification, but thresholds has length != 2." + - " thresholds: " + ",".join(ts)) + " thresholds: {0}".format(str(ts))) t = 1.0/(1.0 + ts[0]/ts[1]) -t2 = self.getParam(self.threshold) +t2 = self.getOrDefault(self.threshold) if abs(t2 - t) >= 1E-5: raise ValueError("Logistic Regression getThreshold found inconsistent values for" + " threshold (%g) and thresholds (equivalent to %g)" % (t2, t)) http://git-wip-us.apache.org/repos/asf/spark/blob/3ed2f4d5/python/pyspark/ml/tests.py -- diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py index 571ac4b..51a3e8e 100755 --- a/python/pyspark/ml/tests.py +++ b/python/pyspark/ml/tests.py @@ -807,6 +807,18 @@ class PersistenceTest(SparkSessionTestCase): except OSError: pass +def logistic_regression_check_thresholds(self): +self.assertIsInstance( +LogisticRegression(threshold=0.5, thresholds=[0.5, 0.5]), +LogisticRegressionModel +) + +self.assertRaisesRegexp( +ValueError, +"Logistic Regression getThreshold found inconsistent.*$", +LogisticRegression, threshold=0.42, thresholds=[0.5, 0.5] +) + def _compare_params(self, m1, m2, param): """ Compare 2 ML Params instances for the given param, and assert both have the same param value - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org