spark git commit: [SPARK-19155][ML] Make family case insensitive in GLM
Repository: spark Updated Branches: refs/heads/branch-2.1 8daf10e3f -> 1e07a7192 [SPARK-19155][ML] Make family case insensitive in GLM ## What changes were proposed in this pull request? This is a supplement to PR #16516 which did not make the value from `getFamily` case insensitive. Current tests of poisson/binomial glm with weight fail when specifying 'Poisson' or 'Binomial', because the calculation of `dispersion` and `pValue` checks the value of family retrieved from `getFamily` ``` model.getFamily == Binomial.name || model.getFamily == Poisson.name ``` ## How was this patch tested? Update existing tests for 'Poisson' and 'Binomial'. yanboliang felixcheung imatiach-msft Author: actuaryzhangCloses #16675 from actuaryzhang/family. (cherry picked from commit f067acefabebf04939d03a639a2aaa654e1bc8f9) Signed-off-by: Yanbo Liang Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1e07a719 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1e07a719 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1e07a719 Branch: refs/heads/branch-2.1 Commit: 1e07a71924ef1420c96a3a0a8cb5be2f3a830037 Parents: 8daf10e Author: actuaryzhang Authored: Mon Jan 23 00:53:44 2017 -0800 Committer: Yanbo Liang Committed: Mon Jan 23 00:54:08 2017 -0800 -- .../spark/ml/regression/GeneralizedLinearRegression.scala | 6 -- .../spark/ml/regression/GeneralizedLinearRegressionSuite.scala | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1e07a719/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala index 1e7ba91..676be61 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala @@ -1027,7 +1027,8 @@ class GeneralizedLinearRegressionSummary private[regression] ( */ @Since("2.0.0") lazy val dispersion: Double = if ( -model.getFamily == Binomial.name || model.getFamily == Poisson.name) { +model.getFamily.toLowerCase == Binomial.name || + model.getFamily.toLowerCase == Poisson.name) { 1.0 } else { val rss = pearsonResiduals.agg(sum(pow(col("pearsonResiduals"), 2.0))).first().getDouble(0) @@ -1130,7 +1131,8 @@ class GeneralizedLinearRegressionTrainingSummary private[regression] ( @Since("2.0.0") lazy val pValues: Array[Double] = { if (isNormalSolver) { - if (model.getFamily == Binomial.name || model.getFamily == Poisson.name) { + if (model.getFamily.toLowerCase == Binomial.name || +model.getFamily.toLowerCase == Poisson.name) { tValues.map { x => 2.0 * (1.0 - dist.Gaussian(0.0, 1.0).cdf(math.abs(x))) } } else { tValues.map { x => http://git-wip-us.apache.org/repos/asf/spark/blob/1e07a719/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala -- diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala index 415d426..95b443d 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala @@ -757,7 +757,7 @@ class GeneralizedLinearRegressionSuite 0.5554219 -0.4034267 0.6567520 -0.2611382 */ val trainer = new GeneralizedLinearRegression() - .setFamily("binomial") + .setFamily("Binomial") .setWeightCol("weight") .setFitIntercept(false) @@ -874,7 +874,7 @@ class GeneralizedLinearRegressionSuite -0.4378554 0.2189277 0.1459518 -0.1094638 */ val trainer = new GeneralizedLinearRegression() - .setFamily("poisson") + .setFamily("Poisson") .setWeightCol("weight") .setFitIntercept(true) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-19155][ML] Make family case insensitive in GLM
Repository: spark Updated Branches: refs/heads/master de6ad3dfa -> f067acefa [SPARK-19155][ML] Make family case insensitive in GLM ## What changes were proposed in this pull request? This is a supplement to PR #16516 which did not make the value from `getFamily` case insensitive. Current tests of poisson/binomial glm with weight fail when specifying 'Poisson' or 'Binomial', because the calculation of `dispersion` and `pValue` checks the value of family retrieved from `getFamily` ``` model.getFamily == Binomial.name || model.getFamily == Poisson.name ``` ## How was this patch tested? Update existing tests for 'Poisson' and 'Binomial'. yanboliang felixcheung imatiach-msft Author: actuaryzhangCloses #16675 from actuaryzhang/family. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f067acef Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f067acef Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f067acef Branch: refs/heads/master Commit: f067acefabebf04939d03a639a2aaa654e1bc8f9 Parents: de6ad3d Author: actuaryzhang Authored: Mon Jan 23 00:53:44 2017 -0800 Committer: Yanbo Liang Committed: Mon Jan 23 00:53:44 2017 -0800 -- .../spark/ml/regression/GeneralizedLinearRegression.scala | 6 -- .../spark/ml/regression/GeneralizedLinearRegressionSuite.scala | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f067acef/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala index 116f0f6..3ffed39 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala @@ -1044,7 +1044,8 @@ class GeneralizedLinearRegressionSummary private[regression] ( */ @Since("2.0.0") lazy val dispersion: Double = if ( -model.getFamily == Binomial.name || model.getFamily == Poisson.name) { +model.getFamily.toLowerCase == Binomial.name || + model.getFamily.toLowerCase == Poisson.name) { 1.0 } else { val rss = pearsonResiduals.agg(sum(pow(col("pearsonResiduals"), 2.0))).first().getDouble(0) @@ -1147,7 +1148,8 @@ class GeneralizedLinearRegressionTrainingSummary private[regression] ( @Since("2.0.0") lazy val pValues: Array[Double] = { if (isNormalSolver) { - if (model.getFamily == Binomial.name || model.getFamily == Poisson.name) { + if (model.getFamily.toLowerCase == Binomial.name || +model.getFamily.toLowerCase == Poisson.name) { tValues.map { x => 2.0 * (1.0 - dist.Gaussian(0.0, 1.0).cdf(math.abs(x))) } } else { tValues.map { x => http://git-wip-us.apache.org/repos/asf/spark/blob/f067acef/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala -- diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala index 9f3d643..e3c2787 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala @@ -758,7 +758,7 @@ class GeneralizedLinearRegressionSuite 0.028480 0.069123 0.935495 -0.049613 */ val trainer = new GeneralizedLinearRegression() - .setFamily("binomial") + .setFamily("Binomial") .setWeightCol("weight") .setFitIntercept(false) @@ -875,7 +875,7 @@ class GeneralizedLinearRegressionSuite -0.4378554 0.2189277 0.1459518 -0.1094638 */ val trainer = new GeneralizedLinearRegression() - .setFamily("poisson") + .setFamily("Poisson") .setWeightCol("weight") .setFitIntercept(true) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org