spark git commit: [SPARK-19155][ML] Make family case insensitive in GLM

2017-01-23 Thread yliang
Repository: spark
Updated Branches:
  refs/heads/branch-2.1 8daf10e3f -> 1e07a7192


[SPARK-19155][ML] Make family case insensitive in GLM

## What changes were proposed in this pull request?
This is a supplement to PR #16516 which did not make the value from `getFamily` 
case insensitive. Current tests of poisson/binomial glm with weight fail when 
specifying 'Poisson' or 'Binomial', because the calculation of `dispersion` and 
`pValue` checks the value of family retrieved from `getFamily`
```
model.getFamily == Binomial.name || model.getFamily == Poisson.name
```

## How was this patch tested?
Update existing tests for 'Poisson' and 'Binomial'.

yanboliang felixcheung imatiach-msft

Author: actuaryzhang 

Closes #16675 from actuaryzhang/family.

(cherry picked from commit f067acefabebf04939d03a639a2aaa654e1bc8f9)
Signed-off-by: Yanbo Liang 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1e07a719
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1e07a719
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1e07a719

Branch: refs/heads/branch-2.1
Commit: 1e07a71924ef1420c96a3a0a8cb5be2f3a830037
Parents: 8daf10e
Author: actuaryzhang 
Authored: Mon Jan 23 00:53:44 2017 -0800
Committer: Yanbo Liang 
Committed: Mon Jan 23 00:54:08 2017 -0800

--
 .../spark/ml/regression/GeneralizedLinearRegression.scala  | 6 --
 .../spark/ml/regression/GeneralizedLinearRegressionSuite.scala | 4 ++--
 2 files changed, 6 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1e07a719/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
 
b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
index 1e7ba91..676be61 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
@@ -1027,7 +1027,8 @@ class GeneralizedLinearRegressionSummary 
private[regression] (
*/
   @Since("2.0.0")
   lazy val dispersion: Double = if (
-model.getFamily == Binomial.name || model.getFamily == Poisson.name) {
+model.getFamily.toLowerCase == Binomial.name ||
+  model.getFamily.toLowerCase == Poisson.name) {
 1.0
   } else {
 val rss = pearsonResiduals.agg(sum(pow(col("pearsonResiduals"), 
2.0))).first().getDouble(0)
@@ -1130,7 +1131,8 @@ class GeneralizedLinearRegressionTrainingSummary 
private[regression] (
   @Since("2.0.0")
   lazy val pValues: Array[Double] = {
 if (isNormalSolver) {
-  if (model.getFamily == Binomial.name || model.getFamily == Poisson.name) 
{
+  if (model.getFamily.toLowerCase == Binomial.name ||
+model.getFamily.toLowerCase == Poisson.name) {
 tValues.map { x => 2.0 * (1.0 - dist.Gaussian(0.0, 
1.0).cdf(math.abs(x))) }
   } else {
 tValues.map { x =>

http://git-wip-us.apache.org/repos/asf/spark/blob/1e07a719/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
--
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
index 415d426..95b443d 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
@@ -757,7 +757,7 @@ class GeneralizedLinearRegressionSuite
0.5554219 -0.4034267  0.6567520 -0.2611382
  */
 val trainer = new GeneralizedLinearRegression()
-  .setFamily("binomial")
+  .setFamily("Binomial")
   .setWeightCol("weight")
   .setFitIntercept(false)
 
@@ -874,7 +874,7 @@ class GeneralizedLinearRegressionSuite
-0.4378554  0.2189277  0.1459518 -0.1094638
  */
 val trainer = new GeneralizedLinearRegression()
-  .setFamily("poisson")
+  .setFamily("Poisson")
   .setWeightCol("weight")
   .setFitIntercept(true)
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-19155][ML] Make family case insensitive in GLM

2017-01-23 Thread yliang
Repository: spark
Updated Branches:
  refs/heads/master de6ad3dfa -> f067acefa


[SPARK-19155][ML] Make family case insensitive in GLM

## What changes were proposed in this pull request?
This is a supplement to PR #16516 which did not make the value from `getFamily` 
case insensitive. Current tests of poisson/binomial glm with weight fail when 
specifying 'Poisson' or 'Binomial', because the calculation of `dispersion` and 
`pValue` checks the value of family retrieved from `getFamily`
```
model.getFamily == Binomial.name || model.getFamily == Poisson.name
```

## How was this patch tested?
Update existing tests for 'Poisson' and 'Binomial'.

yanboliang felixcheung imatiach-msft

Author: actuaryzhang 

Closes #16675 from actuaryzhang/family.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f067acef
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f067acef
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f067acef

Branch: refs/heads/master
Commit: f067acefabebf04939d03a639a2aaa654e1bc8f9
Parents: de6ad3d
Author: actuaryzhang 
Authored: Mon Jan 23 00:53:44 2017 -0800
Committer: Yanbo Liang 
Committed: Mon Jan 23 00:53:44 2017 -0800

--
 .../spark/ml/regression/GeneralizedLinearRegression.scala  | 6 --
 .../spark/ml/regression/GeneralizedLinearRegressionSuite.scala | 4 ++--
 2 files changed, 6 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f067acef/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
 
b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
index 116f0f6..3ffed39 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
@@ -1044,7 +1044,8 @@ class GeneralizedLinearRegressionSummary 
private[regression] (
*/
   @Since("2.0.0")
   lazy val dispersion: Double = if (
-model.getFamily == Binomial.name || model.getFamily == Poisson.name) {
+model.getFamily.toLowerCase == Binomial.name ||
+  model.getFamily.toLowerCase == Poisson.name) {
 1.0
   } else {
 val rss = pearsonResiduals.agg(sum(pow(col("pearsonResiduals"), 
2.0))).first().getDouble(0)
@@ -1147,7 +1148,8 @@ class GeneralizedLinearRegressionTrainingSummary 
private[regression] (
   @Since("2.0.0")
   lazy val pValues: Array[Double] = {
 if (isNormalSolver) {
-  if (model.getFamily == Binomial.name || model.getFamily == Poisson.name) 
{
+  if (model.getFamily.toLowerCase == Binomial.name ||
+model.getFamily.toLowerCase == Poisson.name) {
 tValues.map { x => 2.0 * (1.0 - dist.Gaussian(0.0, 
1.0).cdf(math.abs(x))) }
   } else {
 tValues.map { x =>

http://git-wip-us.apache.org/repos/asf/spark/blob/f067acef/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
--
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
index 9f3d643..e3c2787 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
@@ -758,7 +758,7 @@ class GeneralizedLinearRegressionSuite
0.028480  0.069123  0.935495 -0.049613
 */
 val trainer = new GeneralizedLinearRegression()
-  .setFamily("binomial")
+  .setFamily("Binomial")
   .setWeightCol("weight")
   .setFitIntercept(false)
 
@@ -875,7 +875,7 @@ class GeneralizedLinearRegressionSuite
-0.4378554  0.2189277  0.1459518 -0.1094638
  */
 val trainer = new GeneralizedLinearRegression()
-  .setFamily("poisson")
+  .setFamily("Poisson")
   .setWeightCol("weight")
   .setFitIntercept(true)
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org