[GitHub] spark pull request: [SPARK-10668][ML] Use WeightedLeastSquares in ...

dbtsai Mon, 19 Oct 2015 10:44:46 -0700

Github user dbtsai commented on a diff in the pull request:

    https://github.com/apache/spark/pull/8884#discussion_r42402047
  
    --- Diff: 
mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala 
---
    @@ -93,525 +94,603 @@ class LinearRegressionSuite extends SparkFunSuite 
with MLlibTestSparkContext {
       }
     
       test("linear regression with intercept without regularization") {
    -    val trainer1 = new LinearRegression
    -    // The result should be the same regardless of standardization without 
regularization
    -    val trainer2 = (new LinearRegression).setStandardization(false)
    -    val model1 = trainer1.fit(dataset)
    -    val model2 = trainer2.fit(dataset)
    -
    -    /*
    -       Using the following R code to load the data and train the model 
using glmnet package.
    -
    -       library("glmnet")
    -       data <- read.csv("path", header=FALSE, stringsAsFactors=FALSE)
    -       features <- as.matrix(data.frame(as.numeric(data$V2), 
as.numeric(data$V3)))
    -       label <- as.numeric(data$V1)
    -       weights <- coef(glmnet(features, label, family="gaussian", alpha = 
0, lambda = 0))
    -       > weights
    -        3 x 1 sparse Matrix of class "dgCMatrix"
    -                                 s0
    -       (Intercept)         6.298698
    -       as.numeric.data.V2. 4.700706
    -       as.numeric.data.V3. 7.199082
    -     */
    -    val interceptR = 6.298698
    -    val weightsR = Vectors.dense(4.700706, 7.199082)
    -
    -    assert(model1.intercept ~== interceptR relTol 1E-3)
    -    assert(model1.weights ~= weightsR relTol 1E-3)
    -    assert(model2.intercept ~== interceptR relTol 1E-3)
    -    assert(model2.weights ~= weightsR relTol 1E-3)
    -
    -
    -    model1.transform(dataset).select("features", 
"prediction").collect().foreach {
    -      case Row(features: DenseVector, prediction1: Double) =>
    -        val prediction2 =
    -          features(0) * model1.weights(0) + features(1) * 
model1.weights(1) + model1.intercept
    -        assert(prediction1 ~== prediction2 relTol 1E-5)
    +    Seq("auto", "l-bfgs", "normal").foreach { solver =>
    +      val trainer1 = new LinearRegression().setSolver(solver)
    +      // The result should be the same regardless of standardization 
without regularization
    +      val trainer2 = (new 
LinearRegression).setStandardization(false).setSolver(solver)
    +      val model1 = trainer1.fit(dataset)
    +      val model2 = trainer2.fit(dataset)
    +
    +      /*
    +         Using the following R code to load the data and train the model 
using glmnet package.
    +
    +         library("glmnet")
    +         data <- read.csv("path", header=FALSE, stringsAsFactors=FALSE)
    +         features <- as.matrix(data.frame(as.numeric(data$V2), 
as.numeric(data$V3)))
    +         label <- as.numeric(data$V1)
    +         weights <- coef(glmnet(features, label, family="gaussian", alpha 
= 0, lambda = 0))
    +         > weights
    +          3 x 1 sparse Matrix of class "dgCMatrix"
    +                                   s0
    +         (Intercept)         6.298698
    +         as.numeric.data.V2. 4.700706
    +         as.numeric.data.V3. 7.199082
    +       */
    +      val interceptR = 6.298698
    +      val weightsR = Vectors.dense(4.700706, 7.199082)
    +
    +      assert(model1.intercept ~== interceptR relTol 1E-3)
    +      assert(model1.weights ~= weightsR relTol 1E-3)
    +      assert(model2.intercept ~== interceptR relTol 1E-3)
    +      assert(model2.weights ~= weightsR relTol 1E-3)
    +
    +      model1.transform(dataset).select("features", 
"prediction").collect().foreach {
    +        case Row(features: DenseVector, prediction1: Double) =>
    +          val prediction2 =
    +            features(0) * model1.weights(0) + features(1) * 
model1.weights(1) + model1.intercept
    +          assert(prediction1 ~== prediction2 relTol 1E-5)
    +      }
         }
       }
     
       test("linear regression without intercept without regularization") {
    -    val trainer1 = (new LinearRegression).setFitIntercept(false)
    -    // Without regularization the results should be the same
    -    val trainer2 = (new 
LinearRegression).setFitIntercept(false).setStandardization(false)
    -    val model1 = trainer1.fit(dataset)
    -    val modelWithoutIntercept1 = trainer1.fit(datasetWithoutIntercept)
    -    val model2 = trainer2.fit(dataset)
    -    val modelWithoutIntercept2 = trainer2.fit(datasetWithoutIntercept)
    -
    -
    -    /*
    -       weights <- coef(glmnet(features, label, family="gaussian", alpha = 
0, lambda = 0,
    -         intercept = FALSE))
    -       > weights
    -        3 x 1 sparse Matrix of class "dgCMatrix"
    -                                 s0
    -       (Intercept)         .
    -       as.numeric.data.V2. 6.995908
    -       as.numeric.data.V3. 5.275131
    -     */
    -    val weightsR = Vectors.dense(6.995908, 5.275131)
    -
    -    assert(model1.intercept ~== 0 absTol 1E-3)
    -    assert(model1.weights ~= weightsR relTol 1E-3)
    -    assert(model2.intercept ~== 0 absTol 1E-3)
    -    assert(model2.weights ~= weightsR relTol 1E-3)
    -
    -    /*
    -       Then again with the data with no intercept:
    -       > weightsWithoutIntercept
    -       3 x 1 sparse Matrix of class "dgCMatrix"
    -                                 s0
    -       (Intercept)           .
    -       as.numeric.data3.V2. 4.70011
    -       as.numeric.data3.V3. 7.19943
    -     */
    -    val weightsWithoutInterceptR = Vectors.dense(4.70011, 7.19943)
    -
    -    assert(modelWithoutIntercept1.intercept ~== 0 absTol 1E-3)
    -    assert(modelWithoutIntercept1.weights ~= weightsWithoutInterceptR 
relTol 1E-3)
    -    assert(modelWithoutIntercept2.intercept ~== 0 absTol 1E-3)
    -    assert(modelWithoutIntercept2.weights ~= weightsWithoutInterceptR 
relTol 1E-3)
    +    Seq("auto", "l-bfgs", "normal").foreach { solver =>
    +      val trainer1 = (new 
LinearRegression).setFitIntercept(false).setSolver(solver)
    +      // Without regularization the results should be the same
    +      val trainer2 = (new 
LinearRegression).setFitIntercept(false).setStandardization(false)
    +        .setSolver(solver)
    +      val model1 = trainer1.fit(dataset)
    +      val modelWithoutIntercept1 = trainer1.fit(datasetWithoutIntercept)
    +      val model2 = trainer2.fit(dataset)
    +      val modelWithoutIntercept2 = trainer2.fit(datasetWithoutIntercept)
    +
    +      /*
    +         weights <- coef(glmnet(features, label, family="gaussian", alpha 
= 0, lambda = 0,
    +           intercept = FALSE))
    +         > weights
    +          3 x 1 sparse Matrix of class "dgCMatrix"
    +                                   s0
    +         (Intercept)         .
    +         as.numeric.data.V2. 6.995908
    +         as.numeric.data.V3. 5.275131
    +       */
    +      val weightsR = Vectors.dense(6.995908, 5.275131)
    +
    +      assert(model1.intercept ~== 0 absTol 1E-3)
    +      assert(model1.weights ~= weightsR relTol 1E-3)
    +      assert(model2.intercept ~== 0 absTol 1E-3)
    +      assert(model2.weights ~= weightsR relTol 1E-3)
    +
    +      /*
    +         Then again with the data with no intercept:
    +         > weightsWithoutIntercept
    +          3 x 1 sparse Matrix of class "dgCMatrix"
    +                                   s0
    +         (Intercept)           .
    +         as.numeric.data3.V2. 4.70011
    +         as.numeric.data3.V3. 7.19943
    +       */
    +      val weightsWithoutInterceptR = Vectors.dense(4.70011, 7.19943)
    +
    +      assert(modelWithoutIntercept1.intercept ~== 0 absTol 1E-3)
    +      assert(modelWithoutIntercept1.weights ~= weightsWithoutInterceptR 
relTol 1E-3)
    +      assert(modelWithoutIntercept2.intercept ~== 0 absTol 1E-3)
    +      assert(modelWithoutIntercept2.weights ~= weightsWithoutInterceptR 
relTol 1E-3)
    +    }
       }
     
       test("linear regression with intercept with L1 regularization") {
    -    val trainer1 = (new 
LinearRegression).setElasticNetParam(1.0).setRegParam(0.57)
    -    val trainer2 = (new 
LinearRegression).setElasticNetParam(1.0).setRegParam(0.57)
    -      .setStandardization(false)
    -    val model1 = trainer1.fit(dataset)
    -    val model2 = trainer2.fit(dataset)
    -
    -    /*
    -       weights <- coef(glmnet(features, label, family="gaussian", alpha = 
1.0, lambda = 0.57))
    -       > weights
    -        3 x 1 sparse Matrix of class "dgCMatrix"
    -                                 s0
    -       (Intercept)         6.24300
    -       as.numeric.data.V2. 4.024821
    -       as.numeric.data.V3. 6.679841
    -     */
    -    val interceptR1 = 6.24300
    -    val weightsR1 = Vectors.dense(4.024821, 6.679841)
    -
    -    assert(model1.intercept ~== interceptR1 relTol 1E-3)
    -    assert(model1.weights ~= weightsR1 relTol 1E-3)
    -
    -    /*
    -      weights <- coef(glmnet(features, label, family="gaussian", alpha = 
1.0, lambda = 0.57,
    -        standardize=FALSE))
    -      > weights
    -       3 x 1 sparse Matrix of class "dgCMatrix"
    -                                s0
    -      (Intercept)         6.416948
    -      as.numeric.data.V2. 3.893869
    -      as.numeric.data.V3. 6.724286
    -     */
    -    val interceptR2 = 6.416948
    -    val weightsR2 = Vectors.dense(3.893869, 6.724286)
    -
    -    assert(model2.intercept ~== interceptR2 relTol 1E-3)
    -    assert(model2.weights ~= weightsR2 relTol 1E-3)
    -
    -
    -    model1.transform(dataset).select("features", 
"prediction").collect().foreach {
    -      case Row(features: DenseVector, prediction1: Double) =>
    -        val prediction2 =
    -          features(0) * model1.weights(0) + features(1) * 
model1.weights(1) + model1.intercept
    -        assert(prediction1 ~== prediction2 relTol 1E-5)
    +    Seq("auto", "l-bfgs", "normal").foreach { solver =>
    +      val trainer1 = (new 
LinearRegression).setElasticNetParam(1.0).setRegParam(0.57)
    +        .setSolver(solver)
    +      val trainer2 = (new 
LinearRegression).setElasticNetParam(1.0).setRegParam(0.57)
    +        .setSolver(solver).setStandardization(false)
    +
    +      var model1: LinearRegressionModel = null
    +      var model2: LinearRegressionModel = null
    +
    +      // Normal optimizer is not supported with only L1 regularization 
case.
    +      if (solver == "normal") {
    +        intercept[IllegalArgumentException] {
    +            trainer1.fit(dataset)
    +            trainer2.fit(dataset)
    +          }
    +      } else {
    +        model1 = trainer1.fit(dataset)
    +        model2 = trainer2.fit(dataset)
    +
    +
    +        /*
    +           weights <- coef(glmnet(features, label, family="gaussian", 
alpha = 1.0, lambda = 0.57))
    +           > weights
    +            3 x 1 sparse Matrix of class "dgCMatrix"
    +                                    s0
    +           (Intercept)         6.24300
    +           as.numeric.data.V2. 4.024821
    +           as.numeric.data.V3. 6.679841
    +         */
    +        val interceptR1 = 6.24300
    +        val weightsR1 = Vectors.dense(4.024821, 6.679841)
    +        assert(model1.intercept ~== interceptR1 relTol 1E-3)
    +        assert(model1.weights ~= weightsR1 relTol 1E-3)
    +
    +        /*
    +           weights <- coef(glmnet(features, label, family="gaussian", 
alpha = 1.0, lambda = 0.57,
    +             standardize=FALSE))
    +           > weights
    +            3 x 1 sparse Matrix of class "dgCMatrix"
    +                                    s0
    +           (Intercept)         6.416948
    +           as.numeric.data.V2. 3.893869
    +           as.numeric.data.V3. 6.724286
    +         */
    +        val interceptR2 = 6.416948
    +        val weightsR2 = Vectors.dense(3.893869, 6.724286)
    +
    +        assert(model2.intercept ~== interceptR2 relTol 1E-3)
    +        assert(model2.weights ~= weightsR2 relTol 1E-3)
    +
    +        model1.transform(dataset).select("features", 
"prediction").collect().foreach {
    +          case Row(features: DenseVector, prediction1: Double) =>
    +            val prediction2 =
    +              features(0) * model1.weights(0) + features(1) * 
model1.weights(1) + model1.intercept
    +            assert(prediction1 ~== prediction2 relTol 1E-5)
    +        }
    +      }
         }
       }
     
       test("linear regression without intercept with L1 regularization") {
    -    val trainer1 = (new 
LinearRegression).setElasticNetParam(1.0).setRegParam(0.57)
    -      .setFitIntercept(false)
    -    val trainer2 = (new 
LinearRegression).setElasticNetParam(1.0).setRegParam(0.57)
    -      .setFitIntercept(false).setStandardization(false)
    -    val model1 = trainer1.fit(dataset)
    -    val model2 = trainer2.fit(dataset)
    -
    -    /*
    -       weights <- coef(glmnet(features, label, family="gaussian", alpha = 
1.0, lambda = 0.57,
    -         intercept=FALSE))
    -       > weights
    -        3 x 1 sparse Matrix of class "dgCMatrix"
    -                                 s0
    -       (Intercept)          .
    -       as.numeric.data.V2. 6.299752
    -       as.numeric.data.V3. 4.772913
    -     */
    -    val interceptR1 = 0.0
    -    val weightsR1 = Vectors.dense(6.299752, 4.772913)
    -
    -    assert(model1.intercept ~== interceptR1 absTol 1E-3)
    -    assert(model1.weights ~= weightsR1 relTol 1E-3)
    -
    -    /*
    -       weights <- coef(glmnet(features, label, family="gaussian", alpha = 
1.0, lambda = 0.57,
    -         intercept=FALSE, standardize=FALSE))
    -       > weights
    -       3 x 1 sparse Matrix of class "dgCMatrix"
    -                                 s0
    -       (Intercept)         .
    -       as.numeric.data.V2. 6.232193
    -       as.numeric.data.V3. 4.764229
    -     */
    -    val interceptR2 = 0.0
    -    val weightsR2 = Vectors.dense(6.232193, 4.764229)
    -
    -    assert(model2.intercept ~== interceptR2 absTol 1E-3)
    -    assert(model2.weights ~= weightsR2 relTol 1E-3)
    -
    -
    -    model1.transform(dataset).select("features", 
"prediction").collect().foreach {
    -      case Row(features: DenseVector, prediction1: Double) =>
    -        val prediction2 =
    -          features(0) * model1.weights(0) + features(1) * 
model1.weights(1) + model1.intercept
    -        assert(prediction1 ~== prediction2 relTol 1E-5)
    +    Seq("auto", "l-bfgs", "normal").foreach { solver =>
    +      val trainer1 = (new 
LinearRegression).setElasticNetParam(1.0).setRegParam(0.57)
    +        .setFitIntercept(false).setSolver(solver)
    +      val trainer2 = (new 
LinearRegression).setElasticNetParam(1.0).setRegParam(0.57)
    +        .setFitIntercept(false).setStandardization(false).setSolver(solver)
    +
    +      var model1: LinearRegressionModel = null
    +      var model2: LinearRegressionModel = null
    +
    +      // Normal optimizer is not supported with only L1 regularization 
case.
    +      if (solver == "normal") {
    +        intercept[IllegalArgumentException] {
    +            trainer1.fit(dataset)
    +            trainer2.fit(dataset)
    +          }
    +      } else {
    +        model1 = trainer1.fit(dataset)
    +        model2 = trainer2.fit(dataset)
    +
    +        /*
    +           weights <- coef(glmnet(features, label, family="gaussian", 
alpha = 1.0, lambda = 0.57,
    +             intercept=FALSE))
    +           > weights
    +            3 x 1 sparse Matrix of class "dgCMatrix"
    +                                     s0
    +           (Intercept)          .
    +           as.numeric.data.V2. 6.299752
    +           as.numeric.data.V3. 4.772913
    +         */
    +        val interceptR1 = 0.0
    +        val weightsR1 = Vectors.dense(6.299752, 4.772913)
    +
    +        assert(model1.intercept ~== interceptR1 absTol 1E-3)
    +        assert(model1.weights ~= weightsR1 relTol 1E-3)
    +
    +        /*
    +           weights <- coef(glmnet(features, label, family="gaussian", 
alpha = 1.0, lambda = 0.57,
    +             intercept=FALSE, standardize=FALSE))
    +           > weights
    +            3 x 1 sparse Matrix of class "dgCMatrix"
    +                                     s0
    +           (Intercept)         .
    +           as.numeric.data.V2. 6.232193
    +           as.numeric.data.V3. 4.764229
    +         */
    +        val interceptR2 = 0.0
    +        val weightsR2 = Vectors.dense(6.232193, 4.764229)
    +
    +        assert(model2.intercept ~== interceptR2 absTol 1E-3)
    +        assert(model2.weights ~= weightsR2 relTol 1E-3)
    +
    +        model1.transform(dataset).select("features", 
"prediction").collect().foreach {
    +          case Row(features: DenseVector, prediction1: Double) =>
    +            val prediction2 =
    +              features(0) * model1.weights(0) + features(1) * 
model1.weights(1) + model1.intercept
    +            assert(prediction1 ~== prediction2 relTol 1E-5)
    +        }
    +      }
         }
       }
     
       test("linear regression with intercept with L2 regularization") {
    -    val trainer1 = (new 
LinearRegression).setElasticNetParam(0.0).setRegParam(2.3)
    -    val trainer2 = (new 
LinearRegression).setElasticNetParam(0.0).setRegParam(2.3)
    -      .setStandardization(false)
    -    val model1 = trainer1.fit(dataset)
    -    val model2 = trainer2.fit(dataset)
    -
    -    /*
    -      weights <- coef(glmnet(features, label, family="gaussian", alpha = 
0.0, lambda = 2.3))
    -      > weights
    -       3 x 1 sparse Matrix of class "dgCMatrix"
    -                                s0
    -      (Intercept)         5.269376
    -      as.numeric.data.V2. 3.736216
    -      as.numeric.data.V3. 5.712356)
    -     */
    -    val interceptR1 = 5.269376
    -    val weightsR1 = Vectors.dense(3.736216, 5.712356)
    -
    -    assert(model1.intercept ~== interceptR1 relTol 1E-3)
    -    assert(model1.weights ~= weightsR1 relTol 1E-3)
    -
    -    /*
    -      weights <- coef(glmnet(features, label, family="gaussian", alpha = 
0.0, lambda = 2.3,
    -        standardize=FALSE))
    -      > weights
    -       3 x 1 sparse Matrix of class "dgCMatrix"
    -                                s0
    -      (Intercept)         5.791109
    -      as.numeric.data.V2. 3.435466
    -      as.numeric.data.V3. 5.910406
    -     */
    -    val interceptR2 = 5.791109
    -    val weightsR2 = Vectors.dense(3.435466, 5.910406)
    -
    -    assert(model2.intercept ~== interceptR2 relTol 1E-3)
    -    assert(model2.weights ~= weightsR2 relTol 1E-3)
    -
    -    model1.transform(dataset).select("features", 
"prediction").collect().foreach {
    -      case Row(features: DenseVector, prediction1: Double) =>
    -        val prediction2 =
    -          features(0) * model1.weights(0) + features(1) * 
model1.weights(1) + model1.intercept
    -        assert(prediction1 ~== prediction2 relTol 1E-5)
    +    Seq("auto", "l-bfgs", "normal").foreach { solver =>
    +      val trainer1 = (new 
LinearRegression).setElasticNetParam(0.0).setRegParam(2.3)
    +        .setSolver(solver)
    +      val trainer2 = (new 
LinearRegression).setElasticNetParam(0.0).setRegParam(2.3)
    +        .setStandardization(false).setSolver(solver)
    +      val model1 = trainer1.fit(dataset)
    +      val model2 = trainer2.fit(dataset)
    +
    +      /*
    +         weights <- coef(glmnet(features, label, family="gaussian", alpha 
= 0.0, lambda = 2.3))
    +         > weights
    +          3 x 1 sparse Matrix of class "dgCMatrix"
    +                                   s0
    +         (Intercept)         5.269376
    +         as.numeric.data.V2. 3.736216
    +         as.numeric.data.V3. 5.712356)
    +       */
    +      val interceptR1 = 5.269376
    +      val weightsR1 = Vectors.dense(3.736216, 5.712356)
    +
    +      assert(model1.intercept ~== interceptR1 relTol 1E-3)
    +      assert(model1.weights ~= weightsR1 relTol 1E-3)
    +
    +      /*
    +         weights <- coef(glmnet(features, label, family="gaussian", alpha 
= 0.0, lambda = 2.3,
    +           standardize=FALSE))
    +         > weights
    +          3 x 1 sparse Matrix of class "dgCMatrix"
    +                                   s0
    +         (Intercept)         5.791109
    +         as.numeric.data.V2. 3.435466
    +         as.numeric.data.V3. 5.910406
    +       */
    +      val interceptR2 = 5.791109
    +      val weightsR2 = Vectors.dense(3.435466, 5.910406)
    +
    +      assert(model2.intercept ~== interceptR2 relTol 1E-3)
    +      assert(model2.weights ~= weightsR2 relTol 1E-3)
    +
    +      model1.transform(dataset).select("features", 
"prediction").collect().foreach {
    +        case Row(features: DenseVector, prediction1: Double) =>
    +          val prediction2 =
    +            features(0) * model1.weights(0) + features(1) * 
model1.weights(1) + model1.intercept
    +          assert(prediction1 ~== prediction2 relTol 1E-5)
    +      }
         }
       }
     
       test("linear regression without intercept with L2 regularization") {
    -    val trainer1 = (new 
LinearRegression).setElasticNetParam(0.0).setRegParam(2.3)
    -      .setFitIntercept(false)
    -    val trainer2 = (new 
LinearRegression).setElasticNetParam(0.0).setRegParam(2.3)
    -      .setFitIntercept(false).setStandardization(false)
    -    val model1 = trainer1.fit(dataset)
    -    val model2 = trainer2.fit(dataset)
    -
    -    /*
    -       weights <- coef(glmnet(features, label, family="gaussian", alpha = 
0.0, lambda = 2.3,
    -         intercept = FALSE))
    -       > weights
    -        3 x 1 sparse Matrix of class "dgCMatrix"
    -                                 s0
    -       (Intercept)         .
    -       as.numeric.data.V2. 5.522875
    -       as.numeric.data.V3. 4.214502
    -     */
    -    val interceptR1 = 0.0
    -    val weightsR1 = Vectors.dense(5.522875, 4.214502)
    -
    -    assert(model1.intercept ~== interceptR1 absTol 1E-3)
    -    assert(model1.weights ~= weightsR1 relTol 1E-3)
    -
    -    /*
    -       weights <- coef(glmnet(features, label, family="gaussian", alpha = 
0.0, lambda = 2.3,
    -         intercept = FALSE, standardize=FALSE))
    -       > weights
    -        3 x 1 sparse Matrix of class "dgCMatrix"
    -                                 s0
    -       (Intercept)         .
    -       as.numeric.data.V2. 5.263704
    -       as.numeric.data.V3. 4.187419
    -     */
    -    val interceptR2 = 0.0
    -    val weightsR2 = Vectors.dense(5.263704, 4.187419)
    -
    -    assert(model2.intercept ~== interceptR2 absTol 1E-3)
    -    assert(model2.weights ~= weightsR2 relTol 1E-3)
    -
    -    model1.transform(dataset).select("features", 
"prediction").collect().foreach {
    -      case Row(features: DenseVector, prediction1: Double) =>
    -        val prediction2 =
    -          features(0) * model1.weights(0) + features(1) * 
model1.weights(1) + model1.intercept
    -        assert(prediction1 ~== prediction2 relTol 1E-5)
    +    Seq("auto", "l-bfgs", "normal").foreach { solver =>
    +      val trainer1 = (new 
LinearRegression).setElasticNetParam(0.0).setRegParam(2.3)
    +        .setFitIntercept(false).setSolver(solver)
    +      val trainer2 = (new 
LinearRegression).setElasticNetParam(0.0).setRegParam(2.3)
    +        .setFitIntercept(false).setStandardization(false).setSolver(solver)
    +      val model1 = trainer1.fit(dataset)
    +      val model2 = trainer2.fit(dataset)
    +
    +      /*
    +         weights <- coef(glmnet(features, label, family="gaussian", alpha 
= 0.0, lambda = 2.3,
    +           intercept = FALSE))
    +         > weights
    +          3 x 1 sparse Matrix of class "dgCMatrix"
    +                                   s0
    +         (Intercept)         .
    +         as.numeric.data.V2. 5.522875
    +         as.numeric.data.V3. 4.214502
    +       */
    +      val interceptR1 = 0.0
    +      val weightsR1 = Vectors.dense(5.522875, 4.214502)
    +
    +      assert(model1.intercept ~== interceptR1 absTol 1E-3)
    +      assert(model1.weights ~= weightsR1 relTol 1E-3)
    +
    +      /*
    +         weights <- coef(glmnet(features, label, family="gaussian", alpha 
= 0.0, lambda = 2.3,
    +           intercept = FALSE, standardize=FALSE))
    +         > weights
    +          3 x 1 sparse Matrix of class "dgCMatrix"
    +                                   s0
    +         (Intercept)         .
    +         as.numeric.data.V2. 5.263704
    +         as.numeric.data.V3. 4.187419
    +       */
    +      val interceptR2 = 0.0
    +      val weightsR2 = Vectors.dense(5.263704, 4.187419)
    +
    +      assert(model2.intercept ~== interceptR2 absTol 1E-3)
    +      assert(model2.weights ~= weightsR2 relTol 1E-3)
    +
    +      model1.transform(dataset).select("features", 
"prediction").collect().foreach {
    +        case Row(features: DenseVector, prediction1: Double) =>
    +          val prediction2 =
    +            features(0) * model1.weights(0) + features(1) * 
model1.weights(1) + model1.intercept
    +          assert(prediction1 ~== prediction2 relTol 1E-5)
    +      }
         }
       }
     
       test("linear regression with intercept with ElasticNet regularization") {
    -    val trainer1 = (new 
LinearRegression).setElasticNetParam(0.3).setRegParam(1.6)
    -    val trainer2 = (new 
LinearRegression).setElasticNetParam(0.3).setRegParam(1.6)
    -      .setStandardization(false)
    -    val model1 = trainer1.fit(dataset)
    -    val model2 = trainer2.fit(dataset)
    -
    -    /*
    -       weights <- coef(glmnet(features, label, family="gaussian", alpha = 
0.3, lambda = 1.6))
    -       > weights
    -       3 x 1 sparse Matrix of class "dgCMatrix"
    -       s0
    -       (Intercept)         6.324108
    -       as.numeric.data.V2. 3.168435
    -       as.numeric.data.V3. 5.200403
    -     */
    -    val interceptR1 = 5.696056
    -    val weightsR1 = Vectors.dense(3.670489, 6.001122)
    -
    -    assert(model1.intercept ~== interceptR1 relTol 1E-3)
    -    assert(model1.weights ~= weightsR1 relTol 1E-3)
    -
    -    /*
    -      weights <- coef(glmnet(features, label, family="gaussian", alpha = 
0.3, lambda = 1.6
    -       standardize=FALSE))
    -      > weights
    -      3 x 1 sparse Matrix of class "dgCMatrix"
    -      s0
    -      (Intercept)         6.114723
    -      as.numeric.data.V2. 3.409937
    -      as.numeric.data.V3. 6.146531
    -     */
    -    val interceptR2 = 6.114723
    -    val weightsR2 = Vectors.dense(3.409937, 6.146531)
    -
    -    assert(model2.intercept ~== interceptR2 relTol 1E-3)
    -    assert(model2.weights ~= weightsR2 relTol 1E-3)
    -
    -    model1.transform(dataset).select("features", 
"prediction").collect().foreach {
    -      case Row(features: DenseVector, prediction1: Double) =>
    -        val prediction2 =
    -          features(0) * model1.weights(0) + features(1) * 
model1.weights(1) + model1.intercept
    -        assert(prediction1 ~== prediction2 relTol 1E-5)
    +    Seq("auto", "l-bfgs", "normal").foreach { solver =>
    +      val trainer1 = (new 
LinearRegression).setElasticNetParam(0.3).setRegParam(1.6)
    +        .setSolver(solver)
    +      val trainer2 = (new 
LinearRegression).setElasticNetParam(0.3).setRegParam(1.6)
    +        .setStandardization(false).setSolver(solver)
    +
    +      var model1: LinearRegressionModel = null
    +      var model2: LinearRegressionModel = null
    +
    +      // Normal optimizer is not supported with non-zero elasticnet 
parameter.
    +      if (solver == "normal") {
    +        intercept[IllegalArgumentException] {
    +            trainer1.fit(dataset)
    +            trainer2.fit(dataset)
    +          }
    +      } else {
    +        model1 = trainer1.fit(dataset)
    +        model2 = trainer2.fit(dataset)
    +
    +        /*
    +           weights <- coef(glmnet(features, label, family="gaussian", 
alpha = 0.3, lambda = 1.6))
    +           > weights
    +            3 x 1 sparse Matrix of class "dgCMatrix"
    +                                     s0
    +           (Intercept)         6.324108
    +           as.numeric.data.V2. 3.168435
    +           as.numeric.data.V3. 5.200403
    +         */
    +        val interceptR1 = 5.696056
    +        val weightsR1 = Vectors.dense(3.670489, 6.001122)
    +
    +        assert(model1.intercept ~== interceptR1 relTol 1E-3)
    +        assert(model1.weights ~= weightsR1 relTol 1E-3)
    +
    +        /*
    +           weights <- coef(glmnet(features, label, family="gaussian", 
alpha = 0.3, lambda = 1.6
    +             standardize=FALSE))
    +           > weights
    +            3 x 1 sparse Matrix of class "dgCMatrix"
    +                                     s0
    +           (Intercept)         6.114723
    +           as.numeric.data.V2. 3.409937
    +           as.numeric.data.V3. 6.146531
    +         */
    +        val interceptR2 = 6.114723
    +        val weightsR2 = Vectors.dense(3.409937, 6.146531)
    +
    +        assert(model2.intercept ~== interceptR2 relTol 1E-3)
    +        assert(model2.weights ~= weightsR2 relTol 1E-3)
    +
    +        model1.transform(dataset).select("features", 
"prediction").collect().foreach {
    +          case Row(features: DenseVector, prediction1: Double) =>
    +            val prediction2 =
    +              features(0) * model1.weights(0) + features(1) * 
model1.weights(1) + model1.intercept
    +            assert(prediction1 ~== prediction2 relTol 1E-5)
    +        }
    +      }
         }
       }
     
       test("linear regression without intercept with ElasticNet 
regularization") {
    -    val trainer1 = (new 
LinearRegression).setElasticNetParam(0.3).setRegParam(1.6)
    -      .setFitIntercept(false)
    -    val trainer2 = (new 
LinearRegression).setElasticNetParam(0.3).setRegParam(1.6)
    -      .setFitIntercept(false).setStandardization(false)
    -    val model1 = trainer1.fit(dataset)
    -    val model2 = trainer2.fit(dataset)
    -
    -    /*
    -       weights <- coef(glmnet(features, label, family="gaussian", alpha = 
0.3, lambda = 1.6,
    -         intercept=FALSE))
    -       > weights
    -       3 x 1 sparse Matrix of class "dgCMatrix"
    -       s0
    -       (Intercept)         .
    -       as.numeric.dataM.V2. 5.673348
    -       as.numeric.dataM.V3. 4.322251
    -     */
    -    val interceptR1 = 0.0
    -    val weightsR1 = Vectors.dense(5.673348, 4.322251)
    -
    -    assert(model1.intercept ~== interceptR1 absTol 1E-3)
    -    assert(model1.weights ~= weightsR1 relTol 1E-3)
    -
    -    /*
    -       weights <- coef(glmnet(features, label, family="gaussian", alpha = 
0.3, lambda = 1.6,
    -         intercept=FALSE, standardize=FALSE))
    -       > weights
    -       3 x 1 sparse Matrix of class "dgCMatrix"
    -       s0
    -       (Intercept)         .
    -       as.numeric.data.V2. 5.477988
    -       as.numeric.data.V3. 4.297622
    -     */
    -    val interceptR2 = 0.0
    -    val weightsR2 = Vectors.dense(5.477988, 4.297622)
    -
    -    assert(model2.intercept ~== interceptR2 absTol 1E-3)
    -    assert(model2.weights ~= weightsR2 relTol 1E-3)
    -
    -    model1.transform(dataset).select("features", 
"prediction").collect().foreach {
    -      case Row(features: DenseVector, prediction1: Double) =>
    -        val prediction2 =
    -          features(0) * model1.weights(0) + features(1) * 
model1.weights(1) + model1.intercept
    -        assert(prediction1 ~== prediction2 relTol 1E-5)
    +    Seq("auto", "l-bfgs", "normal").foreach { solver =>
    +      val trainer1 = (new 
LinearRegression).setElasticNetParam(0.3).setRegParam(1.6)
    +        .setFitIntercept(false).setSolver(solver)
    +      val trainer2 = (new 
LinearRegression).setElasticNetParam(0.3).setRegParam(1.6)
    +        .setFitIntercept(false).setStandardization(false).setSolver(solver)
    +
    +      var model1: LinearRegressionModel = null
    +      var model2: LinearRegressionModel = null
    +
    +      // Normal optimizer is not supported with non-zero elasticnet 
parameter.
    +      if (solver == "normal") {
    +        intercept[IllegalArgumentException] {
    +            trainer1.fit(dataset)
    +            trainer2.fit(dataset)
    +          }
    +      } else {
    +        model1 = trainer1.fit(dataset)
    +        model2 = trainer2.fit(dataset)
    +
    +        /*
    +           weights <- coef(glmnet(features, label, family="gaussian", 
alpha = 0.3, lambda = 1.6,
    +             intercept=FALSE))
    +           > weights
    +            3 x 1 sparse Matrix of class "dgCMatrix"
    +                                      s0
    +           (Intercept)         .
    +           as.numeric.dataM.V2. 5.673348
    +           as.numeric.dataM.V3. 4.322251
    +         */
    +        val interceptR1 = 0.0
    +        val weightsR1 = Vectors.dense(5.673348, 4.322251)
    +
    +        assert(model1.intercept ~== interceptR1 absTol 1E-3)
    +        assert(model1.weights ~= weightsR1 relTol 1E-3)
    +
    +        /*
    +           weights <- coef(glmnet(features, label, family="gaussian", 
alpha = 0.3, lambda = 1.6,
    +             intercept=FALSE, standardize=FALSE))
    +           > weights
    +            3 x 1 sparse Matrix of class "dgCMatrix"
    +                                     s0
    +           (Intercept)         .
    +           as.numeric.data.V2. 5.477988
    +           as.numeric.data.V3. 4.297622
    +         */
    +        val interceptR2 = 0.0
    +        val weightsR2 = Vectors.dense(5.477988, 4.297622)
    +
    +        assert(model2.intercept ~== interceptR2 absTol 1E-3)
    +        assert(model2.weights ~= weightsR2 relTol 1E-3)
    +
    +        model1.transform(dataset).select("features", 
"prediction").collect().foreach {
    +          case Row(features: DenseVector, prediction1: Double) =>
    +            val prediction2 =
    +              features(0) * model1.weights(0) + features(1) * 
model1.weights(1) + model1.intercept
    +            assert(prediction1 ~== prediction2 relTol 1E-5)
    +        }
    +      }
         }
       }
     
       test("linear regression model training summary") {
    -    val trainer = new LinearRegression
    -    val model = trainer.fit(dataset)
    -    val trainerNoPredictionCol = trainer.setPredictionCol("")
    -    val modelNoPredictionCol = trainerNoPredictionCol.fit(dataset)
    -
    -
    -    // Training results for the model should be available
    -    assert(model.hasSummary)
    -    assert(modelNoPredictionCol.hasSummary)
    -
    -    // Schema should be a superset of the input dataset
    -    assert((dataset.schema.fieldNames.toSet + "prediction").subsetOf(
    -      model.summary.predictions.schema.fieldNames.toSet))
    -    // Validate that we re-insert a prediction column for evaluation
    -    val modelNoPredictionColFieldNames = 
modelNoPredictionCol.summary.predictions.schema.fieldNames
    -    assert((dataset.schema.fieldNames.toSet).subsetOf(
    -      modelNoPredictionColFieldNames.toSet))
    -    assert(modelNoPredictionColFieldNames.exists(s => 
s.startsWith("prediction_")))
    -
    -    // Residuals in [[LinearRegressionResults]] should equal those 
manually computed
    -    val expectedResiduals = dataset.select("features", "label")
    -      .map { case Row(features: DenseVector, label: Double) =>
    -      val prediction =
    -        features(0) * model.weights(0) + features(1) * model.weights(1) + 
model.intercept
    -      label - prediction
    -    }
    -      .zip(model.summary.residuals.map(_.getDouble(0)))
    -      .collect()
    -      .foreach { case (manualResidual: Double, resultResidual: Double) =>
    -      assert(manualResidual ~== resultResidual relTol 1E-5)
    -    }
    +    Seq("auto", "l-bfgs", "normal").foreach { solver =>
    +      val trainer = new LinearRegression().setSolver(solver)
    +      val model = trainer.fit(dataset)
    +      val trainerNoPredictionCol = trainer.setPredictionCol("")
    +      val modelNoPredictionCol = trainerNoPredictionCol.fit(dataset)
    +
    +
    --- End diff --
    
    extra line



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request: [SPARK-10668][ML] Use WeightedLeastSquares in ...

Reply via email to