[GitHub] spark pull request: [SPARK-10668][ML] Use WeightedLeastSquares in ...

dbtsai Mon, 12 Oct 2015 16:10:24 -0700

Github user dbtsai commented on a diff in the pull request:

    https://github.com/apache/spark/pull/8884#discussion_r41811015
  
    --- Diff: 
mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala 
---
    @@ -93,525 +94,559 @@ class LinearRegressionSuite extends SparkFunSuite 
with MLlibTestSparkContext {
       }
     
       test("linear regression with intercept without regularization") {
    -    val trainer1 = new LinearRegression
    -    // The result should be the same regardless of standardization without 
regularization
    -    val trainer2 = (new LinearRegression).setStandardization(false)
    -    val model1 = trainer1.fit(dataset)
    -    val model2 = trainer2.fit(dataset)
    -
    -    /*
    -       Using the following R code to load the data and train the model 
using glmnet package.
    -
    -       library("glmnet")
    -       data <- read.csv("path", header=FALSE, stringsAsFactors=FALSE)
    -       features <- as.matrix(data.frame(as.numeric(data$V2), 
as.numeric(data$V3)))
    -       label <- as.numeric(data$V1)
    -       weights <- coef(glmnet(features, label, family="gaussian", alpha = 
0, lambda = 0))
    -       > weights
    -        3 x 1 sparse Matrix of class "dgCMatrix"
    -                                 s0
    -       (Intercept)         6.298698
    -       as.numeric.data.V2. 4.700706
    -       as.numeric.data.V3. 7.199082
    -     */
    -    val interceptR = 6.298698
    -    val weightsR = Vectors.dense(4.700706, 7.199082)
    -
    -    assert(model1.intercept ~== interceptR relTol 1E-3)
    -    assert(model1.weights ~= weightsR relTol 1E-3)
    -    assert(model2.intercept ~== interceptR relTol 1E-3)
    -    assert(model2.weights ~= weightsR relTol 1E-3)
    -
    -
    -    model1.transform(dataset).select("features", 
"prediction").collect().foreach {
    -      case Row(features: DenseVector, prediction1: Double) =>
    -        val prediction2 =
    -          features(0) * model1.weights(0) + features(1) * 
model1.weights(1) + model1.intercept
    -        assert(prediction1 ~== prediction2 relTol 1E-5)
    -    }
    +    Seq("auto", "l-bfgs", "normal").foreach { solver => {
    +      val trainer1 = new LinearRegression().setSolver(solver)
    +      // The result should be the same regardless of standardization 
without regularization
    +      val trainer2 = (new 
LinearRegression).setStandardization(false).setSolver(solver)
    +      val model1 = trainer1.fit(dataset)
    +      val model2 = trainer2.fit(dataset)
    +
    +      /*
    +         Using the following R code to load the data and train the model 
using glmnet package.
    +
    +         library("glmnet")
    +         data <- read.csv("path", header=FALSE, stringsAsFactors=FALSE)
    +         features <- as.matrix(data.frame(as.numeric(data$V2), 
as.numeric(data$V3)))
    +         label <- as.numeric(data$V1)
    +         weights <- coef(glmnet(features, label, family="gaussian", alpha 
= 0, lambda = 0))
    +         > weights
    +          3 x 1 sparse Matrix of class "dgCMatrix"
    +                                   s0
    +         (Intercept)         6.298698
    +         as.numeric.data.V2. 4.700706
    +         as.numeric.data.V3. 7.199082
    +       */
    +      val interceptR = 6.298698
    +      val weightsR = Vectors.dense(4.700706, 7.199082)
    +
    +      assert(model1.intercept ~== interceptR relTol 1E-3)
    +      assert(model1.weights ~= weightsR relTol 1E-3)
    +      assert(model2.intercept ~== interceptR relTol 1E-3)
    +      assert(model2.weights ~= weightsR relTol 1E-3)
    +
    +      model1.transform(dataset).select("features", 
"prediction").collect().foreach {
    +        case Row(features: DenseVector, prediction1: Double) =>
    +          val prediction2 =
    +            features(0) * model1.weights(0) + features(1) * 
model1.weights(1) + model1.intercept
    +          assert(prediction1 ~== prediction2 relTol 1E-5)
    +      }
    +    }}
       }
     
       test("linear regression without intercept without regularization") {
    -    val trainer1 = (new LinearRegression).setFitIntercept(false)
    -    // Without regularization the results should be the same
    -    val trainer2 = (new 
LinearRegression).setFitIntercept(false).setStandardization(false)
    -    val model1 = trainer1.fit(dataset)
    -    val modelWithoutIntercept1 = trainer1.fit(datasetWithoutIntercept)
    -    val model2 = trainer2.fit(dataset)
    -    val modelWithoutIntercept2 = trainer2.fit(datasetWithoutIntercept)
    -
    -
    -    /*
    -       weights <- coef(glmnet(features, label, family="gaussian", alpha = 
0, lambda = 0,
    -         intercept = FALSE))
    -       > weights
    -        3 x 1 sparse Matrix of class "dgCMatrix"
    -                                 s0
    -       (Intercept)         .
    -       as.numeric.data.V2. 6.995908
    -       as.numeric.data.V3. 5.275131
    -     */
    -    val weightsR = Vectors.dense(6.995908, 5.275131)
    -
    -    assert(model1.intercept ~== 0 absTol 1E-3)
    -    assert(model1.weights ~= weightsR relTol 1E-3)
    -    assert(model2.intercept ~== 0 absTol 1E-3)
    -    assert(model2.weights ~= weightsR relTol 1E-3)
    -
    -    /*
    -       Then again with the data with no intercept:
    -       > weightsWithoutIntercept
    -       3 x 1 sparse Matrix of class "dgCMatrix"
    -                                 s0
    -       (Intercept)           .
    -       as.numeric.data3.V2. 4.70011
    -       as.numeric.data3.V3. 7.19943
    -     */
    -    val weightsWithoutInterceptR = Vectors.dense(4.70011, 7.19943)
    -
    -    assert(modelWithoutIntercept1.intercept ~== 0 absTol 1E-3)
    -    assert(modelWithoutIntercept1.weights ~= weightsWithoutInterceptR 
relTol 1E-3)
    -    assert(modelWithoutIntercept2.intercept ~== 0 absTol 1E-3)
    -    assert(modelWithoutIntercept2.weights ~= weightsWithoutInterceptR 
relTol 1E-3)
    +    Seq("auto", "l-bfgs", "normal").foreach { solver => {
    +      val trainer1 = (new 
LinearRegression).setFitIntercept(false).setSolver(solver)
    +      // Without regularization the results should be the same
    +      val trainer2 = (new 
LinearRegression).setFitIntercept(false).setStandardization(false)
    +        .setSolver(solver)
    +      val model1 = trainer1.fit(dataset)
    +      val modelWithoutIntercept1 = trainer1.fit(datasetWithoutIntercept)
    +      val model2 = trainer2.fit(dataset)
    +      val modelWithoutIntercept2 = trainer2.fit(datasetWithoutIntercept)
    +
    +      /*
    +         weights <- coef(glmnet(features, label, family="gaussian", alpha 
= 0, lambda = 0,
    +           intercept = FALSE))
    +         > weights
    +          3 x 1 sparse Matrix of class "dgCMatrix"
    +                                   s0
    +         (Intercept)         .
    +         as.numeric.data.V2. 6.995908
    +         as.numeric.data.V3. 5.275131
    +       */
    +      val weightsR = Vectors.dense(6.995908, 5.275131)
    +
    +      assert(model1.intercept ~== 0 absTol 1E-3)
    +      assert(model1.weights ~= weightsR relTol 1E-3)
    +      assert(model2.intercept ~== 0 absTol 1E-3)
    +      assert(model2.weights ~= weightsR relTol 1E-3)
    +
    +      /*
    +         Then again with the data with no intercept:
    +         > weightsWithoutIntercept
    +          3 x 1 sparse Matrix of class "dgCMatrix"
    +                                   s0
    +         (Intercept)           .
    +         as.numeric.data3.V2. 4.70011
    +         as.numeric.data3.V3. 7.19943
    +       */
    +      val weightsWithoutInterceptR = Vectors.dense(4.70011, 7.19943)
    +
    +      assert(modelWithoutIntercept1.intercept ~== 0 absTol 1E-3)
    +      assert(modelWithoutIntercept1.weights ~= weightsWithoutInterceptR 
relTol 1E-3)
    +      assert(modelWithoutIntercept2.intercept ~== 0 absTol 1E-3)
    +      assert(modelWithoutIntercept2.weights ~= weightsWithoutInterceptR 
relTol 1E-3)
    +    }}
       }
     
       test("linear regression with intercept with L1 regularization") {
    -    val trainer1 = (new 
LinearRegression).setElasticNetParam(1.0).setRegParam(0.57)
    -    val trainer2 = (new 
LinearRegression).setElasticNetParam(1.0).setRegParam(0.57)
    -      .setStandardization(false)
    -    val model1 = trainer1.fit(dataset)
    -    val model2 = trainer2.fit(dataset)
    -
    -    /*
    -       weights <- coef(glmnet(features, label, family="gaussian", alpha = 
1.0, lambda = 0.57))
    -       > weights
    -        3 x 1 sparse Matrix of class "dgCMatrix"
    -                                 s0
    -       (Intercept)         6.24300
    -       as.numeric.data.V2. 4.024821
    -       as.numeric.data.V3. 6.679841
    -     */
    -    val interceptR1 = 6.24300
    -    val weightsR1 = Vectors.dense(4.024821, 6.679841)
    -
    -    assert(model1.intercept ~== interceptR1 relTol 1E-3)
    -    assert(model1.weights ~= weightsR1 relTol 1E-3)
    -
    -    /*
    -      weights <- coef(glmnet(features, label, family="gaussian", alpha = 
1.0, lambda = 0.57,
    -        standardize=FALSE))
    -      > weights
    -       3 x 1 sparse Matrix of class "dgCMatrix"
    -                                s0
    -      (Intercept)         6.416948
    -      as.numeric.data.V2. 3.893869
    -      as.numeric.data.V3. 6.724286
    -     */
    -    val interceptR2 = 6.416948
    -    val weightsR2 = Vectors.dense(3.893869, 6.724286)
    -
    -    assert(model2.intercept ~== interceptR2 relTol 1E-3)
    -    assert(model2.weights ~= weightsR2 relTol 1E-3)
    -
    -
    -    model1.transform(dataset).select("features", 
"prediction").collect().foreach {
    -      case Row(features: DenseVector, prediction1: Double) =>
    -        val prediction2 =
    -          features(0) * model1.weights(0) + features(1) * 
model1.weights(1) + model1.intercept
    -        assert(prediction1 ~== prediction2 relTol 1E-5)
    -    }
    +    Seq("auto").foreach { solver => {
    +      val trainer1 = (new 
LinearRegression).setElasticNetParam(1.0).setRegParam(0.57)
    +        .setSolver(solver)
    +      val trainer2 = (new 
LinearRegression).setElasticNetParam(1.0).setRegParam(0.57)
    +        .setSolver(solver).setStandardization(false)
    +      val model1 = trainer1.fit(dataset)
    +      val model2 = trainer2.fit(dataset)
    +
    +      /*
    +         weights <- coef(glmnet(features, label, family="gaussian", alpha 
= 1.0, lambda = 0.57))
    +         > weights
    +          3 x 1 sparse Matrix of class "dgCMatrix"
    +                                   s0
    +         (Intercept)         6.24300
    +         as.numeric.data.V2. 4.024821
    +         as.numeric.data.V3. 6.679841
    +       */
    +      val interceptR1 = 6.24300
    +      val weightsR1 = Vectors.dense(4.024821, 6.679841)
    +      assert(model1.intercept ~== interceptR1 relTol 1E-3)
    +      assert(model1.weights ~= weightsR1 relTol 1E-3)
    +
    +      /*
    +         weights <- coef(glmnet(features, label, family="gaussian", alpha 
= 1.0, lambda = 0.57,
    +           standardize=FALSE))
    +         > weights
    +          3 x 1 sparse Matrix of class "dgCMatrix"
    +                                  s0
    +         (Intercept)         6.416948
    +         as.numeric.data.V2. 3.893869
    +         as.numeric.data.V3. 6.724286
    +       */
    +      val interceptR2 = 6.416948
    +      val weightsR2 = Vectors.dense(3.893869, 6.724286)
    +
    +      assert(model2.intercept ~== interceptR2 relTol 1E-3)
    +      assert(model2.weights ~= weightsR2 relTol 1E-3)
    +
    +      model1.transform(dataset).select("features", 
"prediction").collect().foreach {
    +        case Row(features: DenseVector, prediction1: Double) =>
    +          val prediction2 =
    +            features(0) * model1.weights(0) + features(1) * 
model1.weights(1) + model1.intercept
    +          assert(prediction1 ~== prediction2 relTol 1E-5)
    +      }
    +    }}
    --- End diff --
    
    ditto



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request: [SPARK-10668][ML] Use WeightedLeastSquares in ...

Reply via email to