[ 
https://issues.apache.org/jira/browse/MAHOUT-1962?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15995519#comment-15995519
 ] 

ASF GitHub Bot commented on MAHOUT-1962:
----------------------------------------

Github user rawkintrevo commented on a diff in the pull request:

    https://github.com/apache/mahout/pull/300#discussion_r114637027
  
    --- Diff: 
math-scala/src/main/scala/org/apache/mahout/math/algorithms/regression/LinearRegressorModel.scala
 ---
    @@ -54,46 +58,72 @@ trait LinearRegressorFitter[K] extends 
RegressorFitter[K] {
         addIntercept = hyperparameters.asInstanceOf[Map[Symbol, 
Boolean]].getOrElse('addIntercept, true)
       }
     
    +
       def calculateStandardError[M[K] <: LinearRegressorModel[K]](X: 
DrmLike[K],
                                  drmTarget: DrmLike[K],
                                  drmXtXinv: Matrix,
                                  model: M[K]): M[K] = {
         import org.apache.mahout.math.function.Functions.SQRT
         import org.apache.mahout.math.scalabindings.MahoutCollections._
    -    var modelOut = model
    +
         val yhat = X %*% model.beta
         val residuals = drmTarget - yhat
    -    val ete = (residuals.t %*% residuals).collect // 1x1
    +
    +    // Setting modelOut.rss
    +    // Changed name from ete, to rssModel.  This is residual sum of 
squares for model of yhat vs y
    +    var modelOut = calculateResidualSumOfSquares(model,residuals)
    +
         val n = drmTarget.nrow
         val k = safeToNonNegInt(X.ncol)
         val invDegFreedomKindOf = 1.0 / (n - k)
    -    val varCovarMatrix = invDegFreedomKindOf * ete(0,0) * drmXtXinv
    +    val varCovarMatrix = invDegFreedomKindOf * modelOut.rss * drmXtXinv
         val se = varCovarMatrix.viewDiagonal.assign(SQRT)
         val tScore = model.beta / se
    -    val tDist = new 
org.apache.commons.math3.distribution.TDistribution(n-k)
    +    val tDist = new TDistribution(n-k)
    +
         val pval = dvec(tScore.toArray.map(t => 2 * (1.0 - 
tDist.cumulativeProbability(Math.abs(t))) ))
    +
         // ^^ TODO bug in this calculation- fix and add test
         //degreesFreedom = k
    -
    -
         modelOut.se = se
         modelOut.tScore = tScore
         modelOut.pval = pval
    -    modelOut.degreesFreedom = X.ncol
    -    modelOut.summary = generateSummaryString(modelOut)
    +    // for degrees of freedom, dont count the intercept term that was added
    +    modelOut.degreesFreedom = X.ncol - 1
    +
    +    modelOut.trainingExamples = n.toInt
    +
         if (calcCommonStatistics){
    -      modelOut = calculateCommonStatistics(modelOut, drmTarget, residuals)
    +      modelOut = calculateCommonStatistics(modelOut, X, drmTarget, 
residuals)
    --- End diff --
    
    OK- you changed this to take `X` but the only thing you use X for is to 
calculate teh d.o.f. and `model.trainingExamples` which are already included in 
teh model (you just added them).  I would refactor this so X isn't a parameter.


> Add F-test to Linear Regression  Fitness Tests
> ----------------------------------------------
>
>                 Key: MAHOUT-1962
>                 URL: https://issues.apache.org/jira/browse/MAHOUT-1962
>             Project: Mahout
>          Issue Type: Improvement
>          Components: Algorithms
>    Affects Versions: 0.12.0, 0.12.1, 0.13.0, 0.12.2
>            Reporter: Dustin VanStee
>            Priority: Minor
>              Labels: beginner
>             Fix For: 0.13.1
>
>   Original Estimate: 96h
>  Remaining Estimate: 96h
>
> This update will modify 
> org.apache.mahout.math.algorithms.regression.tests.FittnessTests.scala and 
> add an overall Ftest for significance of one or more parameters being not 
> equal to zero.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)

Reply via email to