[ https://issues.apache.org/jira/browse/MAHOUT-1962?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15995519#comment-15995519 ]
ASF GitHub Bot commented on MAHOUT-1962: ---------------------------------------- Github user rawkintrevo commented on a diff in the pull request: https://github.com/apache/mahout/pull/300#discussion_r114637027 --- Diff: math-scala/src/main/scala/org/apache/mahout/math/algorithms/regression/LinearRegressorModel.scala --- @@ -54,46 +58,72 @@ trait LinearRegressorFitter[K] extends RegressorFitter[K] { addIntercept = hyperparameters.asInstanceOf[Map[Symbol, Boolean]].getOrElse('addIntercept, true) } + def calculateStandardError[M[K] <: LinearRegressorModel[K]](X: DrmLike[K], drmTarget: DrmLike[K], drmXtXinv: Matrix, model: M[K]): M[K] = { import org.apache.mahout.math.function.Functions.SQRT import org.apache.mahout.math.scalabindings.MahoutCollections._ - var modelOut = model + val yhat = X %*% model.beta val residuals = drmTarget - yhat - val ete = (residuals.t %*% residuals).collect // 1x1 + + // Setting modelOut.rss + // Changed name from ete, to rssModel. This is residual sum of squares for model of yhat vs y + var modelOut = calculateResidualSumOfSquares(model,residuals) + val n = drmTarget.nrow val k = safeToNonNegInt(X.ncol) val invDegFreedomKindOf = 1.0 / (n - k) - val varCovarMatrix = invDegFreedomKindOf * ete(0,0) * drmXtXinv + val varCovarMatrix = invDegFreedomKindOf * modelOut.rss * drmXtXinv val se = varCovarMatrix.viewDiagonal.assign(SQRT) val tScore = model.beta / se - val tDist = new org.apache.commons.math3.distribution.TDistribution(n-k) + val tDist = new TDistribution(n-k) + val pval = dvec(tScore.toArray.map(t => 2 * (1.0 - tDist.cumulativeProbability(Math.abs(t))) )) + // ^^ TODO bug in this calculation- fix and add test //degreesFreedom = k - - modelOut.se = se modelOut.tScore = tScore modelOut.pval = pval - modelOut.degreesFreedom = X.ncol - modelOut.summary = generateSummaryString(modelOut) + // for degrees of freedom, dont count the intercept term that was added + modelOut.degreesFreedom = X.ncol - 1 + + modelOut.trainingExamples = n.toInt + if (calcCommonStatistics){ - modelOut = calculateCommonStatistics(modelOut, drmTarget, residuals) + modelOut = calculateCommonStatistics(modelOut, X, drmTarget, residuals) --- End diff -- OK- you changed this to take `X` but the only thing you use X for is to calculate teh d.o.f. and `model.trainingExamples` which are already included in teh model (you just added them). I would refactor this so X isn't a parameter. > Add F-test to Linear Regression Fitness Tests > ---------------------------------------------- > > Key: MAHOUT-1962 > URL: https://issues.apache.org/jira/browse/MAHOUT-1962 > Project: Mahout > Issue Type: Improvement > Components: Algorithms > Affects Versions: 0.12.0, 0.12.1, 0.13.0, 0.12.2 > Reporter: Dustin VanStee > Priority: Minor > Labels: beginner > Fix For: 0.13.1 > > Original Estimate: 96h > Remaining Estimate: 96h > > This update will modify > org.apache.mahout.math.algorithms.regression.tests.FittnessTests.scala and > add an overall Ftest for significance of one or more parameters being not > equal to zero. -- This message was sent by Atlassian JIRA (v6.3.15#6346)