Repository: spark Updated Branches: refs/heads/master fa01bec48 -> 1c3e402e6
[SPARK-7279] Removed diffSum which is theoretical zero in LinearRegression and coding formating Author: DB Tsai <d...@netflix.com> Closes #5809 from dbtsai/format and squashes the following commits: 6904eed [DB Tsai] triger jenkins 9146e19 [DB Tsai] initial commit Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1c3e402e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1c3e402e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1c3e402e Branch: refs/heads/master Commit: 1c3e402e669d047410b00de9193adf3c329844a2 Parents: fa01bec Author: DB Tsai <d...@netflix.com> Authored: Thu Apr 30 16:26:51 2015 -0700 Committer: Xiangrui Meng <m...@databricks.com> Committed: Thu Apr 30 16:26:51 2015 -0700 ---------------------------------------------------------------------- .../spark/ml/regression/LinearRegression.scala | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/1c3e402e/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala index 11c6cea..0b81c48 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala @@ -25,8 +25,7 @@ import breeze.optimize.{CachedDiffFunction, DiffFunction} import org.apache.spark.annotation.AlphaComponent import org.apache.spark.ml.param.{Params, ParamMap} -import org.apache.spark.ml.param.shared.{HasTol, HasElasticNetParam, HasMaxIter, - HasRegParam} +import org.apache.spark.ml.param.shared.{HasTol, HasElasticNetParam, HasMaxIter, HasRegParam} import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer import org.apache.spark.mllib.linalg.{Vector, Vectors} import org.apache.spark.mllib.linalg.BLAS._ @@ -103,9 +102,7 @@ class LinearRegression extends Regressor[Vector, LinearRegression, LinearRegress case LabeledPoint(label: Double, features: Vector) => (label, features) } val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE - if (handlePersistence) { - instances.persist(StorageLevel.MEMORY_AND_DISK) - } + if (handlePersistence) instances.persist(StorageLevel.MEMORY_AND_DISK) val (summarizer, statCounter) = instances.treeAggregate( (new MultivariateOnlineSummarizer, new StatCounter))( { @@ -146,8 +143,7 @@ class LinearRegression extends Regressor[Vector, LinearRegression, LinearRegress val optimizer = if (paramMap(elasticNetParam) == 0.0 || effectiveRegParam == 0.0) { new BreezeLBFGS[BDV[Double]](paramMap(maxIter), 10, paramMap(tol)) } else { - new BreezeOWLQN[Int, BDV[Double]](paramMap(maxIter), 10, effectiveL1RegParam, - paramMap(tol)) + new BreezeOWLQN[Int, BDV[Double]](paramMap(maxIter), 10, effectiveL1RegParam, paramMap(tol)) } val initialWeights = Vectors.zeros(numFeatures) @@ -304,9 +300,8 @@ private class LeastSquaresAggregator( featuresStd: Array[Double], featuresMean: Array[Double]) extends Serializable { - private var totalCnt: Long = 0 + private var totalCnt: Long = 0L private var lossSum = 0.0 - private var diffSum = 0.0 private val (effectiveWeightsArray: Array[Double], offset: Double, dim: Int) = { val weightsArray = weights.toArray.clone() @@ -323,9 +318,10 @@ private class LeastSquaresAggregator( } (weightsArray, -sum + labelMean / labelStd, weightsArray.length) } + private val effectiveWeightsVector = Vectors.dense(effectiveWeightsArray) - private val gradientSumArray: Array[Double] = Array.ofDim[Double](dim) + private val gradientSumArray = Array.ofDim[Double](dim) /** * Add a new training data to this LeastSquaresAggregator, and update the loss and gradient @@ -350,7 +346,6 @@ private class LeastSquaresAggregator( } } lossSum += diff * diff / 2.0 - diffSum += diff } totalCnt += 1 @@ -372,7 +367,6 @@ private class LeastSquaresAggregator( if (other.totalCnt != 0) { totalCnt += other.totalCnt lossSum += other.lossSum - diffSum += other.diffSum var i = 0 val localThisGradientSumArray = this.gradientSumArray --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org