Repository: spark Updated Branches: refs/heads/master 81da3bee6 -> bc8890b35
[SPARK-13132][MLLIB] cache standardization param value in LogisticRegression cache the value of the standardization Param in LogisticRegression, rather than re-fetching it from the ParamMap for every index and every optimization step in the quasi-newton optimizer also, fix Param#toString to cache the stringified representation, rather than re-interpolating it on every call, so any other implementations that have similar repeated access patterns will see a benefit. this change improves training times for one of my test sets from ~7m30s to ~4m30s Author: Gary King <g...@idibon.com> Closes #11027 from idigary/spark-13132-optimize-logistic-regression. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bc8890b3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bc8890b3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bc8890b3 Branch: refs/heads/master Commit: bc8890b357811612ba6c10d96374902b9e08134f Parents: 81da3be Author: Gary King <g...@idibon.com> Authored: Sun Feb 7 09:13:28 2016 +0000 Committer: Sean Owen <so...@cloudera.com> Committed: Sun Feb 7 09:13:28 2016 +0000 ---------------------------------------------------------------------- .../org/apache/spark/ml/classification/LogisticRegression.scala | 3 ++- mllib/src/main/scala/org/apache/spark/ml/param/params.scala | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/bc8890b3/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala index 9b2340a..ac01245 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala @@ -332,12 +332,13 @@ class LogisticRegression @Since("1.2.0") ( val optimizer = if ($(elasticNetParam) == 0.0 || $(regParam) == 0.0) { new BreezeLBFGS[BDV[Double]]($(maxIter), 10, $(tol)) } else { + val standardizationParam = $(standardization) def regParamL1Fun = (index: Int) => { // Remove the L1 penalization on the intercept if (index == numFeatures) { 0.0 } else { - if ($(standardization)) { + if (standardizationParam) { regParamL1 } else { // If `standardization` is false, we still standardize the data http://git-wip-us.apache.org/repos/asf/spark/blob/bc8890b3/mllib/src/main/scala/org/apache/spark/ml/param/params.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala index f48923d..d7d6c0f 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala @@ -117,7 +117,9 @@ class Param[T](val parent: String, val name: String, val doc: String, val isVali } } - override final def toString: String = s"${parent}__$name" + private[this] val stringRepresentation = s"${parent}__$name" + + override final def toString: String = stringRepresentation override final def hashCode: Int = toString.## --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org