Repository: spark Updated Branches: refs/heads/master daaca14c1 -> 2ef016b13
[MLLIB] SPARK-4347: Reducing GradientBoostingSuite run time. Before: [info] GradientBoostingSuite: [info] - Regression with continuous features: SquaredError (22 seconds, 115 milliseconds) [info] - Regression with continuous features: Absolute Error (19 seconds, 330 milliseconds) [info] - Binary classification with continuous features: Log Loss (19 seconds, 17 milliseconds) After: [info] - Regression with continuous features: SquaredError (7 seconds, 69 milliseconds) [info] - Regression with continuous features: Absolute Error (4 seconds, 617 milliseconds) [info] - Binary classification with continuous features: Log Loss (4 seconds, 658 milliseconds) cc: mengxr, jkbradley Author: Manish Amde <manish...@gmail.com> Closes #3214 from manishamde/gbt_test_speedup and squashes the following commits: 8994552 [Manish Amde] reducing gbt test run times Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2ef016b1 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2ef016b1 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2ef016b1 Branch: refs/heads/master Commit: 2ef016b130a48869cf81fe6cf147ef2b1e79d674 Parents: daaca14 Author: Manish Amde <manish...@gmail.com> Authored: Tue Nov 11 22:47:53 2014 -0800 Committer: Xiangrui Meng <m...@databricks.com> Committed: Tue Nov 11 22:47:53 2014 -0800 ---------------------------------------------------------------------- .../apache/spark/mllib/tree/GradientBoostingSuite.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/2ef016b1/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostingSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostingSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostingSuite.scala index 99a02ed..ae0028a 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostingSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostingSuite.scala @@ -35,7 +35,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext { test("Regression with continuous features: SquaredError") { GradientBoostingSuite.testCombinations.foreach { case (numIterations, learningRate, subsamplingRate) => - val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000) + val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 10, 100) val rdd = sc.parallelize(arr) val categoricalFeaturesInfo = Map.empty[Int, Int] @@ -53,7 +53,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext { assert(gbt.weakHypotheses.size === numIterations) val gbtTree = gbt.weakHypotheses(0) - EnsembleTestHelper.validateRegressor(gbt, arr, 0.02) + EnsembleTestHelper.validateRegressor(gbt, arr, 0.03) // Make sure trees are the same. assert(gbtTree.toString == dt.toString) @@ -63,7 +63,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext { test("Regression with continuous features: Absolute Error") { GradientBoostingSuite.testCombinations.foreach { case (numIterations, learningRate, subsamplingRate) => - val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000) + val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 10, 100) val rdd = sc.parallelize(arr) val categoricalFeaturesInfo = Map.empty[Int, Int] @@ -81,7 +81,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext { assert(gbt.weakHypotheses.size === numIterations) val gbtTree = gbt.weakHypotheses(0) - EnsembleTestHelper.validateRegressor(gbt, arr, 0.02) + EnsembleTestHelper.validateRegressor(gbt, arr, 0.03) // Make sure trees are the same. assert(gbtTree.toString == dt.toString) @@ -91,7 +91,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext { test("Binary classification with continuous features: Log Loss") { GradientBoostingSuite.testCombinations.foreach { case (numIterations, learningRate, subsamplingRate) => - val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000) + val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 10, 100) val rdd = sc.parallelize(arr) val categoricalFeaturesInfo = Map.empty[Int, Int] --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org