spark git commit: [MLLIB] SPARK-4347: Reducing GradientBoostingSuite run time.

meng Tue, 11 Nov 2014 22:48:07 -0800

Repository: spark
Updated Branches:
  refs/heads/master daaca14c1 -> 2ef016b13



[MLLIB] SPARK-4347: Reducing GradientBoostingSuite run time.

Before:
[info] GradientBoostingSuite:
[info] - Regression with continuous features: SquaredError (22 seconds, 115 
milliseconds)
[info] - Regression with continuous features: Absolute Error (19 seconds, 330 
milliseconds)
[info] - Binary classification with continuous features: Log Loss (19 seconds, 
17 milliseconds)

After:
[info] - Regression with continuous features: SquaredError (7 seconds, 69 
milliseconds)
[info] - Regression with continuous features: Absolute Error (4 seconds, 617 
milliseconds)
[info] - Binary classification with continuous features: Log Loss (4 seconds, 
658 milliseconds)

cc: mengxr, jkbradley

Author: Manish Amde <manish...@gmail.com>

Closes #3214 from manishamde/gbt_test_speedup and squashes the following 
commits:

8994552 [Manish Amde] reducing gbt test run times


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2ef016b1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2ef016b1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2ef016b1

Branch: refs/heads/master
Commit: 2ef016b130a48869cf81fe6cf147ef2b1e79d674
Parents: daaca14
Author: Manish Amde <manish...@gmail.com>
Authored: Tue Nov 11 22:47:53 2014 -0800
Committer: Xiangrui Meng <m...@databricks.com>
Committed: Tue Nov 11 22:47:53 2014 -0800

----------------------------------------------------------------------
 .../apache/spark/mllib/tree/GradientBoostingSuite.scala   | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/2ef016b1/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostingSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostingSuite.scala 
b/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostingSuite.scala
index 99a02ed..ae0028a 100644
--- 
a/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostingSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostingSuite.scala
@@ -35,7 +35,7 @@ class GradientBoostingSuite extends FunSuite with 
LocalSparkContext {
   test("Regression with continuous features: SquaredError") {
     GradientBoostingSuite.testCombinations.foreach {
       case (numIterations, learningRate, subsamplingRate) =>
-        val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures 
= 50, 1000)
+        val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures 
= 10, 100)
         val rdd = sc.parallelize(arr)
         val categoricalFeaturesInfo = Map.empty[Int, Int]
 
@@ -53,7 +53,7 @@ class GradientBoostingSuite extends FunSuite with 
LocalSparkContext {
         assert(gbt.weakHypotheses.size === numIterations)
         val gbtTree = gbt.weakHypotheses(0)
 
-        EnsembleTestHelper.validateRegressor(gbt, arr, 0.02)
+        EnsembleTestHelper.validateRegressor(gbt, arr, 0.03)
 
         // Make sure trees are the same.
         assert(gbtTree.toString == dt.toString)
@@ -63,7 +63,7 @@ class GradientBoostingSuite extends FunSuite with 
LocalSparkContext {
   test("Regression with continuous features: Absolute Error") {
     GradientBoostingSuite.testCombinations.foreach {
       case (numIterations, learningRate, subsamplingRate) =>
-        val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures 
= 50, 1000)
+        val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures 
= 10, 100)
         val rdd = sc.parallelize(arr)
         val categoricalFeaturesInfo = Map.empty[Int, Int]
 
@@ -81,7 +81,7 @@ class GradientBoostingSuite extends FunSuite with 
LocalSparkContext {
         assert(gbt.weakHypotheses.size === numIterations)
         val gbtTree = gbt.weakHypotheses(0)
 
-        EnsembleTestHelper.validateRegressor(gbt, arr, 0.02)
+        EnsembleTestHelper.validateRegressor(gbt, arr, 0.03)
 
         // Make sure trees are the same.
         assert(gbtTree.toString == dt.toString)
@@ -91,7 +91,7 @@ class GradientBoostingSuite extends FunSuite with 
LocalSparkContext {
   test("Binary classification with continuous features: Log Loss") {
     GradientBoostingSuite.testCombinations.foreach {
       case (numIterations, learningRate, subsamplingRate) =>
-        val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures 
= 50, 1000)
+        val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures 
= 10, 100)
         val rdd = sc.parallelize(arr)
         val categoricalFeaturesInfo = Map.empty[Int, Int]
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [MLLIB] SPARK-4347: Reducing GradientBoostingSuite run time.

Reply via email to