This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 9ed60c2 [MINOR][TEST][ML] Speed up some tests of ML regression by loosening tolerance 9ed60c2 is described below commit 9ed60c2c33737d4017ab8fb2628c40f8b14f3c5c Author: Sean Owen <sean.o...@databricks.com> AuthorDate: Fri Apr 12 09:31:12 2019 -0500 [MINOR][TEST][ML] Speed up some tests of ML regression by loosening tolerance ## What changes were proposed in this pull request? Loosen some tolerances in the ML regression-related tests, as they seem to account for some of the top slow tests in https://spark-tests.appspot.com/slow-tests These changes are good for about a 25 second speedup on my laptop. ## How was this patch tested? Existing tests Closes #24351 from srowen/SpeedReg. Authored-by: Sean Owen <sean.o...@databricks.com> Signed-off-by: Sean Owen <sean.o...@databricks.com> --- .../ml/classification/LogisticRegressionSuite.scala | 21 ++++++++++++++------- .../GeneralizedLinearRegressionSuite.scala | 19 +++++++++---------- .../spark/ml/regression/LinearRegressionSuite.scala | 2 ++ 3 files changed, 25 insertions(+), 17 deletions(-) diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala index 9af7fff..334f92b 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala @@ -1140,8 +1140,10 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest { test("binary logistic regression with intercept with ElasticNet regularization") { val trainer1 = (new LogisticRegression).setFitIntercept(true).setMaxIter(120) .setElasticNetParam(0.38).setRegParam(0.21).setStandardization(true).setWeightCol("weight") + .setTol(1e-5) val trainer2 = (new LogisticRegression).setFitIntercept(true).setMaxIter(60) .setElasticNetParam(0.38).setRegParam(0.21).setStandardization(false).setWeightCol("weight") + .setTol(1e-5) val model1 = trainer1.fit(binaryDataset) val model2 = trainer2.fit(binaryDataset) @@ -1489,12 +1491,14 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest { .setFitIntercept(true) .setStandardization(true) .setWeightCol("weight") + .setTol(1e-5) val trainer2 = new LogisticRegression() .setLowerBoundsOnCoefficients(lowerBoundsOnCoefficients) .setLowerBoundsOnIntercepts(lowerBoundsOnIntercepts) .setFitIntercept(true) .setStandardization(false) .setWeightCol("weight") + .setTol(1e-5) val model1 = trainer1.fit(multinomialDataset) val model2 = trainer2.fit(multinomialDataset) @@ -1690,10 +1694,10 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest { // use tighter constraints because OWL-QN solver takes longer to converge val trainer1 = (new LogisticRegression).setFitIntercept(true) .setElasticNetParam(1.0).setRegParam(0.05).setStandardization(true) - .setMaxIter(160).setTol(1e-10).setWeightCol("weight") + .setMaxIter(160).setTol(1e-5).setWeightCol("weight") val trainer2 = (new LogisticRegression).setFitIntercept(true) .setElasticNetParam(1.0).setRegParam(0.05).setStandardization(false) - .setMaxIter(110).setTol(1e-10).setWeightCol("weight") + .setMaxIter(110).setTol(1e-5).setWeightCol("weight") val model1 = trainer1.fit(multinomialDataset) val model2 = trainer2.fit(multinomialDataset) @@ -1791,8 +1795,10 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest { test("multinomial logistic regression without intercept with L1 regularization") { val trainer1 = (new LogisticRegression).setFitIntercept(false) .setElasticNetParam(1.0).setRegParam(0.05).setStandardization(true).setWeightCol("weight") + .setTol(1e-5) val trainer2 = (new LogisticRegression).setFitIntercept(false) .setElasticNetParam(1.0).setRegParam(0.05).setStandardization(false).setWeightCol("weight") + .setTol(1e-5) val model1 = trainer1.fit(multinomialDataset) val model2 = trainer2.fit(multinomialDataset) @@ -2156,10 +2162,10 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest { test("multinomial logistic regression with intercept with elasticnet regularization") { val trainer1 = (new LogisticRegression).setFitIntercept(true).setWeightCol("weight") .setElasticNetParam(0.5).setRegParam(0.1).setStandardization(true) - .setMaxIter(220).setTol(1e-10) + .setMaxIter(180).setTol(1e-5) val trainer2 = (new LogisticRegression).setFitIntercept(true).setWeightCol("weight") .setElasticNetParam(0.5).setRegParam(0.1).setStandardization(false) - .setMaxIter(220).setTol(1e-10) + .setMaxIter(150).setTol(1e-5) val model1 = trainer1.fit(multinomialDataset) val model2 = trainer2.fit(multinomialDataset) @@ -2255,10 +2261,10 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest { test("multinomial logistic regression without intercept with elasticnet regularization") { val trainer1 = (new LogisticRegression).setFitIntercept(false).setWeightCol("weight") .setElasticNetParam(0.5).setRegParam(0.1).setStandardization(true) - .setMaxIter(75).setTol(1e-10) + .setTol(1e-5) val trainer2 = (new LogisticRegression).setFitIntercept(false).setWeightCol("weight") .setElasticNetParam(0.5).setRegParam(0.1).setStandardization(false) - .setMaxIter(50).setTol(1e-10) + .setTol(1e-5) val model1 = trainer1.fit(multinomialDataset) val model2 = trainer2.fit(multinomialDataset) @@ -2672,6 +2678,7 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest { val trainer1 = new LogisticRegression() .setRegParam(0.1) .setElasticNetParam(1.0) + .setMaxIter(20) // compressed row major is optimal val model1 = trainer1.fit(multinomialDataset.limit(100)) @@ -2687,7 +2694,7 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest { // coefficients are dense without L1 regularization val trainer2 = new LogisticRegression() - .setElasticNetParam(0.0) + .setElasticNetParam(0.0).setMaxIter(1) val model3 = trainer2.fit(multinomialDataset.limit(100)) assert(model3.coefficientMatrix.isInstanceOf[DenseMatrix]) } diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala index fc1284e..a30c472 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.ml.regression import scala.util.Random -import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.SparkConf import org.apache.spark.ml.classification.LogisticRegressionSuite._ import org.apache.spark.ml.feature.{Instance, OffsetInstance} import org.apache.spark.ml.feature.{LabeledPoint, RFormula} @@ -28,7 +28,6 @@ import org.apache.spark.ml.param.{ParamMap, ParamsSuite} import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils} import org.apache.spark.ml.util.TestingUtils._ import org.apache.spark.mllib.random._ -import org.apache.spark.mllib.util.MLlibTestSparkContext import org.apache.spark.serializer.KryoSerializer import org.apache.spark.sql.{DataFrame, Row} import org.apache.spark.sql.functions._ @@ -269,7 +268,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest ("inverse", datasetGaussianInverse))) { for (fitIntercept <- Seq(false, true)) { val trainer = new GeneralizedLinearRegression().setFamily("gaussian").setLink(link) - .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction") + .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction").setTol(1e-3) val model = trainer.fit(dataset) val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1)) assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with gaussian family, " + @@ -328,7 +327,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest for (fitIntercept <- Seq(false, true); regParam <- Seq(0.0, 0.1, 1.0)) { val trainer = new GeneralizedLinearRegression().setFamily("gaussian") - .setFitIntercept(fitIntercept).setRegParam(regParam) + .setFitIntercept(fitIntercept).setRegParam(regParam).setTol(1e-3) val model = trainer.fit(datasetGaussianIdentity) val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1)) assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with gaussian family, " + @@ -384,7 +383,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest ("cloglog", datasetBinomial))) { for (fitIntercept <- Seq(false, true)) { val trainer = new GeneralizedLinearRegression().setFamily("binomial").setLink(link) - .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction") + .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction").setTol(1e-3) val model = trainer.fit(dataset) val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1), model.coefficients(2), model.coefficients(3)) @@ -457,7 +456,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest ("sqrt", datasetPoissonSqrt))) { for (fitIntercept <- Seq(false, true)) { val trainer = new GeneralizedLinearRegression().setFamily("poisson").setLink(link) - .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction") + .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction").setTol(1e-3) val model = trainer.fit(dataset) val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1)) assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with poisson family, " + @@ -515,7 +514,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest val dataset = datasetPoissonLogWithZero for (fitIntercept <- Seq(false, true)) { val trainer = new GeneralizedLinearRegression().setFamily("poisson").setLink(link) - .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction") + .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction").setTol(1e-3) val model = trainer.fit(dataset) val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1)) assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with poisson family, " + @@ -573,7 +572,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest ("identity", datasetGammaIdentity), ("log", datasetGammaLog))) { for (fitIntercept <- Seq(false, true)) { val trainer = new GeneralizedLinearRegression().setFamily("Gamma").setLink(link) - .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction") + .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction").setTol(1e-3) val model = trainer.fit(dataset) val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1)) assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with gamma family, " + @@ -659,7 +658,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest variancePower <- Seq(1.6, 2.5)) { val trainer = new GeneralizedLinearRegression().setFamily("tweedie") .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction") - .setVariancePower(variancePower).setLinkPower(linkPower) + .setVariancePower(variancePower).setLinkPower(linkPower).setTol(1e-4) val model = trainer.fit(datasetTweedie) val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1)) assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with tweedie family, " + @@ -736,7 +735,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest for (variancePower <- Seq(0.0, 1.0, 2.0, 1.5)) { val trainer = new GeneralizedLinearRegression().setFamily("tweedie") .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction") - .setVariancePower(variancePower) + .setVariancePower(variancePower).setTol(1e-3) val model = trainer.fit(datasetTweedie) val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1)) assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with tweedie family, " + diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala index c4db336..d3df0e5 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala @@ -892,6 +892,7 @@ class LinearRegressionSuite extends MLTest with DefaultReadWriteTest with PMMLRe .setRegParam(regParam) .setElasticNetParam(elasticNetParam) .setSolver(solver) + .setMaxIter(1) MLTestingUtils.testArbitrarilyScaledWeights[LinearRegressionModel, LinearRegression]( datasetWithStrongNoise.as[LabeledPoint], estimator, modelEquals) MLTestingUtils.testOutliersWithSmallWeights[LinearRegressionModel, LinearRegression]( @@ -908,6 +909,7 @@ class LinearRegressionSuite extends MLTest with DefaultReadWriteTest with PMMLRe .setFitIntercept(fitIntercept) .setStandardization(standardization) .setRegParam(regParam) + .setMaxIter(1) MLTestingUtils.testArbitrarilyScaledWeights[LinearRegressionModel, LinearRegression]( datasetWithOutlier.as[LabeledPoint], estimator, modelEquals) MLTestingUtils.testOutliersWithSmallWeights[LinearRegressionModel, LinearRegression]( --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org