spark git commit: [SPARK-20764][ML][PYSPARK][FOLLOWUP] Fix visibility discrepancy with numInstances and degreesOfFreedom in LR and GLR - Python version
Repository: spark Updated Branches: refs/heads/branch-2.2 ee9d5975e -> e936a96ba [SPARK-20764][ML][PYSPARK][FOLLOWUP] Fix visibility discrepancy with numInstances and degreesOfFreedom in LR and GLR - Python version ## What changes were proposed in this pull request? Add test cases for PR-18062 ## How was this patch tested? The existing UT Author: PengCloses #18068 from mpjlu/moreTest. (cherry picked from commit 9afcf127d31b5477a539dde6e5f01861532a1c4c) Signed-off-by: Yanbo Liang Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e936a96b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e936a96b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e936a96b Branch: refs/heads/branch-2.2 Commit: e936a96badfeeb2051ee35dc4b0fbecefa9bf4cb Parents: ee9d597 Author: Peng Authored: Wed May 24 19:54:17 2017 +0800 Committer: Yanbo Liang Committed: Wed May 24 19:54:58 2017 +0800 -- python/pyspark/ml/tests.py | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/e936a96b/python/pyspark/ml/tests.py -- diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py index 51a3e8e..a3393c6 100755 --- a/python/pyspark/ml/tests.py +++ b/python/pyspark/ml/tests.py @@ -1066,6 +1066,7 @@ class TrainingSummaryTest(SparkSessionTestCase): self.assertAlmostEqual(s.r2, 1.0, 2) self.assertTrue(isinstance(s.residuals, DataFrame)) self.assertEqual(s.numInstances, 2) +self.assertEqual(s.degreesOfFreedom, 1) devResiduals = s.devianceResiduals self.assertTrue(isinstance(devResiduals, list) and isinstance(devResiduals[0], float)) coefStdErr = s.coefficientStandardErrors @@ -1075,7 +1076,8 @@ class TrainingSummaryTest(SparkSessionTestCase): pValues = s.pValues self.assertTrue(isinstance(pValues, list) and isinstance(pValues[0], float)) # test evaluation (with training dataset) produces a summary with same values -# one check is enough to verify a summary is returned, Scala version runs full test +# one check is enough to verify a summary is returned +# The child class LinearRegressionTrainingSummary runs full test sameSummary = model.evaluate(df) self.assertAlmostEqual(sameSummary.explainedVariance, s.explainedVariance) @@ -1093,6 +1095,7 @@ class TrainingSummaryTest(SparkSessionTestCase): self.assertEqual(s.numIterations, 1) # this should default to a single iteration of WLS self.assertTrue(isinstance(s.predictions, DataFrame)) self.assertEqual(s.predictionCol, "prediction") +self.assertEqual(s.numInstances, 2) self.assertTrue(isinstance(s.residuals(), DataFrame)) self.assertTrue(isinstance(s.residuals("pearson"), DataFrame)) coefStdErr = s.coefficientStandardErrors @@ -,7 +1114,8 @@ class TrainingSummaryTest(SparkSessionTestCase): self.assertTrue(isinstance(s.nullDeviance, float)) self.assertTrue(isinstance(s.dispersion, float)) # test evaluation (with training dataset) produces a summary with same values -# one check is enough to verify a summary is returned, Scala version runs full test +# one check is enough to verify a summary is returned +# The child class GeneralizedLinearRegressionTrainingSummary runs full test sameSummary = model.evaluate(df) self.assertAlmostEqual(sameSummary.deviance, s.deviance) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20764][ML][PYSPARK][FOLLOWUP] Fix visibility discrepancy with numInstances and degreesOfFreedom in LR and GLR - Python version
Repository: spark Updated Branches: refs/heads/master d76633e3c -> 9afcf127d [SPARK-20764][ML][PYSPARK][FOLLOWUP] Fix visibility discrepancy with numInstances and degreesOfFreedom in LR and GLR - Python version ## What changes were proposed in this pull request? Add test cases for PR-18062 ## How was this patch tested? The existing UT Author: PengCloses #18068 from mpjlu/moreTest. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9afcf127 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9afcf127 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9afcf127 Branch: refs/heads/master Commit: 9afcf127d31b5477a539dde6e5f01861532a1c4c Parents: d76633e Author: Peng Authored: Wed May 24 19:54:17 2017 +0800 Committer: Yanbo Liang Committed: Wed May 24 19:54:17 2017 +0800 -- python/pyspark/ml/tests.py | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9afcf127/python/pyspark/ml/tests.py -- diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py index 51a3e8e..a3393c6 100755 --- a/python/pyspark/ml/tests.py +++ b/python/pyspark/ml/tests.py @@ -1066,6 +1066,7 @@ class TrainingSummaryTest(SparkSessionTestCase): self.assertAlmostEqual(s.r2, 1.0, 2) self.assertTrue(isinstance(s.residuals, DataFrame)) self.assertEqual(s.numInstances, 2) +self.assertEqual(s.degreesOfFreedom, 1) devResiduals = s.devianceResiduals self.assertTrue(isinstance(devResiduals, list) and isinstance(devResiduals[0], float)) coefStdErr = s.coefficientStandardErrors @@ -1075,7 +1076,8 @@ class TrainingSummaryTest(SparkSessionTestCase): pValues = s.pValues self.assertTrue(isinstance(pValues, list) and isinstance(pValues[0], float)) # test evaluation (with training dataset) produces a summary with same values -# one check is enough to verify a summary is returned, Scala version runs full test +# one check is enough to verify a summary is returned +# The child class LinearRegressionTrainingSummary runs full test sameSummary = model.evaluate(df) self.assertAlmostEqual(sameSummary.explainedVariance, s.explainedVariance) @@ -1093,6 +1095,7 @@ class TrainingSummaryTest(SparkSessionTestCase): self.assertEqual(s.numIterations, 1) # this should default to a single iteration of WLS self.assertTrue(isinstance(s.predictions, DataFrame)) self.assertEqual(s.predictionCol, "prediction") +self.assertEqual(s.numInstances, 2) self.assertTrue(isinstance(s.residuals(), DataFrame)) self.assertTrue(isinstance(s.residuals("pearson"), DataFrame)) coefStdErr = s.coefficientStandardErrors @@ -,7 +1114,8 @@ class TrainingSummaryTest(SparkSessionTestCase): self.assertTrue(isinstance(s.nullDeviance, float)) self.assertTrue(isinstance(s.dispersion, float)) # test evaluation (with training dataset) produces a summary with same values -# one check is enough to verify a summary is returned, Scala version runs full test +# one check is enough to verify a summary is returned +# The child class GeneralizedLinearRegressionTrainingSummary runs full test sameSummary = model.evaluate(df) self.assertAlmostEqual(sameSummary.deviance, s.deviance) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org