This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 89a7a5b2981 [SPARK-38854][PYTHON][TEST] Improve the test coverage for pyspark/statcounter.py 89a7a5b2981 is described below commit 89a7a5b29815d9547e1d652d97ea07a9b5e9fecf Author: pralabhkumar <pralabhku...@gmail.com> AuthorDate: Tue Apr 12 20:41:11 2022 +0900 [SPARK-38854][PYTHON][TEST] Improve the test coverage for pyspark/statcounter.py ### What changes were proposed in this pull request? This PR adds a test for the case to check merge_stats when - One StatCounter size largely greater than the other . - NA, None is passed to StatCounter ### Why are the changes needed? To cover corner test cases and increase coverage ### Does this PR introduce _any_ user-facing change? No - test only ### How was this patch tested? CI in this PR should test it out Closes #36145 from pralabhkumar/rk_increase_coverage_statcounter. Authored-by: pralabhkumar <pralabhku...@gmail.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/tests/test_statcounter.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/python/pyspark/tests/test_statcounter.py b/python/pyspark/tests/test_statcounter.py index 9651871e113..b10fe7cd911 100644 --- a/python/pyspark/tests/test_statcounter.py +++ b/python/pyspark/tests/test_statcounter.py @@ -16,6 +16,7 @@ # from pyspark.statcounter import StatCounter from pyspark.testing.utils import ReusedPySparkTestCase +import math class StatCounterTests(ReusedPySparkTestCase): @@ -76,6 +77,31 @@ class StatCounterTests(ReusedPySparkTestCase): self.assertEqual(stats.sum(), 20.0) self.assertAlmostEqual(stats.variance(), 1.25) self.assertAlmostEqual(stats.sampleVariance(), 1.4285714285714286) + execution_statements = [ + StatCounter([1.0, 2.0]).mergeStats(StatCounter(range(1, 301))), + StatCounter(range(1, 301)).mergeStats(StatCounter([1.0, 2.0])), + ] + for stats in execution_statements: + self.assertEqual(stats.count(), 302) + self.assertEqual(stats.max(), 300.0) + self.assertEqual(stats.min(), 1.0) + self.assertAlmostEqual(stats.mean(), 149.51324503311) + self.assertAlmostEqual(stats.variance(), 7596.302804701549) + self.assertAlmostEqual(stats.sampleVariance(), 7621.539691095905) + + def test_variance_when_size_zero(self): + # SPARK-38854: Test case to improve test coverage when + # StatCounter argument is empty list or None + arguments = [[], None] + + for arg in arguments: + stats = StatCounter(arg) + self.assertTrue(math.isnan(stats.variance())) + self.assertTrue(math.isnan(stats.sampleVariance())) + self.assertEqual(stats.count(), 0) + self.assertTrue(math.isinf(stats.max())) + self.assertTrue(math.isinf(stats.min())) + self.assertEqual(stats.mean(), 0.0) def test_merge_stats_with_self(self): stats = StatCounter([1.0, 2.0, 3.0, 4.0]) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org