This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 745ed93fe45 [SPARK-43423][PYTHON][ML][TESTS] Retry when `test_gmm` fails 745ed93fe45 is described below commit 745ed93fe451b3f9e8148b06356c28b889a4db5a Author: Ruifeng Zheng <ruife...@apache.org> AuthorDate: Tue May 9 20:16:39 2023 +0900 [SPARK-43423][PYTHON][ML][TESTS] Retry when `test_gmm` fails ### What changes were proposed in this pull request? `test_gmm` is a bit flaky, I can see it fails about 1~3 times per week, for example, https://github.com/apache/spark/actions/runs/4921792416/jobs/8791985336 this PR is to retry it if it fails ### Why are the changes needed? to make CI more stable ### Does this PR introduce _any_ user-facing change? no, dev-only ### How was this patch tested? updated tests Closes #41101 from zhengruifeng/py_gmm_test. Authored-by: Ruifeng Zheng <ruife...@apache.org> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/mllib/tests/test_algorithms.py | 31 ++++++++++++++++----------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/python/pyspark/mllib/tests/test_algorithms.py b/python/pyspark/mllib/tests/test_algorithms.py index 6a9be99ecdf..dc48c2c021d 100644 --- a/python/pyspark/mllib/tests/test_algorithms.py +++ b/python/pyspark/mllib/tests/test_algorithms.py @@ -28,6 +28,7 @@ from pyspark.mllib.recommendation import Rating from pyspark.mllib.regression import LabeledPoint from pyspark.serializers import CPickleSerializer from pyspark.testing.mllibutils import MLlibTestCase +from pyspark.testing.utils import eventually class ListTests(MLlibTestCase): @@ -99,18 +100,24 @@ class ListTests(MLlibTestCase): def test_gmm(self): from pyspark.mllib.clustering import GaussianMixture - data = self.sc.parallelize( - [ - [1, 2], - [8, 9], - [-4, -3], - [-6, -7], - ] - ) - clusters = GaussianMixture.train(data, 2, convergenceTol=0.001, maxIterations=10, seed=1) - labels = clusters.predict(data).collect() - self.assertEqual(labels[0], labels[1]) - self.assertEqual(labels[2], labels[3]) + def condition(): + data = self.sc.parallelize( + [ + [1, 2], + [8, 9], + [-4, -3], + [-6, -7], + ] + ) + clusters = GaussianMixture.train( + data, 2, convergenceTol=0.001, maxIterations=10, seed=1 + ) + labels = clusters.predict(data).collect() + self.assertEqual(labels[0], labels[1]) + self.assertEqual(labels[2], labels[3]) + return True + + eventually(condition, timeout=60, catch_assertions=True) def test_gmm_deterministic(self): from pyspark.mllib.clustering import GaussianMixture --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org