This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 745ed93fe45 [SPARK-43423][PYTHON][ML][TESTS] Retry when `test_gmm` 
fails
745ed93fe45 is described below

commit 745ed93fe451b3f9e8148b06356c28b889a4db5a
Author: Ruifeng Zheng <ruife...@apache.org>
AuthorDate: Tue May 9 20:16:39 2023 +0900

    [SPARK-43423][PYTHON][ML][TESTS] Retry when `test_gmm` fails
    
    ### What changes were proposed in this pull request?
    `test_gmm` is a bit flaky, I can see it fails about 1~3 times per week, for 
example, https://github.com/apache/spark/actions/runs/4921792416/jobs/8791985336
    
    this PR is to retry it if it fails
    
    ### Why are the changes needed?
    to make CI more stable
    
    ### Does this PR introduce _any_ user-facing change?
    no, dev-only
    
    ### How was this patch tested?
    updated tests
    
    Closes #41101 from zhengruifeng/py_gmm_test.
    
    Authored-by: Ruifeng Zheng <ruife...@apache.org>
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 python/pyspark/mllib/tests/test_algorithms.py | 31 ++++++++++++++++-----------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/python/pyspark/mllib/tests/test_algorithms.py 
b/python/pyspark/mllib/tests/test_algorithms.py
index 6a9be99ecdf..dc48c2c021d 100644
--- a/python/pyspark/mllib/tests/test_algorithms.py
+++ b/python/pyspark/mllib/tests/test_algorithms.py
@@ -28,6 +28,7 @@ from pyspark.mllib.recommendation import Rating
 from pyspark.mllib.regression import LabeledPoint
 from pyspark.serializers import CPickleSerializer
 from pyspark.testing.mllibutils import MLlibTestCase
+from pyspark.testing.utils import eventually
 
 
 class ListTests(MLlibTestCase):
@@ -99,18 +100,24 @@ class ListTests(MLlibTestCase):
     def test_gmm(self):
         from pyspark.mllib.clustering import GaussianMixture
 
-        data = self.sc.parallelize(
-            [
-                [1, 2],
-                [8, 9],
-                [-4, -3],
-                [-6, -7],
-            ]
-        )
-        clusters = GaussianMixture.train(data, 2, convergenceTol=0.001, 
maxIterations=10, seed=1)
-        labels = clusters.predict(data).collect()
-        self.assertEqual(labels[0], labels[1])
-        self.assertEqual(labels[2], labels[3])
+        def condition():
+            data = self.sc.parallelize(
+                [
+                    [1, 2],
+                    [8, 9],
+                    [-4, -3],
+                    [-6, -7],
+                ]
+            )
+            clusters = GaussianMixture.train(
+                data, 2, convergenceTol=0.001, maxIterations=10, seed=1
+            )
+            labels = clusters.predict(data).collect()
+            self.assertEqual(labels[0], labels[1])
+            self.assertEqual(labels[2], labels[3])
+            return True
+
+        eventually(condition, timeout=60, catch_assertions=True)
 
     def test_gmm_deterministic(self):
         from pyspark.mllib.clustering import GaussianMixture


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to