Github user WeichenXu123 commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19122#discussion_r140402700
  
    --- Diff: python/pyspark/ml/tests.py ---
    @@ -836,6 +836,27 @@ def test_save_load_simple_estimator(self):
             loadedModel = CrossValidatorModel.load(cvModelPath)
             self.assertEqual(loadedModel.bestModel.uid, cvModel.bestModel.uid)
     
    +    def test_parallel_evaluation(self):
    +        dataset = self.spark.createDataFrame(
    +            [(Vectors.dense([0.0]), 0.0),
    +             (Vectors.dense([0.4]), 1.0),
    +             (Vectors.dense([0.5]), 0.0),
    +             (Vectors.dense([0.6]), 1.0),
    +             (Vectors.dense([1.0]), 1.0)] * 10,
    +            ["features", "label"])
    +
    +        lr = LogisticRegression()
    +        grid = ParamGridBuilder().addGrid(lr.maxIter, [0, 1]).build()
    +        evaluator = BinaryClassificationEvaluator()
    +
    +        # test save/load of CrossValidator
    +        cv = CrossValidator(estimator=lr, estimatorParamMaps=grid, 
evaluator=evaluator)
    +        cv.setParallelism(1)
    +        cvSerialModel = cv.fit(dataset)
    +        cv.setParallelism(2)
    +        cvParallelModel = cv.fit(dataset)
    +        self.assertEqual(sorted(cvSerialModel.avgMetrics), 
sorted(cvParallelModel.avgMetrics))
    --- End diff --
    
    hmm... I tried. But how to get model parents ?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to