Github user srowen commented on a diff in the pull request: https://github.com/apache/spark/pull/21689#discussion_r199950647 --- Diff: mllib/src/test/scala/org/apache/spark/ml/clustering/PowerIterationClusteringSuite.scala --- @@ -76,23 +78,25 @@ class PowerIterationClusteringSuite extends SparkFunSuite .setMaxIter(40) .setWeightCol("weight") .assignClusters(data) - val localAssignments = assignments - .select('id, 'cluster) - .as[(Long, Int)].collect().toSet - val expectedResult = (0 until n1).map(x => (x, 1)).toSet ++ - (n1 until n).map(x => (x, 0)).toSet - assert(localAssignments === expectedResult) + + val predictions = Array.fill(2)(mutable.Set.empty[Long]) + assignments.select("id", "cluster").collect().foreach { + case Row(id: Long, cluster: Integer) => predictions(cluster) += id + } + assert(predictions.toSet == Set((0 until n1).toSet, (n1 until n).toSet)) --- End diff -- I think we want `===` here?
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org