Github user srowen commented on a diff in the pull request: https://github.com/apache/spark/pull/21689#discussion_r199950628 --- Diff: mllib/src/test/scala/org/apache/spark/ml/clustering/PowerIterationClusteringSuite.scala --- @@ -76,23 +78,25 @@ class PowerIterationClusteringSuite extends SparkFunSuite .setMaxIter(40) .setWeightCol("weight") .assignClusters(data) - val localAssignments = assignments - .select('id, 'cluster) - .as[(Long, Int)].collect().toSet - val expectedResult = (0 until n1).map(x => (x, 1)).toSet ++ - (n1 until n).map(x => (x, 0)).toSet - assert(localAssignments === expectedResult) + + val predictions = Array.fill(2)(mutable.Set.empty[Long]) + assignments.select("id", "cluster").collect().foreach { --- End diff -- Nit: I think this was clearer with `.as[(Long,Int)]` as it avoids matching `Row`. I don't feel strongly about it; just less change.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org