Github user mengxr commented on a diff in the pull request: https://github.com/apache/spark/pull/21493#discussion_r192910421 --- Diff: mllib/src/test/scala/org/apache/spark/ml/clustering/PowerIterationClusteringSuite.scala --- @@ -62,136 +61,82 @@ class PowerIterationClusteringSuite extends SparkFunSuite new PowerIterationClustering().setInitMode("no_such_a_mode") } intercept[IllegalArgumentException] { - new PowerIterationClustering().setIdCol("") + new PowerIterationClustering().setSrcCol("") } intercept[IllegalArgumentException] { - new PowerIterationClustering().setNeighborsCol("") - } - intercept[IllegalArgumentException] { - new PowerIterationClustering().setSimilaritiesCol("") + new PowerIterationClustering().setDstCol("") } } test("power iteration clustering") { val n = n1 + n2 - val model = new PowerIterationClustering() + val result = new PowerIterationClustering() .setK(2) .setMaxIter(40) - val result = model.transform(data) - - val predictions = Array.fill(2)(mutable.Set.empty[Long]) - result.select("id", "prediction").collect().foreach { - case Row(id: Long, cluster: Integer) => predictions(cluster) += id - } - assert(predictions.toSet == Set((1 until n1).toSet, (n1 until n).toSet)) + .setWeightCol("weight") + .assignClusters(data).as[(Long, Int)].collect().toSet --- End diff -- it is better to split a long chain of methods. ~~~scala val assignments = new ... ... .assignClusters(...) val localAssignments = assignments .select('id, 'cluster) # need this we didn't put contract on column orders .as[(Long, Int)] .collect() .toSet ~~~
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org