[ 
https://issues.apache.org/jira/browse/SPARK-12363?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15059753#comment-15059753
 ] 

Yanbo Liang commented on SPARK-12363:
-------------------------------------

After I removed [this 
line|https://github.com/apache/spark/blob/master/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala#L388],
 
[this|https://github.com/apache/spark/blob/master/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala#L71]
 test cases failed.
It's very strange that the following test cases are the same dataset, but one 
success and the other failed.
{code}
test("power iteration clustering") {
    /*
     We use the following graph to test PIC. All edges are assigned similarity 
1.0 except 0.1 for
     edge (3, 4).
     15-14 -13 -12
     |           |
     4 . 3 - 2  11
     |   | x |   |
     5   0 - 1  10
     |           |
     6 - 7 - 8 - 9
     */

    val similarities = Seq[(Long, Long, Double)]((0, 1, 1.0), (0, 2, 1.0), (0, 
3, 1.0), (1, 2, 1.0),
      (1, 3, 1.0), (2, 3, 1.0), (3, 4, 0.1), // (3, 4) is a weak edge
      (4, 5, 1.0), (4, 15, 1.0), (5, 6, 1.0), (6, 7, 1.0), (7, 8, 1.0), (8, 9, 
1.0), (9, 10, 1.0),
      (10, 11, 1.0), (11, 12, 1.0), (12, 13, 1.0), (13, 14, 1.0), (14, 15, 1.0))
    val model = new PowerIterationClustering()
      .setK(2)
      .run(sc.parallelize(similarities, 2))
    val predictions = Array.fill(2)(mutable.Set.empty[Long])
    model.assignments.collect().foreach { a =>
      predictions(a.cluster) += a.id
    }
    assert(predictions.toSet == Set((0 to 3).toSet, (4 to 15).toSet))

    val model2 = new PowerIterationClustering()
      .setK(2)
      .setInitializationMode("degree")
      .run(sc.parallelize(similarities, 2))
    val predictions2 = Array.fill(2)(mutable.Set.empty[Long])
    model2.assignments.collect().foreach { a =>
      predictions2(a.cluster) += a.id
    }
    assert(predictions2.toSet == Set((0 to 3).toSet, (4 to 15).toSet))
  }

  test("power iteration clustering on graph") {
    /*
     We use the following graph to test PIC. All edges are assigned similarity 
1.0 except 0.1 for
     edge (3, 4).
     15-14 -13 -12
     |           |
     4 . 3 - 2  11
     |   | x |   |
     5   0 - 1  10
     |           |
     6 - 7 - 8 - 9
     */

    val similarities = Seq[(Long, Long, Double)]((0, 1, 1.0), (0, 2, 1.0), (0, 
3, 1.0), (1, 2, 1.0),
      (1, 3, 1.0), (2, 3, 1.0), (3, 4, 0.1), // (3, 4) is a weak edge
      (4, 5, 1.0), (4, 15, 1.0), (5, 6, 1.0), (6, 7, 1.0), (7, 8, 1.0), (8, 9, 
1.0), (9, 10, 1.0),
      (10, 11, 1.0), (11, 12, 1.0), (12, 13, 1.0), (13, 14, 1.0), (14, 15, 1.0))

    val edges = similarities.flatMap { case (i, j, s) =>
      if (i != j) {
        Seq(Edge(i, j, s), Edge(j, i, s))
      } else {
        None
      }
    }
    val graph = Graph.fromEdges(sc.parallelize(edges, 2), 0.0)

    val model = new PowerIterationClustering()
      .setK(2)
      .run(graph)
    val predictions = Array.fill(2)(mutable.Set.empty[Long])
    model.assignments.collect().foreach { a =>
      predictions(a.cluster) += a.id
    }
    assert(predictions.toSet == Set((0 to 3).toSet, (4 to 15).toSet))

    val model2 = new PowerIterationClustering()
      .setK(2)
      .setInitializationMode("degree")
      .run(sc.parallelize(similarities, 2))
    val predictions2 = Array.fill(2)(mutable.Set.empty[Long])
    model2.assignments.collect().foreach { a =>
      predictions2(a.cluster) += a.id
    }
    assert(predictions2.toSet == Set((0 to 3).toSet, (4 to 15).toSet))
  }
{code}

> PowerIterationClustering test case failed if we deprecated KMeans.setRuns
> -------------------------------------------------------------------------
>
>                 Key: SPARK-12363
>                 URL: https://issues.apache.org/jira/browse/SPARK-12363
>             Project: Spark
>          Issue Type: Bug
>          Components: GraphX, MLlib
>            Reporter: Yanbo Liang
>
> We plan to deprecated `runs` of KMeans, PowerIterationClustering will 
> leverage KMeans to train model.
> I removed `setRuns` used in PowerIterationClustering, but one of the test 
> cases failed.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to