The Scala code below was based on https://www.sics.se/~amir/files/download/dic/answers6.pdf. I extended it by adding a HashMap called componentLists that I populated with each component's starting node as the key and then a ListBuffer of the component's members. As the output below the code shows, it seems to do all that just fine, but then the HashMap size is back down to 0 when it's done (flag3), so I assume there's some scoping issue related to the use(s) of the case keyword. (I'm new to Scala and still don't completely understand that.)

Can anyone tell me how to modify this so that I still have a populated componentLists when it's all done, i.e. when it reaches flag3?

Thanks,

Bob

///////////////
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.graphx._
import org.apache.spark.rdd.RDD
import scala.collection.mutable.ListBuffer
import scala.collection.mutable.HashMap

object problemDemo {
    def main(args: Array[String]) {
         val sc = new SparkContext("local", "ProblemDemo", "127.0.0.1")

        val vertexArray = Array(
            (1L, "Alice"), (2L, "Bob"), (3L, "Charlie"),
            (4L, "David"), (5L, "Ed"),  (6L, "Fran")
        )
        val edgeArray = Array(
            Edge(2L, 1L, "knows"), Edge(2L, 3L, "knows"),
            Edge(3L, 1L, "knows"), Edge(4L, 5L, "knows"),
            Edge(5L, 6L, "knows")
        )
        val vertexRDD: RDD[(Long, String)] = sc.parallelize(vertexArray)
        val edgeRDD: RDD[Edge[String]] = sc.parallelize(edgeArray)
        val graph: Graph[String, String] = Graph(vertexRDD, edgeRDD)

        var componentLists = HashMap[VertexId, ListBuffer[VertexId]]()

        val cc = graph.connectedComponents
        graph.vertices.leftJoin(cc.vertices) {
        case (id, u, comp) => (id, u, comp)
        }.foreach{ case (id, u) =>
                   {
                       // Add id to the list of components with a key
                       // of u._3.get (the starting node)
                       if (!(componentLists.contains(u._3.get))) {
componentLists(u._3.get) = new ListBuffer[VertexId]
                       }
                       componentLists(u._3.get) += id
                       println(s"just added ${id} to ${u._3.get}")
println(s"flag1 length of componentLists ${componentLists.size}")
                   }
println(s"flag2 length of componentLists ${componentLists.size}")
                 }
        println(s"flag3 length of componentLists ${componentLists.size}")
    }
}

////////// output /////////////

just added 4 to 4
flag1 length of componentLists 1
flag2 length of componentLists 1
just added 2 to 1
flag1 length of componentLists 2
flag2 length of componentLists 2
just added 6 to 4
flag1 length of componentLists 2
flag2 length of componentLists 2
just added 5 to 4
flag1 length of componentLists 2
flag2 length of componentLists 2
just added 3 to 1
flag1 length of componentLists 2
flag2 length of componentLists 2
just added 1 to 1
flag1 length of componentLists 2
flag2 length of componentLists 2
flag3 length of componentLists 0


---------------------------------------------------------------------
To unsubscribe, e-mail: user-unsubscr...@spark.apache.org
For additional commands, e-mail: user-h...@spark.apache.org

Reply via email to