Hi,
I was going through the SparkPageRank code and want to see the intermediate
steps, like the RDDs formed in the intermediate steps.
Here is a part of the code along with the lines that I added in order to
print the RDDs.
I want to print the "*parts*" in the code (denoted by the comment in Bold
letters). But, when I try to do the same thing there, it gives an error.
Can someone suggest what I should be doing?
Thank You

CODE:

object SparkPageRank {
  def main(args: Array[String]) {
    val sparkConf = new SparkConf().setAppName("PageRank")
    var iters = args(1).toInt
    val ctx = new SparkContext(sparkConf)
    val lines = ctx.textFile(args(0), 1)
println("The lines RDD is")
lines.collect().foreach(println)
    val links = lines.map{ s =>
      val parts = s.split("\\s+")
      (parts(0), parts(1))  */*I want to print this "parts"*/*
    }.distinct().groupByKey().cache()
println("The links RDD is")
links.collect().foreach(println)
    var ranks = links.mapValues(v => 1.0)
println("The ranks RDD is")
ranks.collect().foreach(println)
    for (i <- 1 to iters) {
      val contribs = links.join(ranks).values.flatMap{ case (urls, rank) =>
        val size = urls.size
        urls.map(url => (url, rank / size))
      }
println("The contribs RDD is")
      contribs.collect().foreach(println)
      ranks = contribs.reduceByKey(_ + _).mapValues(0.15 + 0.85 * _)
    }
println("The second ranks RDD is")
       ranks.collect().foreach(println)

    val output = ranks.collect()
    output.foreach(tup => println(tup._1 + " has rank: " + tup._2 + "."))

    ctx.stop()
  }
}

Reply via email to