Hi,
I was going through the SparkPageRank code and want to see the intermediate
steps, like the RDDs formed in the intermediate steps.
Here is a part of the code along with the lines that I added in order to
print the RDDs.
I want to print the "*parts*" in the code (denoted by the comment in Bold
letters). But, when I try to do the same thing there, it gives an error.
Can someone suggest what I should be doing?
Thank You
CODE:
object SparkPageRank {
def main(args: Array[String]) {
val sparkConf = new SparkConf().setAppName("PageRank")
var iters = args(1).toInt
val ctx = new SparkContext(sparkConf)
val lines = ctx.textFile(args(0), 1)
println("The lines RDD is")
lines.collect().foreach(println)
val links = lines.map{ s =>
val parts = s.split("\\s+")
(parts(0), parts(1)) */*I want to print this "parts"*/*
}.distinct().groupByKey().cache()
println("The links RDD is")
links.collect().foreach(println)
var ranks = links.mapValues(v => 1.0)
println("The ranks RDD is")
ranks.collect().foreach(println)
for (i <- 1 to iters) {
val contribs = links.join(ranks).values.flatMap{ case (urls, rank) =>
val size = urls.size
urls.map(url => (url, rank / size))
}
println("The contribs RDD is")
contribs.collect().foreach(println)
ranks = contribs.reduceByKey(_ + _).mapValues(0.15 + 0.85 * _)
}
println("The second ranks RDD is")
ranks.collect().foreach(println)
val output = ranks.collect()
output.foreach(tup => println(tup._1 + " has rank: " + tup._2 + "."))
ctx.stop()
}
}