Hi, I can understand facing closure issues while executing this code:
======================================================================================== package spark //this package is about understanding closures as mentioned in: http://spark.apache.org/docs/latest/rdd-programming-guide.html#understanding-closures- import org.apache.spark.sql.SparkSession object understandClosures extends App { var counter = 0 //the error thrown is removed in case we use local[*] as master val sparkSession = SparkSession .builder .master("spark://Gouravs-iMac:7077") //.master("local[*]") .appName("test") .getOrCreate() val valueRDD = sparkSession.sparkContext.parallelize(1 until 1000000) println(valueRDD.count()) valueRDD.foreach(x => counter += x) //but even if we use the master as local[*] the total appears as some random number as -1234435435 println("the value is " + counter.toString()) sparkSession.close() } ======================================================================================== Can anyone explain me why am I facing closure issue while executing this code? package spark import org.apache.spark.sql.SparkSession // Import this utility for working with URLs. Unlike Java the semicolon ';' is not required. import java.net.URL // Use {...} to provide a list of things to import, when you don't want to import everything // in a package and you don't want to write a separate line for each type. import java.io.{File, BufferedInputStream, BufferedOutputStream, FileOutputStream} object justenoughTest extends App { val sparkSession = SparkSession .builder .master("spark://Gouravs-iMac:7077") //.master("local[*]") .appName("test") .getOrCreate() println(sparkSession.version) println("Spark version: " + sparkSession.version) println("Spark master: " + sparkSession.sparkContext.master) println("Running 'locally'?: " + sparkSession.sparkContext.isLocal) val pathSeparator = File.separator // The target directory, which we'll now create, if necessary. val shakespeare = new File("/Users/gouravsengupta/Development/data/shakespeare") println(sparkSession.version) //val fileContents = sparkSession.read.text("file:///Users/gouravsengupta/Development/data/shakespeare/") //val fileContents = sparkSession.read.text(shakespeare.toString) val fileContents = sparkSession.sparkContext.wholeTextFiles(shakespeare.toString) println(fileContents.count()) //I am facing the closure issues below val testThis = fileContents.foreach(x => "printing value" + x._1) sparkSession.close() } Regards, Gourav Sengupta