
Any observations on what am I doing wrong?


From: Kamalanathan Venkatesan
Sent: Tuesday, July 09, 2019 7:25 PM
To: 'user@spark.apache.org' <user@spark.apache.org>
Subject: Spark structural streaming sinks output late


I have below spark structural streaming code and I was expecting the results to 
be printed on the console every 10 seconds. But, I notice the sink to console 
happening at every ~2 mins and above.
May I know what am I doing wrong?

def streaming(): Unit = {
    System.setProperty("hadoop.home.dir", "/Documents/ ")
    val conf: SparkConf = new 
    conf.set("spark.eventLog.enabled", "false");
    val sc: SparkContext = new SparkContext(conf)
    val sqlcontext = new SQLContext(sc)
    val spark = SparkSession.builder().config(conf).getOrCreate()

    import sqlcontext.implicits._
    import org.apache.spark.sql.functions.window

    val inputDf = spark.readStream.format("kafka")
      .option("kafka.bootstrap.servers", "localhost:9092")
      .option("subscribe", "wonderful")
      .option("startingOffsets", "latest")
    import scala.concurrent.duration._

    val personJsonDf = inputDf.selectExpr("CAST(key AS STRING)", "CAST(value AS 
STRING)", "timestamp")
      .withWatermark("timestamp", "500 milliseconds")
        window($"timestamp", "10 seconds")).count()

    val consoleOutput = personJsonDf.writeStream
      .option("truncate", "false")

object SparkExecutor {
  val spE: SparkExecutor = new SparkExecutor();
  def main(args: Array[String]): Unit = {

