Github user wypoon commented on a diff in the pull request: https://github.com/apache/spark/pull/19703#discussion_r150030572 --- Diff: examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredKafkaWordCount.scala --- @@ -46,11 +51,13 @@ object StructuredKafkaWordCount { def main(args: Array[String]): Unit = { if (args.length < 3) { System.err.println("Usage: StructuredKafkaWordCount <bootstrap-servers> " + - "<subscribe-type> <topics>") + "<subscribe-type> <topics> [<checkpoint-location>]") System.exit(1) } - val Array(bootstrapServers, subscribeType, topics) = args + val Array(bootstrapServers, subscribeType, topics, _*) = args + val checkpointLocation = + if (args.length > 3) args(3) else "/tmp/temporary-" + UUID.randomUUID.toString --- End diff -- This is what the internal default would be if java.io.tmpdir is "/tmp", but in case of YARN cluster mode, java.io.tmpdir is something else (the underlying problem). Supplying this default here is just to ease the user experience. They would get the same result running in YARN cluster mode or client mode, without supplying an explicit checkpoint location.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org