HeartSaVioR commented on code in PR #38517: URL: https://github.com/apache/spark/pull/38517#discussion_r1049119941
########## connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala: ########## @@ -195,6 +200,102 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase { true } + /** + * Test async progress tracking capability with Kafka source and sink + */ + test("async progress tracking") { + val inputTopic = newTopic() + testUtils.createTopic(inputTopic, partitions = 5) + + val dataSent = new ListBuffer[String]() + testUtils.sendMessages(inputTopic, (0 until 15).map { case x => + val m = s"foo-$x" + dataSent += m + m + }.toArray, Some(0)) + + val outputTopic = newTopic() + testUtils.createTopic(outputTopic, partitions = 5) + + withTempDir { dir => + val reader = spark + .readStream + .format("kafka") + .option("kafka.bootstrap.servers", testUtils.brokerAddress) + .option("kafka.metadata.max.age.ms", "1") + .option("maxOffsetsPerTrigger", 5) + .option("subscribe", inputTopic) + .option("startingOffsets", "earliest") + .load() + + def startQuery(): StreamingQuery = { + reader.writeStream + .format("kafka") + .option("checkpointLocation", dir.getCanonicalPath) + .option("kafka.bootstrap.servers", testUtils.brokerAddress) + .option("kafka.max.block.ms", "5000") + .option("topic", outputTopic) + .option(ASYNC_PROGRESS_TRACKING_ENABLED, true) + .option(ASYNC_PROGRESS_TRACKING_CHECKPOINTING_INTERVAL_MS, 1000) + .queryName("kafkaStream") + .start() + } + + def readResults(): ListBuffer[String] = { Review Comment: The output would be same but the code and actual execution would be much simpler in batch query. See below code when we just go with batch query: ``` spark.read .format("kafka") .option("kafka.bootstrap.servers", testUtils.brokerAddress) .option("startingOffsets", "earliest") .option("subscribe", outputTopic) .load() .select("CAST(value AS string)") .toDS() .collect() .map(_._1) ``` The entire code in the method can be replaced with this query. Haven't gave a try but the actual code that could execute won't be much different. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org