Github user tdas commented on a diff in the pull request: https://github.com/apache/spark/pull/15307#discussion_r83108563 --- Diff: external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala --- @@ -264,6 +266,44 @@ class KafkaSourceSuite extends KafkaSourceTest { testUnsupportedConfig("kafka.auto.offset.reset", "latest") } + test("input row metrics") { + val topic = newTopic() + testUtils.createTopic(topic, partitions = 5) + testUtils.sendMessages(topic, Array("-1")) + require(testUtils.getLatestOffsets(Set(topic)).size === 5) + + val kafka = spark + .readStream + .format("kafka") + .option("subscribe", topic) + .option("kafka.bootstrap.servers", testUtils.brokerAddress) + .load() + .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)") + .as[(String, String)] + + val mapped = kafka.map(kv => kv._2.toInt + 1) + val listener = new QueryStatusCollector + spark.streams.addListener(listener) + try { + testStream(mapped)( + makeSureGetOffsetCalled, + AddKafkaData(Set(topic), 1, 2, 3), + CheckAnswer(2, 3, 4), + AssertOnQuery { query => + eventually(timeout(streamingTimeout)) { + assert(listener.lastTriggerStatus.nonEmpty) + } + val status = listener.lastTriggerStatus.get + assert(status.triggerStatus.get("numRows.input.total").toInt > 0) --- End diff -- Because i am not sure whether all the 3 items inserted will appear in a single batch, or be spread across multiple batches. It could very well be as AddKafkaData is running async to the query, and there is a chance that a batch gets cut off before all the 3 have been inserted. All I care about in this test is whether input rows are being measured or not.
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org