[ https://issues.apache.org/jira/browse/KAFKA-7266?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16578899#comment-16578899 ]
ASF GitHub Bot commented on KAFKA-7266: --------------------------------------- rajinisivaram closed pull request #5485: KAFKA-7266: Fix MetricsTest.testMetrics flakiness by increasing batch… URL: https://github.com/apache/kafka/pull/5485 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/core/src/test/scala/integration/kafka/api/MetricsTest.scala b/core/src/test/scala/integration/kafka/api/MetricsTest.scala index fe7563971ee..494bccecfed 100644 --- a/core/src/test/scala/integration/kafka/api/MetricsTest.scala +++ b/core/src/test/scala/integration/kafka/api/MetricsTest.scala @@ -44,7 +44,8 @@ class MetricsTest extends IntegrationTestHarness with SaslSetup { this.serverConfig.setProperty(KafkaConfig.ZkEnableSecureAclsProp, "false") this.serverConfig.setProperty(KafkaConfig.AutoCreateTopicsEnableDoc, "false") this.producerConfig.setProperty(ProducerConfig.LINGER_MS_CONFIG, "10") - this.producerConfig.setProperty(ProducerConfig.BATCH_SIZE_CONFIG, "1000000") + // intentionally slow message down conversion via gzip compression to ensure we can measure the time it takes + this.producerConfig.setProperty(ProducerConfig.COMPRESSION_TYPE_CONFIG, "gzip") override protected def securityProtocol = SecurityProtocol.SASL_PLAINTEXT override protected val serverSaslProperties = Some(kafkaServerSaslProperties(kafkaServerSaslMechanisms, kafkaClientSaslMechanism)) @@ -78,7 +79,7 @@ class MetricsTest extends IntegrationTestHarness with SaslSetup { // Produce and consume some records val numRecords = 10 - val recordSize = 1000 + val recordSize = 100000 val producer = producers.head sendRecords(producer, numRecords, recordSize, tp) @@ -198,17 +199,7 @@ class MetricsTest extends IntegrationTestHarness with SaslSetup { verifyYammerMetricRecorded(s"kafka.server:type=BrokerTopicMetrics,name=ProduceMessageConversionsPerSec") - // Conversion time less than 1 millisecond is reported as zero, so retry with larger batches until time > 0 - var iteration = 0 - TestUtils.retry(5000) { - val conversionTimeMs = yammerMetricValue(s"$requestMetricsPrefix,name=MessageConversionsTimeMs,request=Produce").asInstanceOf[Double] - if (conversionTimeMs <= 0.0) { - iteration += 1 - sendRecords(producers.head, 1000 * iteration, 100, tp) - } - assertTrue(s"Message conversion time not recorded $conversionTimeMs", conversionTimeMs > 0.0) - } - + verifyYammerMetricRecorded(s"$requestMetricsPrefix,name=MessageConversionsTimeMs,request=Produce", value => value > 0.0) verifyYammerMetricRecorded(s"$requestMetricsPrefix,name=RequestBytes,request=Fetch") verifyYammerMetricRecorded(s"$requestMetricsPrefix,name=TemporaryMemoryBytes,request=Fetch", value => value == 0.0) ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Fix MetricsTest test flakiness > ------------------------------ > > Key: KAFKA-7266 > URL: https://issues.apache.org/jira/browse/KAFKA-7266 > Project: Kafka > Issue Type: Improvement > Reporter: Stanislav Kozlovski > Assignee: Stanislav Kozlovski > Priority: Minor > Fix For: 2.0.1, 2.1.0 > > > The test `kafka.api.MetricsTest.testMetrics` has been failing intermittently > in kafka builds (recent proof: > https://github.com/apache/kafka/pull/5436#issuecomment-409683955) > The particular failure is in the `MessageConversionsTimeMs` metric assertion - > {code} > java.lang.AssertionError: Message conversion time not recorded 0.0 > {code} > There has been work done previously > (https://github.com/apache/kafka/pull/4681) to combat the flakiness of the > test and while it has improved it, the test still fails sometimes. > h3. Solution > On my machine, the test failed 5 times out of 25 runs. Increasing the record > size and using compression should slow down message conversion enough to have > it be above 1ms. Locally this test has not failed in 200 runs and counting > with those changes -- This message was sent by Atlassian JIRA (v7.6.3#76005)