[ https://issues.apache.org/jira/browse/KAFKA-9963?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Evan Williams updated KAFKA-9963: --------------------------------- Description: When replacing a broker, with an empty data dir, and the same broker ID - we are seeing very high CPU usage during replication, generally up to 100% for some time, on a 4 VCPU (EC2 R5) host. This is a 6 host cluster, with approx 1000 topics and 3000 partitions. There is of course traffic being served as well, as it catches up and becomes leader of partitions, however due to the high replication CPU usage - client's start to have connection issue. Normal traffic on this host is is around 40% CPU, when it's completely replicated. I'm not sure if this is 'normal', and we just need to throw more resources at the hosts. Or if replication should not really use this much CPU. replica.fetchers=2 (not high). And java version is: OpenJDK Runtime Environment Corretto-8.252.09.1 (build 1.8.0_252-b09) (AWS's own build). CPU profiling (2 minutes ) during this 'replace' scenario, shows this: {code:java} 54730000000 19.43% 5473 java.util.TreeMap$PrivateEntryIterator.nextEntry 49750000000 17.66% 4975 scala.collection.convert.Wrappers$JIteratorWrapper.hasNext 44170000000 15.68% 4417 java.util.TreeMap.successor 17730000000 6.29% 1773 java.util.TreeMap$ValueIterator.next 17000000000 6.03% 1700 java.util.TreeMap$PrivateEntryIterator.hasNext 6010000000 2.13% 601 scala.collection.convert.Wrappers$JIteratorWrapper.next 5160000000 1.83% 516 writev --- 38850000000 ns (13.79%), 3885 samples [ 0] java.util.TreeMap$PrivateEntryIterator.nextEntry [ 1] java.util.TreeMap$ValueIterator.next [ 2] scala.collection.convert.Wrappers$JIteratorWrapper.next [ 3] scala.collection.Iterator.find [ 4] scala.collection.Iterator.find$ [ 5] scala.collection.AbstractIterator.find [ 6] scala.collection.IterableLike.find [ 7] scala.collection.IterableLike.find$ [ 8] scala.collection.AbstractIterable.find [ 9] kafka.log.ProducerStateManager.lastStableOffset [10] kafka.log.Log.$anonfun$append$12 [11] kafka.log.Log.$anonfun$append$2 [12] kafka.log.Log.append [13] kafka.log.Log.appendAsFollower [14] kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 [15] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica [16] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica [17] kafka.server.ReplicaFetcherThread.processPartitionData [18] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$7 [19] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6 [20] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6$adapted [21] kafka.server.AbstractFetcherThread$$Lambda$552.191789933.apply [22] scala.collection.mutable.ResizableArray.foreach [23] scala.collection.mutable.ResizableArray.foreach$ [24] scala.collection.mutable.ArrayBuffer.foreach [25] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$5 [26] kafka.server.AbstractFetcherThread.processFetchRequest [27] kafka.server.AbstractFetcherThread.$anonfun$maybeFetch$3 [28] kafka.server.AbstractFetcherThread.maybeFetch [29] kafka.server.AbstractFetcherThread.doWork [30] kafka.utils.ShutdownableThread.run {code} {code:java} --- 36320000000 ns (12.89%), 3632 samples [ 0] scala.collection.convert.Wrappers$JIteratorWrapper.hasNext [ 1] scala.collection.Iterator.find [ 2] scala.collection.Iterator.find$ [ 3] scala.collection.AbstractIterator.find [ 4] scala.collection.IterableLike.find [ 5] scala.collection.IterableLike.find$ [ 6] scala.collection.AbstractIterable.find [ 7] kafka.log.ProducerStateManager.lastStableOffset [ 8] kafka.log.Log.$anonfun$append$12 [ 9] kafka.log.Log.$anonfun$append$2 [10] kafka.log.Log.append [11] kafka.log.Log.appendAsFollower [12] kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 [13] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica [14] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica [15] kafka.server.ReplicaFetcherThread.processPartitionData [16] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$7 [17] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6 [18] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6$adapted [19] kafka.server.AbstractFetcherThread$$Lambda$552.191789933.apply [20] scala.collection.mutable.ResizableArray.foreach [21] scala.collection.mutable.ResizableArray.foreach$ [22] scala.collection.mutable.ArrayBuffer.foreach [23] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$5 [24] kafka.server.AbstractFetcherThread.processFetchRequest [25] kafka.server.AbstractFetcherThread.$anonfun$maybeFetch$3 [26] kafka.server.AbstractFetcherThread.maybeFetch [27] kafka.server.AbstractFetcherThread.doWork [28] kafka.utils.ShutdownableThread.run{code} {code:java} --- 32360000000 ns (11.49%), 3236 samples [ 0] java.util.TreeMap.successor [ 1] java.util.TreeMap$PrivateEntryIterator.nextEntry [ 2] java.util.TreeMap$ValueIterator.next [ 3] scala.collection.convert.Wrappers$JIteratorWrapper.next [ 4] scala.collection.Iterator.find [ 5] scala.collection.Iterator.find$ [ 6] scala.collection.AbstractIterator.find [ 7] scala.collection.IterableLike.find [ 8] scala.collection.IterableLike.find$ [ 9] scala.collection.AbstractIterable.find [10] kafka.log.ProducerStateManager.lastStableOffset [11] kafka.log.Log.$anonfun$append$12 [12] kafka.log.Log.$anonfun$append$2 [13] kafka.log.Log.append [14] kafka.log.Log.appendAsFollower [15] kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 [16] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica [17] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica [18] kafka.server.ReplicaFetcherThread.processPartitionData [19] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$7 [20] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6 [21] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6$adapted [22] kafka.server.AbstractFetcherThread$$Lambda$552.191789933.apply [23] scala.collection.mutable.ResizableArray.foreach [24] scala.collection.mutable.ResizableArray.foreach$ [25] scala.collection.mutable.ArrayBuffer.foreach [26] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$5 [27] kafka.server.AbstractFetcherThread.processFetchRequest [28] kafka.server.AbstractFetcherThread.$anonfun$maybeFetch$3 [29] kafka.server.AbstractFetcherThread.maybeFetch [30] kafka.server.AbstractFetcherThread.doWork [31] kafka.utils.ShutdownableThread.run--- 15090000000 ns (5.36%), 1509 samples [ 0] java.util.TreeMap$PrivateEntryIterator.nextEntry [ 1] java.util.TreeMap$ValueIterator.next [ 2] scala.collection.convert.Wrappers$JIteratorWrapper.next [ 3] scala.collection.Iterator.find [ 4] scala.collection.Iterator.find$ [ 5] scala.collection.AbstractIterator.find [ 6] scala.collection.IterableLike.find [ 7] scala.collection.IterableLike.find$ [ 8] scala.collection.AbstractIterable.find [ 9] kafka.log.ProducerStateManager.lastStableOffset [10] kafka.log.Log.$anonfun$append$12 [11] kafka.log.Log.$anonfun$append$2 [12] kafka.log.Log.append [13] kafka.log.Log.appendAsFollower [14] kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 [15] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica [16] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica--- 13200000000 ns (4.69%), 1320 samples [ 0] scala.collection.convert.Wrappers$JIteratorWrapper.hasNext [ 1] scala.collection.Iterator.find [ 2] scala.collection.Iterator.find$ [ 3] scala.collection.AbstractIterator.find [ 4] scala.collection.IterableLike.find [ 5] scala.collection.IterableLike.find$ [ 6] scala.collection.AbstractIterable.find [ 7] kafka.log.ProducerStateManager.lastStableOffset [ 8] kafka.log.Log.$anonfun$append$12 [ 9] kafka.log.Log.$anonfun$append$2 [10] kafka.log.Log.append [11] kafka.log.Log.appendAsFollower [12] kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 [13] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica [14] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica--- 12390000000 ns (4.40%), 1239 samples [ 0] java.util.TreeMap$ValueIterator.next [ 1] scala.collection.convert.Wrappers$JIteratorWrapper.hasNext [ 2] scala.collection.Iterator.find [ 3] scala.collection.Iterator.find$ [ 4] scala.collection.AbstractIterator.find [ 5] scala.collection.IterableLike.find [ 6] scala.collection.IterableLike.find$ [ 7] scala.collection.AbstractIterable.find [ 8] kafka.log.ProducerStateManager.lastStableOffset [ 9] kafka.log.Log.$anonfun$append$12 [10] kafka.log.Log.$anonfun$append$2 [11] kafka.log.Log.append [12] kafka.log.Log.appendAsFollower [13] kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 [14] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica [15] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica [16] kafka.server.ReplicaFetcherThread.processPartitionData [17] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$7 [18] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6 [19] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6$adapted [20] kafka.server.AbstractFetcherThread$$Lambda$552.191789933.apply [21] scala.collection.mutable.ResizableArray.foreach [22] scala.collection.mutable.ResizableArray.foreach$ [23] scala.collection.mutable.ArrayBuffer.foreach [24] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$5 [25] kafka.server.AbstractFetcherThread.processFetchRequest [26] kafka.server.AbstractFetcherThread.$anonfun$maybeFetch$3 [27] kafka.server.AbstractFetcherThread.maybeFetch [28] kafka.server.AbstractFetcherThread.doWork [29] kafka.utils.ShutdownableThread.run{code} {code:java} --- 11810000000 ns (4.19%), 1181 samples [ 0] java.util.TreeMap.successor [ 1] java.util.TreeMap$PrivateEntryIterator.nextEntry [ 2] java.util.TreeMap$ValueIterator.next [ 3] scala.collection.convert.Wrappers$JIteratorWrapper.next [ 4] scala.collection.Iterator.find [ 5] scala.collection.Iterator.find$ [ 6] scala.collection.AbstractIterator.find [ 7] scala.collection.IterableLike.find [ 8] scala.collection.IterableLike.find$ [ 9] scala.collection.AbstractIterable.find [10] kafka.log.ProducerStateManager.lastStableOffset [11] kafka.log.Log.$anonfun$append$12 [12] kafka.log.Log.$anonfun$append$2 [13] kafka.log.Log.append [14] kafka.log.Log.appendAsFollower [15] kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 [16] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica [17] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica {code} was: When replacing a broker, with an empty data dir, and the same broker ID - we are seeing very high CPU usage during replication, generally up to 100% for some time, on a 4 VCPU (EC2 R5) host. This is a 6 host cluster, with approx 1000 topics and 3000 partitions. There is of course traffic being served as well, as it catches up and becomes leader of partitions, however due to the high replication CPU usage - client's start to have connection issue. Normal traffic on this host is is around 40% CPU, when it's completely replicated. I'm not sure if this is 'normal', and we just need to throw more resources at the hosts. Or if replication should not really use this much CPU. replica.fetchers=2 (not high). And java version is: OpenJDK Runtime Environment Corretto-8.252.09.1 (build 1.8.0_252-b09) (AWS's own build). CPU profiling (2 minutes ) during this 'replace' scenario, shows this: {code:java} 54730000000 19.43% 5473 java.util.TreeMap$PrivateEntryIterator.nextEntry 49750000000 17.66% 4975 scala.collection.convert.Wrappers$JIteratorWrapper.hasNext 44170000000 15.68% 4417 java.util.TreeMap.successor 17730000000 6.29% 1773 java.util.TreeMap$ValueIterator.next 17000000000 6.03% 1700 java.util.TreeMap$PrivateEntryIterator.hasNext 6010000000 2.13% 601 scala.collection.convert.Wrappers$JIteratorWrapper.next 5160000000 1.83% 516 writev --- 38850000000 ns (13.79%), 3885 samples [ 0] java.util.TreeMap$PrivateEntryIterator.nextEntry [ 1] java.util.TreeMap$ValueIterator.next [ 2] scala.collection.convert.Wrappers$JIteratorWrapper.next [ 3] scala.collection.Iterator.find [ 4] scala.collection.Iterator.find$ [ 5] scala.collection.AbstractIterator.find [ 6] scala.collection.IterableLike.find [ 7] scala.collection.IterableLike.find$ [ 8] scala.collection.AbstractIterable.find [ 9] kafka.log.ProducerStateManager.lastStableOffset [10] kafka.log.Log.$anonfun$append$12 [11] kafka.log.Log.$anonfun$append$2 [12] kafka.log.Log.append [13] kafka.log.Log.appendAsFollower [14] kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 [15] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica [16] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica [17] kafka.server.ReplicaFetcherThread.processPartitionData [18] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$7 [19] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6 [20] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6$adapted [21] kafka.server.AbstractFetcherThread$$Lambda$552.191789933.apply [22] scala.collection.mutable.ResizableArray.foreach [23] scala.collection.mutable.ResizableArray.foreach$ [24] scala.collection.mutable.ArrayBuffer.foreach [25] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$5 [26] kafka.server.AbstractFetcherThread.processFetchRequest [27] kafka.server.AbstractFetcherThread.$anonfun$maybeFetch$3 [28] kafka.server.AbstractFetcherThread.maybeFetch [29] kafka.server.AbstractFetcherThread.doWork [30] kafka.utils.ShutdownableThread.run --- 36320000000 ns (12.89%), 3632 samples [ 0] scala.collection.convert.Wrappers$JIteratorWrapper.hasNext [ 1] scala.collection.Iterator.find [ 2] scala.collection.Iterator.find$ [ 3] scala.collection.AbstractIterator.find [ 4] scala.collection.IterableLike.find [ 5] scala.collection.IterableLike.find$ [ 6] scala.collection.AbstractIterable.find [ 7] kafka.log.ProducerStateManager.lastStableOffset [ 8] kafka.log.Log.$anonfun$append$12 [ 9] kafka.log.Log.$anonfun$append$2 [10] kafka.log.Log.append [11] kafka.log.Log.appendAsFollower [12] kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 [13] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica [14] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica [15] kafka.server.ReplicaFetcherThread.processPartitionData [16] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$7 [17] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6 [18] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6$adapted [19] kafka.server.AbstractFetcherThread$$Lambda$552.191789933.apply [20] scala.collection.mutable.ResizableArray.foreach [21] scala.collection.mutable.ResizableArray.foreach$ [22] scala.collection.mutable.ArrayBuffer.foreach [23] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$5 [24] kafka.server.AbstractFetcherThread.processFetchRequest [25] kafka.server.AbstractFetcherThread.$anonfun$maybeFetch$3 [26] kafka.server.AbstractFetcherThread.maybeFetch [27] kafka.server.AbstractFetcherThread.doWork [28] kafka.utils.ShutdownableThread.run{code} {code:java} --- 32360000000 ns (11.49%), 3236 samples [ 0] java.util.TreeMap.successor [ 1] java.util.TreeMap$PrivateEntryIterator.nextEntry [ 2] java.util.TreeMap$ValueIterator.next [ 3] scala.collection.convert.Wrappers$JIteratorWrapper.next [ 4] scala.collection.Iterator.find [ 5] scala.collection.Iterator.find$ [ 6] scala.collection.AbstractIterator.find [ 7] scala.collection.IterableLike.find [ 8] scala.collection.IterableLike.find$ [ 9] scala.collection.AbstractIterable.find [10] kafka.log.ProducerStateManager.lastStableOffset [11] kafka.log.Log.$anonfun$append$12 [12] kafka.log.Log.$anonfun$append$2 [13] kafka.log.Log.append [14] kafka.log.Log.appendAsFollower [15] kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 [16] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica [17] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica [18] kafka.server.ReplicaFetcherThread.processPartitionData [19] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$7 [20] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6 [21] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6$adapted [22] kafka.server.AbstractFetcherThread$$Lambda$552.191789933.apply [23] scala.collection.mutable.ResizableArray.foreach [24] scala.collection.mutable.ResizableArray.foreach$ [25] scala.collection.mutable.ArrayBuffer.foreach [26] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$5 [27] kafka.server.AbstractFetcherThread.processFetchRequest [28] kafka.server.AbstractFetcherThread.$anonfun$maybeFetch$3 [29] kafka.server.AbstractFetcherThread.maybeFetch [30] kafka.server.AbstractFetcherThread.doWork [31] kafka.utils.ShutdownableThread.run--- 15090000000 ns (5.36%), 1509 samples [ 0] java.util.TreeMap$PrivateEntryIterator.nextEntry [ 1] java.util.TreeMap$ValueIterator.next [ 2] scala.collection.convert.Wrappers$JIteratorWrapper.next [ 3] scala.collection.Iterator.find [ 4] scala.collection.Iterator.find$ [ 5] scala.collection.AbstractIterator.find [ 6] scala.collection.IterableLike.find [ 7] scala.collection.IterableLike.find$ [ 8] scala.collection.AbstractIterable.find [ 9] kafka.log.ProducerStateManager.lastStableOffset [10] kafka.log.Log.$anonfun$append$12 [11] kafka.log.Log.$anonfun$append$2 [12] kafka.log.Log.append [13] kafka.log.Log.appendAsFollower [14] kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 [15] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica [16] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica--- 13200000000 ns (4.69%), 1320 samples [ 0] scala.collection.convert.Wrappers$JIteratorWrapper.hasNext [ 1] scala.collection.Iterator.find [ 2] scala.collection.Iterator.find$ [ 3] scala.collection.AbstractIterator.find [ 4] scala.collection.IterableLike.find [ 5] scala.collection.IterableLike.find$ [ 6] scala.collection.AbstractIterable.find [ 7] kafka.log.ProducerStateManager.lastStableOffset [ 8] kafka.log.Log.$anonfun$append$12 [ 9] kafka.log.Log.$anonfun$append$2 [10] kafka.log.Log.append [11] kafka.log.Log.appendAsFollower [12] kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 [13] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica [14] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica--- 12390000000 ns (4.40%), 1239 samples [ 0] java.util.TreeMap$ValueIterator.next [ 1] scala.collection.convert.Wrappers$JIteratorWrapper.hasNext [ 2] scala.collection.Iterator.find [ 3] scala.collection.Iterator.find$ [ 4] scala.collection.AbstractIterator.find [ 5] scala.collection.IterableLike.find [ 6] scala.collection.IterableLike.find$ [ 7] scala.collection.AbstractIterable.find [ 8] kafka.log.ProducerStateManager.lastStableOffset [ 9] kafka.log.Log.$anonfun$append$12 [10] kafka.log.Log.$anonfun$append$2 [11] kafka.log.Log.append [12] kafka.log.Log.appendAsFollower [13] kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 [14] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica [15] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica [16] kafka.server.ReplicaFetcherThread.processPartitionData [17] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$7 [18] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6 [19] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6$adapted [20] kafka.server.AbstractFetcherThread$$Lambda$552.191789933.apply [21] scala.collection.mutable.ResizableArray.foreach [22] scala.collection.mutable.ResizableArray.foreach$ [23] scala.collection.mutable.ArrayBuffer.foreach [24] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$5 [25] kafka.server.AbstractFetcherThread.processFetchRequest [26] kafka.server.AbstractFetcherThread.$anonfun$maybeFetch$3 [27] kafka.server.AbstractFetcherThread.maybeFetch [28] kafka.server.AbstractFetcherThread.doWork [29] kafka.utils.ShutdownableThread.run--- 11810000000 ns (4.19%), 1181 samples [ 0] java.util.TreeMap.successor [ 1] java.util.TreeMap$PrivateEntryIterator.nextEntry [ 2] java.util.TreeMap$ValueIterator.next [ 3] scala.collection.convert.Wrappers$JIteratorWrapper.next [ 4] scala.collection.Iterator.find [ 5] scala.collection.Iterator.find$ [ 6] scala.collection.AbstractIterator.find [ 7] scala.collection.IterableLike.find [ 8] scala.collection.IterableLike.find$ [ 9] scala.collection.AbstractIterable.find [10] kafka.log.ProducerStateManager.lastStableOffset [11] kafka.log.Log.$anonfun$append$12 [12] kafka.log.Log.$anonfun$append$2 [13] kafka.log.Log.append [14] kafka.log.Log.appendAsFollower [15] kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 [16] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica [17] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica {code} > High CPU during replication of replaced (empty) broker > ------------------------------------------------------ > > Key: KAFKA-9963 > URL: https://issues.apache.org/jira/browse/KAFKA-9963 > Project: Kafka > Issue Type: Bug > Components: core > Affects Versions: 2.4.1 > Reporter: Evan Williams > Priority: Major > > When replacing a broker, with an empty data dir, and the same broker ID - we > are seeing very high CPU usage during replication, generally up to 100% for > some time, on a 4 VCPU (EC2 R5) host. This is a 6 host cluster, with approx > 1000 topics and 3000 partitions. > > There is of course traffic being served as well, as it catches up and becomes > leader of partitions, however due to the high replication CPU usage - > client's start to have connection issue. Normal traffic on this host is is > around 40% CPU, when it's completely replicated. I'm not sure if this is > 'normal', and we just need to throw more resources at the hosts. Or if > replication should not really use this much CPU. > replica.fetchers=2 (not high). And java version is: OpenJDK Runtime > Environment Corretto-8.252.09.1 (build 1.8.0_252-b09) (AWS's own build). > CPU profiling (2 minutes ) during this 'replace' scenario, shows this: > > {code:java} > 54730000000 19.43% 5473 > java.util.TreeMap$PrivateEntryIterator.nextEntry > 49750000000 17.66% 4975 > scala.collection.convert.Wrappers$JIteratorWrapper.hasNext > 44170000000 15.68% 4417 java.util.TreeMap.successor > 17730000000 6.29% 1773 java.util.TreeMap$ValueIterator.next > 17000000000 6.03% 1700 java.util.TreeMap$PrivateEntryIterator.hasNext > 6010000000 2.13% 601 > scala.collection.convert.Wrappers$JIteratorWrapper.next > 5160000000 1.83% 516 writev > --- 38850000000 ns (13.79%), 3885 samples > [ 0] java.util.TreeMap$PrivateEntryIterator.nextEntry > [ 1] java.util.TreeMap$ValueIterator.next > [ 2] scala.collection.convert.Wrappers$JIteratorWrapper.next > [ 3] scala.collection.Iterator.find > [ 4] scala.collection.Iterator.find$ > [ 5] scala.collection.AbstractIterator.find > [ 6] scala.collection.IterableLike.find > [ 7] scala.collection.IterableLike.find$ > [ 8] scala.collection.AbstractIterable.find > [ 9] kafka.log.ProducerStateManager.lastStableOffset > [10] kafka.log.Log.$anonfun$append$12 > [11] kafka.log.Log.$anonfun$append$2 > [12] kafka.log.Log.append > [13] kafka.log.Log.appendAsFollower > [14] > kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 > [15] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica > [16] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica > [17] kafka.server.ReplicaFetcherThread.processPartitionData > [18] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$7 > [19] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6 > [20] > kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6$adapted > [21] kafka.server.AbstractFetcherThread$$Lambda$552.191789933.apply > [22] scala.collection.mutable.ResizableArray.foreach > [23] scala.collection.mutable.ResizableArray.foreach$ > [24] scala.collection.mutable.ArrayBuffer.foreach > [25] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$5 > [26] kafka.server.AbstractFetcherThread.processFetchRequest > [27] kafka.server.AbstractFetcherThread.$anonfun$maybeFetch$3 > [28] kafka.server.AbstractFetcherThread.maybeFetch > [29] kafka.server.AbstractFetcherThread.doWork > [30] kafka.utils.ShutdownableThread.run > > {code} > {code:java} > --- 36320000000 ns (12.89%), 3632 samples > [ 0] scala.collection.convert.Wrappers$JIteratorWrapper.hasNext > [ 1] scala.collection.Iterator.find > [ 2] scala.collection.Iterator.find$ > [ 3] scala.collection.AbstractIterator.find > [ 4] scala.collection.IterableLike.find > [ 5] scala.collection.IterableLike.find$ > [ 6] scala.collection.AbstractIterable.find > [ 7] kafka.log.ProducerStateManager.lastStableOffset > [ 8] kafka.log.Log.$anonfun$append$12 > [ 9] kafka.log.Log.$anonfun$append$2 > [10] kafka.log.Log.append > [11] kafka.log.Log.appendAsFollower > [12] > kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 > [13] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica > [14] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica > [15] kafka.server.ReplicaFetcherThread.processPartitionData > [16] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$7 > [17] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6 > [18] > kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6$adapted > [19] kafka.server.AbstractFetcherThread$$Lambda$552.191789933.apply > [20] scala.collection.mutable.ResizableArray.foreach > [21] scala.collection.mutable.ResizableArray.foreach$ > [22] scala.collection.mutable.ArrayBuffer.foreach > [23] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$5 > [24] kafka.server.AbstractFetcherThread.processFetchRequest > [25] kafka.server.AbstractFetcherThread.$anonfun$maybeFetch$3 > [26] kafka.server.AbstractFetcherThread.maybeFetch > [27] kafka.server.AbstractFetcherThread.doWork > [28] kafka.utils.ShutdownableThread.run{code} > > {code:java} > --- 32360000000 ns (11.49%), 3236 samples > [ 0] java.util.TreeMap.successor > [ 1] java.util.TreeMap$PrivateEntryIterator.nextEntry > [ 2] java.util.TreeMap$ValueIterator.next > [ 3] scala.collection.convert.Wrappers$JIteratorWrapper.next > [ 4] scala.collection.Iterator.find > [ 5] scala.collection.Iterator.find$ > [ 6] scala.collection.AbstractIterator.find > [ 7] scala.collection.IterableLike.find > [ 8] scala.collection.IterableLike.find$ > [ 9] scala.collection.AbstractIterable.find > [10] kafka.log.ProducerStateManager.lastStableOffset > [11] kafka.log.Log.$anonfun$append$12 > [12] kafka.log.Log.$anonfun$append$2 > [13] kafka.log.Log.append > [14] kafka.log.Log.appendAsFollower > [15] > kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 > [16] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica > [17] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica > [18] kafka.server.ReplicaFetcherThread.processPartitionData > [19] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$7 > [20] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6 > [21] > kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6$adapted > [22] kafka.server.AbstractFetcherThread$$Lambda$552.191789933.apply > [23] scala.collection.mutable.ResizableArray.foreach > [24] scala.collection.mutable.ResizableArray.foreach$ > [25] scala.collection.mutable.ArrayBuffer.foreach > [26] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$5 > [27] kafka.server.AbstractFetcherThread.processFetchRequest > [28] kafka.server.AbstractFetcherThread.$anonfun$maybeFetch$3 > [29] kafka.server.AbstractFetcherThread.maybeFetch > [30] kafka.server.AbstractFetcherThread.doWork > [31] kafka.utils.ShutdownableThread.run--- 15090000000 ns (5.36%), 1509 > samples > [ 0] java.util.TreeMap$PrivateEntryIterator.nextEntry > [ 1] java.util.TreeMap$ValueIterator.next > [ 2] scala.collection.convert.Wrappers$JIteratorWrapper.next > [ 3] scala.collection.Iterator.find > [ 4] scala.collection.Iterator.find$ > [ 5] scala.collection.AbstractIterator.find > [ 6] scala.collection.IterableLike.find > [ 7] scala.collection.IterableLike.find$ > [ 8] scala.collection.AbstractIterable.find > [ 9] kafka.log.ProducerStateManager.lastStableOffset > [10] kafka.log.Log.$anonfun$append$12 > [11] kafka.log.Log.$anonfun$append$2 > [12] kafka.log.Log.append > [13] kafka.log.Log.appendAsFollower > [14] > kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 > [15] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica > [16] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica--- > 13200000000 ns (4.69%), 1320 samples > [ 0] scala.collection.convert.Wrappers$JIteratorWrapper.hasNext > [ 1] scala.collection.Iterator.find > [ 2] scala.collection.Iterator.find$ > [ 3] scala.collection.AbstractIterator.find > [ 4] scala.collection.IterableLike.find > [ 5] scala.collection.IterableLike.find$ > [ 6] scala.collection.AbstractIterable.find > [ 7] kafka.log.ProducerStateManager.lastStableOffset > [ 8] kafka.log.Log.$anonfun$append$12 > [ 9] kafka.log.Log.$anonfun$append$2 > [10] kafka.log.Log.append > [11] kafka.log.Log.appendAsFollower > [12] > kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 > [13] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica > [14] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica--- > 12390000000 ns (4.40%), 1239 samples > [ 0] java.util.TreeMap$ValueIterator.next > [ 1] scala.collection.convert.Wrappers$JIteratorWrapper.hasNext > [ 2] scala.collection.Iterator.find > [ 3] scala.collection.Iterator.find$ > [ 4] scala.collection.AbstractIterator.find > [ 5] scala.collection.IterableLike.find > [ 6] scala.collection.IterableLike.find$ > [ 7] scala.collection.AbstractIterable.find > [ 8] kafka.log.ProducerStateManager.lastStableOffset > [ 9] kafka.log.Log.$anonfun$append$12 > [10] kafka.log.Log.$anonfun$append$2 > [11] kafka.log.Log.append > [12] kafka.log.Log.appendAsFollower > [13] > kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 > [14] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica > [15] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica > [16] kafka.server.ReplicaFetcherThread.processPartitionData > [17] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$7 > [18] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6 > [19] > kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6$adapted > [20] kafka.server.AbstractFetcherThread$$Lambda$552.191789933.apply > [21] scala.collection.mutable.ResizableArray.foreach > [22] scala.collection.mutable.ResizableArray.foreach$ > [23] scala.collection.mutable.ArrayBuffer.foreach > [24] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$5 > [25] kafka.server.AbstractFetcherThread.processFetchRequest > [26] kafka.server.AbstractFetcherThread.$anonfun$maybeFetch$3 > [27] kafka.server.AbstractFetcherThread.maybeFetch > [28] kafka.server.AbstractFetcherThread.doWork > [29] kafka.utils.ShutdownableThread.run{code} > {code:java} > --- 11810000000 ns (4.19%), 1181 samples > [ 0] java.util.TreeMap.successor > [ 1] java.util.TreeMap$PrivateEntryIterator.nextEntry > [ 2] java.util.TreeMap$ValueIterator.next > [ 3] scala.collection.convert.Wrappers$JIteratorWrapper.next > [ 4] scala.collection.Iterator.find > [ 5] scala.collection.Iterator.find$ > [ 6] scala.collection.AbstractIterator.find > [ 7] scala.collection.IterableLike.find > [ 8] scala.collection.IterableLike.find$ > [ 9] scala.collection.AbstractIterable.find > [10] kafka.log.ProducerStateManager.lastStableOffset > [11] kafka.log.Log.$anonfun$append$12 > [12] kafka.log.Log.$anonfun$append$2 > [13] kafka.log.Log.append > [14] kafka.log.Log.appendAsFollower > [15] > kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 > [16] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica > [17] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005)