[ https://issues.apache.org/jira/browse/KAFKA-9963?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Evan Williams updated KAFKA-9963: --------------------------------- Description: When replacing a broker, with an empty data dir, and the same broker ID - we are seeing very high CPU usage during replication, generally up to 100% for some time, on a 4 VCPU (EC2 R5) host. This is a 6 host cluster, with approx 1000 topics and 3000 partitions. There is of course traffic being served as well, as it catches up and becomes leader of partitions, however due to the high replication CPU usage - client's start to have connection issue. Normal traffic on this host is is around 40% CPU, when it's completely replicated. CPU profiling (2 minutes ) during this 'replace' scenario, shows this: {code:java} 54730000000 19.43% 5473 java.util.TreeMap$PrivateEntryIterator.nextEntry 49750000000 17.66% 4975 scala.collection.convert.Wrappers$JIteratorWrapper.hasNext 44170000000 15.68% 4417 java.util.TreeMap.successor 17730000000 6.29% 1773 java.util.TreeMap$ValueIterator.next 17000000000 6.03% 1700 java.util.TreeMap$PrivateEntryIterator.hasNext 6010000000 2.13% 601 scala.collection.convert.Wrappers$JIteratorWrapper.next 5160000000 1.83% 516 writev --- 38850000000 ns (13.79%), 3885 samples [ 0] java.util.TreeMap$PrivateEntryIterator.nextEntry [ 1] java.util.TreeMap$ValueIterator.next [ 2] scala.collection.convert.Wrappers$JIteratorWrapper.next [ 3] scala.collection.Iterator.find [ 4] scala.collection.Iterator.find$ [ 5] scala.collection.AbstractIterator.find [ 6] scala.collection.IterableLike.find [ 7] scala.collection.IterableLike.find$ [ 8] scala.collection.AbstractIterable.find [ 9] kafka.log.ProducerStateManager.lastStableOffset [10] kafka.log.Log.$anonfun$append$12 [11] kafka.log.Log.$anonfun$append$2 [12] kafka.log.Log.append [13] kafka.log.Log.appendAsFollower [14] kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 [15] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica [16] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica [17] kafka.server.ReplicaFetcherThread.processPartitionData [18] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$7 [19] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6 [20] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6$adapted [21] kafka.server.AbstractFetcherThread$$Lambda$552.191789933.apply [22] scala.collection.mutable.ResizableArray.foreach [23] scala.collection.mutable.ResizableArray.foreach$ [24] scala.collection.mutable.ArrayBuffer.foreach [25] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$5 [26] kafka.server.AbstractFetcherThread.processFetchRequest [27] kafka.server.AbstractFetcherThread.$anonfun$maybeFetch$3 [28] kafka.server.AbstractFetcherThread.maybeFetch [29] kafka.server.AbstractFetcherThread.doWork [30] kafka.utils.ShutdownableThread.run--- 36320000000 ns (12.89%), 3632 samples [ 0] scala.collection.convert.Wrappers$JIteratorWrapper.hasNext [ 1] scala.collection.Iterator.find [ 2] scala.collection.Iterator.find$ [ 3] scala.collection.AbstractIterator.find [ 4] scala.collection.IterableLike.find [ 5] scala.collection.IterableLike.find$ [ 6] scala.collection.AbstractIterable.find [ 7] kafka.log.ProducerStateManager.lastStableOffset [ 8] kafka.log.Log.$anonfun$append$12 [ 9] kafka.log.Log.$anonfun$append$2 [10] kafka.log.Log.append [11] kafka.log.Log.appendAsFollower [12] kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 [13] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica [14] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica [15] kafka.server.ReplicaFetcherThread.processPartitionData [16] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$7 [17] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6 [18] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6$adapted [19] kafka.server.AbstractFetcherThread$$Lambda$552.191789933.apply [20] scala.collection.mutable.ResizableArray.foreach [21] scala.collection.mutable.ResizableArray.foreach$ [22] scala.collection.mutable.ArrayBuffer.foreach [23] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$5 [24] kafka.server.AbstractFetcherThread.processFetchRequest [25] kafka.server.AbstractFetcherThread.$anonfun$maybeFetch$3 [26] kafka.server.AbstractFetcherThread.maybeFetch [27] kafka.server.AbstractFetcherThread.doWork [28] kafka.utils.ShutdownableThread.run--- 32360000000 ns (11.49%), 3236 samples [ 0] java.util.TreeMap.successor [ 1] java.util.TreeMap$PrivateEntryIterator.nextEntry [ 2] java.util.TreeMap$ValueIterator.next [ 3] scala.collection.convert.Wrappers$JIteratorWrapper.next [ 4] scala.collection.Iterator.find [ 5] scala.collection.Iterator.find$ [ 6] scala.collection.AbstractIterator.find [ 7] scala.collection.IterableLike.find [ 8] scala.collection.IterableLike.find$ [ 9] scala.collection.AbstractIterable.find [10] kafka.log.ProducerStateManager.lastStableOffset [11] kafka.log.Log.$anonfun$append$12 [12] kafka.log.Log.$anonfun$append$2 [13] kafka.log.Log.append [14] kafka.log.Log.appendAsFollower [15] kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 [16] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica [17] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica [18] kafka.server.ReplicaFetcherThread.processPartitionData [19] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$7 [20] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6 [21] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6$adapted [22] kafka.server.AbstractFetcherThread$$Lambda$552.191789933.apply [23] scala.collection.mutable.ResizableArray.foreach [24] scala.collection.mutable.ResizableArray.foreach$ [25] scala.collection.mutable.ArrayBuffer.foreach [26] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$5 [27] kafka.server.AbstractFetcherThread.processFetchRequest [28] kafka.server.AbstractFetcherThread.$anonfun$maybeFetch$3 [29] kafka.server.AbstractFetcherThread.maybeFetch [30] kafka.server.AbstractFetcherThread.doWork [31] kafka.utils.ShutdownableThread.run--- 15090000000 ns (5.36%), 1509 samples [ 0] java.util.TreeMap$PrivateEntryIterator.nextEntry [ 1] java.util.TreeMap$ValueIterator.next [ 2] scala.collection.convert.Wrappers$JIteratorWrapper.next [ 3] scala.collection.Iterator.find [ 4] scala.collection.Iterator.find$ [ 5] scala.collection.AbstractIterator.find [ 6] scala.collection.IterableLike.find [ 7] scala.collection.IterableLike.find$ [ 8] scala.collection.AbstractIterable.find [ 9] kafka.log.ProducerStateManager.lastStableOffset [10] kafka.log.Log.$anonfun$append$12 [11] kafka.log.Log.$anonfun$append$2 [12] kafka.log.Log.append [13] kafka.log.Log.appendAsFollower [14] kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 [15] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica [16] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica--- 13200000000 ns (4.69%), 1320 samples [ 0] scala.collection.convert.Wrappers$JIteratorWrapper.hasNext [ 1] scala.collection.Iterator.find [ 2] scala.collection.Iterator.find$ [ 3] scala.collection.AbstractIterator.find [ 4] scala.collection.IterableLike.find [ 5] scala.collection.IterableLike.find$ [ 6] scala.collection.AbstractIterable.find [ 7] kafka.log.ProducerStateManager.lastStableOffset [ 8] kafka.log.Log.$anonfun$append$12 [ 9] kafka.log.Log.$anonfun$append$2 [10] kafka.log.Log.append [11] kafka.log.Log.appendAsFollower [12] kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 [13] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica [14] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica--- 12390000000 ns (4.40%), 1239 samples [ 0] java.util.TreeMap$ValueIterator.next [ 1] scala.collection.convert.Wrappers$JIteratorWrapper.hasNext [ 2] scala.collection.Iterator.find [ 3] scala.collection.Iterator.find$ [ 4] scala.collection.AbstractIterator.find [ 5] scala.collection.IterableLike.find [ 6] scala.collection.IterableLike.find$ [ 7] scala.collection.AbstractIterable.find [ 8] kafka.log.ProducerStateManager.lastStableOffset [ 9] kafka.log.Log.$anonfun$append$12 [10] kafka.log.Log.$anonfun$append$2 [11] kafka.log.Log.append [12] kafka.log.Log.appendAsFollower [13] kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 [14] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica [15] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica [16] kafka.server.ReplicaFetcherThread.processPartitionData [17] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$7 [18] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6 [19] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6$adapted [20] kafka.server.AbstractFetcherThread$$Lambda$552.191789933.apply [21] scala.collection.mutable.ResizableArray.foreach [22] scala.collection.mutable.ResizableArray.foreach$ [23] scala.collection.mutable.ArrayBuffer.foreach [24] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$5 [25] kafka.server.AbstractFetcherThread.processFetchRequest [26] kafka.server.AbstractFetcherThread.$anonfun$maybeFetch$3 [27] kafka.server.AbstractFetcherThread.maybeFetch [28] kafka.server.AbstractFetcherThread.doWork [29] kafka.utils.ShutdownableThread.run--- 11810000000 ns (4.19%), 1181 samples [ 0] java.util.TreeMap.successor [ 1] java.util.TreeMap$PrivateEntryIterator.nextEntry [ 2] java.util.TreeMap$ValueIterator.next [ 3] scala.collection.convert.Wrappers$JIteratorWrapper.next [ 4] scala.collection.Iterator.find [ 5] scala.collection.Iterator.find$ [ 6] scala.collection.AbstractIterator.find [ 7] scala.collection.IterableLike.find [ 8] scala.collection.IterableLike.find$ [ 9] scala.collection.AbstractIterable.find [10] kafka.log.ProducerStateManager.lastStableOffset [11] kafka.log.Log.$anonfun$append$12 [12] kafka.log.Log.$anonfun$append$2 [13] kafka.log.Log.append [14] kafka.log.Log.appendAsFollower [15] kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 [16] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica [17] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica {code} was: When replacing a broker, with an empty data dir, and the same broker ID - we are seeing very high CPU usage during replication, generally up to 100% for some time, on a 4 VCPU (EC2 R5) host. This is a 6 host cluster, with approx 1000 topics and 3000 partitions. There is of course traffic being served as well, as it catches up and becomes leader of partitions, however due to the high replication CPU usage - client's start to have connection issue. Normal traffic on this host is is around 40% CPU, when it's completely replicated. CPU profiling during this 'replace' scenario, shows this: {code:java} 54730000000 19.43% 5473 java.util.TreeMap$PrivateEntryIterator.nextEntry 49750000000 17.66% 4975 scala.collection.convert.Wrappers$JIteratorWrapper.hasNext 44170000000 15.68% 4417 java.util.TreeMap.successor 17730000000 6.29% 1773 java.util.TreeMap$ValueIterator.next 17000000000 6.03% 1700 java.util.TreeMap$PrivateEntryIterator.hasNext 6010000000 2.13% 601 scala.collection.convert.Wrappers$JIteratorWrapper.next 5160000000 1.83% 516 writev --- 38850000000 ns (13.79%), 3885 samples [ 0] java.util.TreeMap$PrivateEntryIterator.nextEntry [ 1] java.util.TreeMap$ValueIterator.next [ 2] scala.collection.convert.Wrappers$JIteratorWrapper.next [ 3] scala.collection.Iterator.find [ 4] scala.collection.Iterator.find$ [ 5] scala.collection.AbstractIterator.find [ 6] scala.collection.IterableLike.find [ 7] scala.collection.IterableLike.find$ [ 8] scala.collection.AbstractIterable.find [ 9] kafka.log.ProducerStateManager.lastStableOffset [10] kafka.log.Log.$anonfun$append$12 [11] kafka.log.Log.$anonfun$append$2 [12] kafka.log.Log.append [13] kafka.log.Log.appendAsFollower [14] kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 [15] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica [16] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica [17] kafka.server.ReplicaFetcherThread.processPartitionData [18] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$7 [19] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6 [20] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6$adapted [21] kafka.server.AbstractFetcherThread$$Lambda$552.191789933.apply [22] scala.collection.mutable.ResizableArray.foreach [23] scala.collection.mutable.ResizableArray.foreach$ [24] scala.collection.mutable.ArrayBuffer.foreach [25] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$5 [26] kafka.server.AbstractFetcherThread.processFetchRequest [27] kafka.server.AbstractFetcherThread.$anonfun$maybeFetch$3 [28] kafka.server.AbstractFetcherThread.maybeFetch [29] kafka.server.AbstractFetcherThread.doWork [30] kafka.utils.ShutdownableThread.run--- 36320000000 ns (12.89%), 3632 samples [ 0] scala.collection.convert.Wrappers$JIteratorWrapper.hasNext [ 1] scala.collection.Iterator.find [ 2] scala.collection.Iterator.find$ [ 3] scala.collection.AbstractIterator.find [ 4] scala.collection.IterableLike.find [ 5] scala.collection.IterableLike.find$ [ 6] scala.collection.AbstractIterable.find [ 7] kafka.log.ProducerStateManager.lastStableOffset [ 8] kafka.log.Log.$anonfun$append$12 [ 9] kafka.log.Log.$anonfun$append$2 [10] kafka.log.Log.append [11] kafka.log.Log.appendAsFollower [12] kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 [13] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica [14] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica [15] kafka.server.ReplicaFetcherThread.processPartitionData [16] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$7 [17] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6 [18] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6$adapted [19] kafka.server.AbstractFetcherThread$$Lambda$552.191789933.apply [20] scala.collection.mutable.ResizableArray.foreach [21] scala.collection.mutable.ResizableArray.foreach$ [22] scala.collection.mutable.ArrayBuffer.foreach [23] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$5 [24] kafka.server.AbstractFetcherThread.processFetchRequest [25] kafka.server.AbstractFetcherThread.$anonfun$maybeFetch$3 [26] kafka.server.AbstractFetcherThread.maybeFetch [27] kafka.server.AbstractFetcherThread.doWork [28] kafka.utils.ShutdownableThread.run--- 32360000000 ns (11.49%), 3236 samples [ 0] java.util.TreeMap.successor [ 1] java.util.TreeMap$PrivateEntryIterator.nextEntry [ 2] java.util.TreeMap$ValueIterator.next [ 3] scala.collection.convert.Wrappers$JIteratorWrapper.next [ 4] scala.collection.Iterator.find [ 5] scala.collection.Iterator.find$ [ 6] scala.collection.AbstractIterator.find [ 7] scala.collection.IterableLike.find [ 8] scala.collection.IterableLike.find$ [ 9] scala.collection.AbstractIterable.find [10] kafka.log.ProducerStateManager.lastStableOffset [11] kafka.log.Log.$anonfun$append$12 [12] kafka.log.Log.$anonfun$append$2 [13] kafka.log.Log.append [14] kafka.log.Log.appendAsFollower [15] kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 [16] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica [17] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica [18] kafka.server.ReplicaFetcherThread.processPartitionData [19] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$7 [20] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6 [21] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6$adapted [22] kafka.server.AbstractFetcherThread$$Lambda$552.191789933.apply [23] scala.collection.mutable.ResizableArray.foreach [24] scala.collection.mutable.ResizableArray.foreach$ [25] scala.collection.mutable.ArrayBuffer.foreach [26] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$5 [27] kafka.server.AbstractFetcherThread.processFetchRequest [28] kafka.server.AbstractFetcherThread.$anonfun$maybeFetch$3 [29] kafka.server.AbstractFetcherThread.maybeFetch [30] kafka.server.AbstractFetcherThread.doWork [31] kafka.utils.ShutdownableThread.run--- 15090000000 ns (5.36%), 1509 samples [ 0] java.util.TreeMap$PrivateEntryIterator.nextEntry [ 1] java.util.TreeMap$ValueIterator.next [ 2] scala.collection.convert.Wrappers$JIteratorWrapper.next [ 3] scala.collection.Iterator.find [ 4] scala.collection.Iterator.find$ [ 5] scala.collection.AbstractIterator.find [ 6] scala.collection.IterableLike.find [ 7] scala.collection.IterableLike.find$ [ 8] scala.collection.AbstractIterable.find [ 9] kafka.log.ProducerStateManager.lastStableOffset [10] kafka.log.Log.$anonfun$append$12 [11] kafka.log.Log.$anonfun$append$2 [12] kafka.log.Log.append [13] kafka.log.Log.appendAsFollower [14] kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 [15] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica [16] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica--- 13200000000 ns (4.69%), 1320 samples [ 0] scala.collection.convert.Wrappers$JIteratorWrapper.hasNext [ 1] scala.collection.Iterator.find [ 2] scala.collection.Iterator.find$ [ 3] scala.collection.AbstractIterator.find [ 4] scala.collection.IterableLike.find [ 5] scala.collection.IterableLike.find$ [ 6] scala.collection.AbstractIterable.find [ 7] kafka.log.ProducerStateManager.lastStableOffset [ 8] kafka.log.Log.$anonfun$append$12 [ 9] kafka.log.Log.$anonfun$append$2 [10] kafka.log.Log.append [11] kafka.log.Log.appendAsFollower [12] kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 [13] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica [14] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica--- 12390000000 ns (4.40%), 1239 samples [ 0] java.util.TreeMap$ValueIterator.next [ 1] scala.collection.convert.Wrappers$JIteratorWrapper.hasNext [ 2] scala.collection.Iterator.find [ 3] scala.collection.Iterator.find$ [ 4] scala.collection.AbstractIterator.find [ 5] scala.collection.IterableLike.find [ 6] scala.collection.IterableLike.find$ [ 7] scala.collection.AbstractIterable.find [ 8] kafka.log.ProducerStateManager.lastStableOffset [ 9] kafka.log.Log.$anonfun$append$12 [10] kafka.log.Log.$anonfun$append$2 [11] kafka.log.Log.append [12] kafka.log.Log.appendAsFollower [13] kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 [14] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica [15] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica [16] kafka.server.ReplicaFetcherThread.processPartitionData [17] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$7 [18] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6 [19] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6$adapted [20] kafka.server.AbstractFetcherThread$$Lambda$552.191789933.apply [21] scala.collection.mutable.ResizableArray.foreach [22] scala.collection.mutable.ResizableArray.foreach$ [23] scala.collection.mutable.ArrayBuffer.foreach [24] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$5 [25] kafka.server.AbstractFetcherThread.processFetchRequest [26] kafka.server.AbstractFetcherThread.$anonfun$maybeFetch$3 [27] kafka.server.AbstractFetcherThread.maybeFetch [28] kafka.server.AbstractFetcherThread.doWork [29] kafka.utils.ShutdownableThread.run--- 11810000000 ns (4.19%), 1181 samples [ 0] java.util.TreeMap.successor [ 1] java.util.TreeMap$PrivateEntryIterator.nextEntry [ 2] java.util.TreeMap$ValueIterator.next [ 3] scala.collection.convert.Wrappers$JIteratorWrapper.next [ 4] scala.collection.Iterator.find [ 5] scala.collection.Iterator.find$ [ 6] scala.collection.AbstractIterator.find [ 7] scala.collection.IterableLike.find [ 8] scala.collection.IterableLike.find$ [ 9] scala.collection.AbstractIterable.find [10] kafka.log.ProducerStateManager.lastStableOffset [11] kafka.log.Log.$anonfun$append$12 [12] kafka.log.Log.$anonfun$append$2 [13] kafka.log.Log.append [14] kafka.log.Log.appendAsFollower [15] kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 [16] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica [17] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica {code} > High CPU > -------- > > Key: KAFKA-9963 > URL: https://issues.apache.org/jira/browse/KAFKA-9963 > Project: Kafka > Issue Type: Bug > Components: core > Affects Versions: 2.4.1 > Reporter: Evan Williams > Priority: Major > > When replacing a broker, with an empty data dir, and the same broker ID - we > are seeing very high CPU usage during replication, generally up to 100% for > some time, on a 4 VCPU (EC2 R5) host. This is a 6 host cluster, with approx > 1000 topics and 3000 partitions. > > There is of course traffic being served as well, as it catches up and becomes > leader of partitions, however due to the high replication CPU usage - > client's start to have connection issue. Normal traffic on this host is is > around 40% CPU, when it's completely replicated. > CPU profiling (2 minutes ) during this 'replace' scenario, shows this: > > {code:java} > 54730000000 19.43% 5473 > java.util.TreeMap$PrivateEntryIterator.nextEntry > 49750000000 17.66% 4975 > scala.collection.convert.Wrappers$JIteratorWrapper.hasNext > 44170000000 15.68% 4417 java.util.TreeMap.successor > 17730000000 6.29% 1773 java.util.TreeMap$ValueIterator.next > 17000000000 6.03% 1700 java.util.TreeMap$PrivateEntryIterator.hasNext > 6010000000 2.13% 601 > scala.collection.convert.Wrappers$JIteratorWrapper.next > 5160000000 1.83% 516 writev > --- 38850000000 ns (13.79%), 3885 samples > [ 0] java.util.TreeMap$PrivateEntryIterator.nextEntry > [ 1] java.util.TreeMap$ValueIterator.next > [ 2] scala.collection.convert.Wrappers$JIteratorWrapper.next > [ 3] scala.collection.Iterator.find > [ 4] scala.collection.Iterator.find$ > [ 5] scala.collection.AbstractIterator.find > [ 6] scala.collection.IterableLike.find > [ 7] scala.collection.IterableLike.find$ > [ 8] scala.collection.AbstractIterable.find > [ 9] kafka.log.ProducerStateManager.lastStableOffset > [10] kafka.log.Log.$anonfun$append$12 > [11] kafka.log.Log.$anonfun$append$2 > [12] kafka.log.Log.append > [13] kafka.log.Log.appendAsFollower > [14] > kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 > [15] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica > [16] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica > [17] kafka.server.ReplicaFetcherThread.processPartitionData > [18] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$7 > [19] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6 > [20] > kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6$adapted > [21] kafka.server.AbstractFetcherThread$$Lambda$552.191789933.apply > [22] scala.collection.mutable.ResizableArray.foreach > [23] scala.collection.mutable.ResizableArray.foreach$ > [24] scala.collection.mutable.ArrayBuffer.foreach > [25] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$5 > [26] kafka.server.AbstractFetcherThread.processFetchRequest > [27] kafka.server.AbstractFetcherThread.$anonfun$maybeFetch$3 > [28] kafka.server.AbstractFetcherThread.maybeFetch > [29] kafka.server.AbstractFetcherThread.doWork > [30] kafka.utils.ShutdownableThread.run--- 36320000000 ns (12.89%), 3632 > samples > [ 0] scala.collection.convert.Wrappers$JIteratorWrapper.hasNext > [ 1] scala.collection.Iterator.find > [ 2] scala.collection.Iterator.find$ > [ 3] scala.collection.AbstractIterator.find > [ 4] scala.collection.IterableLike.find > [ 5] scala.collection.IterableLike.find$ > [ 6] scala.collection.AbstractIterable.find > [ 7] kafka.log.ProducerStateManager.lastStableOffset > [ 8] kafka.log.Log.$anonfun$append$12 > [ 9] kafka.log.Log.$anonfun$append$2 > [10] kafka.log.Log.append > [11] kafka.log.Log.appendAsFollower > [12] > kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 > [13] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica > [14] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica > [15] kafka.server.ReplicaFetcherThread.processPartitionData > [16] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$7 > [17] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6 > [18] > kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6$adapted > [19] kafka.server.AbstractFetcherThread$$Lambda$552.191789933.apply > [20] scala.collection.mutable.ResizableArray.foreach > [21] scala.collection.mutable.ResizableArray.foreach$ > [22] scala.collection.mutable.ArrayBuffer.foreach > [23] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$5 > [24] kafka.server.AbstractFetcherThread.processFetchRequest > [25] kafka.server.AbstractFetcherThread.$anonfun$maybeFetch$3 > [26] kafka.server.AbstractFetcherThread.maybeFetch > [27] kafka.server.AbstractFetcherThread.doWork > [28] kafka.utils.ShutdownableThread.run--- 32360000000 ns (11.49%), 3236 > samples > [ 0] java.util.TreeMap.successor > [ 1] java.util.TreeMap$PrivateEntryIterator.nextEntry > [ 2] java.util.TreeMap$ValueIterator.next > [ 3] scala.collection.convert.Wrappers$JIteratorWrapper.next > [ 4] scala.collection.Iterator.find > [ 5] scala.collection.Iterator.find$ > [ 6] scala.collection.AbstractIterator.find > [ 7] scala.collection.IterableLike.find > [ 8] scala.collection.IterableLike.find$ > [ 9] scala.collection.AbstractIterable.find > [10] kafka.log.ProducerStateManager.lastStableOffset > [11] kafka.log.Log.$anonfun$append$12 > [12] kafka.log.Log.$anonfun$append$2 > [13] kafka.log.Log.append > [14] kafka.log.Log.appendAsFollower > [15] > kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 > [16] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica > [17] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica > [18] kafka.server.ReplicaFetcherThread.processPartitionData > [19] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$7 > [20] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6 > [21] > kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6$adapted > [22] kafka.server.AbstractFetcherThread$$Lambda$552.191789933.apply > [23] scala.collection.mutable.ResizableArray.foreach > [24] scala.collection.mutable.ResizableArray.foreach$ > [25] scala.collection.mutable.ArrayBuffer.foreach > [26] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$5 > [27] kafka.server.AbstractFetcherThread.processFetchRequest > [28] kafka.server.AbstractFetcherThread.$anonfun$maybeFetch$3 > [29] kafka.server.AbstractFetcherThread.maybeFetch > [30] kafka.server.AbstractFetcherThread.doWork > [31] kafka.utils.ShutdownableThread.run--- 15090000000 ns (5.36%), 1509 > samples > [ 0] java.util.TreeMap$PrivateEntryIterator.nextEntry > [ 1] java.util.TreeMap$ValueIterator.next > [ 2] scala.collection.convert.Wrappers$JIteratorWrapper.next > [ 3] scala.collection.Iterator.find > [ 4] scala.collection.Iterator.find$ > [ 5] scala.collection.AbstractIterator.find > [ 6] scala.collection.IterableLike.find > [ 7] scala.collection.IterableLike.find$ > [ 8] scala.collection.AbstractIterable.find > [ 9] kafka.log.ProducerStateManager.lastStableOffset > [10] kafka.log.Log.$anonfun$append$12 > [11] kafka.log.Log.$anonfun$append$2 > [12] kafka.log.Log.append > [13] kafka.log.Log.appendAsFollower > [14] > kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 > [15] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica > [16] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica--- > 13200000000 ns (4.69%), 1320 samples > [ 0] scala.collection.convert.Wrappers$JIteratorWrapper.hasNext > [ 1] scala.collection.Iterator.find > [ 2] scala.collection.Iterator.find$ > [ 3] scala.collection.AbstractIterator.find > [ 4] scala.collection.IterableLike.find > [ 5] scala.collection.IterableLike.find$ > [ 6] scala.collection.AbstractIterable.find > [ 7] kafka.log.ProducerStateManager.lastStableOffset > [ 8] kafka.log.Log.$anonfun$append$12 > [ 9] kafka.log.Log.$anonfun$append$2 > [10] kafka.log.Log.append > [11] kafka.log.Log.appendAsFollower > [12] > kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 > [13] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica > [14] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica--- > 12390000000 ns (4.40%), 1239 samples > [ 0] java.util.TreeMap$ValueIterator.next > [ 1] scala.collection.convert.Wrappers$JIteratorWrapper.hasNext > [ 2] scala.collection.Iterator.find > [ 3] scala.collection.Iterator.find$ > [ 4] scala.collection.AbstractIterator.find > [ 5] scala.collection.IterableLike.find > [ 6] scala.collection.IterableLike.find$ > [ 7] scala.collection.AbstractIterable.find > [ 8] kafka.log.ProducerStateManager.lastStableOffset > [ 9] kafka.log.Log.$anonfun$append$12 > [10] kafka.log.Log.$anonfun$append$2 > [11] kafka.log.Log.append > [12] kafka.log.Log.appendAsFollower > [13] > kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 > [14] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica > [15] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica > [16] kafka.server.ReplicaFetcherThread.processPartitionData > [17] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$7 > [18] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6 > [19] > kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$6$adapted > [20] kafka.server.AbstractFetcherThread$$Lambda$552.191789933.apply > [21] scala.collection.mutable.ResizableArray.foreach > [22] scala.collection.mutable.ResizableArray.foreach$ > [23] scala.collection.mutable.ArrayBuffer.foreach > [24] kafka.server.AbstractFetcherThread.$anonfun$processFetchRequest$5 > [25] kafka.server.AbstractFetcherThread.processFetchRequest > [26] kafka.server.AbstractFetcherThread.$anonfun$maybeFetch$3 > [27] kafka.server.AbstractFetcherThread.maybeFetch > [28] kafka.server.AbstractFetcherThread.doWork > [29] kafka.utils.ShutdownableThread.run--- 11810000000 ns (4.19%), 1181 > samples > [ 0] java.util.TreeMap.successor > [ 1] java.util.TreeMap$PrivateEntryIterator.nextEntry > [ 2] java.util.TreeMap$ValueIterator.next > [ 3] scala.collection.convert.Wrappers$JIteratorWrapper.next > [ 4] scala.collection.Iterator.find > [ 5] scala.collection.Iterator.find$ > [ 6] scala.collection.AbstractIterator.find > [ 7] scala.collection.IterableLike.find > [ 8] scala.collection.IterableLike.find$ > [ 9] scala.collection.AbstractIterable.find > [10] kafka.log.ProducerStateManager.lastStableOffset > [11] kafka.log.Log.$anonfun$append$12 > [12] kafka.log.Log.$anonfun$append$2 > [13] kafka.log.Log.append > [14] kafka.log.Log.appendAsFollower > [15] > kafka.cluster.Partition.$anonfun$doAppendRecordsToFollowerOrFutureReplica$1 > [16] kafka.cluster.Partition.doAppendRecordsToFollowerOrFutureReplica > [17] kafka.cluster.Partition.appendRecordsToFollowerOrFutureReplica > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005)