mumrah commented on code in PR #13438: URL: https://github.com/apache/kafka/pull/13438#discussion_r1146431114
########## core/src/main/scala/kafka/server/ControllerServer.scala: ########## @@ -366,6 +376,8 @@ class ControllerServer( // Ensure that we're not the Raft leader prior to shutting down our socket server, for a // smoother transition. sharedServer.ensureNotRaftLeader() + metadataPublishers.forEach(p => sharedServer.loader.removeAndClosePublisher(p).get()) Review Comment: Should we provide a timeout to `get()` to avoid indefinite blocking? ########## core/src/main/scala/kafka/server/ControllerServer.scala: ########## @@ -104,13 +106,15 @@ class ControllerServer( val socketServerFirstBoundPortFuture = new CompletableFuture[Integer]() var createTopicPolicy: Option[CreateTopicPolicy] = None var alterConfigPolicy: Option[AlterConfigPolicy] = None + @volatile var quorumControllerMetrics: QuorumControllerMetrics = _ var controller: Controller = _ var quotaManagers: QuotaManagers = _ var clientQuotaMetadataManager: ClientQuotaMetadataManager = _ var controllerApis: ControllerApis = _ var controllerApisHandlerPool: KafkaRequestHandlerPool = _ var migrationSupport: Option[ControllerMigrationSupport] = None def kafkaYammerMetrics: KafkaYammerMetrics = KafkaYammerMetrics.INSTANCE + val metadataPublishers: util.List[MetadataPublisher] = new CopyOnWriteArrayList[MetadataPublisher]() Review Comment: Do we need a thread-safe collection here? It looks like we're just populating this list here and then passing it to the MetadataLoader. ########## metadata/src/main/java/org/apache/kafka/controller/metrics/ControllerMetricsChanges.java: ########## @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kafka.controller.metrics; + +import org.apache.kafka.image.TopicDelta; +import org.apache.kafka.image.TopicImage; +import org.apache.kafka.metadata.BrokerRegistration; +import org.apache.kafka.metadata.PartitionRegistration; + +import java.util.Map.Entry; + + +class ControllerMetricsChanges { + static int delta(boolean prev, boolean next) { Review Comment: Maybe add a Javadoc for this method? Also, "delta" is kind of vague. ########## metadata/src/main/java/org/apache/kafka/controller/metrics/ControllerMetricsChanges.java: ########## @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kafka.controller.metrics; + +import org.apache.kafka.image.TopicDelta; +import org.apache.kafka.image.TopicImage; +import org.apache.kafka.metadata.BrokerRegistration; +import org.apache.kafka.metadata.PartitionRegistration; + +import java.util.Map.Entry; + + +class ControllerMetricsChanges { + static int delta(boolean prev, boolean next) { + if (prev) { + return next ? 0 : -1; + } else { + return next ? 1 : 0; + } + } + + private int fencedBrokersChange = 0; + private int activeBrokersChange = 0; + private int globalTopicsChange = 0; + private int globalPartitionsChange = 0; + private int offlinePartitionsChange = 0; + private int partitionsWithoutPreferredLeaderChange = 0; + + public int fencedBrokersChange() { + return fencedBrokersChange; + } + + public int activeBrokersChange() { + return activeBrokersChange; + } + + public int globalTopicsChange() { + return globalTopicsChange; + } + + public int globalPartitionsChange() { + return globalPartitionsChange; + } + + public int offlinePartitionsChange() { + return offlinePartitionsChange; + } + + public int partitionsWithoutPreferredLeaderChange() { + return partitionsWithoutPreferredLeaderChange; + } + + void handleBrokerChange(BrokerRegistration prev, BrokerRegistration next) { + boolean wasFenced = false; + boolean wasActive = false; + if (prev != null) { + wasFenced = prev.fenced(); + wasActive = !prev.fenced(); + } + boolean isFenced = false; + boolean isActive = false; + if (next != null) { + isFenced = next.fenced(); + isActive = !next.fenced(); + } + fencedBrokersChange += delta(wasFenced, isFenced); + activeBrokersChange += delta(wasActive, isActive); + } + + void handleDeletedTopic(TopicImage deletedTopic) { + deletedTopic.partitions().values().forEach(prev -> handlePartitionChange(prev, null)); + globalTopicsChange--; + } + + void handleTopicChange(TopicImage prev, TopicDelta topicDelta) { + if (prev == null) { + globalTopicsChange++; + for (PartitionRegistration nextPartition : topicDelta.partitionChanges().values()) { + handlePartitionChange(null, nextPartition); + } + } else { + for (Entry<Integer, PartitionRegistration> entry : topicDelta.partitionChanges().entrySet()) { + int partitionId = entry.getKey(); + PartitionRegistration nextPartition = entry.getValue(); + handlePartitionChange(prev.partitions().get(partitionId), nextPartition); + } + } + } + + void handlePartitionChange(PartitionRegistration prev, PartitionRegistration next) { + boolean wasPresent = false; + boolean wasOffline = false; + boolean wasWithoutPreferredLeader = false; + if (prev != null) { + wasPresent = true; + wasOffline = !prev.hasLeader(); + wasWithoutPreferredLeader = !prev.hasPreferredLeader(); + } + boolean isPresent = false; + boolean isOffline = false; + boolean isWithoutPreferredLeader = false; + if (next != null) { + isPresent = true; + isOffline = !next.hasLeader(); + isWithoutPreferredLeader = !next.hasPreferredLeader(); + } + globalPartitionsChange += delta(wasPresent, isPresent); + offlinePartitionsChange += delta(wasOffline, isOffline); + partitionsWithoutPreferredLeaderChange += delta(wasWithoutPreferredLeader, isWithoutPreferredLeader); + } + + void apply(ControllerServerMetrics metrics) { + if (fencedBrokersChange != 0) { + metrics.addToFencedBrokerCount(fencedBrokersChange); + } + if (activeBrokersChange != 0) { + metrics.addToActiveBrokerCount(activeBrokersChange); + } + if (globalTopicsChange != 0) { + metrics.addToGlobalTopicCount(globalTopicsChange); + } + if (globalPartitionsChange != 0) { + metrics.addToGlobalPartitionCount(globalPartitionsChange); + } + if (offlinePartitionsChange != 0) { + metrics.addToOfflinePartitionCount(offlinePartitionsChange); + } + if (partitionsWithoutPreferredLeaderChange != 0) { + metrics.addToPreferredReplicaImbalanceCount(partitionsWithoutPreferredLeaderChange); + } + } +} Review Comment: nit: newline ########## metadata/src/main/java/org/apache/kafka/controller/metrics/ControllerServerMetricsPublisher.java: ########## @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kafka.controller.metrics; + +import org.apache.kafka.common.Uuid; +import org.apache.kafka.image.MetadataDelta; +import org.apache.kafka.image.MetadataImage; +import org.apache.kafka.image.TopicDelta; +import org.apache.kafka.image.TopicImage; +import org.apache.kafka.image.loader.LoaderManifest; +import org.apache.kafka.image.publisher.MetadataPublisher; +import org.apache.kafka.metadata.BrokerRegistration; +import org.apache.kafka.metadata.PartitionRegistration; +import org.apache.kafka.server.fault.FaultHandler; + +import java.util.Map.Entry; +import java.util.Optional; + + +/** + * This publisher translates metadata updates sent by MetadataLoader into changes to controller + * metrics. Like all MetadataPublisher objects, it only receives notifications about events that + * have been persisted to the metadata log. So on the active controller, it will run slightly + * behind the latest in-memory state which has not yet been fully persisted to the log. This is + * reasonable for metrics, which don't need up-to-the-millisecond update latency. + * + * NOTE: the ZK controller has some special rules for calculating preferredReplicaImbalanceCount + * which we haven't implemented here. Specifically, the ZK controller considers reassigning + * partitions to always have their preferred leader, even if they don't. + * All other metrics should be the same, as far as is possible. + */ +public class ControllerServerMetricsPublisher implements MetadataPublisher { Review Comment: I wonder if having Metadata in the class name would make it more clear that this is a metrics publisher for metadata-derived things ########## metadata/src/main/java/org/apache/kafka/controller/metrics/ControllerMetricsChanges.java: ########## @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kafka.controller.metrics; + +import org.apache.kafka.image.TopicDelta; +import org.apache.kafka.image.TopicImage; +import org.apache.kafka.metadata.BrokerRegistration; +import org.apache.kafka.metadata.PartitionRegistration; + +import java.util.Map.Entry; + + +class ControllerMetricsChanges { Review Comment: Short Javadoc would be helpful here ########## metadata/src/main/java/org/apache/kafka/controller/metrics/ControllerServerMetrics.java: ########## @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kafka.controller.metrics; + +import com.yammer.metrics.core.Gauge; +import com.yammer.metrics.core.MetricName; +import com.yammer.metrics.core.MetricsRegistry; +import org.apache.kafka.server.metrics.KafkaYammerMetrics; + +import java.util.Arrays; +import java.util.Optional; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * These are the metrics which are managed by the ControllerServer class. They generally pertain to + * aspects of the metadata, like how many topics or partitions we have. + * All of these except MetadataErrorCount are managed by ControllerServerMetricsPublisher. + * + * IMPORTANT: Metrics which are managed by the QuorumController class itself should go in + * @link{org.apache.kafka.controller.metrics.QuorumControllerMetrics}, not here. + */ +public final class ControllerServerMetrics implements AutoCloseable { + private final static MetricName FENCED_BROKER_COUNT = getMetricName( + "KafkaController", "FencedBrokerCount"); + private final static MetricName ACTIVE_BROKER_COUNT = getMetricName( + "KafkaController", "ActiveBrokerCount"); + private final static MetricName GLOBAL_TOPIC_COUNT = getMetricName( + "KafkaController", "GlobalTopicCount"); + private final static MetricName GLOBAL_PARTITION_COUNT = getMetricName( + "KafkaController", "GlobalPartitionCount"); + private final static MetricName OFFLINE_PARTITION_COUNT = getMetricName( + "KafkaController", "OfflinePartitionsCount"); + private final static MetricName PREFERRED_REPLICA_IMBALANCE_COUNT = getMetricName( + "KafkaController", "PreferredReplicaImbalanceCount"); + private final static MetricName METADATA_ERROR_COUNT = getMetricName( + "KafkaController", "MetadataErrorCount"); + + private final Optional<MetricsRegistry> registry; + private final AtomicInteger fencedBrokerCount = new AtomicInteger(0); + private final AtomicInteger activeBrokerCount = new AtomicInteger(0); + private final AtomicInteger globalTopicCount = new AtomicInteger(0); + private final AtomicInteger globalPartitionCount = new AtomicInteger(); + private final AtomicInteger offlinePartitionCount = new AtomicInteger(); + private final AtomicInteger preferredReplicaImbalanceCount = new AtomicInteger(); + private final AtomicInteger metadataErrorCount = new AtomicInteger(0); + + /** + * Create a new ControllerServerMetrics object. + * + * @param registry The metrics registry, or Optional.empty if this is a test and we don't have one. + */ + public ControllerServerMetrics(Optional<MetricsRegistry> registry) { Review Comment: I was sort of confused by this, but I think I get it. Since MetricsRegistry is not an interface, we can't easily stub it out for tests. So it's either something like this, or using a mocking library. ########## metadata/src/main/java/org/apache/kafka/controller/metrics/ControllerServerMetrics.java: ########## @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kafka.controller.metrics; + +import com.yammer.metrics.core.Gauge; +import com.yammer.metrics.core.MetricName; +import com.yammer.metrics.core.MetricsRegistry; +import org.apache.kafka.server.metrics.KafkaYammerMetrics; + +import java.util.Arrays; +import java.util.Optional; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * These are the metrics which are managed by the ControllerServer class. They generally pertain to + * aspects of the metadata, like how many topics or partitions we have. + * All of these except MetadataErrorCount are managed by ControllerServerMetricsPublisher. + * + * IMPORTANT: Metrics which are managed by the QuorumController class itself should go in + * @link{org.apache.kafka.controller.metrics.QuorumControllerMetrics}, not here. + */ +public final class ControllerServerMetrics implements AutoCloseable { + private final static MetricName FENCED_BROKER_COUNT = getMetricName( + "KafkaController", "FencedBrokerCount"); + private final static MetricName ACTIVE_BROKER_COUNT = getMetricName( + "KafkaController", "ActiveBrokerCount"); + private final static MetricName GLOBAL_TOPIC_COUNT = getMetricName( + "KafkaController", "GlobalTopicCount"); + private final static MetricName GLOBAL_PARTITION_COUNT = getMetricName( + "KafkaController", "GlobalPartitionCount"); + private final static MetricName OFFLINE_PARTITION_COUNT = getMetricName( + "KafkaController", "OfflinePartitionsCount"); + private final static MetricName PREFERRED_REPLICA_IMBALANCE_COUNT = getMetricName( + "KafkaController", "PreferredReplicaImbalanceCount"); + private final static MetricName METADATA_ERROR_COUNT = getMetricName( + "KafkaController", "MetadataErrorCount"); + + private final Optional<MetricsRegistry> registry; + private final AtomicInteger fencedBrokerCount = new AtomicInteger(0); + private final AtomicInteger activeBrokerCount = new AtomicInteger(0); + private final AtomicInteger globalTopicCount = new AtomicInteger(0); + private final AtomicInteger globalPartitionCount = new AtomicInteger(); Review Comment: Some are initialized with 0, others aren't. We should probably pick one and be consistent -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: jira-unsubscr...@kafka.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org