tkalkirill commented on code in PR #3835: URL: https://github.com/apache/ignite-3/pull/3835#discussion_r1624632167
########## modules/table/src/main/java/org/apache/ignite/internal/table/distributed/disaster/DisasterRecoveryManager.java: ########## @@ -575,10 +595,8 @@ private static <T> boolean containsOrEmpty(T item, Collection<T> collection) { return collection.isEmpty() || collection.contains(item); } - /** - * Converts internal raft node state into public local partition state. - */ - private static LocalPartitionStateEnum convertState(State nodeState) { + /** Converts internal raft node state into public local partition state. */ Review Comment: I ask you to be more polite, and to be less emotional and more professional. I said that I would stop formatting the text and making changes that did not affect my changes. I changed this particular method a little for the task and therefore formatted the documentation, purely for beauty. I’ll try to answer your question again in advance, no, I won’t make any rules or big changes for the entire project. ########## modules/table/src/main/java/org/apache/ignite/internal/table/distributed/disaster/PartitionStatesMetricSource.java: ########## @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.internal.table.distributed.disaster; + +import static org.apache.ignite.internal.table.distributed.disaster.DisasterRecoveryManager.convertState; +import static org.apache.ignite.internal.table.distributed.disaster.LocalPartitionStateEnum.BROKEN; +import static org.apache.ignite.internal.table.distributed.disaster.LocalPartitionStateEnum.HEALTHY; +import static org.apache.ignite.internal.table.distributed.disaster.LocalPartitionStateEnum.INITIALIZING; +import static org.apache.ignite.internal.table.distributed.disaster.LocalPartitionStateEnum.INSTALLING_SNAPSHOT; +import static org.apache.ignite.internal.table.distributed.disaster.LocalPartitionStateEnum.UNAVAILABLE; + +import org.apache.ignite.internal.catalog.descriptors.CatalogTableDescriptor; +import org.apache.ignite.internal.metrics.MetricSet; +import org.apache.ignite.internal.metrics.MetricSetBuilder; +import org.apache.ignite.internal.metrics.MetricSource; +import org.apache.ignite.internal.replicator.TablePartitionId; +import org.apache.ignite.raft.jraft.Node; +import org.apache.ignite.raft.jraft.RaftGroupService; +import org.jetbrains.annotations.Nullable; + +/** Source of metrics for table partition statuses. */ +class PartitionStatesMetricSource implements MetricSource { + private final String metricSourceName; + + private final int tableId; + + private final DisasterRecoveryManager disasterRecoveryManager; + + /** Enablement status. Accessed from different threads under synchronization on this object. */ + private boolean enabled; + + PartitionStatesMetricSource( + CatalogTableDescriptor tableDescriptor, + DisasterRecoveryManager disasterRecoveryManager + ) { + this.tableId = tableDescriptor.id(); + this.disasterRecoveryManager = disasterRecoveryManager; + + metricSourceName = String.format("partition.states.zone.%s.table.%s", tableDescriptor.zoneId(), tableDescriptor.id()); + } + + @Override + public String name() { + return metricSourceName; + } + + @Override + public synchronized @Nullable MetricSet enable() { + if (enabled) { + return null; + } + + var builder = new MetricSetBuilder(metricSourceName); + + builder.longGauge( + "UnavailablePartitionCount", + "Count of partitions not yet started.", + () -> calculatePartitionCountByLocalState(UNAVAILABLE) + ); + + builder.longGauge( + "HealthyPartitionCount", + "Count of living partitions with a healthy state machine.", + () -> calculatePartitionCountByLocalState(HEALTHY) + ); + + builder.longGauge( + "InitializingPartitionCount", + "Count of partitions that are starting right now.", + () -> calculatePartitionCountByLocalState(INITIALIZING) + ); + + builder.longGauge( + "InstallingSnapshotPartitionCount", + "Count of partitions that installing Raft snapshots from the leader.", + () -> calculatePartitionCountByLocalState(INSTALLING_SNAPSHOT) + ); + + builder.longGauge( + "BrokenPartitionCount", + "Count of broken partitions.", + () -> calculatePartitionCountByLocalState(BROKEN) + ); + + return builder.build(); + } + + @Override + public synchronized void disable() { + enabled = false; + } + + @Override + public synchronized boolean enabled() { + return enabled; + } + + private long calculatePartitionCountByLocalState(LocalPartitionStateEnum state) { + long[] count = {0}; + + disasterRecoveryManager.raftManager.forEach((raftNodeId, raftGroupService) -> { Review Comment: I have not found any simple and suitable mechanisms for obtaining node statuses, there are no listeners or something like that. Maybe you know? I thought that to begin with, this approach would be sufficient and cover the needs; if there is a strong drawdown in performance, then we will do it somehow differently. I don’t understand where the O(n^2), can you clearly indicate? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: notifications-unsubscr...@ignite.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org