This is an automated email from the ASF dual-hosted git repository. ddanielr pushed a commit to branch 2.1 in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/2.1 by this push: new 99af30f283 Add Balancer Metric for Migrations Needed (#4699) 99af30f283 is described below commit 99af30f283132fbba451a3808f579ce6961082c3 Author: Daniel Roberts <ddani...@gmail.com> AuthorDate: Sun Jul 28 15:31:51 2024 -0400 Add Balancer Metric for Migrations Needed (#4699) Refactored the metric names for readability and added javadoc entries for each of the new balancer metrics Produces a single metric for balancing that emits the current number of migrations needed for the system to be balanced. Switched metric to use a LongSupplier and handled the inital null condition. --------- Co-authored-by: Ed Coleman <edcole...@apache.org> --- .../accumulo/core/metrics/MetricsProducer.java | 10 +++++ .../java/org/apache/accumulo/manager/Manager.java | 8 ++++ .../accumulo/manager/metrics/BalancerMetrics.java | 51 ++++++++++++++++++++++ .../java/org/apache/accumulo/test/BalanceIT.java | 18 ++++++++ .../BalanceInPresenceOfOfflineTableIT.java | 2 + .../apache/accumulo/test/metrics/MetricsIT.java | 5 ++- 6 files changed, 92 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/apache/accumulo/core/metrics/MetricsProducer.java b/core/src/main/java/org/apache/accumulo/core/metrics/MetricsProducer.java index 8cf2ffc956..84bf20b16c 100644 --- a/core/src/main/java/org/apache/accumulo/core/metrics/MetricsProducer.java +++ b/core/src/main/java/org/apache/accumulo/core/metrics/MetricsProducer.java @@ -596,6 +596,14 @@ import io.micrometer.core.instrument.MeterRegistry; * <td>Distribution Summary</td> * <td></td> * </tr> + * <!-- Balancing --> + * <tr> + * <td>N/A</td> + * <td>N/A</td> + * <td>{@value METRICS_MANAGER_BALANCER_MIGRATIONS_NEEDED}</td> + * <td>Gauge</td> + * <td>The number of migrations that need to complete before the system is balanced</td> + * </tr> * </table> * * @since 2.1.0 @@ -708,6 +716,8 @@ public interface MetricsProducer { String METRICS_BLOCKCACHE_SUMMARY_REQUESTCOUNT = METRICS_BLOCKCACHE_PREFIX + "summary.requestcount"; + String METRICS_MANAGER_BALANCER_MIGRATIONS_NEEDED = "accumulo.manager.balancer.migrations.needed"; + /** * Build Micrometer Meter objects and register them with the registry */ diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java b/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java index 16548ea6d0..a72485af01 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java @@ -109,6 +109,7 @@ import org.apache.accumulo.core.util.Halt; import org.apache.accumulo.core.util.Retry; import org.apache.accumulo.core.util.threads.ThreadPools; import org.apache.accumulo.core.util.threads.Threads; +import org.apache.accumulo.manager.metrics.BalancerMetrics; import org.apache.accumulo.manager.metrics.ManagerMetrics; import org.apache.accumulo.manager.recovery.RecoveryManager; import org.apache.accumulo.manager.state.TableCounts; @@ -205,6 +206,7 @@ public class Manager extends AbstractServer private TServer clientService = null; private volatile TabletBalancer tabletBalancer; private final BalancerEnvironment balancerEnvironment; + private final BalancerMetrics balancerMetrics = new BalancerMetrics(); private ManagerState state = ManagerState.INITIAL; @@ -561,6 +563,10 @@ public class Manager extends AbstractServer } } + public MetricsProducer getBalancerMetrics() { + return balancerMetrics; + } + enum TabletGoalState { HOSTED(TUnloadTabletGoal.UNKNOWN), UNASSIGNED(TUnloadTabletGoal.UNASSIGNED), @@ -1068,6 +1074,7 @@ public class Manager extends AbstractServer } while (migrationsOutForLevel > 0 && (dl == DataLevel.ROOT || dl == DataLevel.METADATA)); totalMigrationsOut += migrationsOutForLevel; } + balancerMetrics.assignMigratingCount(migrations::size); if (totalMigrationsOut == 0) { synchronized (balancedNotifier) { @@ -1244,6 +1251,7 @@ public class Manager extends AbstractServer metricsInfo.addServiceTags(getApplicationName(), sa.getAddress()); var producers = ManagerMetrics.getProducers(getConfiguration(), this); + producers.add(balancerMetrics); metricsInfo.addMetricsProducers(producers.toArray(new MetricsProducer[0])); metricsInfo.init(); diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/metrics/BalancerMetrics.java b/server/manager/src/main/java/org/apache/accumulo/manager/metrics/BalancerMetrics.java new file mode 100644 index 0000000000..01aa9e3052 --- /dev/null +++ b/server/manager/src/main/java/org/apache/accumulo/manager/metrics/BalancerMetrics.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.accumulo.manager.metrics; + +import java.util.function.LongSupplier; + +import org.apache.accumulo.core.metrics.MetricsProducer; + +import io.micrometer.core.instrument.Gauge; +import io.micrometer.core.instrument.MeterRegistry; + +public class BalancerMetrics implements MetricsProducer { + + LongSupplier migratingCount; + + public void assignMigratingCount(LongSupplier f) { + migratingCount = f; + } + + public long getMigratingCount() { + // Handle inital NaN value state when balance has never been called + if (migratingCount == null) { + return 0; + } + return migratingCount.getAsLong(); + } + + @Override + public void registerMetrics(MeterRegistry registry) { + Gauge + .builder(METRICS_MANAGER_BALANCER_MIGRATIONS_NEEDED, this, + BalancerMetrics::getMigratingCount) + .description("Overall total migrations that need to complete").register(registry); + } +} diff --git a/test/src/main/java/org/apache/accumulo/test/BalanceIT.java b/test/src/main/java/org/apache/accumulo/test/BalanceIT.java index 27ae709699..0164463903 100644 --- a/test/src/main/java/org/apache/accumulo/test/BalanceIT.java +++ b/test/src/main/java/org/apache/accumulo/test/BalanceIT.java @@ -19,12 +19,16 @@ package org.apache.accumulo.test; import java.time.Duration; +import java.util.Map; import java.util.SortedSet; import java.util.TreeSet; import org.apache.accumulo.core.client.Accumulo; import org.apache.accumulo.core.client.AccumuloClient; +import org.apache.accumulo.core.conf.Property; import org.apache.accumulo.harness.AccumuloClusterHarness; +import org.apache.accumulo.miniclusterImpl.MiniAccumuloConfigImpl; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; import org.junit.jupiter.api.Test; import org.slf4j.Logger; @@ -33,6 +37,20 @@ import org.slf4j.LoggerFactory; public class BalanceIT extends AccumuloClusterHarness { private static final Logger log = LoggerFactory.getLogger(BalanceIT.class); + @Override + public void configureMiniCluster(MiniAccumuloConfigImpl cfg, Configuration hadoopCoreSite) { + Map<String,String> siteConfig = cfg.getSiteConfig(); + siteConfig.put(Property.TSERV_MAXMEM.getKey(), "10K"); + siteConfig.put(Property.TSERV_MAJC_DELAY.getKey(), "50ms"); + siteConfig.put(Property.GENERAL_MICROMETER_ENABLED.getKey(), "true"); + siteConfig.put("general.custom.metrics.opts.logging.step", "0.5s"); + cfg.setSiteConfig(siteConfig); + // ensure we have two tservers + if (cfg.getNumTservers() < 2) { + cfg.setNumTservers(2); + } + } + @Override protected Duration defaultTimeout() { return Duration.ofMinutes(1); diff --git a/test/src/main/java/org/apache/accumulo/test/functional/BalanceInPresenceOfOfflineTableIT.java b/test/src/main/java/org/apache/accumulo/test/functional/BalanceInPresenceOfOfflineTableIT.java index 72889e0a2d..fe32572022 100644 --- a/test/src/main/java/org/apache/accumulo/test/functional/BalanceInPresenceOfOfflineTableIT.java +++ b/test/src/main/java/org/apache/accumulo/test/functional/BalanceInPresenceOfOfflineTableIT.java @@ -69,6 +69,8 @@ public class BalanceInPresenceOfOfflineTableIT extends AccumuloClusterHarness { Map<String,String> siteConfig = cfg.getSiteConfig(); siteConfig.put(Property.TSERV_MAXMEM.getKey(), "10K"); siteConfig.put(Property.TSERV_MAJC_DELAY.getKey(), "50ms"); + siteConfig.put(Property.GENERAL_MICROMETER_ENABLED.getKey(), "true"); + siteConfig.put("general.custom.metrics.opts.logging.step", "0.5s"); cfg.setSiteConfig(siteConfig); // ensure we have two tservers if (cfg.getNumTservers() < 2) { diff --git a/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java b/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java index fc0ecdb881..77c76f41ad 100644 --- a/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java +++ b/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java @@ -108,12 +108,13 @@ public class MetricsIT extends ConfigurableMacBase implements MetricsProducer { // add sserver as flaky until scan server included in mini tests. Set<String> flakyMetrics = Set.of(METRICS_FATE_TYPE_IN_PROGRESS, - METRICS_SERVER_IDLE, + METRICS_MANAGER_BALANCER_MIGRATIONS_NEEDED, METRICS_SCAN_BUSY_TIMEOUT_COUNTER, METRICS_SCAN_RESERVATION_CONFLICT_COUNTER, METRICS_SCAN_RESERVATION_TOTAL_TIMER, METRICS_SCAN_RESERVATION_WRITEOUT_TIMER, - METRICS_SCAN_TABLET_METADATA_CACHE); + METRICS_SCAN_TABLET_METADATA_CACHE, + METRICS_SERVER_IDLE); // @formatter:on Map<String,String> expectedMetricNames = this.getMetricFields();