This is an automated email from the ASF dual-hosted git repository.

ddanielr pushed a commit to branch 2.1
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/2.1 by this push:
     new 99af30f283 Add Balancer Metric for Migrations Needed  (#4699)
99af30f283 is described below

commit 99af30f283132fbba451a3808f579ce6961082c3
Author: Daniel Roberts <ddani...@gmail.com>
AuthorDate: Sun Jul 28 15:31:51 2024 -0400

    Add Balancer Metric for Migrations Needed  (#4699)
    
    Refactored the metric names for readability and added javadoc entries
    for each of the new balancer metrics
    
    Produces a single metric for balancing that emits the current number of
    migrations needed for the system to be balanced.
    
    Switched metric to use a LongSupplier and handled the inital null condition.
    
    ---------
    
    Co-authored-by: Ed Coleman <edcole...@apache.org>
---
 .../accumulo/core/metrics/MetricsProducer.java     | 10 +++++
 .../java/org/apache/accumulo/manager/Manager.java  |  8 ++++
 .../accumulo/manager/metrics/BalancerMetrics.java  | 51 ++++++++++++++++++++++
 .../java/org/apache/accumulo/test/BalanceIT.java   | 18 ++++++++
 .../BalanceInPresenceOfOfflineTableIT.java         |  2 +
 .../apache/accumulo/test/metrics/MetricsIT.java    |  5 ++-
 6 files changed, 92 insertions(+), 2 deletions(-)

diff --git 
a/core/src/main/java/org/apache/accumulo/core/metrics/MetricsProducer.java 
b/core/src/main/java/org/apache/accumulo/core/metrics/MetricsProducer.java
index 8cf2ffc956..84bf20b16c 100644
--- a/core/src/main/java/org/apache/accumulo/core/metrics/MetricsProducer.java
+++ b/core/src/main/java/org/apache/accumulo/core/metrics/MetricsProducer.java
@@ -596,6 +596,14 @@ import io.micrometer.core.instrument.MeterRegistry;
  * <td>Distribution Summary</td>
  * <td></td>
  * </tr>
+ * <!-- Balancing -->
+ * <tr>
+ * <td>N/A</td>
+ * <td>N/A</td>
+ * <td>{@value METRICS_MANAGER_BALANCER_MIGRATIONS_NEEDED}</td>
+ * <td>Gauge</td>
+ * <td>The number of migrations that need to complete before the system is 
balanced</td>
+ * </tr>
  * </table>
  *
  * @since 2.1.0
@@ -708,6 +716,8 @@ public interface MetricsProducer {
   String METRICS_BLOCKCACHE_SUMMARY_REQUESTCOUNT =
       METRICS_BLOCKCACHE_PREFIX + "summary.requestcount";
 
+  String METRICS_MANAGER_BALANCER_MIGRATIONS_NEEDED = 
"accumulo.manager.balancer.migrations.needed";
+
   /**
    * Build Micrometer Meter objects and register them with the registry
    */
diff --git 
a/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java 
b/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java
index 16548ea6d0..a72485af01 100644
--- a/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java
+++ b/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java
@@ -109,6 +109,7 @@ import org.apache.accumulo.core.util.Halt;
 import org.apache.accumulo.core.util.Retry;
 import org.apache.accumulo.core.util.threads.ThreadPools;
 import org.apache.accumulo.core.util.threads.Threads;
+import org.apache.accumulo.manager.metrics.BalancerMetrics;
 import org.apache.accumulo.manager.metrics.ManagerMetrics;
 import org.apache.accumulo.manager.recovery.RecoveryManager;
 import org.apache.accumulo.manager.state.TableCounts;
@@ -205,6 +206,7 @@ public class Manager extends AbstractServer
   private TServer clientService = null;
   private volatile TabletBalancer tabletBalancer;
   private final BalancerEnvironment balancerEnvironment;
+  private final BalancerMetrics balancerMetrics = new BalancerMetrics();
 
   private ManagerState state = ManagerState.INITIAL;
 
@@ -561,6 +563,10 @@ public class Manager extends AbstractServer
     }
   }
 
+  public MetricsProducer getBalancerMetrics() {
+    return balancerMetrics;
+  }
+
   enum TabletGoalState {
     HOSTED(TUnloadTabletGoal.UNKNOWN),
     UNASSIGNED(TUnloadTabletGoal.UNASSIGNED),
@@ -1068,6 +1074,7 @@ public class Manager extends AbstractServer
         } while (migrationsOutForLevel > 0 && (dl == DataLevel.ROOT || dl == 
DataLevel.METADATA));
         totalMigrationsOut += migrationsOutForLevel;
       }
+      balancerMetrics.assignMigratingCount(migrations::size);
 
       if (totalMigrationsOut == 0) {
         synchronized (balancedNotifier) {
@@ -1244,6 +1251,7 @@ public class Manager extends AbstractServer
     metricsInfo.addServiceTags(getApplicationName(), sa.getAddress());
 
     var producers = ManagerMetrics.getProducers(getConfiguration(), this);
+    producers.add(balancerMetrics);
     metricsInfo.addMetricsProducers(producers.toArray(new MetricsProducer[0]));
     metricsInfo.init();
 
diff --git 
a/server/manager/src/main/java/org/apache/accumulo/manager/metrics/BalancerMetrics.java
 
b/server/manager/src/main/java/org/apache/accumulo/manager/metrics/BalancerMetrics.java
new file mode 100644
index 0000000000..01aa9e3052
--- /dev/null
+++ 
b/server/manager/src/main/java/org/apache/accumulo/manager/metrics/BalancerMetrics.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.accumulo.manager.metrics;
+
+import java.util.function.LongSupplier;
+
+import org.apache.accumulo.core.metrics.MetricsProducer;
+
+import io.micrometer.core.instrument.Gauge;
+import io.micrometer.core.instrument.MeterRegistry;
+
+public class BalancerMetrics implements MetricsProducer {
+
+  LongSupplier migratingCount;
+
+  public void assignMigratingCount(LongSupplier f) {
+    migratingCount = f;
+  }
+
+  public long getMigratingCount() {
+    // Handle inital NaN value state when balance has never been called
+    if (migratingCount == null) {
+      return 0;
+    }
+    return migratingCount.getAsLong();
+  }
+
+  @Override
+  public void registerMetrics(MeterRegistry registry) {
+    Gauge
+        .builder(METRICS_MANAGER_BALANCER_MIGRATIONS_NEEDED, this,
+            BalancerMetrics::getMigratingCount)
+        .description("Overall total migrations that need to 
complete").register(registry);
+  }
+}
diff --git a/test/src/main/java/org/apache/accumulo/test/BalanceIT.java 
b/test/src/main/java/org/apache/accumulo/test/BalanceIT.java
index 27ae709699..0164463903 100644
--- a/test/src/main/java/org/apache/accumulo/test/BalanceIT.java
+++ b/test/src/main/java/org/apache/accumulo/test/BalanceIT.java
@@ -19,12 +19,16 @@
 package org.apache.accumulo.test;
 
 import java.time.Duration;
+import java.util.Map;
 import java.util.SortedSet;
 import java.util.TreeSet;
 
 import org.apache.accumulo.core.client.Accumulo;
 import org.apache.accumulo.core.client.AccumuloClient;
+import org.apache.accumulo.core.conf.Property;
 import org.apache.accumulo.harness.AccumuloClusterHarness;
+import org.apache.accumulo.miniclusterImpl.MiniAccumuloConfigImpl;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.Text;
 import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
@@ -33,6 +37,20 @@ import org.slf4j.LoggerFactory;
 public class BalanceIT extends AccumuloClusterHarness {
   private static final Logger log = LoggerFactory.getLogger(BalanceIT.class);
 
+  @Override
+  public void configureMiniCluster(MiniAccumuloConfigImpl cfg, Configuration 
hadoopCoreSite) {
+    Map<String,String> siteConfig = cfg.getSiteConfig();
+    siteConfig.put(Property.TSERV_MAXMEM.getKey(), "10K");
+    siteConfig.put(Property.TSERV_MAJC_DELAY.getKey(), "50ms");
+    siteConfig.put(Property.GENERAL_MICROMETER_ENABLED.getKey(), "true");
+    siteConfig.put("general.custom.metrics.opts.logging.step", "0.5s");
+    cfg.setSiteConfig(siteConfig);
+    // ensure we have two tservers
+    if (cfg.getNumTservers() < 2) {
+      cfg.setNumTservers(2);
+    }
+  }
+
   @Override
   protected Duration defaultTimeout() {
     return Duration.ofMinutes(1);
diff --git 
a/test/src/main/java/org/apache/accumulo/test/functional/BalanceInPresenceOfOfflineTableIT.java
 
b/test/src/main/java/org/apache/accumulo/test/functional/BalanceInPresenceOfOfflineTableIT.java
index 72889e0a2d..fe32572022 100644
--- 
a/test/src/main/java/org/apache/accumulo/test/functional/BalanceInPresenceOfOfflineTableIT.java
+++ 
b/test/src/main/java/org/apache/accumulo/test/functional/BalanceInPresenceOfOfflineTableIT.java
@@ -69,6 +69,8 @@ public class BalanceInPresenceOfOfflineTableIT extends 
AccumuloClusterHarness {
     Map<String,String> siteConfig = cfg.getSiteConfig();
     siteConfig.put(Property.TSERV_MAXMEM.getKey(), "10K");
     siteConfig.put(Property.TSERV_MAJC_DELAY.getKey(), "50ms");
+    siteConfig.put(Property.GENERAL_MICROMETER_ENABLED.getKey(), "true");
+    siteConfig.put("general.custom.metrics.opts.logging.step", "0.5s");
     cfg.setSiteConfig(siteConfig);
     // ensure we have two tservers
     if (cfg.getNumTservers() < 2) {
diff --git a/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java 
b/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java
index fc0ecdb881..77c76f41ad 100644
--- a/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java
+++ b/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java
@@ -108,12 +108,13 @@ public class MetricsIT extends ConfigurableMacBase 
implements MetricsProducer {
 
     // add sserver as flaky until scan server included in mini tests.
     Set<String> flakyMetrics = Set.of(METRICS_FATE_TYPE_IN_PROGRESS,
-            METRICS_SERVER_IDLE,
+            METRICS_MANAGER_BALANCER_MIGRATIONS_NEEDED,
             METRICS_SCAN_BUSY_TIMEOUT_COUNTER,
             METRICS_SCAN_RESERVATION_CONFLICT_COUNTER,
             METRICS_SCAN_RESERVATION_TOTAL_TIMER,
             METRICS_SCAN_RESERVATION_WRITEOUT_TIMER,
-            METRICS_SCAN_TABLET_METADATA_CACHE);
+            METRICS_SCAN_TABLET_METADATA_CACHE,
+            METRICS_SERVER_IDLE);
     // @formatter:on
 
     Map<String,String> expectedMetricNames = this.getMetricFields();

Reply via email to