This is an automated email from the ASF dual-hosted git repository.

mlbiscoc pushed a commit to branch feature/SOLR-17458-rebased
in repository https://gitbox.apache.org/repos/asf/solr.git

commit 2c2183d3239e805c93afe315e5cd131c7b2a1f7b
Author: Matthew Biscocho <[email protected]>
AuthorDate: Wed Aug 6 10:33:08 2025 -0400

    SOLR-17806: Migrate UpdateLog metrics to OTEL (#3438)
    
    * Init migration
    
    * Better test cleanup
    
    * Change to OPERATION_ATTR
    
    * Clean up switch
---
 .../src/java/org/apache/solr/update/UpdateLog.java | 118 ++++++++++++++-------
 .../org/apache/solr/cloud/TestTlogReplica.java     |  13 ++-
 .../test/org/apache/solr/search/TestRecovery.java  |  82 +++++++++-----
 3 files changed, 140 insertions(+), 73 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/update/UpdateLog.java 
b/solr/core/src/java/org/apache/solr/update/UpdateLog.java
index 3d379c5d808..bcb4cde925f 100644
--- a/solr/core/src/java/org/apache/solr/update/UpdateLog.java
+++ b/solr/core/src/java/org/apache/solr/update/UpdateLog.java
@@ -21,8 +21,7 @@ import static 
org.apache.solr.update.processor.DistributingUpdateProcessorFactor
 
 import com.carrotsearch.hppc.LongHashSet;
 import com.carrotsearch.hppc.LongSet;
-import com.codahale.metrics.Gauge;
-import com.codahale.metrics.Meter;
+import io.opentelemetry.api.common.AttributeKey;
 import io.opentelemetry.api.common.Attributes;
 import java.io.Closeable;
 import java.io.IOException;
@@ -80,6 +79,7 @@ import org.apache.solr.core.SolrInfoBean;
 import org.apache.solr.core.SolrPaths;
 import org.apache.solr.metrics.SolrMetricProducer;
 import org.apache.solr.metrics.SolrMetricsContext;
+import org.apache.solr.metrics.otel.instruments.AttributedLongCounter;
 import org.apache.solr.request.LocalSolrQueryRequest;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.request.SolrRequestInfo;
@@ -264,10 +264,9 @@ public class UpdateLog implements PluginInfoInitialized, 
SolrMetricProducer {
   protected List<Long> startingVersions;
 
   // metrics
-  protected Gauge<Integer> bufferedOpsGauge;
-  protected Meter applyingBufferedOpsMeter;
-  protected Meter replayOpsMeter;
-  protected Meter copyOverOldUpdatesMeter;
+  protected AttributedLongCounter applyingBufferedOpsCounter;
+  protected AttributedLongCounter replayOpsCounter;
+  protected AttributedLongCounter copyOverOldUpdatesCounter;
   protected SolrMetricsContext solrMetricsContext;
 
   public static class LogPtr {
@@ -627,40 +626,65 @@ public class UpdateLog implements PluginInfoInitialized, 
SolrMetricProducer {
     }
   }
 
-  // TODO SOLR-17458: Migrate to Otel
   @Override
   public void initializeMetrics(
       SolrMetricsContext parentContext, Attributes attributes, String scope) {
     solrMetricsContext = parentContext.getChildContext(this);
-    bufferedOpsGauge =
-        () -> {
-          if (state == State.BUFFERING) {
-            if (bufferTlog == null) return 0;
-            // numRecords counts header as a record
-            return bufferTlog.numRecords() - 1;
-          }
-          if (tlog == null) {
-            return 0;
-          } else if (state == State.APPLYING_BUFFERED) {
-            // numRecords counts header as a record
-            return tlog.numRecords()
-                - 1
-                - recoveryInfo.adds
-                - recoveryInfo.deleteByQuery
-                - recoveryInfo.deletes
-                - recoveryInfo.errors.get();
-          } else {
-            return 0;
-          }
-        };
 
-    solrMetricsContext.gauge(bufferedOpsGauge, true, "ops", scope, "buffered");
-    solrMetricsContext.gauge(() -> logs.size(), true, "logs", scope, "replay", 
"remaining");
-    solrMetricsContext.gauge(() -> getTotalLogsSize(), true, "bytes", scope, 
"replay", "remaining");
-    applyingBufferedOpsMeter = solrMetricsContext.meter("ops", scope, 
"applyingBuffered");
-    replayOpsMeter = solrMetricsContext.meter("ops", scope, "replay");
-    copyOverOldUpdatesMeter = solrMetricsContext.meter("ops", scope, 
"copyOverOldUpdates");
-    solrMetricsContext.gauge(() -> state.getValue(), true, "state", scope);
+    // NOCOMMIT: We do not need a scope attribute
+    // Will need to fix up SolrCoreMetricManager instead of directly removing 
it from builder.
+    var baseAttributes =
+        attributes.toBuilder()
+            .remove(AttributeKey.stringKey("scope"))
+            .put(CATEGORY_ATTR, SolrInfoBean.Category.TLOG.toString())
+            .build();
+
+    solrMetricsContext.observableLongGauge(
+        "solr_core_update_log_buffered_ops",
+        "The current number of buffered operations",
+        (observableLongMeasurement ->
+            observableLongMeasurement.record(computeBufferedOps(), 
baseAttributes)));
+
+    solrMetricsContext.observableLongGauge(
+        "solr_core_update_log_replay_logs_remaining",
+        "The current number of tlogs remaining to be replayed",
+        (observableLongMeasurement -> {
+          observableLongMeasurement.record(logs.size(), baseAttributes);
+        }));
+
+    solrMetricsContext.observableLongGauge(
+        "solr_core_update_log_size_remaining",
+        "The total size in bytes of all tlogs remaining to be replayed",
+        (observableLongMeasurement -> {
+          observableLongMeasurement.record(getTotalLogsSize(), baseAttributes);
+        }),
+        "By");
+
+    // NOCOMMIT: Do we want to keep this? Metric was just state with the 
numeric enum value.
+    // Without context this doesn't mean anything and can be very confusing. 
Maybe keep the numeric
+    // value and put the value meaning in the
+    // description?
+    //    solrMetricsContext.gauge(() -> state.getValue(), true, "state", 
scope);
+
+    applyingBufferedOpsCounter =
+        new AttributedLongCounter(
+            solrMetricsContext.longCounter(
+                "solr_core_update_log_applied_buffered_ops",
+                "Total number of buffered operations applied"),
+            baseAttributes);
+
+    replayOpsCounter =
+        new AttributedLongCounter(
+            solrMetricsContext.longCounter(
+                "solr_core_update_log_replay_ops", "Total number of log replay 
operations"),
+            baseAttributes);
+
+    copyOverOldUpdatesCounter =
+        new AttributedLongCounter(
+            solrMetricsContext.longCounter(
+                "solr_core_update_log_old_updates_copied",
+                "Total number of updates copied from previous tlog or last 
tlog to a new tlog"),
+            baseAttributes);
   }
 
   @Override
@@ -668,6 +692,24 @@ public class UpdateLog implements PluginInfoInitialized, 
SolrMetricProducer {
     return solrMetricsContext;
   }
 
+  private long computeBufferedOps() {
+    return switch (state) {
+        // numRecords counts header as a record
+      case BUFFERING -> (bufferTlog == null ? 0 : bufferTlog.numRecords() - 1);
+      case APPLYING_BUFFERED -> {
+        if (tlog == null) yield 0;
+        // numRecords counts header as a record
+        yield tlog.numRecords()
+            - 1
+            - recoveryInfo.adds
+            - recoveryInfo.deleteByQuery
+            - recoveryInfo.deletes
+            - recoveryInfo.errors.get();
+      }
+      default -> 0;
+    };
+  }
+
   /**
    * Returns a new {@link org.apache.solr.update.TransactionLog}. Sub-classes 
can override this
    * method to change the implementation of the transaction log.
@@ -1509,7 +1551,7 @@ public class UpdateLog implements PluginInfoInitialized, 
SolrMetricProducer {
    *     over
    */
   public void copyOverOldUpdates(long commitVersion, TransactionLog oldTlog) {
-    copyOverOldUpdatesMeter.mark();
+    copyOverOldUpdatesCounter.inc();
 
     SolrQueryRequest req = new LocalSolrQueryRequest(uhandler.core, new 
ModifiableSolrParams());
     TransactionLog.LogReader logReader = null;
@@ -2290,9 +2332,9 @@ public class UpdateLog implements PluginInfoInitialized, 
SolrMetricProducer {
               throw rsp.getException();
             }
             if (state == State.REPLAYING) {
-              replayOpsMeter.mark();
+              replayOpsCounter.inc();
             } else if (state == State.APPLYING_BUFFERED) {
-              applyingBufferedOpsMeter.mark();
+              applyingBufferedOpsCounter.inc();
             } else {
               // XXX should not happen?
             }
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestTlogReplica.java 
b/solr/core/src/test/org/apache/solr/cloud/TestTlogReplica.java
index 7c375f9e0ac..11308e89b5d 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestTlogReplica.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestTlogReplica.java
@@ -19,7 +19,6 @@ package org.apache.solr.cloud;
 import static org.apache.solr.cloud.TestPullReplica.getHypotheticalTlogDir;
 
 import com.carrotsearch.randomizedtesting.annotations.Repeat;
-import com.codahale.metrics.Meter;
 import java.io.IOException;
 import java.lang.invoke.MethodHandles;
 import java.nio.file.Files;
@@ -1239,11 +1238,11 @@ public class TestTlogReplica extends SolrCloudTestCase {
   }
 
   private long getTimesCopyOverOldUpdates(SolrCore core) {
-    return ((Meter)
-            core.getSolrMetricsContext()
-                .getMetricRegistry()
-                .getMetrics()
-                .get("TLOG.copyOverOldUpdates.ops"))
-        .getCount();
+    var metric =
+        SolrMetricTestUtils.getCounterDatapoint(
+            core,
+            "solr_core_update_log_old_updates_copied",
+            SolrMetricTestUtils.newCloudLabelsBuilder(core).label("category", 
"TLOG").build());
+    return (metric != null) ? (long) metric.getValue() : 0L;
   }
 }
diff --git a/solr/core/src/test/org/apache/solr/search/TestRecovery.java 
b/solr/core/src/test/org/apache/solr/search/TestRecovery.java
index ec2dead4da4..b4ad7161251 100644
--- a/solr/core/src/test/org/apache/solr/search/TestRecovery.java
+++ b/solr/core/src/test/org/apache/solr/search/TestRecovery.java
@@ -20,8 +20,6 @@ import static 
org.apache.solr.search.TestRecovery.VersionProvider.getNextVersion
 import static 
org.apache.solr.update.processor.DistributingUpdateProcessorFactory.DISTRIB_UPDATE_PARAM;
 import static org.apache.solr.util.SolrMatchers.subListMatches;
 
-import com.codahale.metrics.Gauge;
-import com.codahale.metrics.Meter;
 import com.codahale.metrics.Metric;
 import com.codahale.metrics.MetricRegistry;
 import java.io.RandomAccessFile;
@@ -46,6 +44,7 @@ import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.common.util.TimeSource;
 import org.apache.solr.common.util.Utils;
 import org.apache.solr.metrics.SolrMetricManager;
+import org.apache.solr.metrics.SolrMetricTestUtils;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.update.UpdateHandler;
@@ -230,8 +229,6 @@ public class TestRecovery extends SolrTestCaseJ4 {
       h.close();
       createCore();
 
-      Map<String, Metric> metrics = getMetrics(); // live map view
-
       // Solr should kick this off now
       // h.getCore().getUpdateHandler().getUpdateLog().recoverFromLog();
 
@@ -244,18 +241,27 @@ public class TestRecovery extends SolrTestCaseJ4 {
 
       assertEquals(
           UpdateLog.State.REPLAYING, 
h.getCore().getUpdateHandler().getUpdateLog().getState());
+
+      var attributes =
+          SolrMetricTestUtils.newStandaloneLabelsBuilder(h.getCore())
+              .label("category", "TLOG")
+              .build();
+
       // check metrics
-      @SuppressWarnings({"unchecked"})
-      Gauge<Integer> state = (Gauge<Integer>) metrics.get("TLOG.state");
-      assertEquals(UpdateLog.State.REPLAYING.ordinal(), 
state.getValue().intValue());
-      @SuppressWarnings({"unchecked"})
-      Gauge<Integer> replayingLogs = (Gauge<Integer>) 
metrics.get("TLOG.replay.remaining.logs");
-      assertTrue(replayingLogs.getValue() > 0);
-      @SuppressWarnings({"unchecked"})
-      Gauge<Long> replayingDocs = (Gauge<Long>) 
metrics.get("TLOG.replay.remaining.bytes");
-      assertTrue(replayingDocs.getValue() > 0);
-      Meter replayDocs = (Meter) metrics.get("TLOG.replay.ops");
-      long initialOps = replayDocs.getCount();
+      // NOCOMMIT: Need to decide if we are keeping this state metric or not
+      //      @SuppressWarnings({"unchecked"})
+      //      Gauge<Integer> state = (Gauge<Integer>) 
metrics.get("TLOG.state");
+      //      assertEquals(UpdateLog.State.REPLAYING.ordinal(), 
state.getValue().intValue());
+
+      var actualReplayingLogs =
+          SolrMetricTestUtils.getGaugeDatapoint(
+              h.getCore(), "solr_core_update_log_replay_logs_remaining", 
attributes);
+      assertTrue(actualReplayingLogs.getValue() > 0);
+
+      var actualReplayingDocs =
+          SolrMetricTestUtils.getGaugeDatapoint(
+              h.getCore(), "solr_core_update_log_size_remaining_bytes", 
attributes);
+      assertTrue(actualReplayingDocs.getValue() > 0);
 
       // unblock recovery
       logReplay.release(1000);
@@ -271,8 +277,14 @@ public class TestRecovery extends SolrTestCaseJ4 {
 
       assertJQ(req("q", "*:*"), "/response/numFound==3");
 
-      assertEquals(7L, replayDocs.getCount() - initialOps);
-      assertEquals(UpdateLog.State.ACTIVE.ordinal(), 
state.getValue().intValue());
+      var actualReplayOps =
+          SolrMetricTestUtils.getCounterDatapoint(
+                  h.getCore(), "solr_core_update_log_replay_ops", attributes)
+              .getValue();
+      assertEquals(7.0, actualReplayOps, 0.0);
+
+      // NOCOMMIT: Need to decide if we are keeping this state metric or not
+      //      assertEquals(UpdateLog.State.ACTIVE.ordinal(), 
state.getValue().intValue());
 
       // make sure we can still access versions after recovery
       assertJQ(req("qt", "/get", "getVersions", "" + versions.size()), 
"/versions==" + versions);
@@ -662,14 +674,16 @@ public class TestRecovery extends SolrTestCaseJ4 {
 
       ulog.bufferUpdates();
       assertEquals(UpdateLog.State.BUFFERING, ulog.getState());
-      @SuppressWarnings({"unchecked"})
-      Gauge<Integer> state = (Gauge<Integer>) metrics.get("TLOG.state");
-      assertEquals(UpdateLog.State.BUFFERING.ordinal(), 
state.getValue().intValue());
-      @SuppressWarnings({"unchecked"})
-      Gauge<Integer> bufferedOps = (Gauge<Integer>) 
metrics.get("TLOG.buffered.ops");
-      int initialOps = bufferedOps.getValue();
-      Meter applyingBuffered = (Meter) 
metrics.get("TLOG.applyingBuffered.ops");
-      long initialApplyingOps = applyingBuffered.getCount();
+
+      var attributes =
+          SolrMetricTestUtils.newStandaloneLabelsBuilder(h.getCore())
+              .label("category", "TLOG")
+              .build();
+
+      // NOCOMMIT: Need to decide if we are keeping this state metric or not
+      //      @SuppressWarnings({"unchecked"})
+      //      Gauge<Integer> state = (Gauge<Integer>) 
metrics.get("TLOG.state");
+      //      assertEquals(UpdateLog.State.BUFFERING.ordinal(), 
state.getValue().intValue());
 
       String v3 = getNextVersion();
       String v940_del = "-" + getNextVersion();
@@ -722,7 +736,11 @@ public class TestRecovery extends SolrTestCaseJ4 {
       // be bad for updates to be visible if we're just buffering.
       assertJQ(req("qt", "/get", "id", "B3"), "=={'doc':null}");
 
-      assertEquals(6, bufferedOps.getValue() - initialOps);
+      var actualBufferedOpsValue =
+          SolrMetricTestUtils.getGaugeDatapoint(
+                  h.getCore(), "solr_core_update_log_buffered_ops", attributes)
+              .getValue();
+      assertEquals(6, actualBufferedOpsValue, 0.0);
 
       rinfoFuture = ulog.applyBufferedUpdates();
       assertNotNull(rinfoFuture);
@@ -734,7 +752,11 @@ public class TestRecovery extends SolrTestCaseJ4 {
       UpdateLog.RecoveryInfo rinfo = rinfoFuture.get();
       assertEquals(UpdateLog.State.ACTIVE, ulog.getState());
 
-      assertEquals(6L, applyingBuffered.getCount() - initialApplyingOps);
+      var actualAppliedBufferedOpsValue =
+          SolrMetricTestUtils.getCounterDatapoint(
+                  h.getCore(), "solr_core_update_log_applied_buffered_ops", 
attributes)
+              .getValue();
+      assertEquals(6, actualAppliedBufferedOpsValue, 0.0);
 
       assertThatJQ(
           req("qt", "/get", "getVersions", "6"),
@@ -868,7 +890,11 @@ public class TestRecovery extends SolrTestCaseJ4 {
       assertEquals(
           UpdateLog.State.ACTIVE, ulog.getState()); // leave each test method 
in a good state
 
-      assertEquals(0, bufferedOps.getValue().intValue());
+      actualBufferedOpsValue =
+          SolrMetricTestUtils.getGaugeDatapoint(
+                  h.getCore(), "solr_core_update_log_buffered_ops", attributes)
+              .getValue();
+      assertEquals(0, actualBufferedOpsValue, 0.0);
     } finally {
       UpdateLog.testing_logReplayHook = null;
       UpdateLog.testing_logReplayFinishHook = null;

Reply via email to