This is an automated email from the ASF dual-hosted git repository. mlbiscoc pushed a commit to branch feature/SOLR-17458-rebased in repository https://gitbox.apache.org/repos/asf/solr.git
commit 2c2183d3239e805c93afe315e5cd131c7b2a1f7b Author: Matthew Biscocho <[email protected]> AuthorDate: Wed Aug 6 10:33:08 2025 -0400 SOLR-17806: Migrate UpdateLog metrics to OTEL (#3438) * Init migration * Better test cleanup * Change to OPERATION_ATTR * Clean up switch --- .../src/java/org/apache/solr/update/UpdateLog.java | 118 ++++++++++++++------- .../org/apache/solr/cloud/TestTlogReplica.java | 13 ++- .../test/org/apache/solr/search/TestRecovery.java | 82 +++++++++----- 3 files changed, 140 insertions(+), 73 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/update/UpdateLog.java b/solr/core/src/java/org/apache/solr/update/UpdateLog.java index 3d379c5d808..bcb4cde925f 100644 --- a/solr/core/src/java/org/apache/solr/update/UpdateLog.java +++ b/solr/core/src/java/org/apache/solr/update/UpdateLog.java @@ -21,8 +21,7 @@ import static org.apache.solr.update.processor.DistributingUpdateProcessorFactor import com.carrotsearch.hppc.LongHashSet; import com.carrotsearch.hppc.LongSet; -import com.codahale.metrics.Gauge; -import com.codahale.metrics.Meter; +import io.opentelemetry.api.common.AttributeKey; import io.opentelemetry.api.common.Attributes; import java.io.Closeable; import java.io.IOException; @@ -80,6 +79,7 @@ import org.apache.solr.core.SolrInfoBean; import org.apache.solr.core.SolrPaths; import org.apache.solr.metrics.SolrMetricProducer; import org.apache.solr.metrics.SolrMetricsContext; +import org.apache.solr.metrics.otel.instruments.AttributedLongCounter; import org.apache.solr.request.LocalSolrQueryRequest; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrRequestInfo; @@ -264,10 +264,9 @@ public class UpdateLog implements PluginInfoInitialized, SolrMetricProducer { protected List<Long> startingVersions; // metrics - protected Gauge<Integer> bufferedOpsGauge; - protected Meter applyingBufferedOpsMeter; - protected Meter replayOpsMeter; - protected Meter copyOverOldUpdatesMeter; + protected AttributedLongCounter applyingBufferedOpsCounter; + protected AttributedLongCounter replayOpsCounter; + protected AttributedLongCounter copyOverOldUpdatesCounter; protected SolrMetricsContext solrMetricsContext; public static class LogPtr { @@ -627,40 +626,65 @@ public class UpdateLog implements PluginInfoInitialized, SolrMetricProducer { } } - // TODO SOLR-17458: Migrate to Otel @Override public void initializeMetrics( SolrMetricsContext parentContext, Attributes attributes, String scope) { solrMetricsContext = parentContext.getChildContext(this); - bufferedOpsGauge = - () -> { - if (state == State.BUFFERING) { - if (bufferTlog == null) return 0; - // numRecords counts header as a record - return bufferTlog.numRecords() - 1; - } - if (tlog == null) { - return 0; - } else if (state == State.APPLYING_BUFFERED) { - // numRecords counts header as a record - return tlog.numRecords() - - 1 - - recoveryInfo.adds - - recoveryInfo.deleteByQuery - - recoveryInfo.deletes - - recoveryInfo.errors.get(); - } else { - return 0; - } - }; - solrMetricsContext.gauge(bufferedOpsGauge, true, "ops", scope, "buffered"); - solrMetricsContext.gauge(() -> logs.size(), true, "logs", scope, "replay", "remaining"); - solrMetricsContext.gauge(() -> getTotalLogsSize(), true, "bytes", scope, "replay", "remaining"); - applyingBufferedOpsMeter = solrMetricsContext.meter("ops", scope, "applyingBuffered"); - replayOpsMeter = solrMetricsContext.meter("ops", scope, "replay"); - copyOverOldUpdatesMeter = solrMetricsContext.meter("ops", scope, "copyOverOldUpdates"); - solrMetricsContext.gauge(() -> state.getValue(), true, "state", scope); + // NOCOMMIT: We do not need a scope attribute + // Will need to fix up SolrCoreMetricManager instead of directly removing it from builder. + var baseAttributes = + attributes.toBuilder() + .remove(AttributeKey.stringKey("scope")) + .put(CATEGORY_ATTR, SolrInfoBean.Category.TLOG.toString()) + .build(); + + solrMetricsContext.observableLongGauge( + "solr_core_update_log_buffered_ops", + "The current number of buffered operations", + (observableLongMeasurement -> + observableLongMeasurement.record(computeBufferedOps(), baseAttributes))); + + solrMetricsContext.observableLongGauge( + "solr_core_update_log_replay_logs_remaining", + "The current number of tlogs remaining to be replayed", + (observableLongMeasurement -> { + observableLongMeasurement.record(logs.size(), baseAttributes); + })); + + solrMetricsContext.observableLongGauge( + "solr_core_update_log_size_remaining", + "The total size in bytes of all tlogs remaining to be replayed", + (observableLongMeasurement -> { + observableLongMeasurement.record(getTotalLogsSize(), baseAttributes); + }), + "By"); + + // NOCOMMIT: Do we want to keep this? Metric was just state with the numeric enum value. + // Without context this doesn't mean anything and can be very confusing. Maybe keep the numeric + // value and put the value meaning in the + // description? + // solrMetricsContext.gauge(() -> state.getValue(), true, "state", scope); + + applyingBufferedOpsCounter = + new AttributedLongCounter( + solrMetricsContext.longCounter( + "solr_core_update_log_applied_buffered_ops", + "Total number of buffered operations applied"), + baseAttributes); + + replayOpsCounter = + new AttributedLongCounter( + solrMetricsContext.longCounter( + "solr_core_update_log_replay_ops", "Total number of log replay operations"), + baseAttributes); + + copyOverOldUpdatesCounter = + new AttributedLongCounter( + solrMetricsContext.longCounter( + "solr_core_update_log_old_updates_copied", + "Total number of updates copied from previous tlog or last tlog to a new tlog"), + baseAttributes); } @Override @@ -668,6 +692,24 @@ public class UpdateLog implements PluginInfoInitialized, SolrMetricProducer { return solrMetricsContext; } + private long computeBufferedOps() { + return switch (state) { + // numRecords counts header as a record + case BUFFERING -> (bufferTlog == null ? 0 : bufferTlog.numRecords() - 1); + case APPLYING_BUFFERED -> { + if (tlog == null) yield 0; + // numRecords counts header as a record + yield tlog.numRecords() + - 1 + - recoveryInfo.adds + - recoveryInfo.deleteByQuery + - recoveryInfo.deletes + - recoveryInfo.errors.get(); + } + default -> 0; + }; + } + /** * Returns a new {@link org.apache.solr.update.TransactionLog}. Sub-classes can override this * method to change the implementation of the transaction log. @@ -1509,7 +1551,7 @@ public class UpdateLog implements PluginInfoInitialized, SolrMetricProducer { * over */ public void copyOverOldUpdates(long commitVersion, TransactionLog oldTlog) { - copyOverOldUpdatesMeter.mark(); + copyOverOldUpdatesCounter.inc(); SolrQueryRequest req = new LocalSolrQueryRequest(uhandler.core, new ModifiableSolrParams()); TransactionLog.LogReader logReader = null; @@ -2290,9 +2332,9 @@ public class UpdateLog implements PluginInfoInitialized, SolrMetricProducer { throw rsp.getException(); } if (state == State.REPLAYING) { - replayOpsMeter.mark(); + replayOpsCounter.inc(); } else if (state == State.APPLYING_BUFFERED) { - applyingBufferedOpsMeter.mark(); + applyingBufferedOpsCounter.inc(); } else { // XXX should not happen? } diff --git a/solr/core/src/test/org/apache/solr/cloud/TestTlogReplica.java b/solr/core/src/test/org/apache/solr/cloud/TestTlogReplica.java index 7c375f9e0ac..11308e89b5d 100644 --- a/solr/core/src/test/org/apache/solr/cloud/TestTlogReplica.java +++ b/solr/core/src/test/org/apache/solr/cloud/TestTlogReplica.java @@ -19,7 +19,6 @@ package org.apache.solr.cloud; import static org.apache.solr.cloud.TestPullReplica.getHypotheticalTlogDir; import com.carrotsearch.randomizedtesting.annotations.Repeat; -import com.codahale.metrics.Meter; import java.io.IOException; import java.lang.invoke.MethodHandles; import java.nio.file.Files; @@ -1239,11 +1238,11 @@ public class TestTlogReplica extends SolrCloudTestCase { } private long getTimesCopyOverOldUpdates(SolrCore core) { - return ((Meter) - core.getSolrMetricsContext() - .getMetricRegistry() - .getMetrics() - .get("TLOG.copyOverOldUpdates.ops")) - .getCount(); + var metric = + SolrMetricTestUtils.getCounterDatapoint( + core, + "solr_core_update_log_old_updates_copied", + SolrMetricTestUtils.newCloudLabelsBuilder(core).label("category", "TLOG").build()); + return (metric != null) ? (long) metric.getValue() : 0L; } } diff --git a/solr/core/src/test/org/apache/solr/search/TestRecovery.java b/solr/core/src/test/org/apache/solr/search/TestRecovery.java index ec2dead4da4..b4ad7161251 100644 --- a/solr/core/src/test/org/apache/solr/search/TestRecovery.java +++ b/solr/core/src/test/org/apache/solr/search/TestRecovery.java @@ -20,8 +20,6 @@ import static org.apache.solr.search.TestRecovery.VersionProvider.getNextVersion import static org.apache.solr.update.processor.DistributingUpdateProcessorFactory.DISTRIB_UPDATE_PARAM; import static org.apache.solr.util.SolrMatchers.subListMatches; -import com.codahale.metrics.Gauge; -import com.codahale.metrics.Meter; import com.codahale.metrics.Metric; import com.codahale.metrics.MetricRegistry; import java.io.RandomAccessFile; @@ -46,6 +44,7 @@ import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.util.TimeSource; import org.apache.solr.common.util.Utils; import org.apache.solr.metrics.SolrMetricManager; +import org.apache.solr.metrics.SolrMetricTestUtils; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.schema.IndexSchema; import org.apache.solr.update.UpdateHandler; @@ -230,8 +229,6 @@ public class TestRecovery extends SolrTestCaseJ4 { h.close(); createCore(); - Map<String, Metric> metrics = getMetrics(); // live map view - // Solr should kick this off now // h.getCore().getUpdateHandler().getUpdateLog().recoverFromLog(); @@ -244,18 +241,27 @@ public class TestRecovery extends SolrTestCaseJ4 { assertEquals( UpdateLog.State.REPLAYING, h.getCore().getUpdateHandler().getUpdateLog().getState()); + + var attributes = + SolrMetricTestUtils.newStandaloneLabelsBuilder(h.getCore()) + .label("category", "TLOG") + .build(); + // check metrics - @SuppressWarnings({"unchecked"}) - Gauge<Integer> state = (Gauge<Integer>) metrics.get("TLOG.state"); - assertEquals(UpdateLog.State.REPLAYING.ordinal(), state.getValue().intValue()); - @SuppressWarnings({"unchecked"}) - Gauge<Integer> replayingLogs = (Gauge<Integer>) metrics.get("TLOG.replay.remaining.logs"); - assertTrue(replayingLogs.getValue() > 0); - @SuppressWarnings({"unchecked"}) - Gauge<Long> replayingDocs = (Gauge<Long>) metrics.get("TLOG.replay.remaining.bytes"); - assertTrue(replayingDocs.getValue() > 0); - Meter replayDocs = (Meter) metrics.get("TLOG.replay.ops"); - long initialOps = replayDocs.getCount(); + // NOCOMMIT: Need to decide if we are keeping this state metric or not + // @SuppressWarnings({"unchecked"}) + // Gauge<Integer> state = (Gauge<Integer>) metrics.get("TLOG.state"); + // assertEquals(UpdateLog.State.REPLAYING.ordinal(), state.getValue().intValue()); + + var actualReplayingLogs = + SolrMetricTestUtils.getGaugeDatapoint( + h.getCore(), "solr_core_update_log_replay_logs_remaining", attributes); + assertTrue(actualReplayingLogs.getValue() > 0); + + var actualReplayingDocs = + SolrMetricTestUtils.getGaugeDatapoint( + h.getCore(), "solr_core_update_log_size_remaining_bytes", attributes); + assertTrue(actualReplayingDocs.getValue() > 0); // unblock recovery logReplay.release(1000); @@ -271,8 +277,14 @@ public class TestRecovery extends SolrTestCaseJ4 { assertJQ(req("q", "*:*"), "/response/numFound==3"); - assertEquals(7L, replayDocs.getCount() - initialOps); - assertEquals(UpdateLog.State.ACTIVE.ordinal(), state.getValue().intValue()); + var actualReplayOps = + SolrMetricTestUtils.getCounterDatapoint( + h.getCore(), "solr_core_update_log_replay_ops", attributes) + .getValue(); + assertEquals(7.0, actualReplayOps, 0.0); + + // NOCOMMIT: Need to decide if we are keeping this state metric or not + // assertEquals(UpdateLog.State.ACTIVE.ordinal(), state.getValue().intValue()); // make sure we can still access versions after recovery assertJQ(req("qt", "/get", "getVersions", "" + versions.size()), "/versions==" + versions); @@ -662,14 +674,16 @@ public class TestRecovery extends SolrTestCaseJ4 { ulog.bufferUpdates(); assertEquals(UpdateLog.State.BUFFERING, ulog.getState()); - @SuppressWarnings({"unchecked"}) - Gauge<Integer> state = (Gauge<Integer>) metrics.get("TLOG.state"); - assertEquals(UpdateLog.State.BUFFERING.ordinal(), state.getValue().intValue()); - @SuppressWarnings({"unchecked"}) - Gauge<Integer> bufferedOps = (Gauge<Integer>) metrics.get("TLOG.buffered.ops"); - int initialOps = bufferedOps.getValue(); - Meter applyingBuffered = (Meter) metrics.get("TLOG.applyingBuffered.ops"); - long initialApplyingOps = applyingBuffered.getCount(); + + var attributes = + SolrMetricTestUtils.newStandaloneLabelsBuilder(h.getCore()) + .label("category", "TLOG") + .build(); + + // NOCOMMIT: Need to decide if we are keeping this state metric or not + // @SuppressWarnings({"unchecked"}) + // Gauge<Integer> state = (Gauge<Integer>) metrics.get("TLOG.state"); + // assertEquals(UpdateLog.State.BUFFERING.ordinal(), state.getValue().intValue()); String v3 = getNextVersion(); String v940_del = "-" + getNextVersion(); @@ -722,7 +736,11 @@ public class TestRecovery extends SolrTestCaseJ4 { // be bad for updates to be visible if we're just buffering. assertJQ(req("qt", "/get", "id", "B3"), "=={'doc':null}"); - assertEquals(6, bufferedOps.getValue() - initialOps); + var actualBufferedOpsValue = + SolrMetricTestUtils.getGaugeDatapoint( + h.getCore(), "solr_core_update_log_buffered_ops", attributes) + .getValue(); + assertEquals(6, actualBufferedOpsValue, 0.0); rinfoFuture = ulog.applyBufferedUpdates(); assertNotNull(rinfoFuture); @@ -734,7 +752,11 @@ public class TestRecovery extends SolrTestCaseJ4 { UpdateLog.RecoveryInfo rinfo = rinfoFuture.get(); assertEquals(UpdateLog.State.ACTIVE, ulog.getState()); - assertEquals(6L, applyingBuffered.getCount() - initialApplyingOps); + var actualAppliedBufferedOpsValue = + SolrMetricTestUtils.getCounterDatapoint( + h.getCore(), "solr_core_update_log_applied_buffered_ops", attributes) + .getValue(); + assertEquals(6, actualAppliedBufferedOpsValue, 0.0); assertThatJQ( req("qt", "/get", "getVersions", "6"), @@ -868,7 +890,11 @@ public class TestRecovery extends SolrTestCaseJ4 { assertEquals( UpdateLog.State.ACTIVE, ulog.getState()); // leave each test method in a good state - assertEquals(0, bufferedOps.getValue().intValue()); + actualBufferedOpsValue = + SolrMetricTestUtils.getGaugeDatapoint( + h.getCore(), "solr_core_update_log_buffered_ops", attributes) + .getValue(); + assertEquals(0, actualBufferedOpsValue, 0.0); } finally { UpdateLog.testing_logReplayHook = null; UpdateLog.testing_logReplayFinishHook = null;
