This is an automated email from the ASF dual-hosted git repository. mlbiscoc pushed a commit to branch feature/SOLR-17458-rebased in repository https://gitbox.apache.org/repos/asf/solr.git
commit 42c9b415738ea8092d9e4f3690d875055d0e940c Author: Matthew Biscocho <[email protected]> AuthorDate: Mon Aug 11 14:36:33 2025 -0400 SOLR-17806: Migrate ZkContainer metrics to OTEL (#3453) * Migrate ZkContainer metrics * Fix naming --- .../java/org/apache/solr/core/CoreContainer.java | 7 +- .../src/java/org/apache/solr/core/ZkContainer.java | 73 +++++++++++++++-- .../apache/solr/handler/RequestHandlerBase.java | 5 -- .../java/org/apache/solr/metrics/MetricsMap.java | 3 + .../apache/solr/cloud/PeerSyncReplicationTest.java | 4 +- .../solr/cloud/TestRandomRequestDistribution.java | 2 +- .../solr/opentelemetry/TestMetricExemplars.java | 6 +- .../solr/common/cloud/SolrZKMetricsListener.java | 93 +++++++++++++--------- .../org/apache/solr/common/cloud/SolrZkClient.java | 3 +- 9 files changed, 136 insertions(+), 60 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java b/solr/core/src/java/org/apache/solr/core/CoreContainer.java index 59e5fe8c561..cd6bf740b36 100644 --- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java +++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java @@ -26,6 +26,7 @@ import static org.apache.solr.common.params.CommonParams.INFO_HANDLER_PATH; import static org.apache.solr.common.params.CommonParams.METRICS_PATH; import static org.apache.solr.common.params.CommonParams.ZK_PATH; import static org.apache.solr.common.params.CommonParams.ZK_STATUS_PATH; +import static org.apache.solr.metrics.SolrMetricProducer.CATEGORY_ATTR; import static org.apache.solr.metrics.SolrMetricProducer.HANDLER_ATTR; import static org.apache.solr.search.SolrIndexSearcher.EXECUTOR_MAX_CPU_THREADS; import static org.apache.solr.security.AuthenticationPlugin.AUTHENTICATION_PLUGIN_PROP; @@ -817,10 +818,12 @@ public class CoreContainer { if (isZooKeeperAware()) { solrClientCache.setDefaultZKHost(getZkController().getZkServerAddress()); // initialize ZkClient metrics - // NOCOMMIT SOLR-17458: Add Otel zkSys .getZkMetricsProducer() - .initializeMetrics(solrMetricsContext, Attributes.empty(), "zkClient"); + .initializeMetrics( + solrMetricsContext, + Attributes.of(CATEGORY_ATTR, SolrInfoBean.Category.CONTAINER.toString()), + "zkClient"); pkiAuthenticationSecurityBuilder = new PKIAuthenticationPlugin( this, diff --git a/solr/core/src/java/org/apache/solr/core/ZkContainer.java b/solr/core/src/java/org/apache/solr/core/ZkContainer.java index c6a1b79d283..bd675378a1a 100644 --- a/solr/core/src/java/org/apache/solr/core/ZkContainer.java +++ b/solr/core/src/java/org/apache/solr/core/ZkContainer.java @@ -39,7 +39,6 @@ import org.apache.solr.common.util.ExecutorUtil; import org.apache.solr.common.util.SolrNamedThreadFactory; import org.apache.solr.common.util.StrUtils; import org.apache.solr.logging.MDCLoggingContext; -import org.apache.solr.metrics.MetricsMap; import org.apache.solr.metrics.SolrMetricProducer; import org.apache.solr.metrics.SolrMetricsContext; import org.apache.zookeeper.KeeperException; @@ -141,18 +140,82 @@ public class ZkContainer { } this.zkController = zkController; - MetricsMap metricsMap = new MetricsMap(zkController.getZkClient().getMetrics()); + metricProducer = new SolrMetricProducer() { SolrMetricsContext ctx; - // TODO SOLR-17458: Migrate to Otel @Override public void initializeMetrics( SolrMetricsContext parentContext, Attributes attributes, String scope) { ctx = parentContext.getChildContext(this); - ctx.gauge( - metricsMap, true, scope, null, SolrInfoBean.Category.CONTAINER.toString()); + + var metricsListener = zkController.getZkClient().getMetrics(); + + ctx.observableLongCounter( + "solr_zk_ops", + "Total number of ZooKeeper operations", + measurement -> { + measurement.record( + metricsListener.getReads(), + attributes.toBuilder().put(OPERATION_ATTR, "read").build()); + measurement.record( + metricsListener.getDeletes(), + attributes.toBuilder().put(OPERATION_ATTR, "delete").build()); + measurement.record( + metricsListener.getWrites(), + attributes.toBuilder().put(OPERATION_ATTR, "write").build()); + measurement.record( + metricsListener.getMultiOps(), + attributes.toBuilder().put(OPERATION_ATTR, "multi").build()); + measurement.record( + metricsListener.getExistsChecks(), + attributes.toBuilder().put(OPERATION_ATTR, "exists").build()); + }); + + ctx.observableLongCounter( + "solr_zk_read", + "Total bytes read from ZooKeeper", + measurement -> { + measurement.record(metricsListener.getBytesRead(), attributes); + }, + "By"); + + ctx.observableLongCounter( + "solr_zk_watches_fired", + "Total number of ZooKeeper watches fired", + measurement -> { + measurement.record(metricsListener.getWatchesFired(), attributes); + }); + + ctx.observableLongCounter( + "solr_zk_written", + "Total bytes written to ZooKeeper", + measurement -> { + measurement.record(metricsListener.getBytesWritten()); + }, + "By"); + + ctx.observableLongCounter( + "solr_zk_cumulative_multi_ops_total", + "Total cumulative multi-operations count", + measurement -> { + measurement.record(metricsListener.getCumulativeMultiOps()); + }); + + ctx.observableLongCounter( + "solr_zk_child_fetches", + "Total number of ZooKeeper child node fetches", + measurement -> { + measurement.record(metricsListener.getChildFetches()); + }); + + ctx.observableLongCounter( + "solr_zk_cumulative_children_fetched", + "Total cumulative children fetched count", + measurement -> { + measurement.record(metricsListener.getCumulativeChildrenFetched()); + }); } @Override diff --git a/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java b/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java index 6a70abe9441..610bf338a6f 100644 --- a/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java +++ b/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java @@ -253,11 +253,6 @@ public abstract class RequestHandlerBase numTimeouts = new AttributedLongCounter(timeoutRequestMetric, attributes); requestTimes = new AttributedLongTimer(requestTimeMetric, attributes); - // NOCOMMIT: Temporary to see metrics - requests.add(0L); - numTimeouts.add(0L); - numClientErrors.add(0L); - numServerErrors.add(0L); } } diff --git a/solr/core/src/java/org/apache/solr/metrics/MetricsMap.java b/solr/core/src/java/org/apache/solr/metrics/MetricsMap.java index aec6f3e352e..1d667cf556b 100644 --- a/solr/core/src/java/org/apache/solr/metrics/MetricsMap.java +++ b/solr/core/src/java/org/apache/solr/metrics/MetricsMap.java @@ -55,6 +55,9 @@ import org.slf4j.LoggerFactory; * {@link javax.management.openmbean.OpenType#ALLOWED_CLASSNAMES_LIST}, otherwise only their * toString() representation will be shown in JConsole. */ +// NOCOMMIT: This MetricMap appears to be a shim for MapWriter for responses and a way to shim +// gauges into Dropwizard and JXM which is being removed. This should be removed completely as it is +// no longer needed with OTEL public class MetricsMap implements Gauge<Map<String, Object>>, MapWriter, DynamicMBean { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); diff --git a/solr/core/src/test/org/apache/solr/cloud/PeerSyncReplicationTest.java b/solr/core/src/test/org/apache/solr/cloud/PeerSyncReplicationTest.java index 469f3f2cd31..931551b8757 100644 --- a/solr/core/src/test/org/apache/solr/cloud/PeerSyncReplicationTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/PeerSyncReplicationTest.java @@ -198,7 +198,7 @@ public class PeerSyncReplicationTest extends AbstractFullDistribZkTestBase { assertTrue( SolrMetricTestUtils.getHistogramDatapoint( core, - "solr_core_leader_sync_time_milliseconds", + "solr_core_sync_with_leader_time_milliseconds", SolrMetricTestUtils.newCloudLabelsBuilder(core) .label("category", "REPLICATION") .build()) @@ -206,7 +206,7 @@ public class PeerSyncReplicationTest extends AbstractFullDistribZkTestBase { assertNull( SolrMetricTestUtils.getCounterDatapoint( core, - "solr_core_leader_sync_errors", + "solr_core_sync_with_leader_sync_errors", SolrMetricTestUtils.newCloudLabelsBuilder(core) .label("category", "REPLICATION") .build())); diff --git a/solr/core/src/test/org/apache/solr/cloud/TestRandomRequestDistribution.java b/solr/core/src/test/org/apache/solr/cloud/TestRandomRequestDistribution.java index a83da0d2e7b..cf4b41eb150 100644 --- a/solr/core/src/test/org/apache/solr/cloud/TestRandomRequestDistribution.java +++ b/solr/core/src/test/org/apache/solr/cloud/TestRandomRequestDistribution.java @@ -279,7 +279,7 @@ public class TestRandomRequestDistribution extends AbstractFullDistribZkTestBase private Double getSelectRequestCount(SolrCore core) { var labels = - SolrMetricTestUtils.getCloudLabelsBase(core) + SolrMetricTestUtils.newCloudLabelsBuilder(core) .label("category", "QUERY") .label("handler", "/select") .label("internal", "false") diff --git a/solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/TestMetricExemplars.java b/solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/TestMetricExemplars.java index 687b86b20cb..e0660f8559a 100644 --- a/solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/TestMetricExemplars.java +++ b/solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/TestMetricExemplars.java @@ -79,14 +79,12 @@ public class TestMetricExemplars extends SolrCloudTestCase { .lines() .filter( l -> - l.startsWith("solr_metrics_core_requests_total") + l.startsWith("solr_core_requests_total") && l.contains("handler=\"/update\"") && l.contains("collection=\"collection1\"")) .findFirst() .orElseThrow( - () -> - new AssertionError( - "Could not find /update solr_metrics_core_requests_total")); + () -> new AssertionError("Could not find /update solr_core_requests_total")); String actualExemplarTrace = line.split("trace_id=\"")[1].split("\"")[0]; assertEquals(actualExemplarTrace, expectedTrace); diff --git a/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/SolrZKMetricsListener.java b/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/SolrZKMetricsListener.java index e6375b9348a..d8cc8f350b7 100644 --- a/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/SolrZKMetricsListener.java +++ b/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/SolrZKMetricsListener.java @@ -16,7 +16,6 @@ */ package org.apache.solr.common.cloud; -import java.io.IOException; import java.util.concurrent.atomic.LongAdder; import org.apache.curator.drivers.AdvancedTracerDriver; import org.apache.curator.drivers.EventTrace; @@ -24,32 +23,20 @@ import org.apache.curator.drivers.OperationTrace; import org.apache.curator.framework.CuratorFramework; import org.apache.curator.framework.api.CuratorEvent; import org.apache.curator.framework.api.CuratorListener; -import org.apache.solr.common.MapWriter; -import org.apache.solr.common.annotation.JsonProperty; -import org.apache.solr.common.util.ReflectMapWriter; -public class SolrZKMetricsListener extends AdvancedTracerDriver - implements ReflectMapWriter, CuratorListener { - // all fields of this class are public because ReflectMapWriter requires them to be. - // however the object itself is private and only this class can modify it +public class SolrZKMetricsListener extends AdvancedTracerDriver implements CuratorListener { - @JsonProperty public final LongAdder watchesFired = new LongAdder(); - @JsonProperty public final LongAdder reads = new LongAdder(); - @JsonProperty public final LongAdder writes = new LongAdder(); - @JsonProperty public final LongAdder bytesRead = new LongAdder(); - @JsonProperty public final LongAdder bytesWritten = new LongAdder(); - - @JsonProperty public final LongAdder multiOps = new LongAdder(); - - @JsonProperty public final LongAdder cumulativeMultiOps = new LongAdder(); - - @JsonProperty public final LongAdder childFetches = new LongAdder(); - - @JsonProperty public final LongAdder cumulativeChildrenFetched = new LongAdder(); - - @JsonProperty public final LongAdder existsChecks = new LongAdder(); - - @JsonProperty public final LongAdder deletes = new LongAdder(); + final LongAdder watchesFired = new LongAdder(); + final LongAdder reads = new LongAdder(); + final LongAdder writes = new LongAdder(); + final LongAdder bytesRead = new LongAdder(); + final LongAdder bytesWritten = new LongAdder(); + final LongAdder multiOps = new LongAdder(); + final LongAdder cumulativeMultiOps = new LongAdder(); + final LongAdder childFetches = new LongAdder(); + final LongAdder cumulativeChildrenFetched = new LongAdder(); + final LongAdder existsChecks = new LongAdder(); + final LongAdder deletes = new LongAdder(); /* This is used by curator for all operations, but we will only use it for Foreground operations. @@ -128,19 +115,47 @@ public class SolrZKMetricsListener extends AdvancedTracerDriver } } - @Override - public void writeMap(MapWriter.EntryWriter ew) throws IOException { - ReflectMapWriter.super.writeMap( - new MapWriter.EntryWriter() { - @Override - public MapWriter.EntryWriter put(CharSequence k, Object v) throws IOException { - if (v instanceof LongAdder) { - ew.put(k, ((LongAdder) v).longValue()); - } else { - ew.put(k, v); - } - return this; - } - }); + public long getWatchesFired() { + return watchesFired.longValue(); + } + + public long getReads() { + return reads.longValue(); + } + + public long getWrites() { + return writes.longValue(); + } + + public long getBytesRead() { + return bytesRead.longValue(); + } + + public long getBytesWritten() { + return bytesWritten.longValue(); + } + + public long getMultiOps() { + return multiOps.longValue(); + } + + public long getCumulativeMultiOps() { + return cumulativeMultiOps.longValue(); + } + + public long getChildFetches() { + return childFetches.longValue(); + } + + public long getCumulativeChildrenFetched() { + return cumulativeChildrenFetched.longValue(); + } + + public long getExistsChecks() { + return existsChecks.longValue(); + } + + public long getDeletes() { + return deletes.longValue(); } } diff --git a/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/SolrZkClient.java b/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/SolrZkClient.java index 14ec233981d..eaefc7589fc 100644 --- a/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/SolrZkClient.java +++ b/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/SolrZkClient.java @@ -47,7 +47,6 @@ import org.apache.curator.retry.ExponentialBackoffRetry; import org.apache.curator.utils.ZKPaths; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.impl.SolrZkClientTimeout; -import org.apache.solr.common.MapWriter; import org.apache.solr.common.SolrException; import org.apache.solr.common.util.Compressor; import org.apache.solr.common.util.ExecutorUtil; @@ -86,7 +85,7 @@ public class SolrZkClient implements Closeable { private int minStateByteLenForCompression; // The metrics collector is shared across all SolrZkClient objects - public MapWriter getMetrics() { + public SolrZKMetricsListener getMetrics() { return metricsListener; }
