This is an automated email from the ASF dual-hosted git repository.
mlbiscoc pushed a commit to branch feature/SOLR-17458
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/feature/SOLR-17458 by this
push:
new c43b99c6545 SOLR-17806: Migrate ZkContainer metrics to OTEL (#3453)
c43b99c6545 is described below
commit c43b99c6545ba0d79ad2c18baf16e31c76a69c7f
Author: Matthew Biscocho <[email protected]>
AuthorDate: Mon Aug 11 14:36:33 2025 -0400
SOLR-17806: Migrate ZkContainer metrics to OTEL (#3453)
* Migrate ZkContainer metrics
* Fix naming
---
.../java/org/apache/solr/core/CoreContainer.java | 7 +-
.../src/java/org/apache/solr/core/ZkContainer.java | 73 +++++++++++++++--
.../apache/solr/handler/RequestHandlerBase.java | 5 --
.../java/org/apache/solr/metrics/MetricsMap.java | 3 +
.../apache/solr/cloud/PeerSyncReplicationTest.java | 4 +-
.../solr/cloud/TestRandomRequestDistribution.java | 2 +-
.../solr/opentelemetry/TestMetricExemplars.java | 6 +-
.../solr/common/cloud/SolrZKMetricsListener.java | 93 +++++++++++++---------
.../org/apache/solr/common/cloud/SolrZkClient.java | 3 +-
9 files changed, 136 insertions(+), 60 deletions(-)
diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java
b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
index c2040429d5a..829e9875983 100644
--- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java
+++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
@@ -26,6 +26,7 @@ import static
org.apache.solr.common.params.CommonParams.INFO_HANDLER_PATH;
import static org.apache.solr.common.params.CommonParams.METRICS_PATH;
import static org.apache.solr.common.params.CommonParams.ZK_PATH;
import static org.apache.solr.common.params.CommonParams.ZK_STATUS_PATH;
+import static org.apache.solr.metrics.SolrMetricProducer.CATEGORY_ATTR;
import static org.apache.solr.metrics.SolrMetricProducer.HANDLER_ATTR;
import static
org.apache.solr.search.SolrIndexSearcher.EXECUTOR_MAX_CPU_THREADS;
import static
org.apache.solr.security.AuthenticationPlugin.AUTHENTICATION_PLUGIN_PROP;
@@ -899,10 +900,12 @@ public class CoreContainer {
if (isZooKeeperAware()) {
solrClientCache.setDefaultZKHost(getZkController().getZkServerAddress());
// initialize ZkClient metrics
- // NOCOMMIT SOLR-17458: Add Otel
zkSys
.getZkMetricsProducer()
- .initializeMetrics(solrMetricsContext, Attributes.empty(),
"zkClient");
+ .initializeMetrics(
+ solrMetricsContext,
+ Attributes.of(CATEGORY_ATTR,
SolrInfoBean.Category.CONTAINER.toString()),
+ "zkClient");
pkiAuthenticationSecurityBuilder =
new PKIAuthenticationPlugin(
this,
diff --git a/solr/core/src/java/org/apache/solr/core/ZkContainer.java
b/solr/core/src/java/org/apache/solr/core/ZkContainer.java
index 6cd48e10120..849f15b0458 100644
--- a/solr/core/src/java/org/apache/solr/core/ZkContainer.java
+++ b/solr/core/src/java/org/apache/solr/core/ZkContainer.java
@@ -38,7 +38,6 @@ import org.apache.solr.common.util.ExecutorUtil;
import org.apache.solr.common.util.SolrNamedThreadFactory;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.logging.MDCLoggingContext;
-import org.apache.solr.metrics.MetricsMap;
import org.apache.solr.metrics.SolrMetricProducer;
import org.apache.solr.metrics.SolrMetricsContext;
import org.apache.zookeeper.KeeperException;
@@ -141,18 +140,82 @@ public class ZkContainer {
}
this.zkController = zkController;
- MetricsMap metricsMap = new
MetricsMap(zkController.getZkClient().getMetrics());
+
metricProducer =
new SolrMetricProducer() {
SolrMetricsContext ctx;
- // TODO SOLR-17458: Migrate to Otel
@Override
public void initializeMetrics(
SolrMetricsContext parentContext, Attributes attributes,
String scope) {
ctx = parentContext.getChildContext(this);
- ctx.gauge(
- metricsMap, true, scope, null,
SolrInfoBean.Category.CONTAINER.toString());
+
+ var metricsListener = zkController.getZkClient().getMetrics();
+
+ ctx.observableLongCounter(
+ "solr_zk_ops",
+ "Total number of ZooKeeper operations",
+ measurement -> {
+ measurement.record(
+ metricsListener.getReads(),
+ attributes.toBuilder().put(OPERATION_ATTR,
"read").build());
+ measurement.record(
+ metricsListener.getDeletes(),
+ attributes.toBuilder().put(OPERATION_ATTR,
"delete").build());
+ measurement.record(
+ metricsListener.getWrites(),
+ attributes.toBuilder().put(OPERATION_ATTR,
"write").build());
+ measurement.record(
+ metricsListener.getMultiOps(),
+ attributes.toBuilder().put(OPERATION_ATTR,
"multi").build());
+ measurement.record(
+ metricsListener.getExistsChecks(),
+ attributes.toBuilder().put(OPERATION_ATTR,
"exists").build());
+ });
+
+ ctx.observableLongCounter(
+ "solr_zk_read",
+ "Total bytes read from ZooKeeper",
+ measurement -> {
+ measurement.record(metricsListener.getBytesRead(),
attributes);
+ },
+ "By");
+
+ ctx.observableLongCounter(
+ "solr_zk_watches_fired",
+ "Total number of ZooKeeper watches fired",
+ measurement -> {
+ measurement.record(metricsListener.getWatchesFired(),
attributes);
+ });
+
+ ctx.observableLongCounter(
+ "solr_zk_written",
+ "Total bytes written to ZooKeeper",
+ measurement -> {
+ measurement.record(metricsListener.getBytesWritten());
+ },
+ "By");
+
+ ctx.observableLongCounter(
+ "solr_zk_cumulative_multi_ops_total",
+ "Total cumulative multi-operations count",
+ measurement -> {
+
measurement.record(metricsListener.getCumulativeMultiOps());
+ });
+
+ ctx.observableLongCounter(
+ "solr_zk_child_fetches",
+ "Total number of ZooKeeper child node fetches",
+ measurement -> {
+ measurement.record(metricsListener.getChildFetches());
+ });
+
+ ctx.observableLongCounter(
+ "solr_zk_cumulative_children_fetched",
+ "Total cumulative children fetched count",
+ measurement -> {
+
measurement.record(metricsListener.getCumulativeChildrenFetched());
+ });
}
@Override
diff --git a/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java
b/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java
index d33921b2e11..4ebc874e741 100644
--- a/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java
+++ b/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java
@@ -250,11 +250,6 @@ public abstract class RequestHandlerBase
numTimeouts = new AttributedLongCounter(timeoutRequestMetric,
attributes);
requestTimes = new AttributedLongTimer(requestTimeMetric, attributes);
- // NOCOMMIT: Temporary to see metrics
- requests.add(0L);
- numTimeouts.add(0L);
- numClientErrors.add(0L);
- numServerErrors.add(0L);
}
}
diff --git a/solr/core/src/java/org/apache/solr/metrics/MetricsMap.java
b/solr/core/src/java/org/apache/solr/metrics/MetricsMap.java
index aec6f3e352e..1d667cf556b 100644
--- a/solr/core/src/java/org/apache/solr/metrics/MetricsMap.java
+++ b/solr/core/src/java/org/apache/solr/metrics/MetricsMap.java
@@ -55,6 +55,9 @@ import org.slf4j.LoggerFactory;
* {@link javax.management.openmbean.OpenType#ALLOWED_CLASSNAMES_LIST},
otherwise only their
* toString() representation will be shown in JConsole.
*/
+// NOCOMMIT: This MetricMap appears to be a shim for MapWriter for responses
and a way to shim
+// gauges into Dropwizard and JXM which is being removed. This should be
removed completely as it is
+// no longer needed with OTEL
public class MetricsMap implements Gauge<Map<String, Object>>, MapWriter,
DynamicMBean {
private static final Logger log =
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
diff --git
a/solr/core/src/test/org/apache/solr/cloud/PeerSyncReplicationTest.java
b/solr/core/src/test/org/apache/solr/cloud/PeerSyncReplicationTest.java
index cb0d85fabc6..caec21813ed 100644
--- a/solr/core/src/test/org/apache/solr/cloud/PeerSyncReplicationTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/PeerSyncReplicationTest.java
@@ -198,7 +198,7 @@ public class PeerSyncReplicationTest extends
AbstractFullDistribZkTestBase {
assertTrue(
SolrMetricTestUtils.getHistogramDatapoint(
core,
- "solr_core_leader_sync_time_milliseconds",
+ "solr_core_sync_with_leader_time_milliseconds",
SolrMetricTestUtils.newCloudLabelsBuilder(core)
.label("category", "REPLICATION")
.build())
@@ -206,7 +206,7 @@ public class PeerSyncReplicationTest extends
AbstractFullDistribZkTestBase {
assertNull(
SolrMetricTestUtils.getCounterDatapoint(
core,
- "solr_core_leader_sync_errors",
+ "solr_core_sync_with_leader_sync_errors",
SolrMetricTestUtils.newCloudLabelsBuilder(core)
.label("category", "REPLICATION")
.build()));
diff --git
a/solr/core/src/test/org/apache/solr/cloud/TestRandomRequestDistribution.java
b/solr/core/src/test/org/apache/solr/cloud/TestRandomRequestDistribution.java
index 58b725bbc44..5d099f1057e 100644
---
a/solr/core/src/test/org/apache/solr/cloud/TestRandomRequestDistribution.java
+++
b/solr/core/src/test/org/apache/solr/cloud/TestRandomRequestDistribution.java
@@ -281,7 +281,7 @@ public class TestRandomRequestDistribution extends
AbstractFullDistribZkTestBase
private Double getSelectRequestCount(SolrCore core) {
var labels =
- SolrMetricTestUtils.getCloudLabelsBase(core)
+ SolrMetricTestUtils.newCloudLabelsBuilder(core)
.label("category", "QUERY")
.label("handler", "/select")
.label("internal", "false")
diff --git
a/solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/TestMetricExemplars.java
b/solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/TestMetricExemplars.java
index 687b86b20cb..e0660f8559a 100644
---
a/solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/TestMetricExemplars.java
+++
b/solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/TestMetricExemplars.java
@@ -79,14 +79,12 @@ public class TestMetricExemplars extends SolrCloudTestCase {
.lines()
.filter(
l ->
- l.startsWith("solr_metrics_core_requests_total")
+ l.startsWith("solr_core_requests_total")
&& l.contains("handler=\"/update\"")
&& l.contains("collection=\"collection1\""))
.findFirst()
.orElseThrow(
- () ->
- new AssertionError(
- "Could not find /update
solr_metrics_core_requests_total"));
+ () -> new AssertionError("Could not find /update
solr_core_requests_total"));
String actualExemplarTrace = line.split("trace_id=\"")[1].split("\"")[0];
assertEquals(actualExemplarTrace, expectedTrace);
diff --git
a/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/SolrZKMetricsListener.java
b/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/SolrZKMetricsListener.java
index e6375b9348a..d8cc8f350b7 100644
---
a/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/SolrZKMetricsListener.java
+++
b/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/SolrZKMetricsListener.java
@@ -16,7 +16,6 @@
*/
package org.apache.solr.common.cloud;
-import java.io.IOException;
import java.util.concurrent.atomic.LongAdder;
import org.apache.curator.drivers.AdvancedTracerDriver;
import org.apache.curator.drivers.EventTrace;
@@ -24,32 +23,20 @@ import org.apache.curator.drivers.OperationTrace;
import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.api.CuratorEvent;
import org.apache.curator.framework.api.CuratorListener;
-import org.apache.solr.common.MapWriter;
-import org.apache.solr.common.annotation.JsonProperty;
-import org.apache.solr.common.util.ReflectMapWriter;
-public class SolrZKMetricsListener extends AdvancedTracerDriver
- implements ReflectMapWriter, CuratorListener {
- // all fields of this class are public because ReflectMapWriter requires
them to be.
- // however the object itself is private and only this class can modify it
+public class SolrZKMetricsListener extends AdvancedTracerDriver implements
CuratorListener {
- @JsonProperty public final LongAdder watchesFired = new LongAdder();
- @JsonProperty public final LongAdder reads = new LongAdder();
- @JsonProperty public final LongAdder writes = new LongAdder();
- @JsonProperty public final LongAdder bytesRead = new LongAdder();
- @JsonProperty public final LongAdder bytesWritten = new LongAdder();
-
- @JsonProperty public final LongAdder multiOps = new LongAdder();
-
- @JsonProperty public final LongAdder cumulativeMultiOps = new LongAdder();
-
- @JsonProperty public final LongAdder childFetches = new LongAdder();
-
- @JsonProperty public final LongAdder cumulativeChildrenFetched = new
LongAdder();
-
- @JsonProperty public final LongAdder existsChecks = new LongAdder();
-
- @JsonProperty public final LongAdder deletes = new LongAdder();
+ final LongAdder watchesFired = new LongAdder();
+ final LongAdder reads = new LongAdder();
+ final LongAdder writes = new LongAdder();
+ final LongAdder bytesRead = new LongAdder();
+ final LongAdder bytesWritten = new LongAdder();
+ final LongAdder multiOps = new LongAdder();
+ final LongAdder cumulativeMultiOps = new LongAdder();
+ final LongAdder childFetches = new LongAdder();
+ final LongAdder cumulativeChildrenFetched = new LongAdder();
+ final LongAdder existsChecks = new LongAdder();
+ final LongAdder deletes = new LongAdder();
/*
This is used by curator for all operations, but we will only use it for
Foreground operations.
@@ -128,19 +115,47 @@ public class SolrZKMetricsListener extends
AdvancedTracerDriver
}
}
- @Override
- public void writeMap(MapWriter.EntryWriter ew) throws IOException {
- ReflectMapWriter.super.writeMap(
- new MapWriter.EntryWriter() {
- @Override
- public MapWriter.EntryWriter put(CharSequence k, Object v) throws
IOException {
- if (v instanceof LongAdder) {
- ew.put(k, ((LongAdder) v).longValue());
- } else {
- ew.put(k, v);
- }
- return this;
- }
- });
+ public long getWatchesFired() {
+ return watchesFired.longValue();
+ }
+
+ public long getReads() {
+ return reads.longValue();
+ }
+
+ public long getWrites() {
+ return writes.longValue();
+ }
+
+ public long getBytesRead() {
+ return bytesRead.longValue();
+ }
+
+ public long getBytesWritten() {
+ return bytesWritten.longValue();
+ }
+
+ public long getMultiOps() {
+ return multiOps.longValue();
+ }
+
+ public long getCumulativeMultiOps() {
+ return cumulativeMultiOps.longValue();
+ }
+
+ public long getChildFetches() {
+ return childFetches.longValue();
+ }
+
+ public long getCumulativeChildrenFetched() {
+ return cumulativeChildrenFetched.longValue();
+ }
+
+ public long getExistsChecks() {
+ return existsChecks.longValue();
+ }
+
+ public long getDeletes() {
+ return deletes.longValue();
}
}
diff --git
a/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/SolrZkClient.java
b/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/SolrZkClient.java
index 14ec233981d..eaefc7589fc 100644
---
a/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/SolrZkClient.java
+++
b/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/SolrZkClient.java
@@ -47,7 +47,6 @@ import org.apache.curator.retry.ExponentialBackoffRetry;
import org.apache.curator.utils.ZKPaths;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.SolrZkClientTimeout;
-import org.apache.solr.common.MapWriter;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.Compressor;
import org.apache.solr.common.util.ExecutorUtil;
@@ -86,7 +85,7 @@ public class SolrZkClient implements Closeable {
private int minStateByteLenForCompression;
// The metrics collector is shared across all SolrZkClient objects
- public MapWriter getMetrics() {
+ public SolrZKMetricsListener getMetrics() {
return metricsListener;
}