This is an automated email from the ASF dual-hosted git repository.
mlbiscoc pushed a commit to branch feature/SOLR-17458
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/feature/SOLR-17458 by this
push:
new 4b5a23cb5ec SOLR-17806: Convert CoreContainer metrics to OTEL (#3509)
4b5a23cb5ec is described below
commit 4b5a23cb5eceba1876a14ca8beaa4385c4012346
Author: Jude Muriithi <[email protected]>
AuthorDate: Fri Sep 26 18:27:21 2025 -0400
SOLR-17806: Convert CoreContainer metrics to OTEL (#3509)
* initial commit
* Replace CoreContainer gauges
* Add nocommits for node state reporting
* Fix metric names
* Convert CoreContainer metrics test
* Remove duplicate method after merge
* PR Feedback
* Update solr_disk_space description
---------
Co-authored-by: jmuriithi3 <[email protected]>
---
.../java/org/apache/solr/core/CoreContainer.java | 34 +++++++++++-
.../src/java/org/apache/solr/core/SolrCores.java | 44 +++++++++++++++-
.../org/apache/solr/core/TransientSolrCores.java | 2 +-
.../solr/metrics/SolrMetricsIntegrationTest.java | 61 ++++++++++++++--------
.../solr/client/solrj/impl/NodeValueFetcher.java | 2 +
.../solrj/impl/SolrClientNodeStateProvider.java | 2 +
6 files changed, 118 insertions(+), 27 deletions(-)
diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java
b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
index 7c64b94115e..2578ab601ba 100644
--- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java
+++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
@@ -29,6 +29,7 @@ import static
org.apache.solr.common.params.CommonParams.ZK_STATUS_PATH;
import static org.apache.solr.metrics.SolrMetricProducer.CATEGORY_ATTR;
import static org.apache.solr.metrics.SolrMetricProducer.HANDLER_ATTR;
import static org.apache.solr.metrics.SolrMetricProducer.NAME_ATTR;
+import static org.apache.solr.metrics.SolrMetricProducer.TYPE_ATTR;
import static
org.apache.solr.search.SolrIndexSearcher.EXECUTOR_MAX_CPU_THREADS;
import static
org.apache.solr.security.AuthenticationPlugin.AUTHENTICATION_PLUGIN_PROP;
@@ -141,6 +142,7 @@ import org.apache.solr.logging.MDCLoggingContext;
import org.apache.solr.metrics.SolrMetricManager;
import org.apache.solr.metrics.SolrMetricProducer;
import org.apache.solr.metrics.SolrMetricsContext;
+import org.apache.solr.metrics.otel.OtelUnit;
import org.apache.solr.pkg.SolrPackageLoader;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest;
@@ -1004,8 +1006,14 @@ public class CoreContainer {
containerProperties.putAll(cfg.getSolrProperties());
+ Attributes containerAttrs =
+ Attributes.builder().put(CATEGORY_ATTR,
SolrInfoBean.Category.CONTAINER.toString()).build();
+
// initialize gauges for reporting the number of cores and disk total/free
+ solrCores.initializeMetrics(solrMetricsContext, containerAttrs, "");
+ // NOCOMMIT: Can't remove these without impacting node state reporting
+ // until NodeValueFetcher and SolrClientNodeStateProvider are patched
solrMetricsContext.gauge(
solrCores::getNumLoadedPermanentCores,
true,
@@ -1024,8 +1032,33 @@ public class CoreContainer {
"unloaded",
SolrInfoBean.Category.CONTAINER.toString(),
"cores");
+
Path dataHome =
cfg.getSolrDataHome() != null ? cfg.getSolrDataHome() :
cfg.getCoreRootDirectory();
+
+ solrMetricsContext.observableLongGauge(
+ "solr_disk_space",
+ String.format("Disk metrics for Solr's data home directory (%s)",
dataHome.toString()),
+ measurement -> {
+ try {
+ var fileStore = Files.getFileStore(dataHome);
+ measurement.record(
+ fileStore.getTotalSpace(),
+ containerAttrs.toBuilder().put(TYPE_ATTR,
"total_space").build());
+ measurement.record(
+ fileStore.getUsableSpace(),
+ containerAttrs.toBuilder().put(TYPE_ATTR,
"usable_space").build());
+ } catch (IOException e) {
+ throw new SolrException(
+ ErrorCode.SERVER_ERROR,
+ "Error retrieving disk space information for data home
directory" + dataHome,
+ e);
+ }
+ },
+ OtelUnit.BYTES);
+
+ // NOCOMMIT: Can't remove these without impacting node state reporting
+ // until NodeValueFetcher and SolrClientNodeStateProvider are patched
solrMetricsContext.gauge(
() -> {
try {
@@ -1113,7 +1146,6 @@ public class CoreContainer {
"implementation",
SolrInfoBean.Category.CONTAINER.toString(),
"version");
-
SolrFieldCacheBean fieldCacheBean = new SolrFieldCacheBean();
fieldCacheBean.initializeMetrics(
solrMetricsContext,
diff --git a/solr/core/src/java/org/apache/solr/core/SolrCores.java
b/solr/core/src/java/org/apache/solr/core/SolrCores.java
index 8aef23b0555..104ca9b34c1 100644
--- a/solr/core/src/java/org/apache/solr/core/SolrCores.java
+++ b/solr/core/src/java/org/apache/solr/core/SolrCores.java
@@ -16,6 +16,7 @@
*/
package org.apache.solr.core;
+import io.opentelemetry.api.common.Attributes;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collection;
@@ -33,11 +34,12 @@ import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.ExecutorUtil;
import org.apache.solr.common.util.SolrNamedThreadFactory;
import org.apache.solr.logging.MDCLoggingContext;
+import org.apache.solr.metrics.SolrMetricsContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/** AKA CoreManager: Holds/manages {@link SolrCore}s within {@link
CoreContainer}. */
-public class SolrCores {
+public class SolrCores implements SolrInfoBean {
private static final Logger log =
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@@ -90,7 +92,7 @@ public class SolrCores {
// We are shutting down. You can't hold the lock on the various lists of
cores while they shut
// down, so we need to make a temporary copy of the names and shut them down
outside the lock.
- protected void close() {
+ public void close() {
waitForLoadingCoresToFinish(30 * 1000);
// It might be possible for one of the cores to move from one list to
another while we're
@@ -488,4 +490,42 @@ public class SolrCores {
modifyLock.notifyAll(); // Wakes up closer thread too
}
}
+
+ @Override
+ public void initializeMetrics(
+ SolrMetricsContext parentContext, Attributes attributes, String scope) {
+ parentContext.observableLongGauge(
+ "solr_cores_loaded",
+ "Number of Solr cores loaded by CoreContainer",
+ measurement -> {
+ measurement.record(
+ getNumLoadedPermanentCores(),
+ attributes.toBuilder().put(TYPE_ATTR, "permanent").build());
+ measurement.record(
+ getNumLoadedTransientCores(),
+ attributes.toBuilder().put(TYPE_ATTR, "transient").build());
+ measurement.record(
+ getNumUnloadedCores(), attributes.toBuilder().put(TYPE_ATTR,
"unloaded").build());
+ });
+ }
+
+ @Override
+ public SolrMetricsContext getSolrMetricsContext() {
+ return this.container.solrMetricsContext;
+ }
+
+ @Override
+ public String getName() {
+ return this.getClass().getName();
+ }
+
+ @Override
+ public String getDescription() {
+ return "Manager for Solr cores within a CoreContainer";
+ }
+
+ @Override
+ public Category getCategory() {
+ return Category.CONTAINER;
+ }
}
diff --git a/solr/core/src/java/org/apache/solr/core/TransientSolrCores.java
b/solr/core/src/java/org/apache/solr/core/TransientSolrCores.java
index 2b50b2d81e3..7cba1e2edbc 100644
--- a/solr/core/src/java/org/apache/solr/core/TransientSolrCores.java
+++ b/solr/core/src/java/org/apache/solr/core/TransientSolrCores.java
@@ -34,7 +34,7 @@ public class TransientSolrCores extends SolrCores {
}
@Override
- protected void close() {
+ public void close() {
super.close();
transientSolrCoreCache.close();
}
diff --git
a/solr/core/src/test/org/apache/solr/metrics/SolrMetricsIntegrationTest.java
b/solr/core/src/test/org/apache/solr/metrics/SolrMetricsIntegrationTest.java
index 7b03ea79492..8c994f6d9f8 100644
--- a/solr/core/src/test/org/apache/solr/metrics/SolrMetricsIntegrationTest.java
+++ b/solr/core/src/test/org/apache/solr/metrics/SolrMetricsIntegrationTest.java
@@ -17,14 +17,17 @@
package org.apache.solr.metrics;
-import com.codahale.metrics.Gauge;
-import com.codahale.metrics.Metric;
-import com.codahale.metrics.MetricRegistry;
+import static org.apache.solr.metrics.SolrMetricProducer.TYPE_ATTR;
+
+import io.prometheus.metrics.model.snapshots.GaugeSnapshot;
+import
io.prometheus.metrics.model.snapshots.GaugeSnapshot.GaugeDataPointSnapshot;
+import io.prometheus.metrics.model.snapshots.MetricSnapshots;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.Map;
+import java.util.Optional;
import java.util.Set;
import org.apache.http.client.HttpClient;
import org.apache.solr.SolrTestCaseJ4;
@@ -35,7 +38,6 @@ import org.apache.solr.common.util.Utils;
import org.apache.solr.core.CoreContainer;
import org.apache.solr.core.NodeConfig;
import org.apache.solr.core.SolrCore;
-import org.apache.solr.core.SolrInfoBean;
import org.apache.solr.core.SolrXmlConfig;
import org.apache.solr.embedded.JettySolrRunner;
import org.apache.solr.util.SolrMetricTestUtils;
@@ -81,25 +83,38 @@ public class SolrMetricsIntegrationTest extends
SolrTestCaseJ4 {
@Test
public void testCoreContainerMetrics() {
- String registryName =
SolrMetricManager.getRegistryName(SolrInfoBean.Group.node);
- assertTrue(
- cc.getMetricManager().registryNames().toString(),
- cc.getMetricManager().registryNames().contains(registryName));
- MetricRegistry registry = cc.getMetricManager().registry(registryName);
- Map<String, Metric> metrics = registry.getMetrics();
- assertTrue(metrics.containsKey("CONTAINER.cores.loaded"));
- assertTrue(metrics.containsKey("CONTAINER.cores.lazy"));
- assertTrue(metrics.containsKey("CONTAINER.cores.unloaded"));
- assertTrue(metrics.containsKey("CONTAINER.fs.totalSpace"));
- assertTrue(metrics.containsKey("CONTAINER.fs.usableSpace"));
- assertTrue(metrics.containsKey("CONTAINER.fs.path"));
- assertTrue(metrics.containsKey("CONTAINER.fs.coreRoot.totalSpace"));
- assertTrue(metrics.containsKey("CONTAINER.fs.coreRoot.usableSpace"));
- assertTrue(metrics.containsKey("CONTAINER.fs.coreRoot.path"));
- assertTrue(metrics.containsKey("CONTAINER.version.specification"));
- assertTrue(metrics.containsKey("CONTAINER.version.implementation"));
- Gauge<?> g = (Gauge<?>) metrics.get("CONTAINER.fs.path");
- assertEquals(g.getValue(), cc.getSolrHome().toString());
+ MetricSnapshots metrics =
+ new MetricSnapshots(
+ metricManager.getPrometheusMetricReaders().entrySet().stream()
+ .flatMap(
+ entry ->
+ entry.getValue().collect().stream()
+ .filter(m ->
!m.getMetadata().getPrometheusName().startsWith("target")))
+ .toList());
+
+ GaugeSnapshot coresLoaded =
+ SolrMetricTestUtils.getMetricSnapshot(GaugeSnapshot.class, metrics,
"solr_cores_loaded");
+ assertTrue(getGaugeOpt(coresLoaded, "permanent").isPresent());
+ assertTrue(getGaugeOpt(coresLoaded, "transient").isPresent());
+ assertTrue(getGaugeOpt(coresLoaded, "unloaded").isPresent());
+
+ GaugeSnapshot fsDiskSpace =
+ SolrMetricTestUtils.getMetricSnapshot(
+ GaugeSnapshot.class, metrics,
"solr_cores_filesystem_disk_space_bytes");
+ assertTrue(getGaugeOpt(fsDiskSpace, "total_space").isPresent());
+ assertTrue(getGaugeOpt(fsDiskSpace, "usable_space").isPresent());
+
+ GaugeSnapshot rootDiskSpace =
+ SolrMetricTestUtils.getMetricSnapshot(
+ GaugeSnapshot.class, metrics, "solr_cores_root_disk_space_bytes");
+ assertTrue(getGaugeOpt(rootDiskSpace, "total_space").isPresent());
+ assertTrue(getGaugeOpt(rootDiskSpace, "usable_space").isPresent());
+ }
+
+ private static Optional<GaugeDataPointSnapshot> getGaugeOpt(GaugeSnapshot
gauges, String type) {
+ return gauges.getDataPoints().stream()
+ .filter(g -> g.getLabels().get(TYPE_ATTR.toString()).equals(type))
+ .findFirst();
}
// NOCOMMIT: Comeback and fix this test after merging the SolrZKClient
metrics migration
diff --git
a/solr/solrj-zookeeper/src/java/org/apache/solr/client/solrj/impl/NodeValueFetcher.java
b/solr/solrj-zookeeper/src/java/org/apache/solr/client/solrj/impl/NodeValueFetcher.java
index b2843c6dec5..7f787c1cb6f 100644
---
a/solr/solrj-zookeeper/src/java/org/apache/solr/client/solrj/impl/NodeValueFetcher.java
+++
b/solr/solrj-zookeeper/src/java/org/apache/solr/client/solrj/impl/NodeValueFetcher.java
@@ -38,6 +38,8 @@ import org.apache.solr.common.util.Utils;
* This class is responsible for fetching metrics and other attributes from a
given node in Solr
* cluster. This is a helper class that is used by {@link
SolrClientNodeStateProvider}
*/
+// NOCOMMIT: Need to removed hardcoded references to Dropwizard metrics for
OTEL conversion, and
+// probably change enum structure to be more compatible with OTEL naming
public class NodeValueFetcher {
// well known tags
public static final String NODE = "node";
diff --git
a/solr/solrj-zookeeper/src/java/org/apache/solr/client/solrj/impl/SolrClientNodeStateProvider.java
b/solr/solrj-zookeeper/src/java/org/apache/solr/client/solrj/impl/SolrClientNodeStateProvider.java
index e3b3e9cbcf5..b88ae6a177f 100644
---
a/solr/solrj-zookeeper/src/java/org/apache/solr/client/solrj/impl/SolrClientNodeStateProvider.java
+++
b/solr/solrj-zookeeper/src/java/org/apache/solr/client/solrj/impl/SolrClientNodeStateProvider.java
@@ -181,6 +181,8 @@ public class SolrClientNodeStateProvider implements
NodeStateProvider, MapWriter
return ctx.tags;
}
+ // NOCOMMIT: We need to change the /admin/metrics call here to work with
+ // Prometheus/OTEL telemetry
static void fetchReplicaMetrics(
String solrNode, RemoteCallCtx ctx, Map<String, Set<Object>>
metricsKeyVsTag) {
if (!ctx.isNodeAlive(solrNode)) return;