This is an automated email from the ASF dual-hosted git repository.

mlbiscoc pushed a commit to branch feature/SOLR-17458
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/feature/SOLR-17458 by this 
push:
     new 4b5a23cb5ec SOLR-17806: Convert CoreContainer metrics to OTEL (#3509)
4b5a23cb5ec is described below

commit 4b5a23cb5eceba1876a14ca8beaa4385c4012346
Author: Jude Muriithi <[email protected]>
AuthorDate: Fri Sep 26 18:27:21 2025 -0400

    SOLR-17806: Convert CoreContainer metrics to OTEL (#3509)
    
    * initial commit
    
    * Replace CoreContainer gauges
    
    * Add nocommits for node state reporting
    
    * Fix metric names
    
    * Convert CoreContainer metrics test
    
    * Remove duplicate method after merge
    
    * PR Feedback
    
    * Update solr_disk_space description
    
    ---------
    
    Co-authored-by: jmuriithi3 <[email protected]>
---
 .../java/org/apache/solr/core/CoreContainer.java   | 34 +++++++++++-
 .../src/java/org/apache/solr/core/SolrCores.java   | 44 +++++++++++++++-
 .../org/apache/solr/core/TransientSolrCores.java   |  2 +-
 .../solr/metrics/SolrMetricsIntegrationTest.java   | 61 ++++++++++++++--------
 .../solr/client/solrj/impl/NodeValueFetcher.java   |  2 +
 .../solrj/impl/SolrClientNodeStateProvider.java    |  2 +
 6 files changed, 118 insertions(+), 27 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java 
b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
index 7c64b94115e..2578ab601ba 100644
--- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java
+++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
@@ -29,6 +29,7 @@ import static 
org.apache.solr.common.params.CommonParams.ZK_STATUS_PATH;
 import static org.apache.solr.metrics.SolrMetricProducer.CATEGORY_ATTR;
 import static org.apache.solr.metrics.SolrMetricProducer.HANDLER_ATTR;
 import static org.apache.solr.metrics.SolrMetricProducer.NAME_ATTR;
+import static org.apache.solr.metrics.SolrMetricProducer.TYPE_ATTR;
 import static 
org.apache.solr.search.SolrIndexSearcher.EXECUTOR_MAX_CPU_THREADS;
 import static 
org.apache.solr.security.AuthenticationPlugin.AUTHENTICATION_PLUGIN_PROP;
 
@@ -141,6 +142,7 @@ import org.apache.solr.logging.MDCLoggingContext;
 import org.apache.solr.metrics.SolrMetricManager;
 import org.apache.solr.metrics.SolrMetricProducer;
 import org.apache.solr.metrics.SolrMetricsContext;
+import org.apache.solr.metrics.otel.OtelUnit;
 import org.apache.solr.pkg.SolrPackageLoader;
 import org.apache.solr.request.LocalSolrQueryRequest;
 import org.apache.solr.request.SolrQueryRequest;
@@ -1004,8 +1006,14 @@ public class CoreContainer {
 
     containerProperties.putAll(cfg.getSolrProperties());
 
+    Attributes containerAttrs =
+        Attributes.builder().put(CATEGORY_ATTR, 
SolrInfoBean.Category.CONTAINER.toString()).build();
+
     // initialize gauges for reporting the number of cores and disk total/free
+    solrCores.initializeMetrics(solrMetricsContext, containerAttrs, "");
 
+    // NOCOMMIT: Can't remove these without impacting node state reporting
+    // until NodeValueFetcher and SolrClientNodeStateProvider are patched
     solrMetricsContext.gauge(
         solrCores::getNumLoadedPermanentCores,
         true,
@@ -1024,8 +1032,33 @@ public class CoreContainer {
         "unloaded",
         SolrInfoBean.Category.CONTAINER.toString(),
         "cores");
+
     Path dataHome =
         cfg.getSolrDataHome() != null ? cfg.getSolrDataHome() : 
cfg.getCoreRootDirectory();
+
+    solrMetricsContext.observableLongGauge(
+        "solr_disk_space",
+        String.format("Disk metrics for Solr's data home directory (%s)", 
dataHome.toString()),
+        measurement -> {
+          try {
+            var fileStore = Files.getFileStore(dataHome);
+            measurement.record(
+                fileStore.getTotalSpace(),
+                containerAttrs.toBuilder().put(TYPE_ATTR, 
"total_space").build());
+            measurement.record(
+                fileStore.getUsableSpace(),
+                containerAttrs.toBuilder().put(TYPE_ATTR, 
"usable_space").build());
+          } catch (IOException e) {
+            throw new SolrException(
+                ErrorCode.SERVER_ERROR,
+                "Error retrieving disk space information for data home 
directory" + dataHome,
+                e);
+          }
+        },
+        OtelUnit.BYTES);
+
+    // NOCOMMIT: Can't remove these without impacting node state reporting
+    // until NodeValueFetcher and SolrClientNodeStateProvider are patched
     solrMetricsContext.gauge(
         () -> {
           try {
@@ -1113,7 +1146,6 @@ public class CoreContainer {
         "implementation",
         SolrInfoBean.Category.CONTAINER.toString(),
         "version");
-
     SolrFieldCacheBean fieldCacheBean = new SolrFieldCacheBean();
     fieldCacheBean.initializeMetrics(
         solrMetricsContext,
diff --git a/solr/core/src/java/org/apache/solr/core/SolrCores.java 
b/solr/core/src/java/org/apache/solr/core/SolrCores.java
index 8aef23b0555..104ca9b34c1 100644
--- a/solr/core/src/java/org/apache/solr/core/SolrCores.java
+++ b/solr/core/src/java/org/apache/solr/core/SolrCores.java
@@ -16,6 +16,7 @@
  */
 package org.apache.solr.core;
 
+import io.opentelemetry.api.common.Attributes;
 import java.lang.invoke.MethodHandles;
 import java.util.ArrayList;
 import java.util.Collection;
@@ -33,11 +34,12 @@ import org.apache.solr.common.SolrException;
 import org.apache.solr.common.util.ExecutorUtil;
 import org.apache.solr.common.util.SolrNamedThreadFactory;
 import org.apache.solr.logging.MDCLoggingContext;
+import org.apache.solr.metrics.SolrMetricsContext;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 /** AKA CoreManager: Holds/manages {@link SolrCore}s within {@link 
CoreContainer}. */
-public class SolrCores {
+public class SolrCores implements SolrInfoBean {
 
   private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
@@ -90,7 +92,7 @@ public class SolrCores {
 
   // We are shutting down. You can't hold the lock on the various lists of 
cores while they shut
   // down, so we need to make a temporary copy of the names and shut them down 
outside the lock.
-  protected void close() {
+  public void close() {
     waitForLoadingCoresToFinish(30 * 1000);
 
     // It might be possible for one of the cores to move from one list to 
another while we're
@@ -488,4 +490,42 @@ public class SolrCores {
       modifyLock.notifyAll(); // Wakes up closer thread too
     }
   }
+
+  @Override
+  public void initializeMetrics(
+      SolrMetricsContext parentContext, Attributes attributes, String scope) {
+    parentContext.observableLongGauge(
+        "solr_cores_loaded",
+        "Number of Solr cores loaded by CoreContainer",
+        measurement -> {
+          measurement.record(
+              getNumLoadedPermanentCores(),
+              attributes.toBuilder().put(TYPE_ATTR, "permanent").build());
+          measurement.record(
+              getNumLoadedTransientCores(),
+              attributes.toBuilder().put(TYPE_ATTR, "transient").build());
+          measurement.record(
+              getNumUnloadedCores(), attributes.toBuilder().put(TYPE_ATTR, 
"unloaded").build());
+        });
+  }
+
+  @Override
+  public SolrMetricsContext getSolrMetricsContext() {
+    return this.container.solrMetricsContext;
+  }
+
+  @Override
+  public String getName() {
+    return this.getClass().getName();
+  }
+
+  @Override
+  public String getDescription() {
+    return "Manager for Solr cores within a CoreContainer";
+  }
+
+  @Override
+  public Category getCategory() {
+    return Category.CONTAINER;
+  }
 }
diff --git a/solr/core/src/java/org/apache/solr/core/TransientSolrCores.java 
b/solr/core/src/java/org/apache/solr/core/TransientSolrCores.java
index 2b50b2d81e3..7cba1e2edbc 100644
--- a/solr/core/src/java/org/apache/solr/core/TransientSolrCores.java
+++ b/solr/core/src/java/org/apache/solr/core/TransientSolrCores.java
@@ -34,7 +34,7 @@ public class TransientSolrCores extends SolrCores {
   }
 
   @Override
-  protected void close() {
+  public void close() {
     super.close();
     transientSolrCoreCache.close();
   }
diff --git 
a/solr/core/src/test/org/apache/solr/metrics/SolrMetricsIntegrationTest.java 
b/solr/core/src/test/org/apache/solr/metrics/SolrMetricsIntegrationTest.java
index 7b03ea79492..8c994f6d9f8 100644
--- a/solr/core/src/test/org/apache/solr/metrics/SolrMetricsIntegrationTest.java
+++ b/solr/core/src/test/org/apache/solr/metrics/SolrMetricsIntegrationTest.java
@@ -17,14 +17,17 @@
 
 package org.apache.solr.metrics;
 
-import com.codahale.metrics.Gauge;
-import com.codahale.metrics.Metric;
-import com.codahale.metrics.MetricRegistry;
+import static org.apache.solr.metrics.SolrMetricProducer.TYPE_ATTR;
+
+import io.prometheus.metrics.model.snapshots.GaugeSnapshot;
+import 
io.prometheus.metrics.model.snapshots.GaugeSnapshot.GaugeDataPointSnapshot;
+import io.prometheus.metrics.model.snapshots.MetricSnapshots;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.List;
 import java.util.Map;
+import java.util.Optional;
 import java.util.Set;
 import org.apache.http.client.HttpClient;
 import org.apache.solr.SolrTestCaseJ4;
@@ -35,7 +38,6 @@ import org.apache.solr.common.util.Utils;
 import org.apache.solr.core.CoreContainer;
 import org.apache.solr.core.NodeConfig;
 import org.apache.solr.core.SolrCore;
-import org.apache.solr.core.SolrInfoBean;
 import org.apache.solr.core.SolrXmlConfig;
 import org.apache.solr.embedded.JettySolrRunner;
 import org.apache.solr.util.SolrMetricTestUtils;
@@ -81,25 +83,38 @@ public class SolrMetricsIntegrationTest extends 
SolrTestCaseJ4 {
 
   @Test
   public void testCoreContainerMetrics() {
-    String registryName = 
SolrMetricManager.getRegistryName(SolrInfoBean.Group.node);
-    assertTrue(
-        cc.getMetricManager().registryNames().toString(),
-        cc.getMetricManager().registryNames().contains(registryName));
-    MetricRegistry registry = cc.getMetricManager().registry(registryName);
-    Map<String, Metric> metrics = registry.getMetrics();
-    assertTrue(metrics.containsKey("CONTAINER.cores.loaded"));
-    assertTrue(metrics.containsKey("CONTAINER.cores.lazy"));
-    assertTrue(metrics.containsKey("CONTAINER.cores.unloaded"));
-    assertTrue(metrics.containsKey("CONTAINER.fs.totalSpace"));
-    assertTrue(metrics.containsKey("CONTAINER.fs.usableSpace"));
-    assertTrue(metrics.containsKey("CONTAINER.fs.path"));
-    assertTrue(metrics.containsKey("CONTAINER.fs.coreRoot.totalSpace"));
-    assertTrue(metrics.containsKey("CONTAINER.fs.coreRoot.usableSpace"));
-    assertTrue(metrics.containsKey("CONTAINER.fs.coreRoot.path"));
-    assertTrue(metrics.containsKey("CONTAINER.version.specification"));
-    assertTrue(metrics.containsKey("CONTAINER.version.implementation"));
-    Gauge<?> g = (Gauge<?>) metrics.get("CONTAINER.fs.path");
-    assertEquals(g.getValue(), cc.getSolrHome().toString());
+    MetricSnapshots metrics =
+        new MetricSnapshots(
+             metricManager.getPrometheusMetricReaders().entrySet().stream()
+                .flatMap(
+                    entry ->
+                        entry.getValue().collect().stream()
+                            .filter(m -> 
!m.getMetadata().getPrometheusName().startsWith("target")))
+                .toList());
+
+    GaugeSnapshot coresLoaded =
+        SolrMetricTestUtils.getMetricSnapshot(GaugeSnapshot.class, metrics, 
"solr_cores_loaded");
+    assertTrue(getGaugeOpt(coresLoaded, "permanent").isPresent());
+    assertTrue(getGaugeOpt(coresLoaded, "transient").isPresent());
+    assertTrue(getGaugeOpt(coresLoaded, "unloaded").isPresent());
+
+    GaugeSnapshot fsDiskSpace =
+        SolrMetricTestUtils.getMetricSnapshot(
+            GaugeSnapshot.class, metrics, 
"solr_cores_filesystem_disk_space_bytes");
+    assertTrue(getGaugeOpt(fsDiskSpace, "total_space").isPresent());
+    assertTrue(getGaugeOpt(fsDiskSpace, "usable_space").isPresent());
+
+    GaugeSnapshot rootDiskSpace =
+        SolrMetricTestUtils.getMetricSnapshot(
+            GaugeSnapshot.class, metrics, "solr_cores_root_disk_space_bytes");
+    assertTrue(getGaugeOpt(rootDiskSpace, "total_space").isPresent());
+    assertTrue(getGaugeOpt(rootDiskSpace, "usable_space").isPresent());
+  }
+
+  private static Optional<GaugeDataPointSnapshot> getGaugeOpt(GaugeSnapshot 
gauges, String type) {
+    return gauges.getDataPoints().stream()
+        .filter(g -> g.getLabels().get(TYPE_ATTR.toString()).equals(type))
+        .findFirst();
   }
 
   // NOCOMMIT: Comeback and fix this test after merging the SolrZKClient 
metrics migration
diff --git 
a/solr/solrj-zookeeper/src/java/org/apache/solr/client/solrj/impl/NodeValueFetcher.java
 
b/solr/solrj-zookeeper/src/java/org/apache/solr/client/solrj/impl/NodeValueFetcher.java
index b2843c6dec5..7f787c1cb6f 100644
--- 
a/solr/solrj-zookeeper/src/java/org/apache/solr/client/solrj/impl/NodeValueFetcher.java
+++ 
b/solr/solrj-zookeeper/src/java/org/apache/solr/client/solrj/impl/NodeValueFetcher.java
@@ -38,6 +38,8 @@ import org.apache.solr.common.util.Utils;
  * This class is responsible for fetching metrics and other attributes from a 
given node in Solr
  * cluster. This is a helper class that is used by {@link 
SolrClientNodeStateProvider}
  */
+// NOCOMMIT: Need to removed hardcoded references to Dropwizard metrics for 
OTEL conversion, and
+// probably change enum structure to be more compatible with OTEL naming
 public class NodeValueFetcher {
   // well known tags
   public static final String NODE = "node";
diff --git 
a/solr/solrj-zookeeper/src/java/org/apache/solr/client/solrj/impl/SolrClientNodeStateProvider.java
 
b/solr/solrj-zookeeper/src/java/org/apache/solr/client/solrj/impl/SolrClientNodeStateProvider.java
index e3b3e9cbcf5..b88ae6a177f 100644
--- 
a/solr/solrj-zookeeper/src/java/org/apache/solr/client/solrj/impl/SolrClientNodeStateProvider.java
+++ 
b/solr/solrj-zookeeper/src/java/org/apache/solr/client/solrj/impl/SolrClientNodeStateProvider.java
@@ -181,6 +181,8 @@ public class SolrClientNodeStateProvider implements 
NodeStateProvider, MapWriter
     return ctx.tags;
   }
 
+  // NOCOMMIT: We need to change the /admin/metrics call here to work with
+  // Prometheus/OTEL telemetry
   static void fetchReplicaMetrics(
       String solrNode, RemoteCallCtx ctx, Map<String, Set<Object>> 
metricsKeyVsTag) {
     if (!ctx.isNodeAlive(solrNode)) return;

Reply via email to