This is an automated email from the ASF dual-hosted git repository.
mlbiscoc pushed a commit to branch feature/SOLR-17458-rebased
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/feature/SOLR-17458-rebased by
this push:
new f88e8199843 SOLR-17880: Migrate SolrClientNodeStateProvider and
NodeValueFetcher to OTEL (#3713)
f88e8199843 is described below
commit f88e819984386318a5d8499fd1562a754079ad0e
Author: Matthew Biscocho <[email protected]>
AuthorDate: Mon Oct 6 15:02:04 2025 -0400
SOLR-17880: Migrate SolrClientNodeStateProvider and NodeValueFetcher to
OTEL (#3713)
* Update NodeValueFetcher with OTEL
* comment removal
* Feedback changes
* Update
solr/solrj-zookeeper/src/java/org/apache/solr/client/solrj/impl/NodeValueFetcher.java
Co-authored-by: David Smiley <[email protected]>
* Additional feedback changes
* Update
solr/solrj-zookeeper/src/java/org/apache/solr/client/solrj/impl/NodeValueFetcher.java
Co-authored-by: David Smiley <[email protected]>
* Remove filter and rename processor
* Update
solr/solrj-zookeeper/src/java/org/apache/solr/client/solrj/impl/NodeValueFetcher.java
Co-authored-by: David Smiley <[email protected]>
* Move constants and cleanup inputStream
* Bad input stream
* Filter out prometheus comment lines at Stream method
---------
Co-authored-by: Matthew Biscocho <[email protected]>
Co-authored-by: David Smiley <[email protected]>
---
.../org/apache/solr/cluster/placement/Metric.java | 27 ++
.../apache/solr/cluster/placement/NodeMetric.java | 27 +-
.../placement/impl/AttributeFetcherImpl.java | 131 ++++----
.../solr/cluster/placement/impl/MetricImpl.java | 62 +++-
.../cluster/placement/impl/NodeMetricImpl.java | 78 ++---
.../cluster/placement/impl/ReplicaMetricImpl.java | 11 +-
.../java/org/apache/solr/core/CoreContainer.java | 110 -------
.../otel/FilterablePrometheusMetricReader.java | 2 +-
.../impl/PlacementPluginIntegrationTest.java | 35 +--
.../solr/client/solrj/impl/NodeValueFetcher.java | 350 +++++++++++++++------
.../solrj/impl/SolrClientNodeStateProvider.java | 150 +++++----
11 files changed, 535 insertions(+), 448 deletions(-)
diff --git a/solr/core/src/java/org/apache/solr/cluster/placement/Metric.java
b/solr/core/src/java/org/apache/solr/cluster/placement/Metric.java
index b645fd94c41..afa1040b96a 100644
--- a/solr/core/src/java/org/apache/solr/cluster/placement/Metric.java
+++ b/solr/core/src/java/org/apache/solr/cluster/placement/Metric.java
@@ -36,4 +36,31 @@ public interface Metric<T> {
* @return converted value
*/
T convert(Object value);
+
+ /**
+ * Return the label key for Prometheus metrics filtering, if any.
+ *
+ * @return label key or null if no label filtering is needed
+ */
+ default String getLabelKey() {
+ return null;
+ }
+
+ /**
+ * Return the label value for Prometheus metrics filtering, if any.
+ *
+ * @return label value or null if no label filtering is needed
+ */
+ default String getLabelValue() {
+ return null;
+ }
+
+ /**
+ * Return true if this metric has label filtering.
+ *
+ * @return true if both label key and value are non-null
+ */
+ default boolean hasLabels() {
+ return getLabelKey() != null && getLabelValue() != null;
+ }
}
diff --git
a/solr/core/src/java/org/apache/solr/cluster/placement/NodeMetric.java
b/solr/core/src/java/org/apache/solr/cluster/placement/NodeMetric.java
index 8f5781d1bd9..d4a4492f982 100644
--- a/solr/core/src/java/org/apache/solr/cluster/placement/NodeMetric.java
+++ b/solr/core/src/java/org/apache/solr/cluster/placement/NodeMetric.java
@@ -18,29 +18,6 @@
package org.apache.solr.cluster.placement;
/**
- * Node metric identifier, corresponding to a node-level metric registry and
the internal metric
- * name.
+ * Node metric identifier, corresponding to a node-level metric name with
optional labels for metric
*/
-public interface NodeMetric<T> extends Metric<T> {
-
- /**
- * Metric registry. If this metric identifier uses a fully-qualified metric
key instead, then this
- * method will return {@link Registry#UNSPECIFIED}.
- */
- Registry getRegistry();
-
- /** Registry options for node metrics. */
- enum Registry {
- /** corresponds to solr.node */
- SOLR_NODE,
- /** corresponds to solr.jvm */
- SOLR_JVM,
- /** corresponds to solr.jetty */
- SOLR_JETTY,
- /**
- * In case when the registry name is not relevant (eg. a fully-qualified
metric key was provided
- * as the metric name).
- */
- UNSPECIFIED
- }
-}
+public interface NodeMetric<T> extends Metric<T> {}
diff --git
a/solr/core/src/java/org/apache/solr/cluster/placement/impl/AttributeFetcherImpl.java
b/solr/core/src/java/org/apache/solr/cluster/placement/impl/AttributeFetcherImpl.java
index 8ac3f9ab0c6..a8c918cb32c 100644
---
a/solr/core/src/java/org/apache/solr/cluster/placement/impl/AttributeFetcherImpl.java
+++
b/solr/core/src/java/org/apache/solr/cluster/placement/impl/AttributeFetcherImpl.java
@@ -17,14 +17,12 @@
package org.apache.solr.cluster.placement.impl;
-import java.lang.invoke.MethodHandles;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
-import java.util.function.BiConsumer;
import java.util.stream.Collectors;
import org.apache.solr.client.solrj.cloud.NodeStateProvider;
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
@@ -36,19 +34,13 @@ import org.apache.solr.cluster.placement.AttributeValues;
import org.apache.solr.cluster.placement.CollectionMetrics;
import org.apache.solr.cluster.placement.NodeMetric;
import org.apache.solr.cluster.placement.ReplicaMetric;
-import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.Replica;
-import org.apache.solr.core.SolrInfoBean;
-import org.apache.solr.metrics.SolrMetricManager;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
/**
* Implementation of {@link AttributeFetcher} that uses {@link
SolrCloudManager} to access Solr
* cluster details.
*/
public class AttributeFetcherImpl implements AttributeFetcher {
- private static final Logger log =
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
Set<String> requestedNodeSystemSnitchTags = new HashSet<>();
Set<NodeMetric<?>> requestedNodeMetricSnitchTags = new HashSet<>();
@@ -89,6 +81,12 @@ public class AttributeFetcherImpl implements
AttributeFetcher {
return this;
}
+ // TODO: This method is overly complex and very confusing for trying to get
metrics across
+ // nodes. Probably because of the complex filtering parameters of Dropwizard
and trying to collect
+ // system property strings + ints + longs in a single map and have a
conversion method in the
+ // middle of it.
+ // With migration to OTEL, we should come back and clean up this so it is
not so confusing of
+ // trying to link multiple maps with one another with a much cleaner
implementation
@Override
public AttributeValues fetchAttributes() {
@@ -106,19 +104,11 @@ public class AttributeFetcherImpl implements
AttributeFetcher {
// Node to whatever defined above) we instead pass a function taking two
arguments, the node and
// the (non null) returned value, that will cast the value into the
appropriate type for the
// snitch tag and insert it into the appropriate map with the node as the
key.
- Map<String, BiConsumer<Node, Object>> allSnitchTagsToInsertion = new
HashMap<>();
for (String sysPropSnitch : requestedNodeSystemSnitchTags) {
- final Map<Node, String> sysPropMap = new HashMap<>();
- systemSnitchToNodeToValue.put(sysPropSnitch, sysPropMap);
- allSnitchTagsToInsertion.put(
- sysPropSnitch, (node, value) -> sysPropMap.put(node, (String)
value));
+ systemSnitchToNodeToValue.put(sysPropSnitch, new HashMap<>());
}
for (NodeMetric<?> metric : requestedNodeMetricSnitchTags) {
- final Map<Node, Object> metricMap = new HashMap<>();
- metricSnitchToNodeToValue.put(metric, metricMap);
- String metricSnitch = getMetricTag(metric);
- allSnitchTagsToInsertion.put(
- metricSnitch, (node, value) -> metricMap.put(node,
metric.convert(value)));
+ metricSnitchToNodeToValue.put(metric, new HashMap<>());
}
requestedCollectionMetrics.forEach(
(collection, tags) -> {
@@ -145,19 +135,10 @@ public class AttributeFetcherImpl implements
AttributeFetcher {
// TODO: we could probably fetch this in parallel - for large clusters
this could
// significantly shorten the execution time
for (Node node : nodes) {
- Map<String, Object> tagValues =
- nodeStateProvider.getNodeValues(node.getName(),
allSnitchTagsToInsertion.keySet());
- for (Map.Entry<String, Object> e : tagValues.entrySet()) {
- String tag = e.getKey();
- Object value = e.getValue(); // returned value from the node
-
- BiConsumer<Node, Object> inserter = allSnitchTagsToInsertion.get(tag);
- // If inserter is null it's a return of a tag that we didn't request
- if (inserter != null) {
- inserter.accept(node, value);
- } else {
- log.error("Received unsolicited snitch tag {} from node {}", tag,
node);
- }
+ // Fetch system properties and metrics for nodes
+ if (!requestedNodeSystemSnitchTags.isEmpty() ||
!requestedNodeMetricSnitchTags.isEmpty()) {
+ fetchNodeValues(
+ node, nodeStateProvider, systemSnitchToNodeToValue,
metricSnitchToNodeToValue);
}
}
@@ -217,37 +198,79 @@ public class AttributeFetcherImpl implements
AttributeFetcher {
systemSnitchToNodeToValue, metricSnitchToNodeToValue,
collectionMetrics);
}
- private static SolrInfoBean.Group
getGroupFromMetricRegistry(NodeMetric.Registry registry) {
- switch (registry) {
- case SOLR_JVM:
- return SolrInfoBean.Group.jvm;
- case SOLR_NODE:
- return SolrInfoBean.Group.node;
- case SOLR_JETTY:
- return SolrInfoBean.Group.jetty;
- default:
- throw new SolrException(
- SolrException.ErrorCode.SERVER_ERROR, "Unsupported registry value
" + registry);
+ /**
+ * Fetch both system properties and node metric values for a specific node
and add it accordingly
+ * to the maps
+ */
+ private void fetchNodeValues(
+ Node node,
+ NodeStateProvider nodeStateProvider,
+ Map<String, Map<Node, String>> systemSnitchToNodeToValue,
+ Map<NodeMetric<?>, Map<Node, Object>> metricSnitchToNodeToValue) {
+
+ Set<String> allRequestedTags = new HashSet<>();
+ Map<String, NodeMetric<?>> tagToMetric = new HashMap<>();
+
+ // Add system property tags we need to request
+ allRequestedTags.addAll(requestedNodeSystemSnitchTags);
+
+ // Add metric tags we need to request
+ for (NodeMetric<?> metric : requestedNodeMetricSnitchTags) {
+ String tag = buildMetricTag(metric);
+ allRequestedTags.add(tag);
+ tagToMetric.put(tag, metric);
+ }
+
+ if (allRequestedTags.isEmpty()) {
+ return;
+ }
+
+ // Fetch all system properties and metric values for the requested tags
+ Map<String, Object> allValues =
+ nodeStateProvider.getNodeValues(node.getName(), allRequestedTags);
+
+ // Now process the results and place the system property and metric values
in the correct maps
+ for (Map.Entry<String, Object> entry : allValues.entrySet()) {
+ String tag = entry.getKey();
+ Object value = entry.getValue();
+
+ if (value != null) {
+ // Check if it's a system property
+ if (requestedNodeSystemSnitchTags.contains(tag)) {
+ systemSnitchToNodeToValue.get(tag).put(node, (String) value);
+ }
+
+ // Check if it's a metric
+ NodeMetric<?> metric = tagToMetric.get(tag);
+ if (metric != null) {
+ Object convertedValue = metric.convert(value);
+ metricSnitchToNodeToValue.get(metric).put(node, convertedValue);
+ }
+ }
}
}
- public static String getMetricTag(NodeMetric<?> metric) {
- if (metric.getRegistry() != NodeMetric.Registry.UNSPECIFIED) {
- // regular registry + metricName
- return NodeValueFetcher.METRICS_PREFIX
- +
SolrMetricManager.getRegistryName(getGroupFromMetricRegistry(metric.getRegistry()))
- + ":"
- + metric.getInternalName();
- } else if (NodeValueFetcher.tags.contains(metric.getInternalName())) {
+ /**
+ * Build a string tag for a NodeMetric that can be used with
NodeStateProvider.getNodeValues().
+ */
+ private String buildMetricTag(NodeMetric<?> metric) {
+ String metricTagName;
+ if (NodeValueFetcher.tags.contains(metric.getInternalName())) {
// "special" well-known tag
- return metric.getInternalName();
+ metricTagName = metric.getInternalName();
} else {
- // a fully-qualified metric key
- return NodeValueFetcher.METRICS_PREFIX + metric.getInternalName();
+ // A full Prometheus metric name
+ metricTagName = NodeValueFetcher.METRICS_PREFIX +
metric.getInternalName();
+ }
+ // Append label to metricTagName to filter
+ if (metric.hasLabels()) {
+ metricTagName =
+ metricTagName + ":" + metric.getLabelKey() + "=" + "\"" +
metric.getLabelValue() + "\"";
}
+ return metricTagName;
}
public static String getSystemPropertySnitchTag(String name) {
- return NodeValueFetcher.SYSPROP + name;
+ return NodeValueFetcher.SYSPROP_PREFIX + name;
}
}
diff --git
a/solr/core/src/java/org/apache/solr/cluster/placement/impl/MetricImpl.java
b/solr/core/src/java/org/apache/solr/cluster/placement/impl/MetricImpl.java
index b84c2892186..7cc5d7eb347 100644
--- a/solr/core/src/java/org/apache/solr/cluster/placement/impl/MetricImpl.java
+++ b/solr/core/src/java/org/apache/solr/cluster/placement/impl/MetricImpl.java
@@ -64,6 +64,8 @@ public abstract class MetricImpl<T> implements Metric<T> {
protected final String name;
protected final String internalName;
protected final Function<Object, T> converter;
+ protected final String labelKey;
+ protected final String labelValue;
/**
* Create a metric attribute.
@@ -72,7 +74,7 @@ public abstract class MetricImpl<T> implements Metric<T> {
* @param internalName internal name of a Solr metric.
*/
public MetricImpl(String name, String internalName) {
- this(name, internalName, null);
+ this(name, internalName, null, null, null);
}
/**
@@ -84,10 +86,31 @@ public abstract class MetricImpl<T> implements Metric<T> {
* used.
*/
public MetricImpl(String name, String internalName, Function<Object, T>
converter) {
+ this(name, internalName, null, null, converter);
+ }
+
+ /**
+ * Create a metric attribute with labels.
+ *
+ * @param name short-hand name that identifies this attribute.
+ * @param internalName internal name of a Solr metric.
+ * @param labelKey optional label key for Prometheus format labeled metrics.
+ * @param labelValue optional label value for Prometheus format labeled
metrics.
+ * @param converter optional raw value converter. If null then {@link
#IDENTITY_CONVERTER} will be
+ * used.
+ */
+ public MetricImpl(
+ String name,
+ String internalName,
+ String labelKey,
+ String labelValue,
+ Function<Object, T> converter) {
Objects.requireNonNull(name);
Objects.requireNonNull(internalName);
this.name = name;
this.internalName = internalName;
+ this.labelKey = labelKey;
+ this.labelValue = labelValue;
if (converter == null) {
this.converter = IDENTITY_CONVERTER;
} else {
@@ -105,6 +128,21 @@ public abstract class MetricImpl<T> implements Metric<T> {
return internalName;
}
+ @Override
+ public String getLabelKey() {
+ return labelKey;
+ }
+
+ @Override
+ public String getLabelValue() {
+ return labelValue;
+ }
+
+ @Override
+ public boolean hasLabels() {
+ return labelKey != null && labelValue != null;
+ }
+
@Override
public T convert(Object value) {
return converter.apply(value);
@@ -120,22 +158,26 @@ public abstract class MetricImpl<T> implements Metric<T> {
}
return name.equals(that.getName())
&& internalName.equals(that.getInternalName())
- && converter.equals(that.converter);
+ && Objects.equals(labelKey, that.labelKey)
+ && Objects.equals(labelValue, that.labelValue);
}
@Override
public int hashCode() {
- return Objects.hash(name, internalName, converter);
+ return Objects.hash(name, internalName, labelKey, labelValue);
}
@Override
public String toString() {
- return getClass().getSimpleName()
- + "{"
- + "name="
- + name
- + ", internalName="
- + internalName
- + "}";
+ String result =
+ getClass().getSimpleName() + "{" + "name=" + name + ", internalName="
+ internalName;
+ if (labelKey != null) {
+ result += ", labelKey='" + labelKey + '\'';
+ }
+ if (labelValue != null) {
+ result += ", labelValue='" + labelValue + '\'';
+ }
+ result += "}";
+ return result;
}
}
diff --git
a/solr/core/src/java/org/apache/solr/cluster/placement/impl/NodeMetricImpl.java
b/solr/core/src/java/org/apache/solr/cluster/placement/impl/NodeMetricImpl.java
index cdf1fcb93fc..2e62ffab60c 100644
---
a/solr/core/src/java/org/apache/solr/cluster/placement/impl/NodeMetricImpl.java
+++
b/solr/core/src/java/org/apache/solr/cluster/placement/impl/NodeMetricImpl.java
@@ -17,70 +17,56 @@
package org.apache.solr.cluster.placement.impl;
-import java.util.Objects;
import java.util.function.Function;
import org.apache.solr.client.solrj.impl.NodeValueFetcher;
import org.apache.solr.cluster.placement.NodeMetric;
-/**
- * Node metric identifier, corresponding to a node-level metric registry and
the internal metric
- * name.
- */
+/** Node metric identifier, corresponding to a node-level metric name with
labels */
public class NodeMetricImpl<T> extends MetricImpl<T> implements NodeMetric<T> {
/** Total disk space in GB. */
public static final NodeMetricImpl<Double> TOTAL_DISK_GB =
- new NodeMetricImpl<>(
- "totalDisk", Registry.SOLR_NODE, "CONTAINER.fs.totalSpace",
BYTES_TO_GB_CONVERTER);
+ new NodeMetricImpl<>("totaldisk", BYTES_TO_GB_CONVERTER);
/** Free (usable) disk space in GB. */
public static final NodeMetricImpl<Double> FREE_DISK_GB =
- new NodeMetricImpl<>(
- "freeDisk", Registry.SOLR_NODE, "CONTAINER.fs.usableSpace",
BYTES_TO_GB_CONVERTER);
+ new NodeMetricImpl<>("freedisk", BYTES_TO_GB_CONVERTER);
/** Number of all cores. */
public static final NodeMetricImpl<Integer> NUM_CORES =
new NodeMetricImpl<>(NodeValueFetcher.CORES);
- public static final NodeMetricImpl<Double> HEAP_USAGE =
- new NodeMetricImpl<>(NodeValueFetcher.Tags.HEAPUSAGE.tagName);
-
/** System load average. */
public static final NodeMetricImpl<Double> SYSLOAD_AVG =
- new NodeMetricImpl<>(
- NodeValueFetcher.Tags.SYSLOADAVG.tagName,
- Registry.SOLR_JVM,
- NodeValueFetcher.Tags.SYSLOADAVG.prefix);
+ new NodeMetricImpl<>("sysLoadAvg", "jvm_system_cpu_utilization_ratio");
/** Number of available processors. */
public static final NodeMetricImpl<Integer> AVAILABLE_PROCESSORS =
- new NodeMetricImpl<>("availableProcessors", Registry.SOLR_JVM,
"os.availableProcessors");
+ new NodeMetricImpl<>("availableProcessors", "jvm_cpu_count");
- private final Registry registry;
-
- public NodeMetricImpl(String name, Registry registry, String internalName) {
- this(name, registry, internalName, null);
+ public NodeMetricImpl(String name, String internalName) {
+ this(name, internalName, null);
}
- public NodeMetricImpl(
- String name, Registry registry, String internalName, Function<Object, T>
converter) {
+ public NodeMetricImpl(String name, String internalName, Function<Object, T>
converter) {
super(name, internalName, converter);
- Objects.requireNonNull(registry);
- this.registry = registry;
}
- public NodeMetricImpl(String key) {
- this(key, null);
+ public NodeMetricImpl(String key, Function<Object, T> converter) {
+ super(key, key, null, null, converter);
}
- public NodeMetricImpl(String key, Function<Object, T> converter) {
- super(key, key, converter);
- this.registry = Registry.UNSPECIFIED;
+ public NodeMetricImpl(
+ String name,
+ String internalName,
+ String labelKey,
+ String labelValue,
+ Function<Object, T> converter) {
+ super(name, internalName, labelKey, labelValue, converter);
}
- @Override
- public Registry getRegistry() {
- return registry;
+ public NodeMetricImpl(String key) {
+ super(key, key);
}
@Override
@@ -88,37 +74,19 @@ public class NodeMetricImpl<T> extends MetricImpl<T>
implements NodeMetric<T> {
if (this == o) {
return true;
}
- if (!(o instanceof NodeMetricImpl<?> that)) {
+ if (!(o instanceof NodeMetricImpl<?>)) {
return false;
}
- if (!super.equals(o)) {
- return false;
- }
- return registry == that.registry;
+ return super.equals(o);
}
@Override
public int hashCode() {
- return Objects.hash(super.hashCode(), registry);
+ return super.hashCode();
}
@Override
public String toString() {
- if (registry != null) {
- return "NodeMetricImpl{"
- + "name='"
- + name
- + '\''
- + ", internalName='"
- + internalName
- + '\''
- + ", converter="
- + converter
- + ", registry="
- + registry
- + '}';
- } else {
- return "NodeMetricImpl{key=" + internalName + "}";
- }
+ return "NodeMetricImpl{key=" + getInternalName() + "," + labelKey + "=" +
labelValue + "}";
}
}
diff --git
a/solr/core/src/java/org/apache/solr/cluster/placement/impl/ReplicaMetricImpl.java
b/solr/core/src/java/org/apache/solr/cluster/placement/impl/ReplicaMetricImpl.java
index 799f8f75566..a7b98e0daf8 100644
---
a/solr/core/src/java/org/apache/solr/cluster/placement/impl/ReplicaMetricImpl.java
+++
b/solr/core/src/java/org/apache/solr/cluster/placement/impl/ReplicaMetricImpl.java
@@ -24,18 +24,9 @@ import org.apache.solr.cluster.placement.ReplicaMetric;
* reported in <code>solr.core.[collection].[replica]</code> registry)
*/
public class ReplicaMetricImpl<T> extends MetricImpl<T> implements
ReplicaMetric<T> {
-
/** Replica index size in GB. */
public static final ReplicaMetricImpl<Double> INDEX_SIZE_GB =
- new ReplicaMetricImpl<>("sizeGB", "INDEX.sizeInBytes",
BYTES_TO_GB_CONVERTER);
-
- /** 1-min query rate of the /select handler. */
- public static final ReplicaMetricImpl<Double> QUERY_RATE_1MIN =
- new ReplicaMetricImpl<>("queryRate",
"QUERY./select.requestTimes:1minRate");
-
- /** 1-min update rate of the /update handler. */
- public static final ReplicaMetricImpl<Double> UPDATE_RATE_1MIN =
- new ReplicaMetricImpl<>("updateRate",
"UPDATE./update.requestTimes:1minRate");
+ new ReplicaMetricImpl<>("sizeGB", "solr_core_index_size_bytes",
BYTES_TO_GB_CONVERTER);
public ReplicaMetricImpl(String name, String internalName) {
super(name, internalName);
diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java
b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
index 8bd4496e8cd..ead1559c827 100644
--- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java
+++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
@@ -930,27 +930,6 @@ public class CoreContainer {
// initialize gauges for reporting the number of cores and disk total/free
solrCores.initializeMetrics(solrMetricsContext, containerAttrs, "");
- // NOCOMMIT: Can't remove these without impacting node state reporting
- // until NodeValueFetcher and SolrClientNodeStateProvider are patched
- solrMetricsContext.gauge(
- solrCores::getNumLoadedPermanentCores,
- true,
- "loaded",
- SolrInfoBean.Category.CONTAINER.toString(),
- "cores");
- solrMetricsContext.gauge(
- solrCores::getNumLoadedTransientCores,
- true,
- "lazy",
- SolrInfoBean.Category.CONTAINER.toString(),
- "cores");
- solrMetricsContext.gauge(
- solrCores::getNumUnloadedCores,
- true,
- "unloaded",
- SolrInfoBean.Category.CONTAINER.toString(),
- "cores");
-
Path dataHome =
cfg.getSolrDataHome() != null ? cfg.getSolrDataHome() :
cfg.getCoreRootDirectory();
@@ -975,95 +954,6 @@ public class CoreContainer {
},
OtelUnit.BYTES);
- // NOCOMMIT: Can't remove these without impacting node state reporting
- // until NodeValueFetcher and SolrClientNodeStateProvider are patched
- solrMetricsContext.gauge(
- () -> {
- try {
- return Files.getFileStore(dataHome).getTotalSpace();
- } catch (IOException e) {
- throw new SolrException(
- ErrorCode.SERVER_ERROR,
- "Error retrieving total space for data home directory" +
dataHome,
- e);
- }
- },
- true,
- "totalSpace",
- SolrInfoBean.Category.CONTAINER.toString(),
- "fs");
-
- solrMetricsContext.gauge(
- () -> {
- try {
- return Files.getFileStore(dataHome).getUsableSpace();
- } catch (IOException e) {
- throw new SolrException(
- ErrorCode.SERVER_ERROR,
- "Error retrieving usable space for data home directory" +
dataHome,
- e);
- }
- },
- true,
- "usableSpace",
- SolrInfoBean.Category.CONTAINER.toString(),
- "fs");
- solrMetricsContext.gauge(
- dataHome::toString, true, "path",
SolrInfoBean.Category.CONTAINER.toString(), "fs");
- solrMetricsContext.gauge(
- () -> {
- try {
- return
Files.getFileStore(cfg.getCoreRootDirectory()).getTotalSpace();
- } catch (IOException e) {
- throw new SolrException(
- SolrException.ErrorCode.SERVER_ERROR,
- "Error retrieving total space for core root directory: "
- + cfg.getCoreRootDirectory(),
- e);
- }
- },
- true,
- "totalSpace",
- SolrInfoBean.Category.CONTAINER.toString(),
- "fs",
- "coreRoot");
- solrMetricsContext.gauge(
- () -> {
- try {
- return
Files.getFileStore(cfg.getCoreRootDirectory()).getUsableSpace();
- } catch (IOException e) {
- throw new SolrException(
- SolrException.ErrorCode.SERVER_ERROR,
- "Error retrieving usable space for core root directory: "
- + cfg.getCoreRootDirectory(),
- e);
- }
- },
- true,
- "usableSpace",
- SolrInfoBean.Category.CONTAINER.toString(),
- "fs",
- "coreRoot");
- solrMetricsContext.gauge(
- () -> cfg.getCoreRootDirectory().toString(),
- true,
- "path",
- SolrInfoBean.Category.CONTAINER.toString(),
- "fs",
- "coreRoot");
- // add version information
- solrMetricsContext.gauge(
- () -> this.getClass().getPackage().getSpecificationVersion(),
- true,
- "specification",
- SolrInfoBean.Category.CONTAINER.toString(),
- "version");
- solrMetricsContext.gauge(
- () -> this.getClass().getPackage().getImplementationVersion(),
- true,
- "implementation",
- SolrInfoBean.Category.CONTAINER.toString(),
- "version");
SolrFieldCacheBean fieldCacheBean = new SolrFieldCacheBean();
fieldCacheBean.initializeMetrics(
solrMetricsContext,
diff --git
a/solr/core/src/java/org/apache/solr/metrics/otel/FilterablePrometheusMetricReader.java
b/solr/core/src/java/org/apache/solr/metrics/otel/FilterablePrometheusMetricReader.java
index 4cd321c112f..5664ff047b1 100644
---
a/solr/core/src/java/org/apache/solr/metrics/otel/FilterablePrometheusMetricReader.java
+++
b/solr/core/src/java/org/apache/solr/metrics/otel/FilterablePrometheusMetricReader.java
@@ -42,7 +42,7 @@ public class FilterablePrometheusMetricReader extends
PrometheusMetricReader {
private static final Logger log =
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static final Set<String> PROM_SUFFIXES =
- Set.of("_total", "_sum", "_count", "_bucket", "_gcount", "_gsum",
"_created", "_info");
+ Set.of("_total", "_sum", "_bucket", "_created", "_info");
public FilterablePrometheusMetricReader(
boolean otelScopeEnabled, Predicate<String>
allowedResourceAttributesFilter) {
diff --git
a/solr/core/src/test/org/apache/solr/cluster/placement/impl/PlacementPluginIntegrationTest.java
b/solr/core/src/test/org/apache/solr/cluster/placement/impl/PlacementPluginIntegrationTest.java
index 760efbb83ea..e8997c0a0d2 100644
---
a/solr/core/src/test/org/apache/solr/cluster/placement/impl/PlacementPluginIntegrationTest.java
+++
b/solr/core/src/test/org/apache/solr/cluster/placement/impl/PlacementPluginIntegrationTest.java
@@ -45,7 +45,6 @@ import org.apache.solr.cluster.SolrCollection;
import org.apache.solr.cluster.placement.AttributeFetcher;
import org.apache.solr.cluster.placement.AttributeValues;
import org.apache.solr.cluster.placement.CollectionMetrics;
-import org.apache.solr.cluster.placement.NodeMetric;
import org.apache.solr.cluster.placement.PlacementPluginConfig;
import org.apache.solr.cluster.placement.PlacementPluginFactory;
import org.apache.solr.cluster.placement.ReplicaMetrics;
@@ -82,8 +81,9 @@ public class PlacementPluginIntegrationTest extends
SolrCloudTestCase {
@BeforeClass
public static void setupCluster() throws Exception {
- // placement plugins need metrics
+ // placement plugins need metrics and JVM metrics
System.setProperty("metricsEnabled", "true");
+ System.setProperty("solr.metrics.jvm.enabled", "true");
configureCluster(3).addConfig("conf",
configset("cloud-minimal")).configure();
cc = cluster.getJettySolrRunner(0).getCoreContainer();
cloudManager = cc.getZkController().getSolrCloudManager();
@@ -388,7 +388,6 @@ public class PlacementPluginIntegrationTest extends
SolrCloudTestCase {
// NOCOMMIT: This test fails because of CollectionMetricsBuilder. Need to
dive deeper into what
// this is and if we need to shim otel into this metrics map.
@Test
- @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-17458")
public void testNodeTypeIntegration() throws Exception {
// this functionality relies on System.getProperty which we cannot set on
individual
// nodes in a mini cluster.
@@ -440,9 +439,7 @@ public class PlacementPluginIntegrationTest extends
SolrCloudTestCase {
System.clearProperty(AffinityPlacementConfig.NODE_TYPE_SYSPROP);
}
- // NOCOMMIT: This test needs to be fixed after migrating the collection
metrics builder
@Test
- @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-17458")
public void testAttributeFetcherImpl() throws Exception {
CollectionAdminResponse rsp =
CollectionAdminRequest.createCollection(COLLECTION, "conf", 2, 2)
@@ -452,34 +449,22 @@ public class PlacementPluginIntegrationTest extends
SolrCloudTestCase {
Cluster cluster = new
SimpleClusterAbstractionsImpl.ClusterImpl(cloudManager);
SolrCollection collection = cluster.getCollection(COLLECTION);
AttributeFetcher attributeFetcher = new AttributeFetcherImpl(cloudManager);
- NodeMetric<String> someMetricKey = new
NodeMetricImpl<>("solr.jvm:system.properties:user.name");
String sysprop = "user.name";
attributeFetcher
.fetchFrom(cluster.getLiveNodes())
- .requestNodeMetric(NodeMetricImpl.HEAP_USAGE)
.requestNodeMetric(NodeMetricImpl.SYSLOAD_AVG)
.requestNodeMetric(NodeMetricImpl.NUM_CORES)
.requestNodeMetric(NodeMetricImpl.FREE_DISK_GB)
.requestNodeMetric(NodeMetricImpl.TOTAL_DISK_GB)
.requestNodeMetric(NodeMetricImpl.AVAILABLE_PROCESSORS)
- .requestNodeMetric(someMetricKey)
.requestNodeSystemProperty(sysprop)
- .requestCollectionMetrics(
- collection,
- Set.of(
- ReplicaMetricImpl.INDEX_SIZE_GB,
- ReplicaMetricImpl.QUERY_RATE_1MIN,
- ReplicaMetricImpl.UPDATE_RATE_1MIN));
+ .requestCollectionMetrics(collection,
Set.of(ReplicaMetricImpl.INDEX_SIZE_GB));
AttributeValues attributeValues = attributeFetcher.fetchAttributes();
String userName = System.getProperty("user.name");
// node metrics
for (Node node : cluster.getLiveNodes()) {
- Optional<Double> doubleOpt = attributeValues.getNodeMetric(node,
NodeMetricImpl.HEAP_USAGE);
- assertTrue("heap usage", doubleOpt.isPresent());
- assertTrue(
- "heap usage should be 0 < heapUsage < 100 but was " + doubleOpt,
- doubleOpt.get() > 0 && doubleOpt.get() < 100);
- doubleOpt = attributeValues.getNodeMetric(node,
NodeMetricImpl.TOTAL_DISK_GB);
+ Optional<Double> doubleOpt =
+ attributeValues.getNodeMetric(node, NodeMetricImpl.TOTAL_DISK_GB);
assertTrue("total disk", doubleOpt.isPresent());
assertTrue("total disk should be > 0 but was " + doubleOpt,
doubleOpt.get() > 0);
doubleOpt = attributeValues.getNodeMetric(node,
NodeMetricImpl.FREE_DISK_GB);
@@ -494,9 +479,6 @@ public class PlacementPluginIntegrationTest extends
SolrCloudTestCase {
assertTrue(
"availableProcessors",
attributeValues.getNodeMetric(node,
NodeMetricImpl.AVAILABLE_PROCESSORS).isPresent());
- Optional<String> userNameOpt = attributeValues.getNodeMetric(node,
someMetricKey);
- assertTrue("user.name", userNameOpt.isPresent());
- assertEquals("userName", userName, userNameOpt.get());
Optional<String> syspropOpt = attributeValues.getSystemProperty(node,
sysprop);
assertTrue("sysprop", syspropOpt.isPresent());
assertEquals("user.name sysprop", userName, syspropOpt.get());
@@ -528,13 +510,6 @@ public class PlacementPluginIntegrationTest extends
SolrCloudTestCase {
assertTrue(
"indexSize should be < 0.01 but was " +
indexSizeOpt.get(),
indexSizeOpt.get() < 0.01);
-
- assertNotNull(
- "queryRate",
-
replicaMetrics.getReplicaMetric(ReplicaMetricImpl.QUERY_RATE_1MIN));
- assertNotNull(
- "updateRate",
-
replicaMetrics.getReplicaMetric(ReplicaMetricImpl.UPDATE_RATE_1MIN));
});
});
}
diff --git
a/solr/solrj-zookeeper/src/java/org/apache/solr/client/solrj/impl/NodeValueFetcher.java
b/solr/solrj-zookeeper/src/java/org/apache/solr/client/solrj/impl/NodeValueFetcher.java
index 7f787c1cb6f..30e614a61bc 100644
---
a/solr/solrj-zookeeper/src/java/org/apache/solr/client/solrj/impl/NodeValueFetcher.java
+++
b/solr/solrj-zookeeper/src/java/org/apache/solr/client/solrj/impl/NodeValueFetcher.java
@@ -17,157 +17,191 @@
package org.apache.solr.client.solrj.impl;
-import java.util.ArrayList;
+import static
org.apache.solr.client.solrj.impl.InputStreamResponseParser.STREAM_KEY;
+
+import java.io.BufferedReader;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
import java.util.EnumSet;
-import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
-import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import java.util.stream.Stream;
+import org.apache.solr.client.solrj.SolrRequest.METHOD;
+import org.apache.solr.client.solrj.SolrRequest.SolrRequestType;
+import org.apache.solr.client.solrj.request.GenericSolrRequest;
import org.apache.solr.client.solrj.response.SimpleSolrResponse;
import org.apache.solr.common.SolrException;
-import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.StrUtils;
-import org.apache.solr.common.util.Utils;
/**
* This class is responsible for fetching metrics and other attributes from a
given node in Solr
* cluster. This is a helper class that is used by {@link
SolrClientNodeStateProvider}
*/
-// NOCOMMIT: Need to removed hardcoded references to Dropwizard metrics for
OTEL conversion, and
-// probably change enum structure to be more compatible with OTEL naming
public class NodeValueFetcher {
// well known tags
public static final String NODE = "node";
public static final String PORT = "port";
public static final String HOST = "host";
public static final String CORES = "cores";
- public static final String SYSPROP = "sysprop.";
+ public static final String SYSPROP_PREFIX = "sysprop.";
public static final Set<String> tags =
- Set.of(NODE, PORT, HOST, CORES, Tags.FREEDISK.tagName,
Tags.HEAPUSAGE.tagName);
+ Set.of(
+ NODE,
+ PORT,
+ HOST,
+ CORES,
+ Metrics.FREEDISK.tagName,
+ Metrics.TOTALDISK.tagName,
+ Metrics.SYSLOADAVG.tagName);
public static final Pattern hostAndPortPattern =
Pattern.compile("(?:https?://)?([^:]+):(\\d+)");
public static final String METRICS_PREFIX = "metrics:";
/** Various well known tags that can be fetched from a node */
- public enum Tags {
- FREEDISK(
- "freedisk", "solr.node", "CONTAINER.fs.usableSpace",
"solr.node/CONTAINER.fs.usableSpace"),
- TOTALDISK(
- "totaldisk", "solr.node", "CONTAINER.fs.totalSpace",
"solr.node/CONTAINER.fs.totalSpace"),
- CORES("cores", "solr.node", "CONTAINER.cores", null) {
+ public enum Metrics {
+ FREEDISK("freedisk", "solr_disk_space_bytes", "type", "usable_space"),
+ TOTALDISK("totaldisk", "solr_disk_space_bytes", "type", "total_space"),
+ CORES("cores", "solr_cores_loaded") {
@Override
- public Object extractResult(Object root) {
- NamedList<?> node = (NamedList<?>) Utils.getObjectByPath(root, false,
"solr.node");
- int count = 0;
- for (String leafCoreMetricName : new String[] {"lazy", "loaded",
"unloaded"}) {
- Number n = (Number) node.get("CONTAINER.cores." +
leafCoreMetricName);
- if (n != null) count += n.intValue();
- }
- return count;
+ public Object extractFromPrometheus(List<String> prometheusLines) {
+ return prometheusLines.stream()
+ .filter(line -> extractMetricNameFromLine(line).equals(metricName))
+ .mapToInt((value) -> extractPrometheusValue(value).intValue())
+ .sum();
}
},
- SYSLOADAVG("sysLoadAvg", "solr.jvm", "os.systemLoadAverage",
"solr.jvm/os.systemLoadAverage"),
- HEAPUSAGE("heapUsage", "solr.jvm", "memory.heap.usage",
"solr.jvm/memory.heap.usage");
- // the metrics group
- public final String group;
- // the metrics prefix
- public final String prefix;
+ SYSLOADAVG("sysLoadAvg", "jvm_system_cpu_utilization_ratio");
+
public final String tagName;
- // the json path in the response
- public final String path;
+ public final String metricName;
+ public final String labelKey;
+ public final String labelValue;
+
+ Metrics(String name, String metricName) {
+ this(name, metricName, null, null);
+ }
- Tags(String name, String group, String prefix, String path) {
- this.group = group;
- this.prefix = prefix;
+ Metrics(String name, String metricName, String labelKey, String
labelValue) {
this.tagName = name;
- this.path = path;
+ this.metricName = metricName;
+ this.labelKey = labelKey;
+ this.labelValue = labelValue;
}
- public Object extractResult(Object root) {
- Object v = Utils.getObjectByPath(root, true, path);
- return v == null ? null : convertVal(v);
+ /**
+ * Extract metric value from Prometheus response lines, optionally
filtering by label. This
+ * consolidated method handles both labeled and unlabeled metrics. This
method assumes 1 metric,
+ * so will get the first metricName it sees with associated label and
value.
+ */
+ public Object extractFromPrometheus(List<String> prometheusLines) {
+ return prometheusLines.stream()
+ .filter(line -> line.startsWith(metricName))
+ .filter(
+ line -> {
+ // If metric with specific labels were requested, filter by
those labels
+ if (labelKey != null && labelValue != null) {
+ String expectedLabel = labelKey + "=\"" + labelValue + "\"";
+ return line.contains(expectedLabel);
+ }
+ return true;
+ })
+ .findFirst()
+ .map(Metrics::extractPrometheusValue)
+ .orElse(null);
}
- public Object convertVal(Object val) {
- if (val instanceof String) {
- return Double.valueOf((String) val);
- } else if (val instanceof Number num) {
- return num.doubleValue();
+ /**
+ * Extracts the numeric value from a Prometheus metric line. Sample
inputs: - With labels:
+ * solr_metrics_core_requests_total{core="demo",...} 123.0 - Without
labels:
+ * solr_metrics_core_requests_total 123.0 - With exemplars:
+ * solr_metrics_core_requests_total{core="demo"} 123.0 #
{trace_id="abc123"} 2.0 1234567890
+ */
+ public static Double extractPrometheusValue(String line) {
+ String s = line.trim();
- } else {
- throw new IllegalArgumentException("Unknown type : " + val);
- }
- }
- }
+ // Get the position after the labels if they exist.
+ int afterLabelsPos = s.indexOf('}');
+ String tail = (afterLabelsPos >= 0) ? s.substring(afterLabelsPos +
1).trim() : s;
- /** Retrieve values that match JVM system properties and metrics. */
- private void getRemotePropertiesAndMetrics(
- Set<String> requestedTagNames, SolrClientNodeStateProvider.RemoteCallCtx
ctx) {
+ // Get the metric value after the first white space and chop off
anything after such as
+ // exemplars from Open Metrics Format
+ int whiteSpacePos = tail.indexOf(' ');
+ String firstToken = (whiteSpacePos >= 0) ? tail.substring(0,
whiteSpacePos) : tail;
- Map<String, Set<Object>> metricsKeyVsTag = new HashMap<>();
- for (String tag : requestedTagNames) {
- if (tag.startsWith(SYSPROP)) {
- metricsKeyVsTag
- .computeIfAbsent(
- "solr.jvm:system.properties:" +
tag.substring(SYSPROP.length()),
- k -> new HashSet<>())
- .add(tag);
- } else if (tag.startsWith(METRICS_PREFIX)) {
- metricsKeyVsTag
- .computeIfAbsent(tag.substring(METRICS_PREFIX.length()), k -> new
HashSet<>())
- .add(tag);
- }
+ return Double.parseDouble(firstToken);
}
- if (!metricsKeyVsTag.isEmpty()) {
- SolrClientNodeStateProvider.fetchReplicaMetrics(ctx.getNode(), ctx,
metricsKeyVsTag);
+
+ /** Returns a Stream of Prometheus lines for processing with filtered out
comment lines */
+ public static Stream<String> prometheusMetricStream(InputStream
inputStream) {
+ BufferedReader reader =
+ new BufferedReader(new InputStreamReader(inputStream,
StandardCharsets.UTF_8));
+
+ // Prometheus comment or empty lines are filtered out
+ return reader.lines().filter(line -> !line.startsWith("#") ||
line.isBlank());
}
}
- /** Retrieve values of well known tags, as defined in {@link Tags}. */
- private void getRemoteTags(
+ /** Retrieve values of well known tags, as defined in {@link Metrics}. */
+ private void getRemoteMetricsFromTags(
Set<String> requestedTagNames, SolrClientNodeStateProvider.RemoteCallCtx
ctx) {
// First resolve names into actual Tags instances
- EnumSet<Tags> requestedTags = EnumSet.noneOf(Tags.class);
- for (Tags t : Tags.values()) {
+ EnumSet<Metrics> requestedMetricNames = EnumSet.noneOf(Metrics.class);
+ for (Metrics t : Metrics.values()) {
if (requestedTagNames.contains(t.tagName)) {
- requestedTags.add(t);
+ requestedMetricNames.add(t);
}
}
- if (requestedTags.isEmpty()) {
+
+ if (requestedMetricNames.isEmpty()) {
return;
}
- Set<String> groups = new HashSet<>();
- List<String> prefixes = new ArrayList<>();
- for (Tags t : requestedTags) {
- groups.add(t.group);
- prefixes.add(t.prefix);
+ ModifiableSolrParams params = new ModifiableSolrParams();
+ params.add("wt", "prometheus");
+
+ // Collect unique metric names
+ Set<String> uniqueMetricNames = new HashSet<>();
+ for (Metrics t : requestedMetricNames) {
+ uniqueMetricNames.add(t.metricName);
}
- ModifiableSolrParams params = new ModifiableSolrParams();
- params.add("group", StrUtils.join(groups, ','));
- params.add("prefix", StrUtils.join(prefixes, ','));
+ // Use metric name filtering to get only the metrics we need
+ params.add("name", StrUtils.join(uniqueMetricNames, ','));
try {
+ var req = new GenericSolrRequest(METHOD.GET, "/admin/metrics",
SolrRequestType.ADMIN, params);
+ req.setResponseParser(new InputStreamResponseParser("prometheus"));
+
+ String baseUrl =
+
ctx.zkClientClusterStateProvider.getZkStateReader().getBaseUrlForNodeName(ctx.getNode());
SimpleSolrResponse rsp =
- ctx.invokeWithRetry(ctx.getNode(), CommonParams.METRICS_PATH,
params);
- NamedList<?> metrics = (NamedList<?>) rsp.getResponse().get("metrics");
- if (metrics != null) {
- for (Tags t : requestedTags) {
- ctx.tags.put(t.tagName, t.extractResult(metrics));
+ ctx.cloudSolrClient.getHttpClient().requestWithBaseUrl(baseUrl,
req::process);
+
+ // TODO come up with a better solution to stream this response instead
of loading in memory
+ try (InputStream prometheusStream = (InputStream)
rsp.getResponse().get(STREAM_KEY)) {
+ List<String> prometheusLines =
Metrics.prometheusMetricStream(prometheusStream).toList();
+ for (Metrics t : requestedMetricNames) {
+ Object value = t.extractFromPrometheus(prometheusLines);
+ if (value != null) {
+ ctx.tags.put(t.tagName, value);
+ }
}
}
} catch (Exception e) {
- throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error
getting remote info", e);
+ throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
}
}
public void getTags(Set<String> requestedTags,
SolrClientNodeStateProvider.RemoteCallCtx ctx) {
+ Set<String> requestsProperties = new HashSet<>();
+ Set<String> requestedMetrics = new HashSet<>();
+ Set<String> requestedMetricTags = new HashSet<>();
try {
if (requestedTags.contains(NODE)) ctx.tags.put(NODE, ctx.getNode());
@@ -185,10 +219,150 @@ public class NodeValueFetcher {
return;
}
- getRemotePropertiesAndMetrics(requestedTags, ctx);
- getRemoteTags(requestedTags, ctx);
+ // Categorize requested system properties or metrics
+ requestedTags.forEach(
+ tag -> {
+ if (tag.startsWith(SYSPROP_PREFIX)) {
+ requestsProperties.add(tag);
+ } else if (tag.startsWith(METRICS_PREFIX)) {
+ requestedMetrics.add(tag);
+ } else {
+ requestedMetricTags.add(tag);
+ }
+ });
+
+ getRemoteSystemProps(requestsProperties, ctx);
+ getRemoteMetrics(requestedMetrics, ctx);
+ getRemoteMetricsFromTags(requestedMetricTags, ctx);
+
} catch (Exception e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
}
}
+
+ private void getRemoteSystemProps(
+ Set<String> requestedTagNames, SolrClientNodeStateProvider.RemoteCallCtx
ctx) {
+ ModifiableSolrParams params = new ModifiableSolrParams();
+ try {
+ SimpleSolrResponse rsp = ctx.invokeWithRetry(ctx.getNode(),
"/admin/info/properties", params);
+ NamedList<?> systemPropsRsp = (NamedList<?>)
rsp.getResponse().get("system.properties");
+ for (String requestedProperty : requestedTagNames) {
+ Object property =
systemPropsRsp.get(requestedProperty.substring(SYSPROP_PREFIX.length()));
+ if (property != null) ctx.tags.put(requestedProperty,
property.toString());
+ }
+ } catch (Exception e) {
+ throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error
getting remote info", e);
+ }
+ }
+
+ /**
+ * Retrieve values that match metrics. Metrics names are structured like
below:
+ *
+ * <p>"metrics:solr_cores_filesystem_disk_space_bytes:type=usable_space" or
+ * "metrics:jvm_cpu_count". Metrics are fetched from /admin/metrics and
parsed using shared
+ * utility methods.
+ */
+ private void getRemoteMetrics(
+ Set<String> requestedTagNames, SolrClientNodeStateProvider.RemoteCallCtx
ctx) {
+ Set<MetricRequest> metricRequests = new HashSet<>();
+ Set<String> requestedMetricNames = new HashSet<>();
+
+ // Parse metric tags into structured MetricRequest objects
+ for (String tag : requestedTagNames) {
+ MetricRequest request = MetricRequest.fromTag(tag);
+ metricRequests.add(request);
+ requestedMetricNames.add(request.metricName());
+
+ // Pre-populate the map tag key to match its corresponding prometheus
metrics
+ ctx.tags.put(tag, null);
+ }
+
+ if (requestedMetricNames.isEmpty()) {
+ return;
+ }
+
+ // Process prometheus stream response using structured MetricRequest
objects
+ SolrClientNodeStateProvider.processMetricStream(
+ ctx.getNode(),
+ ctx,
+ requestedMetricNames,
+ (line) -> {
+ String lineMetricName = extractMetricNameFromLine(line);
+ Double value = Metrics.extractPrometheusValue(line);
+
+ // Find matching MetricRequest(s) for this line
+ for (MetricRequest request : metricRequests) {
+ if (!request.metricName().equals(lineMetricName)) {
+ continue; // Metric name doesn't match
+ }
+
+ // Skip metric if it does not contain requested label
+ if (request.hasLabelFilter() && !line.contains(request.kvLabel()))
{
+ continue;
+ }
+
+ // Found a match - store the value using the original tag
+ ctx.tags.put(request.originalTag(), value);
+ break; // Move to next line since we found our match
+ }
+ });
+ }
+
+ /**
+ * Extracts the metric name from a prometheus formatted metric:
+ *
+ * <p>- With labels: solr_metrics_core_requests_total{core="demo",...} 123.0
- Without labels:
+ * solr_metrics_core_requests_total 123.0 - With exemplars:
+ * solr_metrics_core_requests_total{core="demo"} 123.0 # {trace_id="abc123"}
2.0 1234567890 The
+ * sample inputs would return solr_metrics_core_requests_total
+ */
+ public static String extractMetricNameFromLine(String line) {
+ int brace = line.indexOf('{');
+ int space = line.indexOf(' ');
+ int end;
+ if (brace >= 0) {
+ // Labels present in metric
+ end = brace;
+ } else {
+ // No labels in metric
+ end = space;
+ }
+ return line.substring(0, end);
+ }
+
+ public static String extractLabelValueFromLine(String line, String labelKey)
{
+ String labelPattern = labelKey + "=\"";
+ if (!line.contains(labelPattern)) return null;
+
+ int startIdx = line.indexOf(labelPattern) + labelPattern.length();
+ int endIdx = line.indexOf("\"", startIdx);
+ return endIdx > startIdx ? line.substring(startIdx, endIdx) : null;
+ }
+
+ /**
+ * Represents a structured metric request instead of using string parsing.
This eliminates the
+ * need for complex string manipulation and parsing.
+ */
+ record MetricRequest(String metricName, String kvLabel, String originalTag) {
+
+ /**
+ * Create a MetricRequest from a metric tag string like
"metrics:jvm_cpu_count" or
+ * "metrics:solr_cores_filesystem_disk_space_bytes:type=usable_space"
+ */
+ public static MetricRequest fromTag(String tag) {
+ String[] parts = tag.split(":");
+ if (parts.length < 2) {
+ throw new IllegalArgumentException("Invalid metric tag format: " +
tag);
+ }
+
+ String metricName = parts[1];
+ String labelFilter = parts.length > 2 ? parts[2] : null;
+
+ return new MetricRequest(metricName, labelFilter, tag);
+ }
+
+ public boolean hasLabelFilter() {
+ return kvLabel != null;
+ }
+ }
}
diff --git
a/solr/solrj-zookeeper/src/java/org/apache/solr/client/solrj/impl/SolrClientNodeStateProvider.java
b/solr/solrj-zookeeper/src/java/org/apache/solr/client/solrj/impl/SolrClientNodeStateProvider.java
index b88ae6a177f..8ae96ad9e9e 100644
---
a/solr/solrj-zookeeper/src/java/org/apache/solr/client/solrj/impl/SolrClientNodeStateProvider.java
+++
b/solr/solrj-zookeeper/src/java/org/apache/solr/client/solrj/impl/SolrClientNodeStateProvider.java
@@ -17,10 +17,12 @@
package org.apache.solr.client.solrj.impl;
+import static
org.apache.solr.client.solrj.impl.InputStreamResponseParser.STREAM_KEY;
+
import java.io.IOException;
+import java.io.InputStream;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
@@ -28,8 +30,6 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Consumer;
-import java.util.function.Function;
-import java.util.stream.Collectors;
import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.cloud.NodeStateProvider;
@@ -40,7 +40,6 @@ import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.Replica;
-import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.Pair;
@@ -138,74 +137,95 @@ public class SolrClientNodeStateProvider implements
NodeStateProvider, MapWriter
String node, Collection<String> keys) {
Map<String, Map<String, List<Replica>>> result =
nodeVsCollectionVsShardVsReplicaInfo.computeIfAbsent(node, o -> new
HashMap<>());
- if (!keys.isEmpty()) {
- Map<String, Pair<String, Replica>> metricsKeyVsTagReplica = new
HashMap<>();
- forEachReplica(
- result,
- r -> {
- for (String key : keys) {
- if (r.getProperties().containsKey(key)) continue; // it's
already collected
- String perReplicaMetricsKey =
- "solr.core."
- + r.getCollection()
- + "."
- + r.getShard()
- + "."
- + Utils.parseMetricsReplicaName(r.getCollection(),
r.getCoreName())
- + ":";
- String perReplicaValue = key;
- perReplicaMetricsKey += perReplicaValue;
- metricsKeyVsTagReplica.put(perReplicaMetricsKey, new Pair<>(key,
r));
- }
- });
-
- if (!metricsKeyVsTagReplica.isEmpty()) {
- Map<String, Object> tagValues = fetchReplicaMetrics(node,
metricsKeyVsTagReplica);
- tagValues.forEach(
- (k, o) -> {
- Pair<String, Replica> p = metricsKeyVsTagReplica.get(k);
- if (p.second() != null)
p.second().getProperties().put(p.first(), o);
- });
- }
+
+ if (keys.isEmpty()) {
+ return result;
}
- return result;
- }
- protected Map<String, Object> fetchReplicaMetrics(
- String node, Map<String, Pair<String, Replica>> metricsKeyVsTagReplica) {
- Map<String, Set<Object>> collect =
- metricsKeyVsTagReplica.entrySet().stream()
- .collect(Collectors.toMap(e -> e.getKey(), e ->
Set.of(e.getKey())));
- RemoteCallCtx ctx = new RemoteCallCtx(null, solrClient);
- fetchReplicaMetrics(node, ctx, collect);
- return ctx.tags;
+ // Build mapping from core name to (replica, metric) {coreName: <Replica,
Prometheus Metric
+ // Name>}
+ Map<String, List<Pair<Replica, String>>> coreToReplicaProps = new
HashMap<>();
+ Set<String> requestedMetricNames = new HashSet<>();
+
+ forEachReplica(
+ result,
+ replica -> {
+ for (String key : keys) {
+ if (replica.getProperties().containsKey(key)) continue;
+
+ // Build core name as the key to the replica and the metric it
needs
+ String coreName =
+ replica.getCollection()
+ + "_"
+ + replica.getShard()
+ + "_"
+ + Utils.parseMetricsReplicaName(replica.getCollection(),
replica.getCoreName());
+
+ coreToReplicaProps
+ .computeIfAbsent(coreName, k -> new ArrayList<>())
+ .add(new Pair<>(replica, key));
+ requestedMetricNames.add(key);
+ }
+ });
+
+ if (coreToReplicaProps.isEmpty()) {
+ return result;
+ }
+
+ RemoteCallCtx ctx = new RemoteCallCtx(node, solrClient);
+ processMetricStream(
+ node,
+ ctx,
+ requestedMetricNames,
+ (line) -> {
+ String prometheusMetricName =
NodeValueFetcher.extractMetricNameFromLine(line);
+
+ // Extract core name from prometheus line and the core label
+ String coreParam = NodeValueFetcher.extractLabelValueFromLine(line,
"core");
+ if (coreParam == null) return;
+
+ // Find the matching core and set the metric value to its
corresponding replica
+ // properties
+ List<Pair<Replica, String>> replicaProps =
coreToReplicaProps.get(coreParam);
+ if (replicaProps != null) {
+ Double value =
NodeValueFetcher.Metrics.extractPrometheusValue(line);
+ replicaProps.stream()
+ .filter(pair -> pair.second().equals(prometheusMetricName))
+ .forEach(pair ->
pair.first().getProperties().put(pair.second(), value));
+ }
+ });
+ return result;
}
- // NOCOMMIT: We need to change the /admin/metrics call here to work with
- // Prometheus/OTEL telemetry
- static void fetchReplicaMetrics(
- String solrNode, RemoteCallCtx ctx, Map<String, Set<Object>>
metricsKeyVsTag) {
+ /** Process a stream of prometheus metrics lines */
+ static void processMetricStream(
+ String solrNode, RemoteCallCtx ctx, Set<String> metricNames,
Consumer<String> lineProcessor) {
if (!ctx.isNodeAlive(solrNode)) return;
+
ModifiableSolrParams params = new ModifiableSolrParams();
- params.add("key", metricsKeyVsTag.keySet().toArray(new String[0]));
- try {
- SimpleSolrResponse rsp = ctx.invokeWithRetry(solrNode,
CommonParams.METRICS_PATH, params);
- metricsKeyVsTag.forEach(
- (key, tags) -> {
- Object v =
- Utils.getObjectByPath(rsp.getResponse(), true,
Arrays.asList("metrics", key));
- for (Object tag : tags) {
- if (tag instanceof Function) {
- @SuppressWarnings({"unchecked"})
- Pair<String, Object> p = (Pair<String, Object>) ((Function)
tag).apply(v);
- ctx.tags.put(p.first(), p.second());
- } else {
- if (v != null) ctx.tags.put(tag.toString(), v);
- }
- }
- });
+ params.add("wt", "prometheus");
+ params.add("name", String.join(",", metricNames));
+
+ var req =
+ new GenericSolrRequest(
+ SolrRequest.METHOD.GET, "/admin/metrics",
SolrRequest.SolrRequestType.ADMIN, params);
+ req.setResponseParser(new InputStreamResponseParser("prometheus"));
+
+ String baseUrl =
+
ctx.zkClientClusterStateProvider.getZkStateReader().getBaseUrlForNodeName(solrNode);
+
+ try (InputStream in =
+ (InputStream)
+ ctx.cloudSolrClient
+ .getHttpClient()
+ .requestWithBaseUrl(baseUrl, req::process)
+ .getResponse()
+ .get(STREAM_KEY)) {
+
+
NodeValueFetcher.Metrics.prometheusMetricStream(in).forEach(lineProcessor);
} catch (Exception e) {
- log.warn("could not get tags from node {}", solrNode, e);
+ throw new SolrException(
+ SolrException.ErrorCode.SERVER_ERROR, "Unable to read prometheus
metrics output", e);
}
}