This is an automated email from the ASF dual-hosted git repository.
mlbiscoc pushed a commit to branch feature/SOLR-17458-rebased
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/feature/SOLR-17458-rebased by
this push:
new a466f04441a SOLR-17855: OTEL - CPU circuit breaker, awaitFixes and
NOCOMMIT fixes (#3719)
a466f04441a is described below
commit a466f04441abe62644d44c7b480c3ac5abc97f2e
Author: Matthew Biscocho <[email protected]>
AuthorDate: Mon Oct 6 15:04:32 2025 -0400
SOLR-17855: OTEL - CPU circuit breaker, awaitFixes and NOCOMMIT fixes
(#3719)
* Fix NOCOMMIT and broken awaitFix tests
* Remove jvm metrics disabled log line
---------
Co-authored-by: Matthew Biscocho <[email protected]>
---
.../placement/impl/CollectionMetricsBuilder.java | 1 -
.../java/org/apache/solr/core/CoreContainer.java | 4 -
.../src/java/org/apache/solr/core/SolrCore.java | 22 ----
.../src/java/org/apache/solr/core/ZkContainer.java | 9 +-
.../apache/solr/handler/RequestHandlerBase.java | 4 -
.../apache/solr/metrics/OtelRuntimeJvmMetrics.java | 14 +--
.../org/apache/solr/metrics/SolrMetricManager.java | 7 ++
.../apache/solr/servlet/CoreContainerProvider.java | 15 ---
.../apache/solr/update/DirectUpdateHandler2.java | 3 -
.../src/java/org/apache/solr/update/UpdateLog.java | 21 ++--
.../util/circuitbreaker/CPUCircuitBreaker.java | 34 +++---
.../org/apache/solr/core/DirectoryFactoryTest.java | 4 +
.../test/org/apache/solr/core/TestLazyCores.java | 22 ++--
.../solr/handler/RequestHandlerMetricsTest.java | 5 +-
.../org/apache/solr/metrics/JvmMetricsTest.java | 6 +-
.../solr/metrics/SolrCoreMetricManagerTest.java | 4 +-
.../solr/metrics/SolrMetricsIntegrationTest.java | 120 +++++++++++----------
.../test/org/apache/solr/search/TestRecovery.java | 30 +++---
.../org/apache/solr/util/TestCircuitBreakers.java | 1 +
.../solr/common/cloud/NodesSysPropsCacher.java | 16 +--
.../solr/common/cloud/TestNodesSysPropsCacher.java | 3 -
...bstractCollectionsAPIDistributedZkTestBase.java | 42 +++-----
22 files changed, 162 insertions(+), 225 deletions(-)
diff --git
a/solr/core/src/java/org/apache/solr/cluster/placement/impl/CollectionMetricsBuilder.java
b/solr/core/src/java/org/apache/solr/cluster/placement/impl/CollectionMetricsBuilder.java
index 53c714cf9bf..7fac922899b 100644
---
a/solr/core/src/java/org/apache/solr/cluster/placement/impl/CollectionMetricsBuilder.java
+++
b/solr/core/src/java/org/apache/solr/cluster/placement/impl/CollectionMetricsBuilder.java
@@ -29,7 +29,6 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/** Builder class for constructing instances of {@link CollectionMetrics}. */
-// NOCOMMIT: Need to migrate this to an OTEL collection Metrics builder
public class CollectionMetricsBuilder {
private static final Logger log =
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java
b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
index ead1559c827..9c287aca769 100644
--- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java
+++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
@@ -786,14 +786,12 @@ public class CoreContainer {
shardHandlerFactory =
ShardHandlerFactory.newInstance(cfg.getShardHandlerFactoryPluginInfo(), loader);
if (shardHandlerFactory instanceof SolrMetricProducer metricProducer) {
- // NOCOMMIT SOLR-17458: Add Otel
metricProducer.initializeMetrics(solrMetricsContext, Attributes.empty(),
"httpShardHandler");
}
updateShardHandler = new
UpdateShardHandler(cfg.getUpdateShardHandlerConfig());
solrClientProvider =
new HttpSolrClientProvider(cfg.getUpdateShardHandlerConfig(),
solrMetricsContext);
- // NOCOMMIT SOLR-17458: Add Otel
updateShardHandler.initializeMetrics(
solrMetricsContext, Attributes.empty(), "updateShardHandler");
solrClientCache = new SolrClientCache(solrClientProvider.getSolrClient());
@@ -836,7 +834,6 @@ public class CoreContainer {
this,
zkSys.getZkController().getNodeName(),
(PublicKeyHandler) containerHandlers.get(PublicKeyHandler.PATH));
- // NOCOMMIT SOLR-17458: AuthenticationPlugin.java
pkiAuthenticationSecurityBuilder.initializeMetrics(
solrMetricsContext,
Attributes.builder().put(HANDLER_ATTR,
"/authentication/pki").build(),
@@ -2287,7 +2284,6 @@ public class CoreContainer {
}
if (handler instanceof SolrMetricProducer) {
((SolrMetricProducer) handler)
- // NOCOMMIT SOLR-17458: Add Otel
.initializeMetrics(
solrMetricsContext, Attributes.builder().put(HANDLER_ATTR,
path).build(), path);
}
diff --git a/solr/core/src/java/org/apache/solr/core/SolrCore.java
b/solr/core/src/java/org/apache/solr/core/SolrCore.java
index 2f99f4be8af..970309b7944 100644
--- a/solr/core/src/java/org/apache/solr/core/SolrCore.java
+++ b/solr/core/src/java/org/apache/solr/core/SolrCore.java
@@ -1441,21 +1441,6 @@ public class SolrCore implements SolrInfoBean, Closeable
{
observableLongMeasurement.record(getSegmentCount(),
baseGaugeCoreAttributes);
}));
- // NOCOMMIT: Do we need these start_time metrics? I think at minimum it
should be optional
- // otherwise we fall into metric bloat for something people may not care
about.
- observables.add(
- parentContext.observableLongGauge(
- "solr_core_start_time",
- "Start time of a Solr core",
- (observableLongMeasurement -> {
- observableLongMeasurement.record(
- startTime.getTime(),
- Attributes.builder()
- .putAll(baseGaugeCoreAttributes)
- .put(TYPE_ATTR, "start_time")
- .build());
- })));
-
if (coreContainer.isZooKeeperAware())
observables.add(
parentContext.observableLongGauge(
@@ -1468,13 +1453,6 @@ public class SolrCore implements SolrInfoBean, Closeable
{
})));
this.toClose = Collections.unmodifiableList(observables);
-
- // NOCOMMIT: Temporary to see metrics
- newSearcherCounter.inc();
- newSearcherMaxReachedCounter.inc();
- newSearcherOtherErrorsCounter.inc();
- newSearcherTimer.start().stop();
- newSearcherWarmupTimer.start().stop();
}
public String getMetricTag() {
diff --git a/solr/core/src/java/org/apache/solr/core/ZkContainer.java
b/solr/core/src/java/org/apache/solr/core/ZkContainer.java
index 196bfd2dd7e..eb0497a64bc 100644
--- a/solr/core/src/java/org/apache/solr/core/ZkContainer.java
+++ b/solr/core/src/java/org/apache/solr/core/ZkContainer.java
@@ -204,7 +204,7 @@ public class ZkContainer {
"solr_zk_written",
"Total bytes written to ZooKeeper",
measurement -> {
-
measurement.record(metricsListener.getBytesWritten());
+
measurement.record(metricsListener.getBytesWritten(), attributes);
},
OtelUnit.BYTES));
@@ -213,7 +213,7 @@ public class ZkContainer {
"solr_zk_cumulative_multi_ops_total",
"Total cumulative multi-operations count",
measurement -> {
-
measurement.record(metricsListener.getCumulativeMultiOps());
+
measurement.record(metricsListener.getCumulativeMultiOps(), attributes);
}));
observables.add(
@@ -221,7 +221,7 @@ public class ZkContainer {
"solr_zk_child_fetches",
"Total number of ZooKeeper child node fetches",
measurement -> {
-
measurement.record(metricsListener.getChildFetches());
+
measurement.record(metricsListener.getChildFetches(), attributes);
}));
observables.add(
@@ -229,7 +229,8 @@ public class ZkContainer {
"solr_zk_cumulative_children_fetched",
"Total cumulative children fetched count",
measurement -> {
-
measurement.record(metricsListener.getCumulativeChildrenFetched());
+ measurement.record(
+ metricsListener.getCumulativeChildrenFetched(),
attributes);
}));
toClose = Collections.unmodifiableList(observables);
}
diff --git a/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java
b/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java
index 4ac39cda833..f726842d773 100644
--- a/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java
+++ b/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java
@@ -182,10 +182,6 @@ public abstract class RequestHandlerBase
new HandlerMetrics(
solrMetricsContext,
attributes.toBuilder().put(CATEGORY_ATTR,
getCategory().toString()).build());
-
- // NOCOMMIT: I don't see value in this metric
- solrMetricsContext.gauge(
- () -> handlerStart, true, "handlerStart", getCategory().toString(),
scope);
}
/** Metrics for this handler. */
diff --git
a/solr/core/src/java/org/apache/solr/metrics/OtelRuntimeJvmMetrics.java
b/solr/core/src/java/org/apache/solr/metrics/OtelRuntimeJvmMetrics.java
index 1711142a447..055e70efa70 100644
--- a/solr/core/src/java/org/apache/solr/metrics/OtelRuntimeJvmMetrics.java
+++ b/solr/core/src/java/org/apache/solr/metrics/OtelRuntimeJvmMetrics.java
@@ -27,18 +27,18 @@ import org.slf4j.LoggerFactory;
public class OtelRuntimeJvmMetrics {
private static final Logger log =
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
- // Main feature flag to enable/disable all JVM metrics
- private static final String JVM_METRICS_ENABLED = "solr.metrics.jvm.enabled";
-
private RuntimeMetrics runtimeMetrics;
private boolean isInitialized = false;
+ // Main feature flag to enable/disable all JVM metrics
+ public static boolean isJvmMetricsEnabled() {
+ return EnvUtils.getPropertyAsBool("solr.metrics.jvm.enabled", true);
+ }
+
public OtelRuntimeJvmMetrics initialize(
SolrMetricManager solrMetricManager, String registryName) {
- if (!EnvUtils.getPropertyAsBool(JVM_METRICS_ENABLED, true)) {
- log.info("JVM metrics collection is disabled");
- return this;
- }
+ if (!isJvmMetricsEnabled()) return this;
+
this.runtimeMetrics =
RuntimeMetrics.builder(
OpenTelemetrySdk.builder()
diff --git a/solr/core/src/java/org/apache/solr/metrics/SolrMetricManager.java
b/solr/core/src/java/org/apache/solr/metrics/SolrMetricManager.java
index fb2280a8cd6..e5e908d7393 100644
--- a/solr/core/src/java/org/apache/solr/metrics/SolrMetricManager.java
+++ b/solr/core/src/java/org/apache/solr/metrics/SolrMetricManager.java
@@ -170,6 +170,7 @@ public class SolrMetricManager {
new ConcurrentHashMap<>();
private final MetricExporter metricExporter;
+ private OtelRuntimeJvmMetrics otelRuntimeJvmMetrics;
private static final List<Double> SOLR_NANOSECOND_HISTOGRAM_BOUNDARIES =
List.of(
@@ -205,6 +206,9 @@ public class SolrMetricManager {
timerSupplier = MetricSuppliers.timerSupplier(loader,
metricsConfig.getTimerSupplier());
histogramSupplier =
MetricSuppliers.histogramSupplier(loader,
metricsConfig.getHistogramSupplier());
+ this.otelRuntimeJvmMetrics =
+ new OtelRuntimeJvmMetrics()
+ .initialize(this,
SolrMetricManager.getRegistryName(SolrInfoBean.Group.jvm));
}
public LongCounter longCounter(
@@ -904,6 +908,9 @@ public class SolrMetricManager {
IOUtils.closeQuietly(meterAndReader.sdkMeterProvider);
});
meterProviderAndReaders.clear();
+ if (otelRuntimeJvmMetrics != null) {
+ otelRuntimeJvmMetrics.close();
+ }
}
/**
diff --git
a/solr/core/src/java/org/apache/solr/servlet/CoreContainerProvider.java
b/solr/core/src/java/org/apache/solr/servlet/CoreContainerProvider.java
index 3097f2fcb35..5e5f7e21cb8 100644
--- a/solr/core/src/java/org/apache/solr/servlet/CoreContainerProvider.java
+++ b/solr/core/src/java/org/apache/solr/servlet/CoreContainerProvider.java
@@ -53,10 +53,7 @@ import org.apache.solr.common.util.EnvUtils;
import org.apache.solr.core.CoreContainer;
import org.apache.solr.core.NodeConfig;
import org.apache.solr.core.SolrCore;
-import org.apache.solr.core.SolrInfoBean.Group;
import org.apache.solr.core.SolrXmlConfig;
-import org.apache.solr.metrics.OtelRuntimeJvmMetrics;
-import org.apache.solr.metrics.SolrMetricManager;
import org.apache.solr.metrics.SolrMetricProducer;
import org.apache.solr.servlet.RateLimitManager.Builder;
import org.apache.solr.util.StartupLoggingUtils;
@@ -73,9 +70,7 @@ public class CoreContainerProvider implements
ServletContextListener {
private final String metricTag = SolrMetricProducer.getUniqueMetricTag(this,
null);
private CoreContainer cores;
private Properties extraProperties;
- private SolrMetricManager metricManager;
private RateLimitManager rateLimitManager;
- private OtelRuntimeJvmMetrics otelRuntimeJvmMetrics;
/**
* Acquires an instance from the context. Never null.
@@ -142,11 +137,9 @@ public class CoreContainerProvider implements
ServletContextListener {
// }
cores = null;
- if (otelRuntimeJvmMetrics != null) otelRuntimeJvmMetrics.close();
if (cc != null) {
cc.shutdown();
}
- metricManager = null;
}
private void init(ServletContext servletContext) {
@@ -195,7 +188,6 @@ public class CoreContainerProvider implements
ServletContextListener {
});
coresInit = createCoreContainer(computeSolrHome(servletContext),
extraProperties);
- setupJvmMetrics(coresInit);
SolrZkClient zkClient = null;
ZkController zkController = coresInit.getZkController();
@@ -375,13 +367,6 @@ public class CoreContainerProvider implements
ServletContextListener {
return coreContainer;
}
- private void setupJvmMetrics(CoreContainer coresInit) {
- this.metricManager = coresInit.getMetricManager();
- this.otelRuntimeJvmMetrics =
- new OtelRuntimeJvmMetrics()
- .initialize(metricManager,
SolrMetricManager.getRegistryName(Group.jvm));
- }
-
public RateLimitManager getRateLimitManager() {
return rateLimitManager;
}
diff --git
a/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
b/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
index 1ffb451dfb4..0a027282c46 100644
--- a/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
+++ b/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
@@ -318,9 +318,6 @@ public class DirectUpdateHandler2 extends UpdateHandler
baseAttributes.toBuilder().put(TYPE_ATTR,
"soft_auto_commits").build());
})));
- // NOCOMMIT: This might not need to be an obseravableLongGauge, but a
simple long gauge. Seems
- // like a waste to constantly call this callback to get the latest value
if the upper bounds
- // rarely change.
observables.add(
solrMetricsContext.observableLongGauge(
"solr_core_update_commit_stats",
diff --git a/solr/core/src/java/org/apache/solr/update/UpdateLog.java
b/solr/core/src/java/org/apache/solr/update/UpdateLog.java
index d2d50927b8e..e427582718a 100644
--- a/solr/core/src/java/org/apache/solr/update/UpdateLog.java
+++ b/solr/core/src/java/org/apache/solr/update/UpdateLog.java
@@ -21,7 +21,6 @@ import static
org.apache.solr.update.processor.DistributingUpdateProcessorFactor
import com.carrotsearch.hppc.LongHashSet;
import com.carrotsearch.hppc.LongSet;
-import io.opentelemetry.api.common.AttributeKey;
import io.opentelemetry.api.common.Attributes;
import java.io.Closeable;
import java.io.IOException;
@@ -633,13 +632,8 @@ public class UpdateLog implements PluginInfoInitialized,
SolrMetricProducer {
final List<AutoCloseable> observables = new ArrayList<>();
solrMetricsContext = parentContext.getChildContext(this);
- // NOCOMMIT: We do not need a scope attribute
- // Will need to fix up SolrCoreMetricManager instead of directly removing
it from builder.
var baseAttributes =
- attributes.toBuilder()
- .remove(AttributeKey.stringKey("scope"))
- .put(CATEGORY_ATTR, SolrInfoBean.Category.TLOG.toString())
- .build();
+ attributes.toBuilder().put(CATEGORY_ATTR,
SolrInfoBean.Category.TLOG.toString()).build();
observables.add(
solrMetricsContext.observableLongGauge(
@@ -667,11 +661,14 @@ public class UpdateLog implements PluginInfoInitialized,
SolrMetricProducer {
toClose = Collections.unmodifiableList(observables);
- // NOCOMMIT: Do we want to keep this? Metric was just state with the
numeric enum value.
- // Without context this doesn't mean anything and can be very confusing.
Maybe keep the numeric
- // value and put the value meaning in the
- // description?
- // solrMetricsContext.gauge(() -> state.getValue(), true, "state",
scope);
+ solrMetricsContext.gauge(() -> state.getValue(), true, "state", scope);
+ observables.add(
+ solrMetricsContext.observableLongGauge(
+ "solr_core_update_log_state",
+ "The current state of the update log. Replaying (0), buffering
(1), applying buffered (2), active (3)",
+ (observableLongMeasurement -> {
+ observableLongMeasurement.record(state.getValue(),
baseAttributes);
+ })));
applyingBufferedOpsCounter =
new AttributedLongCounter(
diff --git
a/solr/core/src/java/org/apache/solr/util/circuitbreaker/CPUCircuitBreaker.java
b/solr/core/src/java/org/apache/solr/util/circuitbreaker/CPUCircuitBreaker.java
index cf226f1e6c0..b4c5eda9731 100644
---
a/solr/core/src/java/org/apache/solr/util/circuitbreaker/CPUCircuitBreaker.java
+++
b/solr/core/src/java/org/apache/solr/util/circuitbreaker/CPUCircuitBreaker.java
@@ -17,13 +17,12 @@
package org.apache.solr.util.circuitbreaker;
-import com.codahale.metrics.Gauge;
-import com.codahale.metrics.Metric;
+import io.prometheus.metrics.model.snapshots.GaugeSnapshot;
import java.lang.invoke.MethodHandles;
import java.util.Locale;
import org.apache.solr.core.CoreContainer;
import org.apache.solr.core.SolrCore;
-import org.apache.solr.metrics.SolrMetricManager;
+import org.apache.solr.metrics.OtelRuntimeJvmMetrics;
import org.apache.solr.util.plugin.SolrCoreAware;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -110,23 +109,22 @@ public class CPUCircuitBreaker extends CircuitBreaker
implements SolrCoreAware {
* @return Percent CPU usage of -1 if value could not be obtained.
*/
protected double calculateLiveCPUUsage() {
- // TODO: Use Codahale Meter to calculate the value
- Metric metric =
-
this.cc.getMetricManager().registry("solr.jvm").getMetrics().get("os.systemCpuLoad");
-
- if (metric == null) {
- return -1.0;
- }
-
- if (metric instanceof Gauge<?> gauge) {
- // unwrap if needed
- if (gauge instanceof SolrMetricManager.GaugeWrapper) {
- gauge = ((SolrMetricManager.GaugeWrapper) gauge).getGauge();
- }
- return (Double) gauge.getValue() * 100;
+ if (!OtelRuntimeJvmMetrics.isJvmMetricsEnabled()) {
+ throw new IllegalStateException("JVM metrics disabled. Cannot calculate
CPU usage");
}
- return -1.0; // Unable to unpack metric
+ return this.cc
+ .getMetricManager()
+ .getPrometheusMetricReader("solr.jvm")
+ .collect(name -> name.contains("jvm_system_cpu_utilization_ratio"))
+ .stream()
+ .filter(GaugeSnapshot.class::isInstance)
+ .map(GaugeSnapshot.class::cast)
+ .map(GaugeSnapshot::getDataPoints)
+ .flatMap(java.util.Collection::stream)
+ .findFirst()
+ .map(dp -> dp.getValue() * 100)
+ .orElse(-1.0); // Unable to unpack metric
}
@Override
diff --git a/solr/core/src/test/org/apache/solr/core/DirectoryFactoryTest.java
b/solr/core/src/test/org/apache/solr/core/DirectoryFactoryTest.java
index 25112a16bea..8bbb3f58043 100755
--- a/solr/core/src/test/org/apache/solr/core/DirectoryFactoryTest.java
+++ b/solr/core/src/test/org/apache/solr/core/DirectoryFactoryTest.java
@@ -90,6 +90,7 @@ public class DirectoryFactoryTest extends SolrTestCase {
args.add("solr.data.home", "/solrdata/");
df.init(args);
assertDataHome("/solrdata/inst_dir/data", "inst_dir", df, cc);
+ cc.shutdown();
// solr.data.home set with System property, and relative path
System.setProperty("solr.data.home", "solrdata");
@@ -108,6 +109,8 @@ public class DirectoryFactoryTest extends SolrTestCase {
cc,
"dataDir",
"mydata");
+ cc.shutdown();
+
// solr.data.home set but also solrDataHome set in solr.xml, which should
override the former
System.setProperty("test.solr.data.home", "/foo");
config = loadNodeConfig("/solr/solr-solrDataHome.xml");
@@ -116,6 +119,7 @@ public class DirectoryFactoryTest extends SolrTestCase {
df.initCoreContainer(cc);
df.init(new NamedList<>());
assertDataHome("/foo/inst_dir/data", "inst_dir", df, cc);
+ cc.shutdown();
}
private void assertDataHome(
diff --git a/solr/core/src/test/org/apache/solr/core/TestLazyCores.java
b/solr/core/src/test/org/apache/solr/core/TestLazyCores.java
index 3cc8ed7c77f..be876b66b4e 100644
--- a/solr/core/src/test/org/apache/solr/core/TestLazyCores.java
+++ b/solr/core/src/test/org/apache/solr/core/TestLazyCores.java
@@ -16,6 +16,7 @@
*/
package org.apache.solr.core;
+import io.prometheus.metrics.model.snapshots.MetricSnapshots;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
@@ -326,18 +327,21 @@ public class TestLazyCores extends SolrTestCaseJ4 {
// verify that getting metrics from an unloaded core doesn't cause
exceptions (SOLR-12541)
try (SolrCore core1 = cc.getCore("collection1");
MetricsHandler handler = new MetricsHandler(h.getCoreContainer())) {
-
SolrQueryResponse resp = new SolrQueryResponse();
- handler.handleRequest(makeReq(core1, CommonParams.QT,
"/admin/metrics"), resp);
+ handler.handleRequest(
+ makeReq(core1, CommonParams.QT, "/admin/metrics", "wt",
"prometheus"), resp);
NamedList<?> values = resp.getValues();
- assertNotNull(values.get("metrics"));
- values = (NamedList<?>) values.get("metrics");
- NamedList<?> nl = (NamedList<?>) values.get("solr.core.collection2");
- assertNotNull(nl);
- Object o = nl.get("REPLICATION./replication.indexPath");
- assertNotNull(o);
+ MetricSnapshots snapshots = (MetricSnapshots) values.get("metrics");
+ assertNotNull(snapshots);
+ assertTrue(
+ snapshots.stream()
+ .anyMatch(
+ (snapshot) ->
+ snapshot
+ .getMetadata()
+ .getPrometheusName()
+ .equals("solr_replication_index_size_bytes")));
}
-
} finally {
cc.shutdown();
}
diff --git
a/solr/core/src/test/org/apache/solr/handler/RequestHandlerMetricsTest.java
b/solr/core/src/test/org/apache/solr/handler/RequestHandlerMetricsTest.java
index 467094ccd62..e9618297760 100644
--- a/solr/core/src/test/org/apache/solr/handler/RequestHandlerMetricsTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/RequestHandlerMetricsTest.java
@@ -54,10 +54,9 @@ public class RequestHandlerMetricsTest extends
SolrCloudTestCase {
System.clearProperty("metricsEnabled");
}
- // NOCOMMIT: Need to fix aggregateNodeLevelMetricsEnabled for OTEL. Delegate
registries currently
- // do not play nice with OTEL instrumentation
+ // TODO: Migrate aggregateNodeLevelMetricsEnabled for OTEL metrics
@Test
- @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-17458")
+ @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-17865")
@SuppressWarnings({"unchecked"})
public void testAggregateNodeLevelMetrics() throws SolrServerException,
IOException {
String collection1 = "testRequestHandlerMetrics1";
diff --git a/solr/core/src/test/org/apache/solr/metrics/JvmMetricsTest.java
b/solr/core/src/test/org/apache/solr/metrics/JvmMetricsTest.java
index 5bc20b0d09e..0347fd4938b 100644
--- a/solr/core/src/test/org/apache/solr/metrics/JvmMetricsTest.java
+++ b/solr/core/src/test/org/apache/solr/metrics/JvmMetricsTest.java
@@ -66,7 +66,7 @@ public class JvmMetricsTest extends SolrJettyTestBase {
}
@Test
- public void testSetupJvmMetrics() {
+ public void testSetupJvmMetrics() throws InterruptedException {
PrometheusMetricReader reader =
getJetty().getCoreContainer().getMetricManager().getPrometheusMetricReader("solr.jvm");
MetricSnapshots snapshots = reader.collect();
@@ -93,10 +93,6 @@ public class JvmMetricsTest extends SolrJettyTestBase {
"Should have JVM CPU metrics",
metricNames.stream().anyMatch(name -> name.startsWith("jvm_cpu")));
- assertTrue(
- "Should have JVM GC metrics",
- metricNames.stream().anyMatch(name -> name.startsWith("jvm_gc")));
-
assertTrue(
"Should have JVM buffer metrics",
metricNames.stream().anyMatch(name -> name.startsWith("jvm_buffer")));
diff --git
a/solr/core/src/test/org/apache/solr/metrics/SolrCoreMetricManagerTest.java
b/solr/core/src/test/org/apache/solr/metrics/SolrCoreMetricManagerTest.java
index 49f8421b86e..a124ad26e11 100644
--- a/solr/core/src/test/org/apache/solr/metrics/SolrCoreMetricManagerTest.java
+++ b/solr/core/src/test/org/apache/solr/metrics/SolrCoreMetricManagerTest.java
@@ -25,7 +25,6 @@ import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
-import java.util.Set;
import java.util.stream.Collectors;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.solr.SolrTestCaseJ4;
@@ -224,8 +223,7 @@ public class SolrCoreMetricManagerTest extends
SolrTestCaseJ4 {
SolrCore core = cc.create("to-unload", Map.of("configSet", "minimal"));
coreRegistryName = core.getSolrMetricsContext().getRegistryName();
- Set<String> names = metricManager.registryNames();
- assertTrue("missing registry", names.contains(coreRegistryName));
+ assertNotNull("missing registry",
metricManager.getPrometheusMetricReader(coreRegistryName));
// Commit and wait for searcher to ensure the searcher is created. This is
required to make sure
// the core inner thread does not create it *after* we asked the container
to unload the core.
diff --git
a/solr/core/src/test/org/apache/solr/metrics/SolrMetricsIntegrationTest.java
b/solr/core/src/test/org/apache/solr/metrics/SolrMetricsIntegrationTest.java
index 23b39040695..a65d61c19bc 100644
--- a/solr/core/src/test/org/apache/solr/metrics/SolrMetricsIntegrationTest.java
+++ b/solr/core/src/test/org/apache/solr/metrics/SolrMetricsIntegrationTest.java
@@ -23,7 +23,6 @@ import io.prometheus.metrics.model.snapshots.Labels;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
-import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.http.client.HttpClient;
@@ -40,7 +39,6 @@ import org.apache.solr.core.SolrXmlConfig;
import org.apache.solr.embedded.JettySolrRunner;
import org.apache.solr.util.SolrMetricTestUtils;
import org.apache.solr.util.TestHarness;
-import org.hamcrest.number.OrderingComparison;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
@@ -99,9 +97,7 @@ public class SolrMetricsIntegrationTest extends
SolrTestCaseJ4 {
Labels.of("category", "CONTAINER", "otel_scope_name",
"org.apache.solr", "type", type));
}
- // NOCOMMIT: Comeback and fix this test after merging the SolrZKClient
metrics migration
@Test
- @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-17458")
public void testZkMetrics() throws Exception {
System.setProperty("metricsEnabled", "true");
MiniSolrCloudCluster cluster =
@@ -109,61 +105,69 @@ public class SolrMetricsIntegrationTest extends
SolrTestCaseJ4 {
.addConfig("conf", configset("conf2"))
.configure();
System.clearProperty("metricsEnabled");
- try {
- JettySolrRunner j = cluster.getRandomJetty(random());
- String url = j.getBaseUrl() +
"/admin/metrics?key=solr.node:CONTAINER.zkClient&wt=json";
- try (SolrClient solrClient = j.newClient()) {
- HttpClient httpClient = ((HttpSolrClient) solrClient).getHttpClient();
- @SuppressWarnings("unchecked")
- Map<String, Object> zkMmetrics =
- (Map<String, Object>)
- Utils.getObjectByPath(
- HttpClientUtil.executeGET(httpClient, url,
Utils.JSONCONSUMER),
- false,
- List.of("metrics", "solr.node:CONTAINER.zkClient"));
-
- Set<String> allKeys =
- Set.of(
- "watchesFired",
- "reads",
- "writes",
- "bytesRead",
- "bytesWritten",
- "multiOps",
- "cumulativeMultiOps",
- "childFetches",
- "cumulativeChildrenFetched",
- "existsChecks",
- "deletes");
-
- for (String k : allKeys) {
- assertNotNull(zkMmetrics.get(k));
- }
- HttpClientUtil.executeGET(
- httpClient,
- j.getBaseURLV2() + "/cluster/zookeeper/children/live_nodes",
- Utils.JSONCONSUMER);
- @SuppressWarnings("unchecked")
- Map<String, Object> zkMmetricsNew =
- (Map<String, Object>)
- Utils.getObjectByPath(
- HttpClientUtil.executeGET(httpClient, url,
Utils.JSONCONSUMER),
- false,
- List.of("metrics", "solr.node:CONTAINER.zkClient"));
-
- assertThat(
- findDelta(zkMmetrics, zkMmetricsNew, "childFetches"),
- OrderingComparison.greaterThanOrEqualTo(1L));
- assertThat(
- findDelta(zkMmetrics, zkMmetricsNew, "cumulativeChildrenFetched"),
- OrderingComparison.greaterThanOrEqualTo(3L));
- assertThat(
- findDelta(zkMmetrics, zkMmetricsNew, "existsChecks"),
- OrderingComparison.greaterThanOrEqualTo(4L));
- }
- } finally {
- cluster.shutdown();
+ JettySolrRunner j = cluster.getRandomJetty(random());
+ var builder =
+ Labels.builder().label("category",
"CONTAINER").label("otel_scope_name", "org.apache.solr");
+ var baseLabels = builder.build();
+
+ var reader =
j.getCoreContainer().getMetricManager().getPrometheusMetricReader("solr.node");
+
+ assertNotNull(
+ SolrMetricTestUtils.getCounterDatapoint(reader,
"solr_zk_watches_fired", baseLabels));
+ assertNotNull(
+ SolrMetricTestUtils.getCounterDatapoint(reader, "solr_zk_read_bytes",
baseLabels));
+ assertNotNull(
+ SolrMetricTestUtils.getCounterDatapoint(reader,
"solr_zk_written_bytes", baseLabels));
+ assertNotNull(
+ SolrMetricTestUtils.getCounterDatapoint(
+ reader, "solr_zk_cumulative_multi_ops", baseLabels));
+
+ Set<String> types = Set.of("delete", "exists", "multi", "read", "write");
+
+ for (String type : types) {
+ assertNotNull(
+ SolrMetricTestUtils.getCounterDatapoint(
+ reader, "solr_zk_ops", baseLabels.merge(Labels.of("ops",
type))));
}
+
+ try (SolrClient solrClient = j.newClient()) {
+ HttpClient httpClient = ((HttpSolrClient) solrClient).getHttpClient();
+ var initialChildrenFetched =
+ SolrMetricTestUtils.getCounterDatapoint(
+ reader, "solr_zk_cumulative_children_fetched", baseLabels)
+ .getValue();
+ var initialChildFetches =
+ SolrMetricTestUtils.getCounterDatapoint(reader,
"solr_zk_child_fetches", baseLabels)
+ .getValue();
+ var initialExistsOp =
+ SolrMetricTestUtils.getCounterDatapoint(
+ reader, "solr_zk_ops", baseLabels.merge(Labels.of("ops",
"exists")))
+ .getValue();
+
+ // Send GET request to trigger some metrics
+ HttpClientUtil.executeGET(
+ httpClient,
+ j.getBaseURLV2() + "/cluster/zookeeper/children/live_nodes",
+ Utils.JSONCONSUMER);
+
+ var childrenFetched =
+ SolrMetricTestUtils.getCounterDatapoint(
+ reader, "solr_zk_cumulative_children_fetched", baseLabels)
+ .getValue();
+ var childFetches =
+ SolrMetricTestUtils.getCounterDatapoint(reader,
"solr_zk_child_fetches", baseLabels)
+ .getValue();
+ var existsOp =
+ SolrMetricTestUtils.getCounterDatapoint(
+ reader, "solr_zk_ops", builder.label("ops",
"exists").build())
+ .getValue();
+
+ assertTrue(childrenFetched - initialChildrenFetched >= 3.0);
+ assertTrue(childFetches - initialChildFetches >= 1.0);
+ assertTrue(existsOp - initialExistsOp >= 4.0);
+ }
+
+ cluster.shutdown();
}
@Test
diff --git a/solr/core/src/test/org/apache/solr/search/TestRecovery.java
b/solr/core/src/test/org/apache/solr/search/TestRecovery.java
index 115682cbc27..60bde7493bd 100644
--- a/solr/core/src/test/org/apache/solr/search/TestRecovery.java
+++ b/solr/core/src/test/org/apache/solr/search/TestRecovery.java
@@ -248,10 +248,12 @@ public class TestRecovery extends SolrTestCaseJ4 {
.build();
// check metrics
- // NOCOMMIT: Need to decide if we are keeping this state metric or not
- // @SuppressWarnings({"unchecked"})
- // Gauge<Integer> state = (Gauge<Integer>)
metrics.get("TLOG.state");
- // assertEquals(UpdateLog.State.REPLAYING.ordinal(),
state.getValue().intValue());
+ assertEquals(
+ UpdateLog.State.REPLAYING.ordinal(),
+ SolrMetricTestUtils.getGaugeDatapoint(
+ h.getCore(), "solr_core_update_log_state", attributes)
+ .getValue(),
+ 0.0);
var actualReplayingLogs =
SolrMetricTestUtils.getGaugeDatapoint(
@@ -283,8 +285,12 @@ public class TestRecovery extends SolrTestCaseJ4 {
.getValue();
assertEquals(7.0, actualReplayOps, 0.0);
- // NOCOMMIT: Need to decide if we are keeping this state metric or not
- // assertEquals(UpdateLog.State.ACTIVE.ordinal(),
state.getValue().intValue());
+ assertEquals(
+ UpdateLog.State.ACTIVE.ordinal(),
+ SolrMetricTestUtils.getGaugeDatapoint(
+ h.getCore(), "solr_core_update_log_state", attributes)
+ .getValue(),
+ 0.0);
// make sure we can still access versions after recovery
assertJQ(req("qt", "/get", "getVersions", "" + versions.size()),
"/versions==" + versions);
@@ -662,8 +668,6 @@ public class TestRecovery extends SolrTestCaseJ4 {
clearIndex();
assertU(commit());
- Map<String, Metric> metrics = getMetrics();
-
assertEquals(UpdateLog.State.ACTIVE, ulog.getState());
ulog.bufferUpdates();
assertEquals(UpdateLog.State.BUFFERING, ulog.getState());
@@ -680,10 +684,12 @@ public class TestRecovery extends SolrTestCaseJ4 {
.label("category", "TLOG")
.build();
- // NOCOMMIT: Need to decide if we are keeping this state metric or not
- // @SuppressWarnings({"unchecked"})
- // Gauge<Integer> state = (Gauge<Integer>)
metrics.get("TLOG.state");
- // assertEquals(UpdateLog.State.BUFFERING.ordinal(),
state.getValue().intValue());
+ assertEquals(
+ UpdateLog.State.BUFFERING.ordinal(),
+ SolrMetricTestUtils.getGaugeDatapoint(
+ h.getCore(), "solr_core_update_log_state", attributes)
+ .getValue(),
+ 0.0);
String v3 = getNextVersion();
String v940_del = "-" + getNextVersion();
diff --git a/solr/core/src/test/org/apache/solr/util/TestCircuitBreakers.java
b/solr/core/src/test/org/apache/solr/util/TestCircuitBreakers.java
index 0f03a3dcb90..52998058738 100644
--- a/solr/core/src/test/org/apache/solr/util/TestCircuitBreakers.java
+++ b/solr/core/src/test/org/apache/solr/util/TestCircuitBreakers.java
@@ -56,6 +56,7 @@ public class TestCircuitBreakers extends SolrTestCaseJ4 {
System.setProperty("filterCache.enabled", "false");
System.setProperty("queryResultCache.enabled", "false");
System.setProperty("documentCache.enabled", "true");
+ System.setProperty("solr.metrics.jvm.enabled", "true");
System.clearProperty(CircuitBreaker.SYSPROP_SOLR_CIRCUITBREAKER_ERRORCODE);
initCore("solrconfig-pluggable-circuitbreaker.xml", "schema.xml");
diff --git
a/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/NodesSysPropsCacher.java
b/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/NodesSysPropsCacher.java
index 4e79f905972..c3b679546ec 100644
---
a/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/NodesSysPropsCacher.java
+++
b/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/NodesSysPropsCacher.java
@@ -74,28 +74,20 @@ public class NodesSysPropsCacher implements NodesSysProps,
AutoCloseable {
return result;
}
- // NOCOMMIT: These properties were fetched from the /admin/metrics endpoint.
These properties were
- // stored as strings instead of numeric values. This is not possible in OTEL
metrics.
- // Use /admin/info/properties to fetch system properties.
private Map<String, Object> fetchProps(String nodeName, Collection<String>
tags) {
ModifiableSolrParams msp = new ModifiableSolrParams();
msp.add(CommonParams.OMIT_HEADER, "true");
- LinkedHashMap<String, String> keys = new LinkedHashMap<>();
- for (String tag : tags) {
- String metricsKey = "solr.jvm:system.properties:" + tag;
- keys.put(tag, metricsKey);
- msp.add("key", metricsKey);
- }
- GenericSolrRequest req = new GenericSolrRequest(SolrRequest.METHOD.GET,
"/admin/metrics", msp);
+ GenericSolrRequest req =
+ new GenericSolrRequest(SolrRequest.METHOD.GET,
"/admin/info/properties", msp);
try {
LinkedHashMap<String, Object> result = new LinkedHashMap<>();
NavigableObject response =
solrClient
.requestWithBaseUrl(zkStateReader.getBaseUrlForNodeName(nodeName), null, req)
.getResponse();
- var metrics = NavigableObject.wrap(response._get("metrics"));
- keys.forEach((tag, key) -> result.put(tag, metrics._get(key)));
+ var metrics = NavigableObject.wrap(response._get("system.properties"));
+ tags.forEach((tag) -> result.put(tag, metrics._get(tag)));
return result;
} catch (Exception e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
diff --git
a/solr/solrj/src/test/org/apache/solr/common/cloud/TestNodesSysPropsCacher.java
b/solr/solrj/src/test/org/apache/solr/common/cloud/TestNodesSysPropsCacher.java
index fbdfe7c002b..489de336a89 100644
---
a/solr/solrj/src/test/org/apache/solr/common/cloud/TestNodesSysPropsCacher.java
+++
b/solr/solrj/src/test/org/apache/solr/common/cloud/TestNodesSysPropsCacher.java
@@ -28,9 +28,6 @@ import org.junit.Test;
public class TestNodesSysPropsCacher extends SolrCloudTestCase {
@Test
- // NOCOMMIT: These metrics were system properties that must be retrieved
somewhere else that isn't
- // the /admin/metrics endpoint
- @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-17458")
public void testSysProps() throws Exception {
System.setProperty("metricsEnabled", "true");
MiniSolrCloudCluster cluster =
diff --git
a/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractCollectionsAPIDistributedZkTestBase.java
b/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractCollectionsAPIDistributedZkTestBase.java
index 36f3e4317a3..349b2324911 100644
---
a/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractCollectionsAPIDistributedZkTestBase.java
+++
b/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractCollectionsAPIDistributedZkTestBase.java
@@ -21,7 +21,6 @@ import static
org.apache.solr.common.cloud.ZkStateReader.REPLICATION_FACTOR;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
-import java.lang.management.ManagementFactory;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
@@ -35,10 +34,6 @@ import java.util.Map.Entry;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.TimeUnit;
-import javax.management.MBeanServer;
-import javax.management.MBeanServerFactory;
-import javax.management.ObjectName;
-import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.solr.client.api.model.CoreStatusResponse;
import org.apache.solr.client.solrj.SolrClient;
@@ -64,7 +59,6 @@ import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.TimeSource;
import org.apache.solr.core.CoreContainer;
import org.apache.solr.core.SolrCore;
-import org.apache.solr.core.SolrInfoBean.Category;
import org.apache.solr.embedded.JettySolrRunner;
import org.apache.solr.util.TestInjection;
import org.apache.solr.util.TimeOut;
@@ -390,9 +384,7 @@ public abstract class
AbstractCollectionsAPIDistributedZkTestBase extends SolrCl
}
}
- // NOCOMMIT: Failing from JMX reporter
@Test
- @LuceneTestCase.AwaitsFix(bugUrl =
"https://issues.apache.org/jira/browse/SOLR-17458")
public void testCollectionsAPI() throws Exception {
// create new collections rapid fire
@@ -585,28 +577,17 @@ public abstract class
AbstractCollectionsAPIDistributedZkTestBase extends SolrCl
private void checkNoTwoShardsUseTheSameIndexDir() {
Map<String, Set<String>> indexDirToShardNamesMap = new HashMap<>();
- List<MBeanServer> servers = new ArrayList<>();
- servers.add(ManagementFactory.getPlatformMBeanServer());
- servers.addAll(MBeanServerFactory.findMBeanServer(null));
- for (final MBeanServer server : servers) {
- Set<ObjectName> mbeans = new HashSet<>(server.queryNames(null, null));
- for (final ObjectName mbean : mbeans) {
- try {
- Map<String, String> props = mbean.getKeyPropertyList();
- String category = props.get("category");
- String name = props.get("name");
- if ((category != null && category.equals(Category.CORE.toString()))
- && (name != null && name.equals("indexDir"))) {
- String indexDir = server.getAttribute(mbean, "Value").toString();
- String key = props.get("dom2") + "." + props.get("dom3") + "." +
props.get("dom4");
- if (!indexDirToShardNamesMap.containsKey(indexDir)) {
- indexDirToShardNamesMap.put(indexDir, new HashSet<>());
- }
- indexDirToShardNamesMap.get(indexDir).add(key);
+ for (JettySolrRunner jetty : cluster.getJettySolrRunners()) {
+ CoreContainer coreContainer = jetty.getCoreContainer();
+ List<String> coreNames = coreContainer.getAllCoreNames();
+ for (String coreName : coreNames) {
+ try (SolrCore core = coreContainer.getCore(coreName)) {
+ String indexDir = core.getIndexDir();
+ String shardKey = jetty.getNodeName() + "." + coreName;
+ if (!indexDirToShardNamesMap.containsKey(indexDir)) {
+ indexDirToShardNamesMap.put(indexDir, new HashSet<>());
}
- } catch (Exception e) {
- // ignore, just continue - probably a "Value" attribute
- // not found
+ indexDirToShardNamesMap.get(indexDir).add(shardKey);
}
}
}
@@ -617,7 +598,8 @@ public abstract class
AbstractCollectionsAPIDistributedZkTestBase extends SolrCl
+ " : "
+ indexDirToShardNamesMap,
indexDirToShardNamesMap.size() > 0);
- for (Entry<String, Set<String>> entry :
indexDirToShardNamesMap.entrySet()) {
+
+ for (Map.Entry<String, Set<String>> entry :
indexDirToShardNamesMap.entrySet()) {
if (entry.getValue().size() > 1) {
fail(
"We have shards using the same indexDir. E.g. shards "