This is an automated email from the ASF dual-hosted git repository.
mlbiscoc pushed a commit to branch branch_10x
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/branch_10x by this push:
new cced25f4e3e SOLR-17856: Solr ref-guide OpenTelemetry documentation
(#3811)
cced25f4e3e is described below
commit cced25f4e3ec9f478b8369fa9db39eafd2a3af29
Author: Matthew Biscocho <[email protected]>
AuthorDate: Mon Nov 3 14:19:03 2025 -0500
SOLR-17856: Solr ref-guide OpenTelemetry documentation (#3811)
Solr 10 ref-guide additions/changes after migrating from Dropwizard to
OpenTelemetry.
Additionally, added configurable property for changing OTLP endpoint via
gRPC or HTTP.
---
...458-switch-from-dropwizard-to-opentelemetry.yml | 16 +
solr/benchmark/src/resources/solr.xml | 2 -
.../apache/solr/handler/admin/MetricsHandler.java | 5 +-
.../solr/metrics/otel/MetricExporterFactory.java | 7 +
.../solr/opentelemetry/OtlpExporterFactory.java | 8 +-
solr/server/solr/solr.xml | 12 -
.../pages/configuring-solr-xml.adoc | 11 -
.../configuration-guide/pages/solr-plugins.adoc | 2 +-
.../modules/deployment-guide/deployment-nav.adoc | 2 -
.../deployment-guide/pages/jmx-with-solr.adoc | 11 +-
.../pages/mbean-request-handler.adoc | 84 --
.../deployment-guide/pages/metrics-reporting.adoc | 860 +++++----------------
.../monitoring-with-prometheus-and-grafana.adoc | 633 ---------------
.../pages/performance-statistics-reference.adoc | 211 +++--
.../pages/major-changes-in-solr-10.adoc | 18 +-
.../pages/major-changes-in-solr-7.adoc | 2 +-
.../pages/major-changes-in-solr-9.adoc | 2 +-
17 files changed, 321 insertions(+), 1565 deletions(-)
diff --git
a/changelog/unreleased/SOLR-17458-switch-from-dropwizard-to-opentelemetry.yml
b/changelog/unreleased/SOLR-17458-switch-from-dropwizard-to-opentelemetry.yml
new file mode 100644
index 00000000000..04bec7b73f6
--- /dev/null
+++
b/changelog/unreleased/SOLR-17458-switch-from-dropwizard-to-opentelemetry.yml
@@ -0,0 +1,16 @@
+# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc
+title: Switch from Dropwizard to OpenTelemetry. This change provides native
Prometheus support on the /admin/metrics API, OTLP support, exemplar support
for tracing correlation with OpenMetrics format and native attributes and
labels on all metrics.
+type: changed
+authors:
+ - name: Matthew Biscocho
+ - name: David Smiley
+ - name: Sanjay Dutt
+ - name: Jude Muriithi
+ - name: Luke Kot-Zaniewski
+ - name: Carlos Ugarte
+ - name: Kevin Liang
+ - name: Bryan Jacobowitz
+ - name: Adam Quigley
+links:
+ - name: SOLR-17458
+ url: https://issues.apache.org/jira/browse/SOLR-17458
\ No newline at end of file
diff --git a/solr/benchmark/src/resources/solr.xml
b/solr/benchmark/src/resources/solr.xml
index a7f85ea8fdc..5f7c24d6634 100644
--- a/solr/benchmark/src/resources/solr.xml
+++ b/solr/benchmark/src/resources/solr.xml
@@ -21,8 +21,6 @@
-->
<solr>
- <metrics enabled="${metricsEnabled:true}"/>
-
<str name="shareSchema">${shareSchema:false}</str>
<str name="configSetBaseDir">${configSetBaseDir:configsets}</str>
<str name="coreRootDirectory">${coreRootDirectory:.}</str>
diff --git
a/solr/core/src/java/org/apache/solr/handler/admin/MetricsHandler.java
b/solr/core/src/java/org/apache/solr/handler/admin/MetricsHandler.java
index a35ededf7b0..13b4d044c7e 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/MetricsHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/MetricsHandler.java
@@ -60,15 +60,16 @@ public class MetricsHandler extends RequestHandlerBase
implements PermissionName
public static final String KEY_PARAM = "key";
public static final String EXPR_PARAM = "expr";
public static final String TYPE_PARAM = "type";
+
// Prometheus filtering parameters
public static final String CATEGORY_PARAM = "category";
public static final String CORE_PARAM = "core";
public static final String COLLECTION_PARAM = "collection";
public static final String SHARD_PARAM = "shard";
- public static final String REPLICA_PARAM = "replica";
+ public static final String REPLICA_TYPE_PARAM = "replica_type";
public static final String METRIC_NAME_PARAM = "name";
private static final Set<String> labelFilterKeys =
- Set.of(CATEGORY_PARAM, CORE_PARAM, COLLECTION_PARAM, SHARD_PARAM,
REPLICA_PARAM);
+ Set.of(CATEGORY_PARAM, CORE_PARAM, COLLECTION_PARAM, SHARD_PARAM,
REPLICA_TYPE_PARAM);
public static final String PROMETHEUS_METRICS_WT = "prometheus";
public static final String OPEN_METRICS_WT = "openmetrics";
diff --git
a/solr/core/src/java/org/apache/solr/metrics/otel/MetricExporterFactory.java
b/solr/core/src/java/org/apache/solr/metrics/otel/MetricExporterFactory.java
index 5d0ecc67427..d0ac8219aa3 100644
--- a/solr/core/src/java/org/apache/solr/metrics/otel/MetricExporterFactory.java
+++ b/solr/core/src/java/org/apache/solr/metrics/otel/MetricExporterFactory.java
@@ -30,5 +30,12 @@ public interface MetricExporterFactory {
public static final int OTLP_EXPORTER_INTERVAL =
Integer.parseInt(EnvUtils.getProperty("solr.metrics.otlpExporterInterval",
"60000"));
+ public static final String OTLP_EXPORTER_GRPC_ENDPOINT =
+ EnvUtils.getProperty("solr.metrics.otlpGrpcExporterEndpoint",
"http://localhost:4317");
+
+ public static final String OTLP_EXPORTER_HTTP_ENDPOINT =
+ EnvUtils.getProperty(
+ "solr.metrics.otlpHttpExporterEndpoint",
"http://localhost:4318/v1/metrics");
+
MetricExporter getExporter();
}
diff --git
a/solr/modules/opentelemetry/src/java/org/apache/solr/opentelemetry/OtlpExporterFactory.java
b/solr/modules/opentelemetry/src/java/org/apache/solr/opentelemetry/OtlpExporterFactory.java
index 329ff764c84..ab95b2bf89e 100644
---
a/solr/modules/opentelemetry/src/java/org/apache/solr/opentelemetry/OtlpExporterFactory.java
+++
b/solr/modules/opentelemetry/src/java/org/apache/solr/opentelemetry/OtlpExporterFactory.java
@@ -42,8 +42,12 @@ public class OtlpExporterFactory implements
MetricExporterFactory {
}
return switch (OTLP_EXPORTER_PROTOCOL) {
- case "grpc" -> OtlpGrpcMetricExporter.getDefault();
- case "http" -> OtlpHttpMetricExporter.getDefault();
+ case "grpc" -> OtlpGrpcMetricExporter.getDefault().toBuilder()
+ .setEndpoint(OTLP_EXPORTER_GRPC_ENDPOINT)
+ .build();
+ case "http" -> OtlpHttpMetricExporter.getDefault().toBuilder()
+ .setEndpoint(OTLP_EXPORTER_HTTP_ENDPOINT)
+ .build();
case "none" -> null;
default -> {
log.warn(
diff --git a/solr/server/solr/solr.xml b/solr/server/solr/solr.xml
index ff1d9708416..96dabbcee81 100644
--- a/solr/server/solr/solr.xml
+++ b/solr/server/solr/solr.xml
@@ -59,16 +59,4 @@
<int name="connTimeout">${connTimeout:60000}</int>
</shardHandlerFactory>
- <metrics enabled="${metricsEnabled:true}">
- <!-- Solr computes JVM metrics for threads. Computing these metrics,
esp. computing deadlocks etc.,
- requires potentially expensive computations, and can be avoided for every
metrics call by
- setting a high caching expiration interval (in seconds).
- <caching>
- <int name="threadsIntervalSeconds">5</int>
- </caching>
- -->
- <!--reporter name="jmx_metrics" group="core"
class="org.apache.solr.metrics.reporters.SolrJmxReporter"/-->
- </metrics>
-
-
</solr>
diff --git
a/solr/solr-ref-guide/modules/configuration-guide/pages/configuring-solr-xml.adoc
b/solr/solr-ref-guide/modules/configuration-guide/pages/configuring-solr-xml.adoc
index 4a4246efff8..bf5aaa8d15d 100644
---
a/solr/solr-ref-guide/modules/configuration-guide/pages/configuring-solr-xml.adoc
+++
b/solr/solr-ref-guide/modules/configuration-guide/pages/configuring-solr-xml.adoc
@@ -65,10 +65,6 @@ The default `solr.xml` file is found in
`$SOLR_TIP/server/solr/solr.xml` and loo
<int name="connTimeout">${connTimeout:60000}</int>
</shardHandlerFactory>
- <metrics enabled="${metricsEnabled:true}">
- <!--reporter name="jmx_metrics" group="core"
class="org.apache.solr.metrics.reporters.SolrJmxReporter"/-->
- </metrics>
-
</solr>
----
@@ -696,13 +692,6 @@ The `name` attribute is required and must be unique for
each `clusterSingleton`.
The `class` attribute should be set to the FQN (fully qualified name) of a
class that extends `ClusterSingleton`.
Sub-elements are specific to the implementation, `value1` is provided as an
example here.
-=== The <metrics> Element
-
-The `<metrics>` element in `solr.xml` allows you to customize the metrics
reported by Solr.
-You can define system properties that should not be returned, or define custom
suppliers and reporters.
-
-If you would like to customize the metrics for your installation, see the
xref:deployment-guide:metrics-reporting.adoc#metrics-configuration[Metrics
Configuration] section.
-
=== The <caches> Element
The `<caches>` element in `solr.xml` supports defining and configuring named
node-level caches.
diff --git
a/solr/solr-ref-guide/modules/configuration-guide/pages/solr-plugins.adoc
b/solr/solr-ref-guide/modules/configuration-guide/pages/solr-plugins.adoc
index 4eeae1326d3..84aaafb10e3 100644
--- a/solr/solr-ref-guide/modules/configuration-guide/pages/solr-plugins.adoc
+++ b/solr/solr-ref-guide/modules/configuration-guide/pages/solr-plugins.adoc
@@ -42,7 +42,7 @@ Examples of these are query parsers, request handlers, update
request processors
* Cluster level (or Core Container level) plugins.
These are plugins that are installed at a cluster level and every Solr node
has one instance each of these plugins.
-Examples of these are
xref:deployment-guide:authentication-and-authorization-plugins.adoc[],
xref:deployment-guide:metrics-reporting.adoc#reporters[metrics reporters],
https://issues.apache.org/jira/browse/SOLR-14404[cluster level request
handlers], etc.
+Examples of these are
xref:deployment-guide:authentication-and-authorization-plugins.adoc[],
https://issues.apache.org/jira/browse/SOLR-14404[cluster level request
handlers], etc.
== Installing Plugins ==
diff --git a/solr/solr-ref-guide/modules/deployment-guide/deployment-nav.adoc
b/solr/solr-ref-guide/modules/deployment-guide/deployment-nav.adoc
index 47ba93002f8..55301601e3e 100644
--- a/solr/solr-ref-guide/modules/deployment-guide/deployment-nav.adoc
+++ b/solr/solr-ref-guide/modules/deployment-guide/deployment-nav.adoc
@@ -61,8 +61,6 @@
** xref:metrics-reporting.adoc[]
** xref:performance-statistics-reference.adoc[]
** xref:plugins-stats-screen.adoc[]
-** xref:mbean-request-handler.adoc[]
-** xref:monitoring-with-prometheus-and-grafana.adoc[]
** xref:jmx-with-solr.adoc[]
** xref:thread-dump.adoc[]
** xref:distributed-tracing.adoc[]
diff --git
a/solr/solr-ref-guide/modules/deployment-guide/pages/jmx-with-solr.adoc
b/solr/solr-ref-guide/modules/deployment-guide/pages/jmx-with-solr.adoc
index 8e53c7b9b45..23644c5227e 100644
--- a/solr/solr-ref-guide/modules/deployment-guide/pages/jmx-with-solr.adoc
+++ b/solr/solr-ref-guide/modules/deployment-guide/pages/jmx-with-solr.adoc
@@ -22,21 +22,14 @@ In essence, it is a standard interface by which complex
systems can be viewed an
Solr, like any other good citizen of the Java universe, can be controlled via
a JMX interface.
Once enabled, you can use a JMX client, like jconsole, to connect with Solr.
-If you are unfamiliar with JMX, you may find the following overview useful:
http://docs.oracle.com/javase/8/docs/technotes/guides/management/agent.html.
+If you are unfamiliar with JMX, you may find the following overview useful:
http://docs.oracle.com/javase/8/docs/technotes/guides/management/agent.html.
== Configuring JMX
-JMX support is configured by defining a metrics reporter, as described in the
section the section xref:metrics-reporting.adoc#jmx-reporter[JMX Reporter].
-
-If you have an existing MBean server running in Solr's JVM, or if you start
Solr with the system property `-Dcom.sun.management.jmxremote`, Solr will
automatically identify its location on startup even if you have not defined a
reporter explicitly in `solr.xml`.
-You can also define the location of the MBean server with parameters defined
in the reporter definition.
+If you have an existing MBean server running in Solr's JVM, or if you start
Solr with the system property `-Dcom.sun.management.jmxremote`, Solr will
automatically identify its location on startup.
== Configuring MBean Servers
-Versions of Solr prior to 7.0 defined JMX support in `solrconfig.xml`.
-This has been changed to the metrics reporter configuration defined above.
-Parameters for the reporter configuration allow defining the location or
address of an existing MBean server.
-
An MBean server can be started at the time of Solr's startup by passing the
system parameter `-Dcom.sun.management.jmxremote`.
See Oracle's documentation for additional settings available to start and
control an MBean server at
http://docs.oracle.com/javase/8/docs/technotes/guides/management/agent.html.
diff --git
a/solr/solr-ref-guide/modules/deployment-guide/pages/mbean-request-handler.adoc
b/solr/solr-ref-guide/modules/deployment-guide/pages/mbean-request-handler.adoc
deleted file mode 100644
index 9cdc447bcc9..00000000000
---
a/solr/solr-ref-guide/modules/deployment-guide/pages/mbean-request-handler.adoc
+++ /dev/null
@@ -1,84 +0,0 @@
-= MBean Request Handler
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-The MBean Request Handler offers programmatic access to the information
provided on the xref:plugins-stats-screen.adoc[] of the Admin UI.
-
-The MBean Request Handler accepts the following parameters:
-
-`key`::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: none
-|===
-+
-Restricts results by object key.
-
-`cat`::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: none
-|===
-+
-Restricts results by category name.
-
-`stats`::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: `false`
-|===
-+
-Specifies whether statistics are returned with results.
-You can override the `stats` parameter on a per-field basis.
-The default is `false`.
-
-`wt`::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: `json`
-|===
-+
-The output format.
-This operates the same as the xref:query-guide:response-writers.adoc[`wt`
parameter in a query].
-
-== MBeanRequestHandler Examples
-
-All of the examples in this section assume you are running the
xref:getting-started:tutorial-techproducts.adoc["techproducts" example].
-
-To return information about the CACHE category only:
-
-[source,text]
-http://localhost:8983/solr/techproducts/admin/mbeans?cat=CACHE
-
-To return information and statistics about the CACHE category only, formatted
in XML:
-
-[source,text]
-http://localhost:8983/solr/techproducts/admin/mbeans?stats=true&cat=CACHE&wt=xml
-
-To return information for everything, and statistics for everything except the
`fieldCache`:
-
-[source,text]
-http://localhost:8983/solr/techproducts/admin/mbeans?stats=true&f.fieldCache.stats=false
-
-To return information and statistics for the `fieldCache` only:
-
-[source,text]
-http://localhost:8983/solr/techproducts/admin/mbeans?key=fieldCache&stats=true
diff --git
a/solr/solr-ref-guide/modules/deployment-guide/pages/metrics-reporting.adoc
b/solr/solr-ref-guide/modules/deployment-guide/pages/metrics-reporting.adoc
index 268ae57f538..09590b44e72 100644
--- a/solr/solr-ref-guide/modules/deployment-guide/pages/metrics-reporting.adoc
+++ b/solr/solr-ref-guide/modules/deployment-guide/pages/metrics-reporting.adoc
@@ -1,4 +1,4 @@
-= Metrics Reporting
+= Metrics Reporting and Monitoring
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
@@ -16,82 +16,67 @@
// specific language governing permissions and limitations
// under the License.
-Solr includes a developer API and instrumentation for the collection of
detailed performance-oriented metrics throughout the life-cycle of Solr service
and its various components.
+Solr supports both a pull-based Prometheus-formatted API and an OTLP push
exporter for collecting detailed performance-oriented metrics throughout the
lifecycle of Solr services and their various components.
-Internally this feature uses the http://metrics.dropwizard.io[Dropwizard
Metrics API], which uses the following classes of meters to measure events:
+All metrics natively include attributes/labels, providing users with powerful
ways to aggregate metrics in their preferred backend, as well as descriptions
to help understand what each metric represents.
-* *counters* - simply count events.
-They provide a single long value, e.g., the number of requests.
-* *meters* - additionally compute rates of events.
-Provide a count (as above) and 1-, 5-, and 15-minute exponentially decaying
rates, similar to the Unix system load average.
-* *histograms* - calculate approximate distribution of events according to
their values.
-Provide the following approximate statistics, with a similar exponential decay
as above: mean (arithmetic average), median, maximum, minimum, standard
deviation, and 75^th^, 95^th^, 98^th^, 99^th^ and 999^th^ percentiles.
-* *timers* - measure the number and duration of events.
-They provide a count and histogram of timings.
-* *gauges* - offer instantaneous reading of a current value, e.g., current
queue depth, current number of active connections, free heap size.
+Internally this feature uses https://opentelemetry.io/[OpenTelemetry], which
uses the following instruments to measure events. For more information on
instruments see the OpenTelemetry documentation on
https://opentelemetry.io/docs/concepts/signals/metrics/#metric-instruments[Metric
Instruments].
-Some of these meters may be missing or empty for any number of valid reasons.
-In these cases, missing values of any type will be returned as `null` by
default so empty values won't impact averages or histograms.
-This is configurable for several types of missing values; see the <<The
<metrics> <missingValues> Element>> section below.
+Solr metrics provide raw data that must be aggregated and calculated by
monitoring backends (Prometheus, Grafana, etc.). Counters can be use to
calculate rates and averages over time windows. Histograms provide raw bucket
data that backends use to calculate percentiles (p50, p75, p95, p99, p999),
averages, and other statistical measures. Solr delegates these calculations to
your monitoring system for better flexibility and reduced load on Solr. See
xref:deployment-guide:metrics-reporting [...]
-Each group of related metrics with unique names is managed in a *metric
registry*.
-Solr maintains several such registries, each corresponding to a high-level
group such as: `jvm`, `jetty`, `node`, and `core` (see <<Metric Registries>>
below).
-
-For each group (and/or for each registry) there can be several *reporters*,
which are components responsible for communication of metrics from selected
registries to external systems.
-Currently implemented reporters support emitting metrics via JMX, Ganglia,
Graphite and SLF4J.
-
-There is also a dedicated `/admin/metrics` handler that can be queried to
report all or a subset of the current metrics from multiple registries.
+[NOTE]
+====
+Some metrics and/or events may be missing or empty if the metric was never
recorded and therefore won't appear. Specifically, metrics will not appear if
the triggering event never occurs.
+====
== Metric Registries
-Solr includes multiple metric registries, which group related metrics.
+Internally, Solr categorizes metrics into registries that group related
metrics together. While users don't need to understand these registries and
should focus on the attributes attached to metrics for aggregation, being aware
of them can be helpful for knowing what metrics are available and their base
set of attributes.
-Metrics are maintained and accumulated through all lifecycles of components
from the start of the process until its shutdown - e.g., metrics for a
particular SolrCore are tracked through possibly several load, unload and/or
rename operations, and are deleted only when a core is explicitly deleted.
+Metrics are maintained and accumulated throughout all lifecycle phases of
components, from process startup until shutdown. For example, metrics for a
particular SolrCore are tracked through multiple load, unload, and rename
operations, and are only deleted when a core is explicitly deleted.
However, metrics are not persisted across process restarts; restarting Solr
will discard all collected metrics.
These are the major groups of metrics that are collected:
-=== JVM Registry
+=== Node / CoreContainer Registry
-This registry is returned at `solr.jvm` and includes the following information.
-When making requests with the <<Metrics API>>, you can specify `&group=jvm` to
limit to only these metrics.
+The `Node Registry` records metrics at the process level that are not specific
to any core. Metric names are prefixed with `solr_node_` and include the
following information:
-* direct and mapped buffer pools
-* class loading / unloading
-* OS memory, CPU time in nanoseconds, file descriptors, swap, system load
-* GC count and time
-* heap, non-heap memory and GC pools
-* number of threads, their states and deadlocks
-* System properties such as Java information, various installation directory
paths, ports, and similar information.
-You can control what appears here by modifying `solr.xml`.
+* handler requests (count, timing): collections, info, admin, configsets, etc.
+* number of cores (permanent, unloaded)
-=== Overseer Registry
+=== Core (SolrCore) Registry
-This registry is returned at `solr.overseer` when run in SolrCloud mode and
includes the following information.
-When making requests with the <<Metrics API>>, you can specify
`&group=overseer` to limit to only these metrics.
+The <<Core Level Metrics,Core (SolrCore) Registry>> includes all core-level
metrics with one registry for each core. All core metrics are prefixed with
`solr_core_` in the name. In addition to the prefix, all core metrics have the
following standard set of core attributes attached for aggregation:
-* size of the Overseer queues (collection work queue and cluster state update
queue)
+In cloud mode:
-=== Node / CoreContainer Registry
+`core`::
+Name of the core.
-This registry is returned at `solr.node` and includes the following
information.
-When making requests with the <<Metrics API>>, you can specify `&group=node`
to limit to only these metrics.
+`collection`::
+Name of the collection.
-* handler requests (count, timing): collections, info, admin, configsets, etc.
-* number of cores (loaded, lazy, unloaded)
+`replica_type`::
+The type of replica. This can be NRT/TLOG/PULL.
-=== Core (SolrCore) Registry
+`shard`::
+The name of the shard.
+
+In standalone mode, only the `core` attribute is attached to the metrics.
+
+Some examples of metrics available from the core registry:
-The <<Core Level Metrics,Core (SolrCore) Registry>> includes
`solr.core.<collection>`, one for each core.
-When making requests with the <<Metrics API>>, you can specify `&group=core`
to limit to only these metrics.
+* All common RequestHandlers report request timers/counters, timeouts, and
errors.
+Handlers that process distributed shard requests include a boolean `internal`
attribute for each type of distributed request, differentiating between
external client requests and internal requests.
+* <<Index Merge Metrics,Index-level events>>: meters for minor/major merges,
number of merged documents, number of deleted documents, and number of flushes.
+* Shard replication and transaction log replay on replicas.
-* all common RequestHandlers report: request timers / counters, timeouts,
errors.
-Handlers that support process distributed shard requests also report
`shardRequests` sub-counters for each type of distributed request.
-* <<Index Merge Metrics,index-level events>>: meters for minor / major merges,
number of merged docs, number of deleted docs, number of flushes
-* shard replication and transaction log replay on replicas,
-* open / available / pending connections for shard handler and update handler.
+RequestHandlers can be configured to roll up core-level metrics to the node
level in addition to reporting them per core. This is useful when you have a
large number of cores per node and are interested in aggregate metrics per node.
-RequestHandlers can be configured to roll up core level metrics to the node
level in addition to reporting them per core. This is useful if you have a
large number of cores per node and are interested in aggregate metrics per
node. This is configured by adding `<bool
name="aggregateNodeLevelMetricsEnabled">true</bool>` to a
xref:configuration-guide:requesthandlers-searchcomponents.adoc#configuring-request-handlers[RequestHandler
configuration] in your solrconfig.xml, for example:
+These metrics are prefixed with `solr_node`, include the `handler` attribute,
and omit the standard core attributes.
+
+This is configured by adding `<bool
name="aggregateNodeLevelMetricsEnabled">true</bool>` to a
xref:configuration-guide:requesthandlers-searchcomponents.adoc#configuring-request-handlers[RequestHandler
configuration] in your solrconfig.xml, for example:
```
<requestHandler name="/select" class="solr.SearchHandler">
@@ -107,771 +92,290 @@ RequestHandlers can be configured to roll up core level
metrics to the node leve
</requestHandler>
```
-=== Jetty Registry
-
-This registry is returned at `solr.jetty` and includes the following
information.
-When making requests with the <<Metrics API>>, you can specify `&group=jetty`
to limit to only these metrics.
+=== JVM Registry
-* threads and pools,
-* connection and request timers,
-* meters for responses by HTTP class (1xx, 2xx, etc.)
+The `JVM Registry` gathers metrics from the JVM using the OpenTelemetry
instrumentation library with JFR and JMX. See the
https://github.com/open-telemetry/opentelemetry-java-instrumentation/tree/main/instrumentation/runtime-telemetry/runtime-telemetry-java17/library[runtime-telemetry-java17]
documentation for more information on available JVM metrics.
-== Metrics Configuration
+JVM metrics are enabled by default but can be disabled by setting either the
system property `-Dsolr.metrics.jvm.enabled=false` or the environment variable
`SOLR_METRICS_JVM_ENABLED=false`.
-The metrics available in your system can be customized by modifying the
`<metrics>` element in `solr.xml`.
+=== Overseer Registry
-TIP: See also the section xref:configuration-guide:configuring-solr-xml.adoc[]
for more information about the `solr.xml` file, where to find it, and how to
edit it.
+The `Overseer Registry` is initialized when running in SolrCloud mode and
includes the following information:
-=== Disabling the Metrics Collection
-The `<metrics>` element in `solr.xml` supports one attribute `enabled`, which
takes a boolean value,
-for example `<metrics enabled="true">`.
+* Size of the Overseer queues (collection work queue and cluster state update
queue)
-The default value of this attribute is `true`, meaning that metrics are being
collected, processed and
-reported by Solr according to the configured metric reporters.
-They are also available from the
-metrics APIs.
+== Core Level Metrics
-The `false` value of this attribute (`<metrics enabled="false">`) turns off
metrics collection and processing.
-Internally, all metrics suppliers are replaced by singleton no-op
-implementations, which effectively removes nearly all overheads related to
metrics collection.
-All reporter configurations are skipped, and the metrics APIs stop reporting
any metrics and only return an `<error>`
-element in their responses.
+=== Index Merge Metrics
-=== The <metrics> <hiddenSysProps> Element
+These metrics are collected under the `INDEX` category and track flush
operations (documents being written to disk) and merge operations (segments on
disk being merged).
-This section of `solr.xml` allows you to define the system properties which
are considered system-sensitive and should not be exposed via the Metrics API.
+For merge metrics, metrics are tracked with the distinction of "minor" and
"major" merges (as merges with fewer documents will be typically more frequent).
+This is indicated by the `merge_type` label for the metric. The threshold for
when a merge becomes large enough to be considered major is configurable, but
+defaults to 524k documents.
-If this section is not defined, the following default configuration is used
which hides password and authentication information:
+Metrics collection for index merges can be configured in the `<metrics>`
section of `solrconfig.xml` as shown below:
[source,xml]
----
-<metrics>
- <hiddenSysProps>
- <str>javax.net.ssl.keyStorePassword</str>
- <str>javax.net.ssl.trustStorePassword</str>
- <str>solr.security.auth.basicauth.credentials</str>
- <str>zkDigestPassword</str>
- <str>zkDigestReadonlyPassword</str>
- </hiddenSysProps>
-</metrics>
+<config>
+ ...
+ <indexConfig>
+ <metrics>
+ <long name="majorMergeDocs">524288</long>
+ </metrics>
+ ...
+ </indexConfig>
+...
+</config>
----
-[#the-metrics-reporters-element]
-=== The <metrics> <reporters> Element
-
-Reporters consume the metrics data generated by Solr.
-See the section <<Reporters>> below for more details on how to configure
custom reporters.
-
-=== The <metrics> <suppliers> Element
-
-Suppliers help Solr generate metrics data.
-The `<metrics><suppliers>` section of `solr.xml` allows you to define your own
implementations of metrics and configure parameters for them.
-
-Implementation of a custom metrics supplier is beyond the scope of this guide,
but there are other customizations possible with the default implementation,
via the elements described below.
-
-`<counter>`::
-This element defines the implementation and configuration of a `Counter`
supplier.
-The default implementation does not support any configuration.
-
-`<meter>`::
-This element defines the implementation of a `Meter` supplier.
-The default implementation supports an additional parameter:
-
-`<str name="clock">`:::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: `user`
-|===
-+
-The type of clock to use for calculating EWMA rates.
-The supported values are:
-* `user`, which uses `System.nanoTime()`
-* `cpu`, which uses the current thread's CPU time
-
-`<histogram>`::
-This element defines the implementation of a `Histogram` supplier.
-This element also supports the `clock` parameter shown above with the `meter`
element, and also:
+== Metrics API
-`<str name="reservoir">`:::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: `com.codahale.metrics.ExponentiallyDecayingReservoir`
-|===
-+
-The fully-qualified class name of the `Reservoir` implementation to use.
-The default is `com.codahale.metrics.ExponentiallyDecayingReservoir` but there
are other options available with the http://metrics.dropwizard.io/[Codahale
Metrics library] that Solr uses.
+The `/admin/metrics` endpoint natively provides access to all metrics in
Prometheus format by default. You can also specify `wt=prometheus` as a
parameter for Prometheus format or `wt=openmetrics` for OpenMetrics format.
More information on the data models is provided in the sections below.
-`<int name="size">`:::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: `1028`
-|===
-+
-The reservoir size.
+=== Prometheus
-`<double name="alpha">`:::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: `0.015`
-|===
-+
-The decay parameter.
-This is only valid for the `ExponentiallyDecayingReservoir`.
+See https://prometheus.io/docs/concepts/data_model/[Prometheus Data Model]
documentation for more information on its data model.
-`<long name="window">`:::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: `300` seconds
-|===
-+
-The window size, in seconds, and only valid for the
`SlidingTimeWindowReservoir`.
+This endpoint can be used to pull/scrape metrics to a Prometheus server or any
Prometheus-compatible backend directly from Solr.
-`<timer>`::
-This element defines an implementation of a `Timer` supplier.
-The default implementation supports the `clock` and `reservoir` parameters
described above.
+==== Prometheus Setup
-As an example of a section of `solr.xml` that defines some of these custom
parameters, the following defines the default `Meter` supplier with a
non-default `clock` and the default `Timer` is used with a non-default
reservoir:
+The `prometheus-config.yml` file needs to be configured for a Prometheus
server to scrape and collect metrics. A basic configuration for SolrCloud mode
is as follows:
-[source,xml]
+[source,plain]
----
-<metrics>
- <suppliers>
- <meter>
- <str name="clock">cpu</str>
- </meter>
- <timer>
- <str
name="reservoir">com.codahale.metrics.SlidingTimeWindowReservoir</str>
- <long name="window">600</long>
- </timer>
- </suppliers>
-</metrics>
+scrape_configs:
+ - job_name: 'solr'
+ metrics_path: "/solr/admin/metrics"
+ static_configs:
+ - targets: ['localhost:8983', 'localhost:7574']
----
-=== The <metrics> <missingValues> Element
-Long-lived metrics values are still reported when the underlying value is
unavailable (e.g., "INDEX.sizeInBytes" when IndexReader is closed).
-Short-lived transient metrics (such as cache entries) that are properties of
complex gauges (internally represented as `MetricsMap`) are simply skipped when
not available, and neither their names nor values appear in registries (or in
`/admin/metrics` reports).
-
-When a missing value is encountered by default it's reported as null value,
regardless of the metrics type.
-This can be configured in the `solr.xml:/solr/metrics/missingValues` element,
which recognizes the following child elements (for string elements a JSON
payload is supported):
+=== OpenMetrics
-`nullNumber`::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: none
-|===
-+
-The value to use when a missing (null) numeric metrics value is encountered.
+OpenMetrics format is available from the `/admin/metrics` endpoint by
providing the `wt=openmetrics` parameter or by passing the Accept header
`application/openmetrics-text;version=1.0.0`. OpenMetrics is an extension of
the Prometheus format that adds additional metadata and exemplars.
-`notANumber`::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: none
-|===
-+
-The value to use when an invalid numeric value is encountered.
+See https://prometheus.io/docs/specs/om/open_metrics_spec/[OpenMetrics Spec]
documentation for more information.
-`nullString`::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: none
-|===
-+
-The value to use when a missing (null) string metrics is encountered.
+OpenMetrics can be used to pull/scrape metrics to a Prometheus server or any
OpenMetrics-compatible backend directly from Solr.
-`nullObject`::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: none
-|===
-+
-The value to use when a missing (null) complex object is encountered.
+==== Prometheus setup with exemplars
-Example configuration that returns null for missing numbers, `-1` for
-invalid numeric values, empty string for missing strings, and a Map for missing
-complex objects:
+OpenMetrics includes `exemplars` that provide additional information and allow
users to leverage Solr's
xref:deployment-guide:distributed-tracing.adoc[OpenTelemetry distributed
tracing module] and metrics in a cohesive view for correlating traces and
metrics.
-[source,xml]
-----
-<metrics>
- <missingValues>
- <null name="nullNumber"/>
- <int name="notANumber">-1</int>
- <str name="nullString"></str>
- <str name="nullObject">{"value":"missing"}</str>
- </missingValues>
-</metrics>
-----
+Distributed tracing must be enabled to see exemplars. Exemplars will never
appear in OpenMetrics format otherwise. You can then scrape OpenMetrics format
to a Prometheus server or OpenMetrics-compatible backend.
-=== Caching Threads Metrics ===
-The threads metrics in the JVM group can be expensive to compute, as it
requires traversing all threads.
-This can be avoided for every call to the metrics API (group=jvm) by setting a
high caching expiration interval
-(in seconds). For example, to cache the thread metrics for 5 seconds:
+A basic `prometheus-config.yml` configuration for a Prometheus server in
SolrCloud mode that collects exemplars is as follows:
-[source,xml]
+[source,plain]
----
-<solr>
- <metrics>
- <caching>
- <int name="threadsIntervalSeconds">5</int>
- </caching>
- ...
- </metrics>
-...
-</solr>
+scrape_configs:
+ - job_name: 'solr'
+ metrics_path: "/solr/admin/metrics"
+ static_configs:
+ - targets: ['localhost:8983', 'localhost:7574']
+ params:
+ wt: ['openmetrics']
+ scrape_protocols:
+ - OpenMetricsText1.0.0
----
-== Reporters
+The Prometheus server must also be started with the command-line parameter
`--enable-feature=exemplar-storage` to collect exemplars from OpenMetrics.
-Reporter configurations are specified in `solr.xml` file in
`<metrics><reporter>` sections, for example:
+If you are using Grafana, follow the
https://grafana.com/docs/grafana/latest/fundamentals/exemplars/[Introduction to
exemplars] guide to connect your Prometheus data source and see exemplars on
Grafana panels.
-[source,xml]
-----
-<solr>
- <metrics>
- <reporter name="graphite" group="node, jvm"
class="org.apache.solr.metrics.reporters.SolrGraphiteReporter">
- <str name="host">graphite-server</str>
- <int name="port">9999</int>
- <int name="period">60</int>
- </reporter>
- <reporter name="log_metrics" group="core"
class="org.apache.solr.metrics.reporters.SolrSlf4jReporter">
- <int name="period">60</int>
- <str name="filter">QUERY./select.requestTimes</str>
- <str name="filter">QUERY./get.requestTimes</str>
- <str name="filter">UPDATE./update.requestTimes</str>
- <str name="filter">UPDATE./update.clientErrors</str>
- <str name="filter">UPDATE./update.errors</str>
- <str name="filter">SEARCHER.new.time</str>
- <str name="filter">SEARCHER.new.warmup</str>
- <str
name="logger">org.apache.solr.metrics.reporters.SolrSlf4jReporter</str>
- </reporter>
- </metrics>
-...
-</solr>
-----
+=== API Filtering
-This example configures two reporters: <<Graphite Reporter,Graphite>> and
<<SLF4J Reporter,SLF4J>>.
-See below for more details on how to configure reporters.
+A fixed set of parameters is available to filter metrics by either metric name
or base core labels. You can combine these parameters to filter only the
specific metrics you need:
-=== Reporter Arguments
-
-Reporter plugins use the following arguments:
+[NOTE]
+====
+All parameters can be specified with more than one value in a request;
multiple values should be separated by a comma.
+====
`name`::
+
[%autowidth,frame=none]
|===
-s|Required |Default: none
-|===
-+
-The unique name of the reporter plugin.
-
-`class`::
-+
-[%autowidth,frame=none]
-|===
-s|Required |Default: none
-|===
-+
-The fully-qualified implementation class of the plugin, which must extend
`SolrMetricReporter`.
-
-`group`::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: none
-|===
-+
-One or more of the predefined groups (see above).
-
-`registry`::
-+
-[%autowidth,frame=none]
-|===
|Optional |Default: none
|===
+
-One or more of valid fully-qualified registry names.
+The metric name to filter on.
-If both `group` and `registry` attributes are specified only the `group`
attribute is considered.
-If neither attribute is specified then the plugin will be used for all groups
and registries.
-Multiple group or registry names can be specified, separated by comma and/or
space.
-
-Additionally, several implementation-specific initialization arguments can be
specified in nested elements.
-There are some arguments that are common to SLF4J, Ganglia and Graphite
reporters:
-
-`period`::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: `60` seconds
-|===
-+
-The period in seconds between reports.
-
-`prefix`::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: _empty string_
-|===
-+
-A prefix to be added to metric names, which may be helpful in logical grouping
of related Solr instances, e.g., machine name or cluster name.
-Default is empty string, i.e., just the registry name and metric name will be
used to form a fully-qualified metric name.
-
-`filter`::
+`category`::
+
[%autowidth,frame=none]
|===
|Optional |Default: none
|===
+
-If not empty then only metric names that start with this value will be
reported.
-Default is no filtering, i.e., all metrics from the selected registry will be
reported.
-
-Reporters are instantiated for every group and registry that they were
configured for, at the time when the respective components are initialized
(e.g., on JVM startup or SolrCore load).
-
-When reporters are created their configuration is validated (and e.g.,
necessary connections are established).
-Uncaught errors at this initialization stage cause the reporter to be
discarded from the running configuration.
-
-Reporters are closed when the corresponding component is being closed (e.g.,
on SolrCore close, or JVM shutdown) but metrics that they reported are still
maintained in respective registries, as explained in the previous section.
-
-The following sections provide information on implementation-specific
arguments.
-All implementation classes provided with Solr can be found under
`org.apache.solr.metrics.reporters`.
+The category label to filter on.
-=== JMX Reporter
-
-The JMX Reporter uses the `org.apache.solr.metrics.reporters.SolrJmxReporter`
class.
-
-It takes the following arguments:
-
-`domain`::
+`core`::
+
[%autowidth,frame=none]
|===
|Optional |Default: none
|===
+
-The JMX domain name.
-If not specified then the registry name will be used.
+The core name to filter on.
+More than one core can be specified in a request; multiple cores should be
separated by a comma.
-`serviceUrl`::
+`collection`::
+
[%autowidth,frame=none]
|===
|Optional |Default: none
|===
+
-The service URL for a JMX server.
-If not specified, Solr will attempt to discover if the JVM has an MBean server
and will use that address.
-See below for additional information on this.
+The collection name to filter on. This attribute is only filterable in
SolrCloud mode.
-`agentId`::
+`shard`::
+
[%autowidth,frame=none]
|===
|Optional |Default: none
|===
+
-The agent ID for a JMX server.
-Note either `serviceUrl` or `agentId` can be specified but not both.
-If both are specified then the default MBean server will be used.
-
-Object names created by this reporter are hierarchical, dot-separated but also
properly structured to form corresponding hierarchies in e.g., JConsole.
-This hierarchy consists of the following elements in the top-down order:
-
-* registry name (e.g., `solr.core.collection1.shard1.replica1`).
-Dot-separated registry names are also split into ObjectName hierarchy levels,
so that metrics for this registry will be shown under
`/solr/core/collection1/shard1/replica1` in JConsole, with each domain part
being assigned to `dom1, dom2, ... domN` property.
-* reporter name (the value of reporter's `name` attribute)
-* category, scope and name for request handlers
-* or additional `name1, name2, ... nameN` elements for metrics from other
components.
-
-The JMX Reporter replaces the JMX functionality available in Solr versions
before 7.0.
-If you have upgraded from an earlier version and have an MBean Server running
when Solr starts, Solr will automatically discover the location of the local
MBean server and use a default configuration for the SolrJmxReporter.
-
-You can start a local MBean server with a system property at startup by adding
`-Dcom.sun.management.jmxremote` to your start command.
-This will not add the reporter configuration to `solr.xml`, so if you enable
it with a system property, you must always start Solr with the system property
or JMX will not be enabled in subsequent starts.
+The shard name to filter on. This attribute is only filterable in SolrCloud
mode.
-=== SLF4J Reporter
-
-The SLF4J Reporter uses the
`org.apache.solr.metrics.reporters.SolrSlf4jReporter` class.
-
-It takes the following arguments, in addition to common arguments described
<<Reporter Arguments,above>>.
-
-`logger`::
+`replica_type`::
+
[%autowidth,frame=none]
|===
|Optional |Default: none
|===
+
-The name of the logger to use.
-Default is empty, in which case the group (or the initial part of the registry
name that identifies a metrics group) will be used if specified in the plugin
configuration.
-
-Users can specify logger name (and the corresponding logger configuration in
e.g., Log4j configuration) to output metrics-related logging to separate
file(s), which can then be processed by external applications.
-Here is an example for configuring the default `log4j2.xml` which ships in
Solr.
-This can be used in conjunction with the `solr.xml` example provided earlier
in this page to configure the SolrSlf4jReporter:
-
-[source,xml]
-----
-<Configuration>
- <Appenders>
- ...
- <RollingFile
- name="MetricsFile"
- fileName="${sys:solr.logs.dir}/solr_metrics.log"
- filePattern="${sys:solr.logs.dir}/solr_metrics.log.%i" >
- <PatternLayout>
- <Pattern>
- %d{yyyy-MM-dd HH:mm:ss.SSS} %-5p (%t) [%X{node_name} %X{collection}
%X{shard} %X{replica} %X{core} %X{trace_id}] %m%n
- </Pattern>
- </PatternLayout>
- <Policies>
- <OnStartupTriggeringPolicy />
- <SizeBasedTriggeringPolicy size="32 MB"/>
- </Policies>
- <DefaultRolloverStrategy max="10"/>
- </RollingFile>
- ...
- </Appenders>
+The replica type to filter on. Valid values are NRT, TLOG, or PULL. This
attribute is only filterable in SolrCloud mode.
- <Loggers>
- ...
- <Logger name="org.apache.solr.metrics.reporters.SolrSlf4jReporter"
level="info" additivity="false">
- <AppenderRef ref="MetricsFile"/>
- </Logger>
- ...
- </Loggers>
-</Configuration>
-----
+[[metrics_examples]]
+=== Examples
-Each log line produced by this reporter consists of configuration-specific
fields, and a message that follows this format:
+Request only metrics from the `foobar` collection:
[source,text]
-----
-type=COUNTER, name={}, count={}
-
-type=GAUGE, name={}, value={}
-
-type=TIMER, name={}, count={}, min={}, max={}, mean={}, stddev={}, median={},
p75={}, p95={}, p98={}, p99={}, p999={}, mean_rate={}, m1={}, m5={}, m15={},
rate_unit={}, duration_unit={}
-
-type=METER, name={}, count={}, mean_rate={}, m1={}, m5={}, m15={}, rate_unit={}
+http://localhost:8983/solr/admin/metrics?collection=foobar
-type=HISTOGRAM, name={}, count={}, min={}, max={}, mean={}, stddev={},
median={}, p75={}, p95={}, p98={}, p99={}, p999={}
-----
-
-(curly braces added here only as placeholders for actual values).
-
-Additionally, the following MDC context properties are passed to the logger
and can be used in log formats:
-
-`node_name`::
-Solr node name (for SolrCloud deployments, otherwise null), prefixed with `n:`.
-
-`registry`::
-Metric registry name, prefixed with `m:`.
-
-For reporters that are specific to a SolrCore also the following properties
are available:
-
-`collection`::
-Collection name, prefixed with `c:`.
-
-`shard`::
-Shard name, prefixed with `s:`.
-
-`replica`::
-Replica name (core node name), prefixed with `r:`.
-
-`core`::
-SolrCore name, prefixed with `x:`.
-
-`tag`::
-Reporter instance tag, prefixed with `t:`.
-
-=== Graphite Reporter
-
-The http://graphiteapp.org[Graphite] Reporter uses the
`org.apache.solr.metrics.reporters.SolrGraphiteReporter`) class.
-
-It takes the following attributes, in addition to the common attributes
<<Reporter Arguments,above>>.
-
-`host`::
-+
-[%autowidth,frame=none]
-|===
-s|Required |Default: none
-|===
-+
-The host name where Graphite server is running.
-
-`port`::
-+
-[%autowidth,frame=none]
-|===
-s|Required |Default: none
-|===
-+
-The port number for the server.
-
-`pickled`::
-+
-[%autowidth,frame=none]
-|===
-s|Required |Default: `false`
-|===
-+
-If `true`, use "pickled" Graphite protocol which may be more efficient.
-
-When plain-text protocol is used (`pickled==false`) it's possible to use this
reporter to integrate with systems other than Graphite, if they can accept
space-separated and line-oriented input over network in the following format:
+Request only the metrics with a category label of QUERY or UPDATE:
[source,text]
-----
-dot.separated.metric.name[.and.attribute] value epochTimestamp
-----
+http://localhost:8983/solr/admin/metrics?category=QUERY,UPDATE
-For example:
+Request only `solr_core_requests_total` metrics from the
`foobar_shard1_replica_n1` core:
-[source,plain]
-----
-example.solr.node.cores.loaded 1 1482932097
-example.solr.jetty.org.eclipse.jetty.server.handler.DefaultHandler.2xx-responses.count
21 1482932097
-example.solr.jetty.org.eclipse.jetty.server.handler.DefaultHandler.2xx-responses.m1_rate
2.5474287707930614 1482932097
-example.solr.jetty.org.eclipse.jetty.server.handler.DefaultHandler.2xx-responses.m5_rate
3.8003171557510305 1482932097
-example.solr.jetty.org.eclipse.jetty.server.handler.DefaultHandler.2xx-responses.m15_rate
4.0623076220244245 1482932097
-example.solr.jetty.org.eclipse.jetty.server.handler.DefaultHandler.2xx-responses.mean_rate
0.5698031798408144 1482932097
-----
-
-== Core Level Metrics
-
-These metrics are available only on a per-core basis.
-Metrics can be aggregated across cores using Shard and Cluster reporters.
+[source,text]
+http://localhost:8983/solr/admin/metrics?name=solr_core_requests_total&core=foobar_shard1_replica_n1
-=== Index Merge Metrics
+Request only the core index size `solr_core_index_size_bytes` metrics from
collections labeled `foo` and `bar`:
-These metrics are collected under the `INDEX` category and track flush
operations (documents being written to disk) and merge operations (segments on
disk being merged).
+[source,text]
+http://localhost:8983/solr/admin/metrics?name=solr_core_index_size_bytes&collection=foo,bar
-For merge metrics, metrics are tracked with the distinction of "minor" and
"major" merges (as merges with fewer documents will be typically more frequent).
-This is indicated by the `merge_type` label for the metric. The threshold for
when a merge becomes large enough to be considered major is configurable, but
-defaults to 524k documents.
+== OTLP
-Metrics collection for index merges can be configured in the `<metrics>`
section of `solrconfig.xml` as shown below:
+For users who do not use or support pulling metrics in Prometheus format with
the `/admin/metrics` API, Solr also supports pushing metrics natively with
https://opentelemetry.io/docs/specs/otlp/[OTLP], which is a vendor-agnostic
protocol for pushing metrics via gRPC or HTTP.
-[source,xml]
-----
-<config>
- ...
- <indexConfig>
- <metrics>
- <long name="majorMergeDocs">524288</long>
- </metrics>
- ...
- </indexConfig>
-...
-</config>
-----
+OTLP is widely supported by many tools, vendors, and pipelines. See the
OpenTelemetry https://opentelemetry.io/ecosystem/vendors/[vendors list] for
more details on available and compatible options.
+=== OTLP properties
-== Metrics API
+Solr's internal OTLP exporter is disabled by default and is packaged with the
OpenTelemetry module.
-The `admin/metrics` endpoint provides access to all the metrics for all metric
groups.
+The module can be enabled with either the system property
`-Dsolr.modules=opentelemetry` or the environment variable
`SOLR_MODULES=opentelemetry`, similar to distributed tracing.
-A few query parameters are available to limit your request to only certain
metrics:
+The OTLP exporter can be configured with the supported system properties
below. These can also be set as environment variables by following these
mapping rules:
-`group`::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: `all`
-|===
-+
-The metric group to retrieve.
-The value `all` retrieves all metrics for all groups.
-Other possible values are: `jvm`, `jetty`, `node`, and `core`.
-More than one group can be specified in a request; multiple group names should
be separated by a comma.
+- Replace `.` with `_`
+- Convert camelCase to UPPER_SNAKE_CASE
+- Make all letters uppercase
-`type`::
+`solr.metrics.otlpExporterEnabled`::
+
[%autowidth,frame=none]
|===
-|Optional |Default: `all`
+|Optional |Default: false
|===
+
-The type of metric to retrieve.
-The value `all` retrieves all metric types.
-Other possible values are `counter`, `gauge`, `histogram`, `meter`, and
`timer`.
-More than one type can be specified in a request; multiple types should be
separated by a comma.
+Boolean value to enable or disable the OTLP metrics exporter.
-`prefix`::
+`solr.metrics.otlpExporterProtocol`::
+
[%autowidth,frame=none]
|===
-|Optional |Default: none
+|Optional |Default: grpc
|===
+
-The first characters of metric name that will filter the metrics returned to
those starting with the provided string.
-It can be combined with `group` and/or `type` parameters.
-More than one prefix can be specified in a request; multiple prefixes should
be separated by a comma.
-Prefix matching is also case-sensitive.
+OTLP protocol to use for pushing metrics. Available options are `grpc`,
`http`, or `none` (disabled).
-`regex`::
+`solr.metrics.otlpExporterInterval`::
+
[%autowidth,frame=none]
|===
-|Optional |Default: none
+|Optional |Default: 60000
|===
+
-A regular expression matching metric names.
-Note: dot separators in metric names must be escaped, e.g.,
-`QUERY\./select\..*` is a valid regex that matches all metrics with the
`QUERY./select.` prefix.
+The interval in milliseconds for how frequently metrics are pushed via OTLP.
-`property`::
+`solr.metrics.otlpGrpcExporterEndpoint`::
+
[%autowidth,frame=none]
|===
-|Optional |Default: none
+|Optional |Default: http://localhost:4317
|===
+
-Allows requesting only this metric from any compound metric.
-Multiple `property` parameters can be combined to act as an OR request.
-For example, to only get the 99th and 999th percentile values from all metric
types and groups, you can add `&property=p99_ms&property=p999_ms` to your
request.
-This can be combined with `group`, `type`, and `prefix` as necessary.
+Endpoint to send OTLP metrics to using the gRPC protocol.
-`key`::
+`solr.metrics.otlpHttpExporterEndpoint`::
+
[%autowidth,frame=none]
|===
-|Optional |Default: none
+|Optional |Default: http://localhost:4318/v1/metrics
|===
+
-The fully-qualified metric name, which specifies one concrete metric instance
(parameter can be specified multiple times to retrieve multiple concrete
metrics).
-+
-Fully-qualified name consists of registry name, colon and metric name, with
optional colon and metric property.
-Colons in names can be escaped using backslash (`\`) character.
-Examples:
+Endpoint to send OTLP metrics to using the HTTP protocol.
-* `key=solr.node:CONTAINER.fs.totalSpace`
-* `key=solr.core.collection1:QUERY./select.requestTimes:max_ms`
-* `key=solr.jvm:system.properties:user.name`
-+
-*NOTE: when this parameter is used, any other selection methods are ignored.*
+=== OpenTelemetry Collector setup
-`expr`::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: none
-|===
-+
-Extended notation of the `key` selection criteria, which supports regular
expressions for each of the parts supported by the `key` selector.
-This parameter can be specified multiple times to retrieve metrics that match
-any expression.
-The API guarantees that the output will consist only of unique metric names
even if multiple expressions match the same metric name.
-Note: the order of multiple `expr` parameters matters here - only the first
value of the first matching expression will be recorded, subsequent values for
the same metric name produced by matching other expressions will be skipped.
-+
-Fully-qualified expression consists of at least two and at most three regex
patterns separated by colons: a registry pattern, colon, a metric pattern, and
then an optional colon and metric property pattern.
-Colons and other regex meta-characters in names and in regular expressions
MUST be escaped using backslash (`\`) character.
-+
-Examples:
+The https://opentelemetry.io/docs/collector/[OpenTelemetry Collector] is a
powerful process that allows users to decouple their metrics pipeline and route
to their preferred backend. It natively supports metrics being pushed to it via
OTLP and/or scraping the `/admin/metrics` Prometheus endpoint supported by
Solr. You can push both metrics and traces to the collector via OTLP as a
single pipeline.
-* `expr=solr\.core\..*:QUERY\..*\.requestTimes:max_ms`
-* `expr=solr\.jvm:system\.properties:user\..*`
-
-+
-*NOTE: when this parameter is used, any other selection methods are ignored.*
+A simple setup to route metrics from Solr -> OpenTelemetry Collector ->
Prometheus can be configured with the following OpenTelemetry Collector
configuration file:
-`compact`::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: `true`
-|===
-+
-When `false`, a more verbose format of the response will be returned.
-Instead of a response like this:
-+
-[source,json]
-----
-{"metrics": [
- "solr.core.gettingstarted",
- {
- "CORE.aliases": {
- "value": ["gettingstarted"]
- },
- "CORE.coreName": {
- "value": "gettingstarted"
- },
- "CORE.indexDir": {
- "value": "/solr/example/schemaless/solr/gettingstarted/data/index/"
- },
- "CORE.instanceDir": {
- "value": "/solr/example/schemaless/solr/gettingstarted"
- },
- "CORE.refCount": {
- "value": 1
- },
- "CORE.startTime": {
- "value": "2017-03-14T11:43:23.822Z"
- }
- }
- ]}
-----
-+
-The response will look like this:
-+
-[source,json]
+[source,plain]
----
-{"metrics": [
- "solr.core.gettingstarted",
- {
- "CORE.aliases": [
- "gettingstarted"
- ],
- "CORE.coreName": "gettingstarted",
- "CORE.indexDir":
"/solr/example/schemaless/solr/gettingstarted/data/index/",
- "CORE.instanceDir": "/solr/example/schemaless/solr/gettingstarted",
- "CORE.refCount": 1,
- "CORE.startTime": "2017-03-14T11:43:23.822Z"
- }
- ]}
+receivers:
+ otlp:
+ protocols:
+ grpc:
+ endpoint: 0.0.0.0:4317
+ http:
+ endpoint: 0.0.0.0:4318
+
+exporters:
+ prometheus:
+ endpoint: 0.0.0.0:9464
+ send_timestamps: true
+ enable_open_metrics: true
+
+service:
+ pipelines:
+ metrics:
+ receivers: [otlp]
+ exporters: [prometheus]
----
-Like other request handlers, the Metrics API can also take the `wt` parameter
to define the output format.
-The Metrics API also provides an additional special `wt` parameter
`prometheus` that will output metrics in Prometheus format.
-This can be used for
xref:monitoring-with-prometheus-and-grafana.adoc#metrics-api-with-prometheus-format[Monitoring
with Prometheus and Grafana]
-
-*Note: The `group`, `type`, and `prefix` query parameters are still available
for filtering Prometheus metrics but `regex`, `property`, `key` and `compact`
are not supported.*
-
-[[metrics_examples]]
-=== Examples
-
-Request only "counter" type metrics in the "core" group, returned in JSON:
-
-[source,text]
-http://localhost:8983/solr/admin/metrics?type=counter&group=core
-
-Request only "core" group metrics that start with "INDEX", returned in XML:
-
-[source,text]
-http://localhost:8983/solr/admin/metrics?wt=xml&prefix=INDEX&group=core
-
-Request only "core" group metrics that end with ".requests":
+You can then request the metrics in Prometheus format from the collector:
-[source,text]
-http://localhost:8983/solr/admin/metrics?regex=.*\.requests&group=core
-
-Request only "user.name" property of "system.properties" metric from registry
"solr.jvm":
-
-[source,text]
-http://localhost:8983/solr/admin/metrics?wt=xml&key=solr.jvm:system.properties:user.name
+[source,bash]
+----
+curl 'localhost:9464/metrics'
+----
-Request query rates (but not histograms) from any core in any collection in
any QUERY handler:
+Or request OpenMetrics format to also see exemplars by passing the Accept
header:
-[source,text]
-http://localhost:8983/solr/admin/metrics?expr=solr\.core\..*:QUERY\..*\.requestTimes:.*Rate
+[source,bash]
+----
+curl 'localhost:9464/metrics' -H 'Accept: application/openmetrics-text;
version=1.0.0'
+----
diff --git
a/solr/solr-ref-guide/modules/deployment-guide/pages/monitoring-with-prometheus-and-grafana.adoc
b/solr/solr-ref-guide/modules/deployment-guide/pages/monitoring-with-prometheus-and-grafana.adoc
deleted file mode 100644
index 1146f8f2a41..00000000000
---
a/solr/solr-ref-guide/modules/deployment-guide/pages/monitoring-with-prometheus-and-grafana.adoc
+++ /dev/null
@@ -1,633 +0,0 @@
-= Monitoring with Prometheus and Grafana
-:tabs-sync-option:
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-If you use https://prometheus.io[Prometheus] and https://grafana.com[Grafana]
for metrics storage and data visualization, Solr provides 2 solutions to
collect metrics and other data:
-
-* Prometheus Exporter
-* Metrics API with Prometheus format
-
-The Prometheus exporter is included with the full Solr distribution, and is
located under `prometheus-exporter/`.
-It is not included in the `slim` Solr distribution.
-
-A Prometheus exporter (`solr-exporter`) allows users to monitor not only Solr
metrics which come from the xref:metrics-reporting.adoc#metrics-api[Metrics
API], but also facet counts which come from xref:query-guide:faceting.adoc[]
and responses to xref:configuration-guide:collections-api.adoc[] commands and
xref:ping.adoc[] requests.
-
-The Metrics API provides a Prometheus Response Writer to output Solr metrics
natively to be scraped.
-It is more efficient and drops the need of running the Prometheus Exporter but
at the cost of a fixed output and not as flexible in terms of configurability.
-
-== Prometheus Exporter
-
-This graphic provides a more detailed view:
-
-.solr-exporter Diagram
-image::monitoring-with-prometheus-and-grafana/solr-exporter-diagram.png[image,width=600]
-
-There are three aspects to running `solr-exporter`:
-
-* Modify the `solr-exporter-config.xml` to define the data to collect.
-Solr has a default configuration you can use, but if you would like to modify
it before running the exporter the first time, see the section <<Exporter
Configuration>> below.
-* Start the exporter from within Solr.
-See the section below <<Starting the Exporter>>.
-* Modify your Prometheus configuration to listen on the correct port.
-See the section below <<Prometheus Configuration>>
-
-=== Starting the Exporter
-You can start `solr-exporter` by running `./bin/solr-exporter` (Linux) or
`.\bin\solr-exporter.cmd` (Windows) from the `prometheus-exporter/` directory.
-
-The metrics exposed by `solr-exporter` can be seen at the metrics endpoint:
`\http://localhost:8983/solr/admin/metrics`.
-
-See the commands below depending on your operating system and Solr operating
mode:
-
-[tabs#solr-exporter]
-======
-Linux::
-+
-====
-.User-managed / Single-node
-[source,bash]
-----
-$ cd prometheus-exporter
-$ ./bin/solr-exporter -p 9854 --solr-url http://localhost:8983/solr
--config-file ./conf/solr-exporter-config.xml --num-threads 8
-----
-
-.SolrCloud
-[source,bash]
-----
-$ cd prometheus-exporter
-$ ./bin/solr-exporter -p 9854 -z localhost:2181/solr --config-file
./conf/solr-exporter-config.xml --num-threads 16
-----
-====
-
-Windows::
-+
-====
-.User-managed / Single-node
-[source,text]
-----
-> cd prometheus-exporter
-> .\bin\solr-exporter.cmd -p 9854 --solr-url http://localhost:8983/solr
--config-file .\conf\solr-exporter-config.xml --num-threads 8
-----
-
-.SolrCloud
-[source,text]
-----
-> cd prometheus-exporter
-> .\bin\solr-exporter -p 9854 -z localhost:2181/solr --config-file
.\conf\solr-exporter-config.xml --num-threads 16
-----
-====
-======
-
-=== Command Line Parameters
-
-The list of available parameters for the Prometheus Exporter.
-All parameters can be provided via an environment variable, instead of through
the command line.
-
-`h`, `--help`::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: none
-|===
-+
-Displays command line help and usage.
-
-`-p`, `--port`, `$PORT`::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: `8989`
-|===
-+
-The port where Prometheus will listen for new data.
-This port will be used to configure Prometheus.
-It can be any port not already in use on your server.
-
-`-s`, `--solr-url`, `$SOLR_URL`::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: _see description_
-|===
-+
-The Solr base URL (such as `http://localhost:8983/solr`) when Solr is running
in a user-managed cluster or a single-node installation.
-If you are running SolrCloud, do not specify this parameter.
-If neither the `-s` parameter nor the `-z` parameter are defined, the default
is `--solr-url http://localhost:8983/solr`.
-
-`-z`, `--zk-host`, `$ZK_HOST`::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: _see description_
-|===
-+
-The ZooKeeper connect string (such as `localhost:9983`, or
`localhost:2181/solr`) when Solr is running SolrCloud.
-If you are running a user-managed cluster or single-node installation, do not
specify this parameter.
-If neither the `--solr-url` parameter nor the `-z` parameter are defined, the
`--solr-url` parameter default is used.
-
-`--config-file`, `$CONFIG_FILE`::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: `prometheus-exporter/conf/solr-exporter-config.xml`
-|===
-+
-The path to the configuration file that defines the Solr metrics to read.
-
-`--num-threads`, `$NUM_THREADS`::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: `1`
-|===
-+
-The number of threads.
-The `solr-exporter` creates thread pools for requests to Solr.
-Request latency can be improved by increasing the number of threads.
-
-`--scrape-interval`, `$SCRAPE_INTERVAL`::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: `60` seconds
-|===
-+
-The number of seconds between collecting metrics from Solr.
-The `solr-exporter` collects metrics from Solr every few seconds controlled by
this setting.
-These metrics are cached and returned regardless of how frequently prometheus
is configured to pull metrics from this tool.
-The freshness of the metrics can be improved by reducing the scrape interval
but do not set it to a very low value because metrics collection can be
expensive and can execute arbitrary searches to ping Solr.
-
-`--cluster-id`, `$CLUSTER_ID`::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: _see description_
-|===
-+
-A unique ID for the cluster to monitor. This ID will be added to all metrics
as a label `cluster_id` and can be used as a filter in the Grafana dashboard if
you operate multiple Solr clusters reporting to the same Prometheus instance.
If this option is omitted, a hash of the `--solr-url` or `--zk-host` will be
used as ID by default.
-
-`-u`, `--credentials`, `$CREDENTIALS`::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: none
-|===
-+
-Specify the credentials in the format `username:password`. Example:
`--credentials solr:SolrRocks`.
-
-
-`--ssl-enabled`, `$SSL_ENABLED`::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: false
-|===
-+
-
-Enable mTLS connection to Solr. Expects following env variables:
SOLR_SSL_KEY_STORE, SOLR_SSL_KEY_STORE_PASSWORD, SOLR_SSL_TRUST_STORE,
SOLR_SSL_TRUST_STORE_PASSWORD. Example: `--ssl-enabled`
-The environment variables are the same that Solr uses to enable mTLS.
-
-=== Environment Variable Options
-
-The `./bin` scripts provided with the Prometheus Exporter support the use of
custom java options through the following environment variables:
-
-`JAVA_HEAP`::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: `512m`
-|===
-+
-Sets the initial (`Xms`) and max (`Xmx`) Java heap size.
-
-`JAVA_MEM`::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: none
-|===
-+
-Custom java memory settings (e.g., `-Xms1g -Xmx2g`).
-This is ignored if `JAVA_HEAP` is provided.
-
-`GC_TUNE`::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: `-XX:+UseG1GC`
-|===
-+
-Custom Java garbage collection settings.
-
-`JAVA_OPTS`::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: none
-|===
-+
-Extra JVM options.
-
-`ZK_CREDS_AND_ACLS`::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: none
-|===
-+
-Credentials for connecting to a ZooKeeper host that is protected with ACLs.
-For more information on what to include in this variable, refer to the section
xref:zookeeper-access-control.adoc#zookeeper-acls-in-solr-cli[ZooKeeper ACLs in
Solr CLI] or the example <<getting-metrics-from-a-secured-solrcloud>> below.
-
-`CLASSPATH_PREFIX`::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: none
-|===
-+
-Location of extra libraries to load when starting the `solr-exporter`.
-
-All <<command-line-parameters>> are able to be provided via environment
variables when using the `./bin` scripts.
-
-=== Getting Metrics from a Secured SolrCloud
-
-Your SolrCloud security configuration can be injected into `solr-exporter`
using environment variables in a fashion similar to other clients using
xref:solrj.adoc[].
-This is possible because the main script picks up <<Environment Variable
Options>> and passes them on to the Java process.
-
-The following example assumes a SolrCloud instance secured by
xref:basic-authentication-plugin.adoc[], xref:enabling-ssl.adoc[SSL] and
xref:zookeeper-access-control.adoc[].
-
-Suppose you have a file `basicauth.properties` with the Solr Basic-Auth
credentials:
-
-----
-httpBasicAuthUser=myUser
-httpBasicAuthPassword=myPassword
-----
-
-Then you can start the Exporter as follows (Linux).
-
-[source,bash]
-----
-$ cd prometheus-exporter
-$ export JAVA_OPTS="-Djavax.net.ssl.trustStore=truststore.p12
-Djavax.net.ssl.trustStorePassword=truststorePassword
-Dsolr.httpclient.builder.factory=org.apache.solr.client.solrj.impl.PreemptiveBasicAuthClientBuilderFactory
-Dsolr.httpclient.config=basicauth.properties"
-$ export
ZK_CREDS_AND_ACLS="-DzkCredentialsProvider=org.apache.solr.common.cloud.VMParamsSingleSetCredentialsDigestZkCredentialsProvider
-DzkDigestUsername=readonly-user -DzkDigestPassword=zkUserPassword"
-$ export
CLASSPATH_PREFIX="../server/solr-webapp/webapp/WEB-INF/lib/commons-codec-1.11.jar"
-$ ./bin/solr-exporter -p 9854 -z zk1:2181,zk2:2181,zk3:2181 --config-file
./conf/solr-exporter-config.xml --num-threads 16
-----
-
-NOTE:: The Exporter needs the `commons-codec` library for SSL/BasicAuth, but
does not bring it.
-Therefore the example reuses it from the Solr web app.
-Of course, you can use a different source.
-
-=== Exporter Configuration
-The configuration for the `solr-exporter` defines the data to get from Solr.
-This includes the metrics, but can also include queries to the
PingRequestHandler, the Collections API, and a query to any query request
handler.
-
-A default example configuration is in
`prometheus-exporter/conf/solr-exporter-config.xml`.
-Below is a slightly shortened version of it:
-
-[source,xml]
-----
-<config>
-
- <rules>
-
- <ping>
- <lst name="request">
- <lst name="query">
- <str name="path">/admin/ping</str>
- </lst>
- <arr name="jsonQueries">
- <str>
- . as $object | $object |
- (if $object.status == "OK" then 1.0 else 0.0 end) as $value |
- {
- name : "solr_ping",
- type : "GAUGE",
- help : "See following URL:
https://solr.apache.org/guide/solr/latest/deployment-guide/ping.html",
- label_names : [],
- label_values : [],
- value : $value
- }
- </str>
- </arr>
- </lst>
- </ping>
-
- <metrics>
- <lst name="request">
- <lst name="query">
- <str name="path">/admin/metrics</str>
- <lst name="params">
- <str name="group">all</str>
- <str name="type">all</str>
- <str name="prefix"></str>
- <str name="property"></str>
- </lst>
- </lst>
- <arr name="jsonQueries">
- <!--
- jetty metrics
- -->
- <str>
- .metrics["solr.jetty"] | to_entries | .[] | select(.key |
startswith("org.eclipse.jetty.server.handler.DefaultHandler")) | select(.key |
endswith("xx-responses")) as $object |
- $object.key | split(".") | last | split("-") | first as $status |
- $object.value.count as $value |
- {
- name : "solr_metrics_jetty_response_total",
- type : "COUNTER",
- help : "See following URL:
https://solr.apache.org/guide/solr/latest/deployment-guide/metrics-reporting.html",
- label_names : ["status"],
- label_values : [$status],
- value : $value
- }
- </str>
-...
- </arr>
- </lst>
- </metrics>
-
- <collections>
- <lst name="request">
- <lst name="query">
- <str name="path">/admin/collections</str>
- <lst name="params">
- <str name="action">CLUSTERSTATUS</str>
- </lst>
- </lst>
- <arr name="jsonQueries">
- <str>
- .cluster.live_nodes | length as $value|
- {
- name : "solr_collections_live_nodes",
- type : "GAUGE",
- help : "See following URL:
https://solr.apache.org/guide/solr/latest/deployment-guide/cluster-node-management.html#clusterstatus",
- label_names : [],
- label_values : [],
- value : $value
- }
- </str>
-...
- </arr>
- </lst>
- </collections>
-
- <search>
- <lst name="request">
- <lst name="query">
- <str name="collection">collection1</str>
- <str name="path">/select</str>
- <lst name="params">
- <str name="q">*:*</str>
- <str name="start">0</str>
- <str name="rows">0</str>
- <str name="json.facet">
- {
- category: {
- type: terms,
- field: cat
- }
- }
- </str>
- </lst>
- </lst>
- <arr name="jsonQueries">
- <str>
- .facets.category.buckets[] as $object |
- $object.val as $term |
- $object.count as $value |
- {
- name : "solr_facets_category",
- type : "GAUGE",
- help : "Category facets",
- label_names : ["term"],
- label_values : [$term],
- value : $value
- }
- </str>
- </arr>
- </lst>
- </search>
-
- </rules>
-
-</config>
-----
-
-=== Configuration Tags and Elements
-The `solr-exporter` works by making a request to Solr according to the
definitions in the configuration file, scraping the response, and converting it
to a JSON structure Prometheus can understand.
-The configuration file defines the elements to request, how to scrape them,
and where to place the extracted data in the JSON template.
-
-The `solr-exporter` configuration file always starts and closes with two
simple elements:
-
-[source,xml]
-----
-<config>
- <rules>
-
- </rules>
-</config>
-----
-
-Between these elements, the data the `solr-exporter` should request is defined.
-There are several possible types of requests to make:
-
-[horizontal]
-`<ping>`:: Scrape the response to a xref:ping.adoc[] request.
-`<metrics>`:: Scrape the response to a
xref:metrics-reporting.adoc#metrics-api[Metrics API] request.
-`<collections>`:: Scrape the response to a
xref:configuration-guide:collections-api.adoc[] request.
-`<search>`:: Scrape the response to a
xref:query-guide:query-syntax-and-parsers.adoc[query] request.
-
-Within each of these types, we need to define the query and how to work with
the response.
-To do this, we define two additional elements:
-
-`<query>`::
-Defines the query parameter(s) used for the request.
-This section uses several additional properties to define your query:
-
-`collection`:::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: none
-|===
-+
-The collection to issue the query against.
-Only used with SolrCloud clusters.
-
-`core`:::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: none
-|===
-+
-The core to issue the query against.
-Only used with user-managed clusters or single-node installations.
-
-`path`:::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: none
-|===
-+
-The path to the query endpoint where the request will be sent.
-Examples include `admin/metrics` or `/select` or `admin/collections`.
-
-`params`:::
-+
-[%autowidth,frame=none]
-|===
-|Optional |Default: none
-|===
-+
-Additional query parameters.
-These will vary depending on the request type and the endpoint.
-For example, if using the Metrics endpoint, you can add parameters to limit
the query to a certain group and/or prefix.
-If you're using the Collections API, the command you want to use would be a
parameter.
-
-`<jsonQueries>`::
-This is an array that defines one or more JSON Queries in jq syntax.
-For more details about how to structure these queries, see
https://stedolan.github.io/jq/manual/[the jq user manual].
-+
-A jq query has to output JSON in the following format:
-+
-[source,json]
-----
-{
- "name": "solr_ping",
- "type": "GAUGE",
- "help": "See following URL:
https://solr.apache.org/guide/solr/latest/deployment-guide/ping.html",
- "label_names": ["base_url","core"],
- "label_values": ["http://localhost:8983/solr","collection1"],
- "value": 1.0
-}
-----
-
-See the section <<Exposition Format>> below for information about what
information should go into each property, and an example of how the above
example is translated for Prometheus.
-
-=== Exposition Format
-
-The `solr-exporter` converts the JSON to the following exposition format:
-
-[source,plain]
-----
-# TYPE <name> <type>
-# HELP <name> <help>
-<name>{<label_names[0]>=<label_values[0]>,<label_names[1]>=<labelvalues[1]>,...}
<value>
-----
-
-The following parameters should be set:
-
-`name`::
-The metric name to set.
-For more details, see https://prometheus.io/docs/practices/naming/[Prometheus
naming best practices].
-
-`type`::
-The type of the metric, can be `COUNTER`, `GAUGE`, `SUMMARY`, `HISTOGRAM` or
`UNTYPED`.
-For more details, see
https://prometheus.io/docs/concepts/metric_types/[Prometheus metric types].
-
-`help`::
-Help text for the metric.
-
-`label_names`::
-Label names for the metric.
-For more details, see https://prometheus.io/docs/practices/naming/[Prometheus
naming best practices].
-
-`label_values`::
-Label values for the metric.
-For more details, see https://prometheus.io/docs/practices/naming/[Prometheus
naming best practices].
-
-`value`::
-Value for the metric.
-Value must be set to Double type.
-
-For example, `solr-exporter` converts the JSON in the previous section to the
following:
-
-[source,plain]
-----
-# TYPE solr_ping gauge
-# HELP solr_ping See following URL:
https://solr.apache.org/guide/solr/latest/deployment-guide/ping.html
-solr_ping{base_url="http://localhost:8983/solr",core="collection1"} 1.0
-----
-
-=== Prometheus Configuration
-
-Prometheus is a separate server that you need to download and deploy.
-More information can be found at the Prometheus
https://prometheus.io/docs/prometheus/latest/getting_started/[Getting Started]
page.
-
-In order for Prometheus to know about the `solr-exporter`, the listen address
must be added to the Prometheus server's `prometheus.yml` configuration file,
as in this example:
-
-[source,plain]
-----
-scrape_configs:
- - job_name: 'solr'
- static_configs:
- - targets: ['localhost:9854']
-----
-
-If you already have a section for `scrape_configs`, you can add the `job_name`
and other values in the same section.
-
-When you apply the settings to Prometheus, it will start to pull Solr's
metrics from `solr-exporter`.
-
-You can test that the Prometheus server, `solr-exporter`, and Solr are working
together by browsing to http://localhost:9090 and
-doing a query for `solr_ping` metric in the Prometheus GUI:
-
-.Prometheus Solr Ping expression
-image::monitoring-with-prometheus-and-grafana/prometheus-solr-ping.png[image,width=800]
-
-=== Sample Grafana Dashboard
-
-To use Grafana for visualization, it must be downloaded and deployed
separately.
-More information can be found on the Grafana
https://grafana.com/docs/grafana/latest/[Documentation] site.
-Grafana consumes data from many sources, including the Prometheus server that
you previously set up.
-
-A Grafana sample dashboard is provided in the following JSON file:
`prometheus-exporter/conf/grafana-solr-dashboard.json`.
-You can place this with your other Grafana dashboard configurations and modify
it as necessary depending on any customization you've done for the
`solr-exporter` configuration.
-
-TIP: You can directly import the Solr dashboard
https://grafana.com/grafana/dashboards/12456[via grafana.com] by using the
Import function with the dashboard id `12456`.
-
-This screenshot shows what it might look like:
-
-.Grafana Dashboard
-image::monitoring-with-prometheus-and-grafana/grafana-solr-dashboard.png[image,width=800]
-
-== Metrics API with Prometheus format
-
-Prometheus Metrics are available natively directly from Solr by leveraging the
Metrics API and setting `wt` parameter to `prometheus`:
-
-[source,text]
-localhost:8983/solr/admin/metrics?wt=prometheus
-
-The Metrics API with the `prometheus` parameter does not provide any
configurability and the Prometheus output is fixed.
-Any metrics aggregations and/or filtering must be done on Grafana or the
Prometheus server.
-
-=== Prometheus Configuration
-
-Like the Prometheus Exporter, the `prometheus.yml` needs to be configured for
the Prometheus Server to ingest metrics.
-The difference is it must instead scrape the Metrics API with the
`wt=prometheus` parameter directly from each host/port Solr is running on as in
this example:
-
-[source,plain]
-----
-scrape_configs:
- - job_name: 'solr'
- metrics_path: "/solr/admin/metrics"
- static_configs:
- - targets: ['localhost:8983', 'localhost:7574']
- params:
- wt: ['prometheus']
-----
diff --git
a/solr/solr-ref-guide/modules/deployment-guide/pages/performance-statistics-reference.adoc
b/solr/solr-ref-guide/modules/deployment-guide/pages/performance-statistics-reference.adoc
index 653b3e5c757..19fe32dcdff 100644
---
a/solr/solr-ref-guide/modules/deployment-guide/pages/performance-statistics-reference.adoc
+++
b/solr/solr-ref-guide/modules/deployment-guide/pages/performance-statistics-reference.adoc
@@ -19,55 +19,87 @@
This page explains some of the statistics that Solr exposes.
There are two approaches to retrieving metrics.
-First, you can use the xref:metrics-reporting.adoc#metrics-api[Metrics API],
or you can enable JMX and get metrics from the
xref:mbean-request-handler.adoc[] or via an external tool such as JConsole.
-The below descriptions focus on retrieving the metrics using the Metrics API,
but the metric names are the same if using the MBean Request Handler or an
external tool.
+First, you can use the xref:metrics-reporting.adoc#metrics-api[Metrics API] or
push metrics with OTLP to your monitoring backend.
+The descriptions below focus on retrieving metrics using the Metrics API and
Prometheus, but the metric names are the same with OTLP.
-These statistics are per core.
-When you are running in SolrCloud mode these statistics would co-relate to the
performance of an individual replica.
+These statistics are per core. When you are running in SolrCloud mode these
statistics would co-relate to the performance of an individual replica.
+
+[NOTE]
+====
+*What about rates and percentiles e.g. QPS, p95 latency?*
+
+Solr exposes raw counters and histograms. You need to use PromQL (or similar
query language) to transform these raw metrics into useful statistics. Below
are examples using PromQL. See
https://prometheus.io/docs/prometheus/latest/querying/functions/[PromQL Query
Functions documentation] for more information.
+
+[cols="1,1,2",options="header"]
+|===
+|What you want |Raw metric type |PromQL
+
+|Queries per second (QPS)
+|Counter: `solr_core_requests_total`
+|`rate(solr_core_requests_total[5m])`
+
+|Total number of errors over 5 minutes
+|Counter: `solr_core_requests_total`
+|`increase(solr_core_requests_errors_total[5m])`
+
+|95th percentile latency
+|Histogram: `_bucket` metrics
+|`histogram_quantile(0.95,
rate(solr_core_requests_times_milliseconds_bucket[5m]))`
+
+|===
+
+*Filtering by handler and excluding internal requests:*
+You can apply the same functions to aggregate on specific handlers and exclude
internal SolrCloud requests:
+
+[source,promql]
+----
+rate(solr_core_requests_total{handler="/select", internal="false"}[5m])
+----
+
+====
== Request Handler Statistics
+All handler metrics include a `handler` label that identifies the specific
handler the metric corresponds to (e.g., `/select`, `/sql`, `/export`, `/get`,
`/update`, etc.).
+This allows you to analyze statistics for individual handlers based on your
needs.
+
=== Update Request Handler
The update request handler is an endpoint to send data to Solr.
We can see how many update requests are being fired, how fast is it
performing, and other valuable information regarding requests.
-*Registry & Path:* `solr.<core>:UPDATE./update`
-
-You can request update request handler statistics with an API request such as
`\http://localhost:8983/solr/admin/metrics?group=core&prefix=UPDATE`.
+You can request update request handler statistics with an API request such as
`\http://localhost:8983/solr/admin/metrics?category=UPDATE`.
=== Search Request Handler
Can be useful to measure and track number of search queries, response times,
etc.
-If you are not using the “select” handler then the path needs to be changed
appropriately.
-Similarly if you are using the “sql” handler or “export” handler, the realtime
handler “get”, or any other handler similar statistics can be found for that as
well.
-
-*Registry & Path*: `solr.<core>:QUERY./select`
-You can request statistics for the `/select` request handler with an API
request such as
`\http://localhost:8983/solr/admin/metrics?group=core&prefix=QUERY./select`.
+You can request statistics for the `/select` request handler with an API
request by filtering with category name with
`\http://localhost:8983/solr/admin/metrics?category=QUERY`.
=== Commonly Used Stats for Request Handlers
All of the update and search request handlers will provide the following
statistics.
+*Request counts*
+
+[%autowidth.stretch,options="header"]
+|===
+|Metric |Description
+|`solr_core_requests_total` |Total number of HTTP requests to a core.
+|===
+
*Request Times*
To get request times, specifically, you can send an API request such as:
-*
`\http://localhost:8983/solr/admin/metrics?group=core&prefix=UPDATE./update.requestTimes`
-*
`\http://localhost:8983/solr/admin/metrics?group=core&prefix=QUERY./select.requestTimes`
+*
`\http://localhost:8983/solr/admin/metrics?name=solr_core_requests_times_milliseconds_bucket&category=UPDATE`
+*
`\http://localhost:8983/solr/admin/metrics?name=solr_core_requests_times_milliseconds_bucket&category=QUERY`
[%autowidth.stretch,options="header"]
|===
-|Attribute |Description
-|15minRate |Requests per second received over the past 15 minutes.
-|5minRate |Requests per second received over the past 5 minutes.
-|p75_ms |Request processing time for the request which belongs to the 75^th^
Percentile. E.g., if 100 requests are received, then the 75^th^ fastest request
time will be reported by this statistic.
-|p95_ms |Request processing time in milliseconds for the request which belongs
to the 95^th^ Percentile. E.g., if 100 requests are received, then the 95^th^
fastest request time will be reported in this statistic.
-|p999_ms |Request processing time in milliseconds for the request which
belongs to the 99.9^th^ Percentile. E.g., if 1000 requests are received, then
the 999^th^ fastest request time will be reported in this statistic.
-|p99_ms |Request processing time in milliseconds for the request which belongs
to the 99^th^ Percentile. E.g., if 100 requests are received, then the 99^th^
fastest request time will be reported in this statistic.
-|count |Total number of requests made since the Solr core was first created.
-|median_ms |Median of all the request processing time.
-|meanRate |Average number of requests received per second since the Solr core
was first created.
+|Metric |Description
+|`solr_core_requests_times_milliseconds_bucket` |Individual histogram buckets
containing cumulative counts of requests that completed within specific time
thresholds. Each bucket has an `le` (less than or equal) label indicating the
upper bound in milliseconds (e.g., `le="50.0"` counts all requests that took
≤50ms). The buckets are cumulative, so higher thresholds include counts from
all lower buckets. The final bucket with `le="+Inf"` contains the total count
of all requests regardless [...]
+|`solr_core_requests_times_milliseconds_count` |Total count of duration
requests times processed by the handler since the Solr core was created.
+|`solr_core_requests_times_milliseconds_sum` |Total sum of all request
processing times in milliseconds since the Solr core was created.
|===
*Errors and Other Times*
@@ -76,28 +108,16 @@ Other types of data such as errors and timeouts are also
provided.
These are available under different metric names.
For example:
-*
`\http://localhost:8983/solr/admin/metrics?group=core&prefix=UPDATE./update.errors`
-*
`\http://localhost:8983/solr/admin/metrics?group=core&prefix=QUERY./select.errors`
+*
`\http://localhost:8983/solr/admin/metrics?name=solr_core_requests_errors_total`
+*
`\http://localhost:8983/solr/admin/metrics?name=solr_core_requests_timeout_total`
The table below shows the metric names and attributes to request:
[cols="30,70",options="header"]
|===
|Metric name | Description
-|`QUERY./select.errors`
-`UPDATE./update.errors` |Number of errors encountered by handler. In addition
to a count of errors, mean, 1 minute, 5 minute, and 15 minute rates are also
available.
-|`QUERY./select.clientErrors`
-`UPDATE./update.clientErrors` |Number of syntax or parse errors made by a
client while making requests. In addition to a count of errors, mean, 1 minute,
5 minute, and 15 minute rates are also available.
-|`QUERY./select.requests`
-`UPDATE./update.requests` |Number of requests received by this handler.
-|`QUERY./select.serverErrors`
-`UPDATE./update.serverErrors` |Number of errors thrown by the server while
executing the request. In addition to a count of errors, mean, 1 minute, 5
minute, and 15 minute rates are also available.
-|`QUERY./select.timeouts`
-`UPDATE./update.timeouts` |Number of responses received with partial results.
In addition to a count of timeout events, mean, 1 minute, 5 minute, and 15
minute rates are also available.
-|`QUERY./select.totalTime`
-`UPDATE./update.totalTime` |The sum of all request processing times since the
Solr process was started in nanoseconds.
-|`QUERY./select.handlerStart`
-`UPDATE./update.handlerStart` |Epoch time when the handler was registered.
+|`solr_core_requests_errors_total` |Number of errors encountered by handler.
Uses `source` attribute to differentiate between client errors (bad requests,
malformed queries) and server errors (internal failures). Client errors have
`source="client"` while server errors have `source="server"`.
+|`solr_core_requests_timeout_total` |Counter for requests that were cancelled
due to timeouts.
|===
*Differentiating Internal Requests*
@@ -105,120 +125,63 @@ The table below shows the metric names and attributes to
request:
Processing of a single request in SolrCloud for a large collection requires
making additional requests to other replicas, often on other nodes.
The internal requests look much the same on the surface (same handler), but
they are performing a portion of the over-arching task.
Differentiating these requests is really important!
-Solr tracks its metrics on these handlers with a different handler name when
the request is contributing to some other request:
+Solr tracks its metrics on these handlers with an `internal` attribute when
the request is contributing to some other request:
-* Queries: `/select` query's internal requests will be tracked as
`/select[shard]`. Technically, this occurs on `SearchHandler` and its
subclasses.
+* Queries: `/select` query's internal requests will have `internal=true` in
their metric attributes. This occurs on `SearchHandler` and its subclasses.
* _(More can be instrumented some day)_
-Solr's Prometheus exporter configuration extracts this suffix on the handler
to a label named "internal". When configuring Grafana or other metrics tools,
be sure to filter these metrics in or out depending on what is being analyzed.
+When using external monitoring tools like Prometheus or Grafana, be sure to
filter metrics based on the `internal` attribute depending on what is being
analyzed.
== Update Handler
This section has information on the total number of adds and how many commits
have been fired against a Solr core.
-*Registry & Path:* `solr.<core>:UPDATE.updateHandler`
-
-You can get all update handler statistics shown in the table below with an API
request such as
`\http://localhost:8983/solr/admin/metrics?group=core&prefix=UPDATE.updateHandler`.
+You can get all update handler statistics shown in the table below with an API
request such as
`\http://localhost:8983/solr/admin/metrics?category=UPDATE,TLOG`.
The following describes the specific statistics you can get:
[%autowidth.stretch,options="header"]
|===
-|Attribute |Description
-|`UPDATE.updateHandler.adds` |Total number of “add” requests since last commit.
-|`UPDATE.updateHandler.autoCommitMaxTime` |Maximum time between two
auto-commits execution.
-|`UPDATE.updateHandler.autoCommits` |Total number of auto-commits executed.
-|`UPDATE.updateHandler.commits` | Number of total commits executed.
-
-In addition to a count of commits, mean, 1 minute, 5 minute, and 15 minute
rates are also available.
-|`UPDATE.updateHandler.cumulativeAdds` |Number of “effective” additions
executed over the lifetime. The counter is incremented when “add‘ command is
executed while decremented when “rollback” is executed.
-
-In addition to a count of adds, mean, 1 minute, 5 minute, and 15 minute rates
are also available.
-|`UPDATE.updateHandler.cumulativeDeletesById` |Number of document deletions
executed by ID over the lifetime. The counter is incremented when “delete”
command is executed and decremented when “rollback” is executed.
-
-In addition to a count of deletes, mean, 1 minute, 5 minute, and 15 minute
rates are also available.
-|`UPDATE.updateHandler.cumulativeDeletesByQuery` |Number of document deletions
executed by query over the lifetime. The counter is incremented when “delete”
command is executed and decremented when “rollback” is executed.
-
-In addition to a count of deletes, mean, 1 minute, 5 minute, and 15 minute
rates are also available.
-|`UPDATE.updateHandler.cumulativeErrors` |Number of error messages received
while performing addition/deletion actions on documents over the lifetime.
-
-In addition to a count of errors, mean, 1 minute, 5 minute, and 15 minute
rates are also available.
-|`UPDATE.updateHandler.deletesById` |Currently uncommitted deletions by ID.
-|`UPDATE.updateHandler.deletesByQuery` |Currently uncommitted deletions by
query.
-|`UPDATE.updateHandler.docsPending` |Number of documents which are pending
commit.
-|`UPDATE.updateHandler.errors` |Number of error messages received while
performing addition/deletion/commit/rollback actions on documents over the
lifetime of the core.
-|`UPDATE.updateHandler.expungeDeletes` |Number of commit commands issued with
expunge deletes.
+|Metric |Description
+|`solr_core_update_submitted_ops` |Counter for operations submitted to the
update handler.
+|`solr_core_update_committed_ops` |Counter for operations that have been
committed.
+|`solr_core_update_cumulative_ops` |Gauge showing cumulative count of
operations over the lifetime. Cumulative can decrease from rollback command.
+|`solr_core_update_commit_ops` |Counter for commit operations.
+|`solr_core_update_maintenance_ops` |Counter for total number of maintenance
operations such as rollback
+|`solr_core_update_docs_pending_commit` |Gauge showing number of documents
pending commit.
+|`solr_core_update_log_buffered_ops` |Gauge for current number of buffered
operations.
+|`solr_core_update_log_replay_logs_remaining` |Gauge current number of tlogs
remaining to be replayed.
+|`solr_core_update_log_size_remaining` |Gauge total size in bytes of all tlogs
remaining to be replayed.
+|`solr_core_update_log_state` |Gauge The current state of the update log.
Replaying (0), buffering (1), applying buffered (2), active (3).
+|`solr_core_update_log_applied_buffered_ops` |Counter number of buffered
operations applied.
+|===
-In addition to a count of expunge deletes, mean, 1 minute, 5 minute, and 15
minute rates are also available.
-|`UPDATE.updateHandler.merges` | Number of index merges that have occurred.
+== Cache Statistics
-In addition to a count of merges, mean, 1 minute, 5 minute, and 15 minute
rates are also available.
-|`UPDATE.updateHandler.optimizes` |Number of explicit optimize commands issued.
+You can get the statistics shown in the table below with an API request such
as `\http://localhost:8983/solr/admin/metrics?category=CACHE`. Each cache
metric has a `name` attribute attached that correspond to the cache the metric
was recorded from.
-In addition to a count of optimizations, mean, 1 minute, 5 minute, and 15
minute rates are also available.
-|`UPDATE.updateHandler.rollbacks` |Number of rollbacks executed.
+The following statistics are available for each of the caches mentioned below:
-In addition to a count of rollbacks, mean, 1 minute, 5 minute, and 15 minute
rates are also available.
-|`UPDATE.updateHandler.softAutoCommitMaxTime` |Maximum document ‘adds’ between
two soft auto-commits.
-|`UPDATE.updateHandler.softAutoCommits` |Number of soft commits executed.
+[%autowidth.stretch,options="header"]
+|===
+|Metric Name |Description
+|`solr_caffeine_cache_ops` |Number of cumulative cache operations (inserts and
evictions).
+|`solr_caffeine_cache_lookups` |Number of cumulative cache lookup results
(hits and misses).
+|`solr_caffeine_cache_size` |Current number of cache entries.
+|`solr_caffeine_cache_ram_used` |RAM bytes used by cache.
|===
-
-== Cache Statistics
=== Document Cache
This cache holds Lucene Document objects (the stored fields for each document).
Since Lucene internal document IDs are transient, this cache cannot be
auto-warmed.
-*Registry and Path:* `solr.<core>:CACHE.searcher.documentCache`
-
-You can get the statistics shown in the table below with an API request such
as
`\http://localhost:8983/solr/admin/metrics?group=core&prefix=CACHE.searcher.documentCache`.
-
=== Query Result Cache
This cache holds the results of previous searches: ordered lists of document
IDs based on a query, a sort, and the range of documents requested
-*Registry and Path:* `solr.<core>:CACHE.searcher.queryResultCache`
-
-You can get the statistics shown in the table below with an API request such
as
`\http://localhost:8983/solr/admin/metrics?group=core&prefix=CACHE.searcher.queryResultCache`.
-
=== Filter Cache
This cache is used for filters for unordered sets of all documents that match
a query.
-*Registry and Path:* `solr.<core>:CACHE.searcher.filterCache`
-
-You can get the statistics shown in the table below with an API request such
as
`\http://localhost:8983/solr/admin/metrics?group=core&prefix=CACHE.searcher.filterCache`.
-
-=== Statistics for Caches
-
-The following statistics are available for each of the caches mentioned above:
-
-[%autowidth.stretch,options="header"]
-|===
-|Attribute |Description
-|cumulative_evictions |Number of cache evictions across all caches since this
node has been running.
-|cumulative_hitratio |Ratio of cache hits to lookups across all the caches
since this node has been running.
-|cumulative_hits |Number of cache hits across all the caches since this node
has been running.
-|cumulative_inserts |Number of cache insertions across all the caches since
this node has been running.
-|cumulative_lookups |Number of cache lookups across all the caches since this
node has been running.
-|evictions |Number of cache evictions for the current index searcher.
-|hitratio |Ratio of cache hits to lookups for the current index searcher.
-|hits |Number of hits for the current index searcher.
-|inserts |Number of inserts into the cache.
-|lookups |Number of lookups against the cache.
-|size |Number of entries in the cache at that particular instance.
-|warmupTime |Warm-up time for the registered index searcher in milliseconds.
This time is taken in account for the “auto-warming” of caches.
-|===
-
-When eviction by heap usage is enabled, the following additional statistics
are available for the Query Result Cache:
-
-[cols="25,75",options="header"]
-|===
-|Attribute |Description
-|maxRamMB |Maximum heap that should be used by the cache beyond which keys
will be evicted.
-|ramBytesUsed| Actual heap usage of the cache at that particular instance.
-|evictionsRamUsage| Number of cache evictions for the current index searcher
because heap usage exceeded maxRamMB.
-|===
-
More information on Solr caches is available in the section
xref:configuration-guide:caches-warming.adoc[].
diff --git
a/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-10.adoc
b/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-10.adoc
index a2902b67753..fc05af4d7ef 100644
---
a/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-10.adoc
+++
b/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-10.adoc
@@ -179,6 +179,10 @@ Nowadays, the HTTP request is available via internal APIs:
`SolrQueryRequest.get
+
+NOTE: The previous parse-context-based configuration (`parseContext.config`)
is no longer supported. Tika parser-specific properties must now be configured
directly on the Tika Server itself, rather than through Solr configuration.
Please refer to the Tika Server documentation for details on how to set these
properties.
+* The Prometheus exporter, JMX, SLF4J and Graphite metric reporters have been
removed. Users should migrate to using OTLP or the `/admin/metrics` endpoint
with external tools to get metrics to their preferred backend such as the
link:https://opentelemetry.io/docs/collector/[OTEL Collector].
+
+* SolrInfoMBeanHandler and PluginInfoHandler have been removed
+
=== Security
* There is no longer a distinction between trusted and untrusted configSets;
all configSets are now considered trusted. To ensure security, Solr should be
properly protected using authentication and authorization mechanisms, allowing
only authorized users with administrative privileges to publish them.
@@ -187,11 +191,19 @@ Nowadays, the HTTP request is available via internal
APIs: `SolrQueryRequest.get
Solr upgraded to Jetty 12.x from 10.x as Jetty 10 and 11 have reached
end-of-life support. Jetty 12.x requires Java 17 or newer and is fully
compatible with Solr's new minimum requirement of Java 21. This upgrade brings
support for modern HTTP protocols and adopts the Jakarta EE 10 namespace. For
more details, see https://webtide.com/jetty-12-has-arrived/.
-=== Open Telemetry
+=== OpenTelemetry
+
+Solr 10 has migrated from Dropwizard metrics to OpenTelemetry (OTEL) for
observability. This migration provides native Prometheus support, OTLP support,
exemplar support for tracing correlation, and native attributes and labels on
all metrics.
+
+* All metrics have been migrated to snake-case metric names instead of
dot-delimited format and now natively include attributes/labels.
+
+* The `/admin/metrics` API now defaults to Prometheus exposition format and no
longer supports XML/JSON/javabin. You can specify `wt=prometheus` as a
parameter for Prometheus format or `wt=openmetrics` for OpenMetrics exposition
format with exemplars support (distributed tracing must be enabled to view
exemplars).
+
+* The metrics API supports filtering by metric name and attributes. See
xref:deployment-guide:metrics-reporting.adoc#api-filtering[Metrics API Filter]
for more info.
-* The Prometheus exporter, JMX, SLF4J and Graphite metric reporters have been
removed. Users should migrate to using OTLP or the /admin/metrics endpoint with
external tools to get metrics to their preferred backend such as the
link:https://opentelemetry.io/docs/collector/[OTEL Collector].
+* OTLP metrics exporter via gRPC or HTTP is now supported with the
OpenTelemetry module. Users can enable the module to push metrics to their
preferred OTLP-supported backend.
-* Core renaming and swapping will reset the state of all the corresponding
cores metrics
+* Core renaming and swapping will reset the state of all corresponding core
metrics.
=== Docker
diff --git
a/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-7.adoc
b/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-7.adoc
index 9dedd41ae7e..a9f776d6dc4 100644
---
a/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-7.adoc
+++
b/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-7.adoc
@@ -187,7 +187,7 @@ This default instance exports all Solr metrics from all
registries as hierarchic
This behavior can be also disabled by specifying a `SolrJmxReporter`
configuration with a boolean init argument `enabled` set to `false`.
For a more fine-grained control users should explicitly specify at least one
`SolrJmxReporter` configuration.
+
-See also the section
xref:deployment-guide:metrics-reporting.adoc#the-metrics-reporters-element[The
<metrics><reporters> Element], which describes how to set up Metrics Reporters
in `solr.xml`.
+See also the section
https://solr.apache.org/guide/7_7/metrics-reporting.html#the-metrics-reporters-element[The
<metrics><reporters> Element], which describes how to set up Metrics Reporters
in `solr.xml`.
Note that back-compatibility support may be removed in Solr 8.
* MBean names and attributes now follow the hierarchical names used in
metrics. This is reflected also in `/admin/mbeans` and `/admin/plugins` output,
and can be observed in the UI Plugins tab, because now all these APIs get their
data from the metrics API.
diff --git
a/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc
b/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc
index d07e8d8081d..037c7339fe1 100644
---
a/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc
+++
b/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc
@@ -659,7 +659,7 @@ For examples using this feature, see the section
xref:query-guide:learning-to-ra
*Prometheus Exporter*
* The `./bin` scripts included with the Prometheus Exporter now allow use of
custom java options with environment variables.
-See the section
xref:deployment-guide:monitoring-with-prometheus-and-grafana.adoc#environment-variable-options[Environment
Variable Options] for more details.
+See the section
https://solr.apache.org/guide/solr/9_9/deployment-guide/monitoring-with-prometheus-and-grafana.html#environment-variable-options[Environment
Variable Options] for more details.
* The default Grafana dashboards now include panels for query performance
monitoring.
The default Prometheus Exporter configuration includes metrics like
queries-per-second (QPS) and 95th percentiles (P95) to populate the new panels.
* The default Prometheus Exporter configuration also includes the two new
metrics mentioned in the Metrics above.