This is an automated email from the ASF dual-hosted git repository.
dsmiley pushed a commit to branch feature/SOLR-17458
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/feature/SOLR-17458 by this
push:
new f3e1c8fcdc4 SOLR-17799: Revamp index merge & flush metrics. Use OTEL.
f3e1c8fcdc4 is described below
commit f3e1c8fcdc42539ed1f66340f3febccc6b8bc408
Author: Kevin Liang <[email protected]>
AuthorDate: Mon Oct 13 21:32:16 2025 -0400
SOLR-17799: Revamp index merge & flush metrics. Use OTEL.
Replace existing segment merge metrics with consistent counters in OTEL
format
* Enabled segment merge metrics by default (configurable threshold for
major merges behavior is unchanged)
* Removed the index merge running gauge metrics
---
.../org/apache/solr/update/SolrIndexWriter.java | 308 +++++++++------------
.../solr/metrics/SolrMetricsIntegrationTest.java | 2 +-
.../apache/solr/update/SolrIndexMetricsTest.java | 197 +++++--------
.../deployment-guide/pages/metrics-reporting.adoc | 50 +---
4 files changed, 216 insertions(+), 341 deletions(-)
diff --git a/solr/core/src/java/org/apache/solr/update/SolrIndexWriter.java
b/solr/core/src/java/org/apache/solr/update/SolrIndexWriter.java
index 75bd8ddc311..5b3f2b5ccd5 100644
--- a/solr/core/src/java/org/apache/solr/update/SolrIndexWriter.java
+++ b/solr/core/src/java/org/apache/solr/update/SolrIndexWriter.java
@@ -17,17 +17,16 @@
package org.apache.solr.update;
import static org.apache.solr.metrics.SolrMetricProducer.CATEGORY_ATTR;
-import static org.apache.solr.metrics.SolrMetricProducer.TYPE_ATTR;
import io.opentelemetry.api.common.AttributeKey;
-import io.opentelemetry.api.metrics.ObservableLongGauge;
+import io.opentelemetry.api.common.Attributes;
+import io.opentelemetry.api.metrics.LongCounter;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.IndexDeletionPolicy;
@@ -45,7 +44,6 @@ import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrInfoBean;
import org.apache.solr.metrics.SolrMetricsContext;
import org.apache.solr.metrics.otel.OtelUnit;
-import org.apache.solr.metrics.otel.instruments.AttributedLongCounter;
import org.apache.solr.metrics.otel.instruments.AttributedLongTimer;
import org.apache.solr.schema.IndexSchema;
import org.slf4j.Logger;
@@ -69,7 +67,14 @@ public class SolrIndexWriter extends IndexWriter {
public static final String COMMIT_COMMAND_VERSION = "commitCommandVer";
+ // TODO: we should eventually explore moving to a histogram distribution
style of classifying
+ // merges instead of just setting an (arbitrary) document count threshold
for major/minor (see
+ // discussion on SOLR-17799). This has its own considerations as well, given
that the most
+ // commonly used tiered merge policy results in merges to get exponentially
larger.
public static final AttributeKey<String> MERGE_TYPE_ATTR =
AttributeKey.stringKey("merge_type");
+ public static final AttributeKey<String> MERGE_STATE_ATTR =
AttributeKey.stringKey("merge_state");
+ public static final AttributeKey<String> MERGE_OP_ATTR =
AttributeKey.stringKey("merge_op");
+ public static final AttributeKey<String> RESULT_ATTR =
AttributeKey.stringKey("result");
private final Object CLOSE_LOCK = new Object();
@@ -80,23 +85,17 @@ public class SolrIndexWriter extends IndexWriter {
// metrics
private long majorMergeDocs = 512 * 1024;
- private AttributedLongTimer majorMerge;
- private AttributedLongTimer minorMerge;
- private AttributedLongCounter majorMergedDocs;
- private AttributedLongCounter majorDeletedDocs;
- private AttributedLongCounter mergeErrors;
- private AttributedLongCounter flushes; // original counter is
package-private in IndexWriter
- private boolean mergeTotals = false;
- private boolean mergeDetails = false;
- private final AtomicInteger runningMajorMerges = new AtomicInteger();
- private final AtomicInteger runningMinorMerges = new AtomicInteger();
- private final AtomicInteger runningMajorMergesSegments = new AtomicInteger();
- private final AtomicInteger runningMinorMergesSegments = new AtomicInteger();
- private final AtomicLong runningMajorMergesDocs = new AtomicLong();
- private final AtomicLong runningMinorMergesDocs = new AtomicLong();
- private ObservableLongGauge mergeStats;
-
- private final SolrMetricsContext solrMetricsContext;
+ private LongCounter mergesCounter;
+ private LongCounter mergeDocsCounter;
+ private LongCounter mergeSegmentsCounter;
+ private LongCounter flushesCounter;
+
+ private AttributedLongTimer majorMergeTimer;
+ private AttributedLongTimer minorMergeTimer;
+
+ private SolrMetricsContext solrMetricsContext;
+ private Attributes baseAttributes;
+
// merge diagnostics.
private final Map<String, Long> runningMerges = new ConcurrentHashMap<>();
@@ -134,8 +133,6 @@ public class SolrIndexWriter extends IndexWriter {
numOpens.incrementAndGet();
log.debug("Opened Writer {}", name);
// no metrics
- mergeTotals = false;
- mergeDetails = false;
solrMetricsContext = null;
}
@@ -173,118 +170,8 @@ public class SolrIndexWriter extends IndexWriter {
log.warn("Invalid 'majorMergeDocs' argument, using default 512k", e);
}
}
- Boolean Totals = config.metricsInfo.initArgs.getBooleanArg("merge");
- Boolean Details =
config.metricsInfo.initArgs.getBooleanArg("mergeDetails");
- if (Details != null) {
- mergeDetails = Details;
- } else {
- mergeDetails = false;
- }
- if (Totals != null) {
- mergeTotals = Totals;
- } else {
- mergeTotals = false;
- }
- var baseAttributes =
- core.getCoreAttributes().toBuilder()
- .put(CATEGORY_ATTR, SolrInfoBean.Category.INDEX.toString())
- .build();
- if (mergeDetails) {
- mergeTotals = true; // override
- majorMergedDocs =
- new AttributedLongCounter(
- solrMetricsContext.longCounter(
- "solr_indexwriter_major_merged_docs",
- "Number of documents merged while merging segments above
the majorMergeDocs threshold ("
- + majorMergeDocs
- + ")"),
- baseAttributes);
- majorDeletedDocs =
- new AttributedLongCounter(
- solrMetricsContext.longCounter(
- "solr_indexwriter_major_deleted_docs",
- "Number of deleted documents that were expunged while
merging segments above the majorMergeDocs threshold ("
- + majorMergeDocs
- + ")"),
- baseAttributes);
- }
- if (mergeTotals) {
- minorMerge =
- new AttributedLongTimer(
- solrMetricsContext.longHistogram(
- "solr_indexwriter_merge",
- "Time spent merging segments below or equal to the
majorMergeDocs threshold ("
- + majorMergeDocs
- + ")",
- OtelUnit.MILLISECONDS),
- baseAttributes.toBuilder().put(MERGE_TYPE_ATTR,
"minor").build());
- majorMerge =
- new AttributedLongTimer(
- solrMetricsContext.longHistogram(
- "solr_indexwriter_merge",
- "Time spent merging segments above the majorMergeDocs
threshold ("
- + majorMergeDocs
- + ")",
- OtelUnit.MILLISECONDS),
- baseAttributes.toBuilder().put(MERGE_TYPE_ATTR,
"major").build());
- mergeErrors =
- new AttributedLongCounter(
- solrMetricsContext.longCounter(
- "solr_indexwriter_merge_errors", "Number of merge errors"),
- baseAttributes);
- String tag = core.getMetricTag();
- mergeStats =
- solrMetricsContext.observableLongGauge(
- "solr_indexwriter_merge_stats",
- "Metrics around currently running segment merges; major :=
above the majorMergeDocs threshold ("
- + majorMergeDocs
- + "), minor := below or equal to the threshold",
- (observableLongMeasurement -> {
- observableLongMeasurement.record(
- runningMajorMerges.get(),
- baseAttributes.toBuilder()
- .put(TYPE_ATTR, "running")
- .put(MERGE_TYPE_ATTR, "major")
- .build());
- observableLongMeasurement.record(
- runningMajorMergesDocs.get(),
- baseAttributes.toBuilder()
- .put(TYPE_ATTR, "running_docs")
- .put(MERGE_TYPE_ATTR, "major")
- .build());
- observableLongMeasurement.record(
- runningMajorMergesSegments.get(),
- baseAttributes.toBuilder()
- .put(TYPE_ATTR, "running_segments")
- .put(MERGE_TYPE_ATTR, "major")
- .build());
- observableLongMeasurement.record(
- runningMinorMerges.get(),
- baseAttributes.toBuilder()
- .put(TYPE_ATTR, "running")
- .put(MERGE_TYPE_ATTR, "minor")
- .build());
- observableLongMeasurement.record(
- runningMinorMergesDocs.get(),
- baseAttributes.toBuilder()
- .put(TYPE_ATTR, "running_docs")
- .put(MERGE_TYPE_ATTR, "minor")
- .build());
- observableLongMeasurement.record(
- runningMinorMergesSegments.get(),
- baseAttributes.toBuilder()
- .put(TYPE_ATTR, "running_segments")
- .put(MERGE_TYPE_ATTR, "minor")
- .build());
- }));
- flushes =
- new AttributedLongCounter(
- solrMetricsContext.longCounter(
- "solr_indexwriter_flush",
- "Number of times added/deleted documents have been flushed
to the Directory"),
- baseAttributes);
- }
}
+ initMetrics(core);
}
@SuppressForbidden(
@@ -309,60 +196,36 @@ public class SolrIndexWriter extends IndexWriter {
this.directoryFactory = factory;
}
+ // for testing
+ public void setMajorMergeDocs(long majorMergeDocs) {
+ this.majorMergeDocs = majorMergeDocs;
+ }
+
// we override this method to collect metrics for merges.
@Override
protected void merge(MergePolicy.OneMerge merge) throws IOException {
String segString = merge.segString();
long totalNumDocs = merge.totalNumDocs();
runningMerges.put(segString, totalNumDocs);
- if (!mergeTotals) {
- try {
- super.merge(merge);
- } finally {
- runningMerges.remove(segString);
- }
- return;
- }
long deletedDocs = 0;
for (SegmentCommitInfo info : merge.segments) {
totalNumDocs -= info.getDelCount();
deletedDocs += info.getDelCount();
}
- boolean major = totalNumDocs > majorMergeDocs;
int segmentsCount = merge.segments.size();
- AttributedLongTimer.MetricTimer context;
- if (major) {
- runningMajorMerges.incrementAndGet();
- runningMajorMergesDocs.addAndGet(totalNumDocs);
- runningMajorMergesSegments.addAndGet(segmentsCount);
- if (mergeDetails) {
- majorMergedDocs.add(totalNumDocs);
- majorDeletedDocs.add(deletedDocs);
- }
- context = majorMerge.start();
- } else {
- runningMinorMerges.incrementAndGet();
- runningMinorMergesDocs.addAndGet(totalNumDocs);
- runningMinorMergesSegments.addAndGet(segmentsCount);
- context = minorMerge.start();
- }
+ AttributedLongTimer.MetricTimer timer =
+ updateMergeMetrics(totalNumDocs, deletedDocs, segmentsCount, false,
false, null);
try {
super.merge(merge);
+ updateMergeMetrics(totalNumDocs, deletedDocs, segmentsCount, true,
false, timer);
} catch (Throwable t) {
- mergeErrors.inc();
+ if (timer != null) {
+ timer.stop();
+ }
+ updateMergeMetrics(totalNumDocs, deletedDocs, segmentsCount, true, true,
timer);
throw t;
} finally {
runningMerges.remove(segString);
- context.stop();
- if (major) {
- runningMajorMerges.decrementAndGet();
- runningMajorMergesDocs.addAndGet(-totalNumDocs);
- runningMajorMergesSegments.addAndGet(-segmentsCount);
- } else {
- runningMinorMerges.decrementAndGet();
- runningMinorMergesDocs.addAndGet(-totalNumDocs);
- runningMinorMergesSegments.addAndGet(-segmentsCount);
- }
}
}
@@ -372,12 +235,111 @@ public class SolrIndexWriter extends IndexWriter {
@Override
protected void doAfterFlush() throws IOException {
- if (flushes != null) { // this is null when writer is used only for
snapshot cleanup
- flushes.inc(); // or if mergeTotals == false
+ if (flushesCounter != null) { // this is null when writer is used only for
snapshot cleanup
+ flushesCounter.add(1L, baseAttributes); // or if mergeTotals == false
}
super.doAfterFlush();
}
+ private void initMetrics(final SolrCore core) {
+ if (solrMetricsContext == null) {
+ solrMetricsContext = core.getSolrMetricsContext().getChildContext(this);
+ }
+
+ var baseAttributesBuilder =
+ Attributes.builder().put(CATEGORY_ATTR,
SolrInfoBean.Category.INDEX.toString());
+ baseAttributes = baseAttributesBuilder.build();
+
+ mergesCounter =
+ solrMetricsContext.longCounter(
+ "solr_indexwriter_merges",
+ "Number of total merge operations, "
+ + " where \"major\" merges involve more than "
+ + majorMergeDocs
+ + " documents, otherwise merge classified as minor.");
+ mergeDocsCounter =
+ solrMetricsContext.longCounter(
+ "solr_indexwriter_merge_docs",
+ "Number of documents involved in merge, "
+ + " where \"major\" merges involve more than "
+ + majorMergeDocs
+ + " documents, otherwise merge classified as minor.");
+ mergeSegmentsCounter =
+ solrMetricsContext.longCounter(
+ "solr_indexwriter_merge_segments",
+ "Number of segments involved in merge, "
+ + " where \"major\" merges involve more than "
+ + majorMergeDocs
+ + " documents, otherwise merge classified as minor.");
+ flushesCounter =
+ solrMetricsContext.longCounter(
+ "solr_indexwriter_flushes", "Number of flush to disk operations
triggered");
+
+ var mergesTimerBase =
+ solrMetricsContext.longHistogram(
+ "solr_indexwriter_merge_time",
+ "Time spent merging segments, "
+ + " where \"major\" merges involve more than "
+ + majorMergeDocs
+ + " documents, otherwise merge classified as minor.",
+ OtelUnit.MILLISECONDS);
+ majorMergeTimer =
+ new AttributedLongTimer(
+ mergesTimerBase, baseAttributes.toBuilder().put(MERGE_TYPE_ATTR,
"major").build());
+ minorMergeTimer =
+ new AttributedLongTimer(
+ mergesTimerBase, baseAttributes.toBuilder().put(MERGE_TYPE_ATTR,
"minor").build());
+ }
+
+ /**
+ * Updates relevant metrics related to segment merging
+ *
+ * @param numDocs number of documents in merge op
+ * @param numDeletedDocs number of deleted docs in merge op
+ * @param numSegments number of segments in merge op
+ * @param mergeCompleted true if being called for a successful post-merge,
else false to signify a
+ * merge is about to start
+ * @param mergeFailed true if merge entered an unrecoverable error state,
else false
+ * @param metricTimer an existing timer context for actively running merge
+ * @return timer context for current merge operation
+ */
+ private AttributedLongTimer.MetricTimer updateMergeMetrics(
+ long numDocs,
+ long numDeletedDocs,
+ long numSegments,
+ boolean mergeCompleted,
+ boolean mergeFailed,
+ AttributedLongTimer.MetricTimer metricTimer) {
+ if (solrMetricsContext == null) {
+ return null;
+ }
+ boolean isMajorMerge = numDocs > majorMergeDocs;
+ var attributes = baseAttributes.toBuilder();
+ attributes.put(MERGE_TYPE_ATTR, isMajorMerge ? "major" : "minor");
+ Attributes mergeAttr;
+ if (mergeCompleted) { // merge operation terminating
+ if (metricTimer != null) {
+ metricTimer.stop();
+ }
+ attributes.put(MERGE_STATE_ATTR, "completed");
+ attributes.put(RESULT_ATTR, mergeFailed ? "error" : "success");
+
+ } else { // merge operation starting
+ metricTimer = isMajorMerge ? majorMergeTimer.start() :
minorMergeTimer.start();
+ attributes.put(MERGE_STATE_ATTR, "started");
+ }
+ mergeAttr = attributes.build();
+ mergesCounter.add(1L, mergeAttr);
+ mergeSegmentsCounter.add(numSegments, mergeAttr);
+
+ mergeDocsCounter.add(
+ numDocs, mergeAttr.toBuilder().put(MERGE_OP_ATTR, "merge").build());
// docs merged
+ mergeDocsCounter.add(
+ numDeletedDocs, mergeAttr.toBuilder().put(MERGE_OP_ATTR,
"delete").build());
+
+ return metricTimer;
+ }
+
// use DocumentBuilder now...
// private final void addField(Document doc, String name, String val) {
// SchemaField ftype = schema.getField(name);
@@ -462,7 +424,7 @@ public class SolrIndexWriter extends IndexWriter {
if (directoryFactory != null) {
directoryFactory.release(directory);
}
- IOUtils.closeQuietly(mergeStats);
+
if (solrMetricsContext != null) {
solrMetricsContext.unregister();
}
diff --git
a/solr/core/src/test/org/apache/solr/metrics/SolrMetricsIntegrationTest.java
b/solr/core/src/test/org/apache/solr/metrics/SolrMetricsIntegrationTest.java
index 8c994f6d9f8..dcb75938abe 100644
--- a/solr/core/src/test/org/apache/solr/metrics/SolrMetricsIntegrationTest.java
+++ b/solr/core/src/test/org/apache/solr/metrics/SolrMetricsIntegrationTest.java
@@ -85,7 +85,7 @@ public class SolrMetricsIntegrationTest extends
SolrTestCaseJ4 {
public void testCoreContainerMetrics() {
MetricSnapshots metrics =
new MetricSnapshots(
- metricManager.getPrometheusMetricReaders().entrySet().stream()
+ metricManager.getPrometheusMetricReaders().entrySet().stream()
.flatMap(
entry ->
entry.getValue().collect().stream()
diff --git
a/solr/core/src/test/org/apache/solr/update/SolrIndexMetricsTest.java
b/solr/core/src/test/org/apache/solr/update/SolrIndexMetricsTest.java
index 2a40735ef40..f7d0470c88d 100644
--- a/solr/core/src/test/org/apache/solr/update/SolrIndexMetricsTest.java
+++ b/solr/core/src/test/org/apache/solr/update/SolrIndexMetricsTest.java
@@ -17,7 +17,10 @@
package org.apache.solr.update;
import static org.apache.solr.metrics.SolrMetricProducer.CATEGORY_ATTR;
+import static org.apache.solr.update.SolrIndexWriter.MERGE_OP_ATTR;
+import static org.apache.solr.update.SolrIndexWriter.MERGE_STATE_ATTR;
import static org.apache.solr.update.SolrIndexWriter.MERGE_TYPE_ATTR;
+import static org.apache.solr.update.SolrIndexWriter.RESULT_ATTR;
import io.prometheus.metrics.model.snapshots.MetricSnapshots;
import org.apache.solr.SolrTestCaseJ4;
@@ -41,7 +44,7 @@ public class SolrIndexMetricsTest extends SolrTestCaseJ4 {
SolrQueryRequest req = lrf.makeRequest();
UpdateHandler uh = req.getCore().getUpdateHandler();
AddUpdateCommand add = new AddUpdateCommand(req);
- for (int i = 0; i < 1000; i++) {
+ for (int i = 0; i < 800; i++) {
add.clear();
add.solrDoc = new SolrInputDocument();
add.solrDoc.addField("id", "" + i);
@@ -53,63 +56,8 @@ public class SolrIndexMetricsTest extends SolrTestCaseJ4 {
h.reload();
}
- @Test
- public void testIndexMetricsNoDetails() throws Exception {
- System.setProperty("solr.tests.metrics.merge", "true");
- System.setProperty("solr.tests.metrics.mergeDetails", "false");
- initCore("solrconfig-indexmetrics.xml", "schema.xml");
-
- addDocs();
-
- try (SolrCore core = h.getCoreContainer().getCore("collection1")) {
- // check basic index meters
- var minorMergeTimer =
- SolrMetricTestUtils.getHistogramDatapoint(
- core,
- "solr_indexwriter_merge_milliseconds",
- SolrMetricTestUtils.newStandaloneLabelsBuilder(core)
- .label(CATEGORY_ATTR.toString(),
SolrInfoBean.Category.INDEX.toString())
- .label(MERGE_TYPE_ATTR.toString(), "minor")
- .build());
- assertTrue("minorMerge: " + minorMergeTimer.getCount(),
minorMergeTimer.getCount() >= 3);
- var majorMergeTimer =
- SolrMetricTestUtils.getHistogramDatapoint(
- core,
- "solr_indexwriter_merge_milliseconds",
- SolrMetricTestUtils.newStandaloneLabelsBuilder(core)
- .label(CATEGORY_ATTR.toString(),
SolrInfoBean.Category.INDEX.toString())
- .label(MERGE_TYPE_ATTR.toString(), "major")
- .build());
- // major merge timer should have a value of 0, and because 0 values are
not reported, no
- // datapoint is available
- assertNull("majorMergeTimer", majorMergeTimer);
-
- // check detailed meters
- var majorMergeDocs =
- SolrMetricTestUtils.getCounterDatapoint(
- core,
- "solr_indexwriter_major_merged_docs",
- SolrMetricTestUtils.newStandaloneLabelsBuilder(core)
- .label(CATEGORY_ATTR.toString(),
SolrInfoBean.Category.INDEX.toString())
- .build());
- // major merge docs should be null because mergeDetails is false
- assertNull("majorMergeDocs", majorMergeDocs);
-
- var flushCounter =
- SolrMetricTestUtils.getCounterDatapoint(
- core,
- "solr_indexwriter_flush",
- SolrMetricTestUtils.newStandaloneLabelsBuilder(core)
- .label(CATEGORY_ATTR.toString(),
SolrInfoBean.Category.INDEX.toString())
- .build());
- assertTrue("flush: " + flushCounter.getValue(), flushCounter.getValue()
> 10);
- }
- }
-
@Test
public void testIndexNoMetrics() throws Exception {
- System.setProperty("solr.tests.metrics.merge", "false");
- System.setProperty("solr.tests.metrics.mergeDetails", "false");
initCore("solrconfig-indexmetrics.xml", "schema.xml");
addDocs();
try (SolrCore core = h.getCoreContainer().getCore("collection1")) {
@@ -133,9 +81,8 @@ public class SolrIndexMetricsTest extends SolrTestCaseJ4 {
}
@Test
- public void testIndexMetricsWithDetails() throws Exception {
- System.setProperty("solr.tests.metrics.merge", "false"); // test
mergeDetails override too
- System.setProperty("solr.tests.metrics.mergeDetails", "true");
+ public void testIndexMetricsMajorAndMinorMerges() throws Exception {
+ System.setProperty("solr.tests.metrics.majorMergeDocs", "450");
initCore("solrconfig-indexmetrics.xml", "schema.xml");
addDocs();
@@ -144,116 +91,118 @@ public class SolrIndexMetricsTest extends SolrTestCaseJ4
{
var prometheusMetricReader =
SolrMetricTestUtils.getPrometheusMetricReader(core);
assertNotNull(prometheusMetricReader);
MetricSnapshots otelMetrics = prometheusMetricReader.collect();
- assertTrue("Metrics count: " + otelMetrics.size(), otelMetrics.size() >=
19);
+ assertTrue("Metrics count: " + otelMetrics.size(), otelMetrics.size() >=
18);
+
+ // addDocs() adds 800 documents and then sends a commit.
maxBufferedDocs==100,
+ // segmentsPerTier==3,
+ // maxMergeAtOnce==3 and majorMergeDocs==450. Thus, new documents
form segments with 100
+ // docs, merges are
+ // called for when there are 3 segments at the lowest tier, and the
merges are as follows:
+ // 1. 100 + 100 + 100 ==> new 300 doc segment, below the 450
threshold ==> minor merge
+ // 2. 100 + 100 + 100 ==> new 300 doc segment, below the 450
threshold ==> minor merge
+ // 3. 300 + 100 + 100 ==> new 500 doc segment, above the 450
threshold ==> major merge
// check basic index meters
var minorMergeTimer =
SolrMetricTestUtils.getHistogramDatapoint(
core,
- "solr_indexwriter_merge_milliseconds",
+ "solr_indexwriter_merge_time_milliseconds",
SolrMetricTestUtils.newStandaloneLabelsBuilder(core)
.label(CATEGORY_ATTR.toString(),
SolrInfoBean.Category.INDEX.toString())
.label(MERGE_TYPE_ATTR.toString(), "minor")
.build());
- assertTrue("minorMergeTimer: " + minorMergeTimer.getCount(),
minorMergeTimer.getCount() >= 3);
+ assertEquals(
+ "minorMergeTimer instances should be at least 2, got: " +
minorMergeTimer.getCount(),
+ 2,
+ minorMergeTimer.getCount());
var majorMergeTimer =
SolrMetricTestUtils.getHistogramDatapoint(
core,
- "solr_indexwriter_merge_milliseconds",
+ "solr_indexwriter_merge_time_milliseconds",
SolrMetricTestUtils.newStandaloneLabelsBuilder(core)
.label(CATEGORY_ATTR.toString(),
SolrInfoBean.Category.INDEX.toString())
.label(MERGE_TYPE_ATTR.toString(), "major")
.build());
- // major merge timer should have a value of 0, and because 0 values are
not reported, no
- // datapoint is available
- assertNull("majorMergeTimer", majorMergeTimer);
+ assertEquals(
+ "majorMergeTimer instances should be at least 1, got: " +
majorMergeTimer.getCount(),
+ 1,
+ majorMergeTimer.getCount());
- // check detailed meters
- var majorMergeDocs =
+ var minorMergeDocs =
SolrMetricTestUtils.getCounterDatapoint(
core,
- "solr_indexwriter_major_merged_docs",
+ "solr_indexwriter_merge_docs",
SolrMetricTestUtils.newStandaloneLabelsBuilder(core)
.label(CATEGORY_ATTR.toString(),
SolrInfoBean.Category.INDEX.toString())
+ .label(MERGE_TYPE_ATTR.toString(), "minor")
+ .label(MERGE_OP_ATTR.toString(), "merge")
+ .label(MERGE_STATE_ATTR.toString(), "completed")
+ .label(RESULT_ATTR.toString(), "success")
.build());
- // major merge docs should have a value of 0, and because 0 values are
not reported, no
- // datapoint is available
- assertNull("majorMergeDocs", majorMergeDocs);
-
- var flushCounter =
+ assertEquals(
+ "minorMergeDocs should be 600, got: " + minorMergeDocs.getValue(),
+ 600,
+ (long) minorMergeDocs.getValue());
+ var majorMergeDocs =
SolrMetricTestUtils.getCounterDatapoint(
core,
- "solr_indexwriter_flush",
+ "solr_indexwriter_merge_docs",
SolrMetricTestUtils.newStandaloneLabelsBuilder(core)
.label(CATEGORY_ATTR.toString(),
SolrInfoBean.Category.INDEX.toString())
+ .label(MERGE_TYPE_ATTR.toString(), "major")
+ .label(MERGE_OP_ATTR.toString(), "merge")
+ .label(MERGE_STATE_ATTR.toString(), "completed")
+ .label(RESULT_ATTR.toString(), "success")
.build());
- assertTrue("flush: " + flushCounter.getValue(), flushCounter.getValue()
> 10);
- }
- }
-
- public void testIndexMetricsMajorAndMinorMergesWithDetails() throws
Exception {
- System.setProperty("solr.tests.metrics.merge", "false"); // test
mergeDetails override too
- System.setProperty("solr.tests.metrics.mergeDetails", "true");
- System.setProperty("solr.tests.metrics.majorMergeDocs", "450");
- initCore("solrconfig-indexmetrics.xml", "schema.xml");
-
- addDocs();
-
- try (SolrCore core = h.getCoreContainer().getCore("collection1")) {
- var prometheusMetricReader =
SolrMetricTestUtils.getPrometheusMetricReader(core);
- assertNotNull(prometheusMetricReader);
- MetricSnapshots otelMetrics = prometheusMetricReader.collect();
- assertTrue("Metrics count: " + otelMetrics.size(), otelMetrics.size() >=
18);
+ assertEquals(
+ "majorMergeDocs should be 500, got: " + majorMergeDocs.getValue(),
+ 500,
+ (long) majorMergeDocs.getValue());
- // addDocs() adds 1000 documents and then sends a commit.
maxBufferedDocs==100,
- // segmentsPerTier==3,
- // maxMergeAtOnce==3 and majorMergeDocs==450. Thus, new documents
form segments with 100
- // docs, merges are
- // called for when there are 3 segments at the lowest tier, and the
merges are as follows:
- // 1. 100 + 100 + 100 ==> new 300 doc segment, below the 450
threshold ==> minor merge
- // 2. 100 + 100 + 100 ==> new 300 doc segment, below the 450
threshold ==> minor merge
- // 3. 300 + 100 + 100 ==> new 500 doc segment, above the 450
threshold ==> major merge
- // 4. 300 + 100 + 100 ==> new 500 doc segment, above the 450
threshold ==> major merge
-
- // check basic index meters
- var minorMergeTimer =
- SolrMetricTestUtils.getHistogramDatapoint(
+ // segments metrics
+ var minorSegmentsMergeMetric =
+ SolrMetricTestUtils.getCounterDatapoint(
core,
- "solr_indexwriter_merge_milliseconds",
+ "solr_indexwriter_merge_segments",
SolrMetricTestUtils.newStandaloneLabelsBuilder(core)
.label(CATEGORY_ATTR.toString(),
SolrInfoBean.Category.INDEX.toString())
.label(MERGE_TYPE_ATTR.toString(), "minor")
+ .label(MERGE_STATE_ATTR.toString(), "completed")
+ .label(RESULT_ATTR.toString(), "success")
.build());
- assertTrue("minorMergeTimer: " + minorMergeTimer.getCount(),
minorMergeTimer.getCount() == 2);
- var majorMergeTimer =
- SolrMetricTestUtils.getHistogramDatapoint(
- core,
- "solr_indexwriter_merge_milliseconds",
- SolrMetricTestUtils.newStandaloneLabelsBuilder(core)
- .label(CATEGORY_ATTR.toString(),
SolrInfoBean.Category.INDEX.toString())
- .label(MERGE_TYPE_ATTR.toString(), "major")
- .build());
- assertTrue("majorMergeTimer: " + majorMergeTimer.getCount(),
majorMergeTimer.getCount() == 2);
-
- // check detailed meters
- var majorMergeDocs =
+ assertNotNull("minor segment merges metric should exist",
minorSegmentsMergeMetric);
+ assertEquals(
+ "number of minor segments merged should be 6, got: "
+ + minorSegmentsMergeMetric.getValue(),
+ 6,
+ (long) minorSegmentsMergeMetric.getValue());
+ var majorSegmentsMergeMetric =
SolrMetricTestUtils.getCounterDatapoint(
core,
- "solr_indexwriter_major_merged_docs",
+ "solr_indexwriter_merge_segments",
SolrMetricTestUtils.newStandaloneLabelsBuilder(core)
.label(CATEGORY_ATTR.toString(),
SolrInfoBean.Category.INDEX.toString())
+ .label(MERGE_TYPE_ATTR.toString(), "major")
+ .label(MERGE_STATE_ATTR.toString(), "completed")
+ .label(RESULT_ATTR.toString(), "success")
.build());
- // majorMergeDocs is the total number of docs merged during major merge
operations
- assertTrue("majorMergeDocs: " + majorMergeDocs.getValue(),
majorMergeDocs.getValue() == 1000);
+ assertNotNull("major segment merges metric should exist",
majorSegmentsMergeMetric);
+ assertEquals(
+ "number of major segments merged should be 3, got: "
+ + majorSegmentsMergeMetric.getValue(),
+ 3,
+ (long) majorSegmentsMergeMetric.getValue());
var flushCounter =
SolrMetricTestUtils.getCounterDatapoint(
core,
- "solr_indexwriter_flush",
+ "solr_indexwriter_flushes",
SolrMetricTestUtils.newStandaloneLabelsBuilder(core)
.label(CATEGORY_ATTR.toString(),
SolrInfoBean.Category.INDEX.toString())
.build());
- assertTrue("flush: " + flushCounter.getValue(), flushCounter.getValue()
>= 10);
+ assertTrue(
+ "should be at greater than 10 flushes: " + flushCounter.getValue(),
+ flushCounter.getValue() >= 10);
}
}
}
diff --git
a/solr/solr-ref-guide/modules/deployment-guide/pages/metrics-reporting.adoc
b/solr/solr-ref-guide/modules/deployment-guide/pages/metrics-reporting.adoc
index ef02fcd5042..0f8b5c37534 100644
--- a/solr/solr-ref-guide/modules/deployment-guide/pages/metrics-reporting.adoc
+++ b/solr/solr-ref-guide/modules/deployment-guide/pages/metrics-reporting.adoc
@@ -87,7 +87,7 @@ When making requests with the <<Metrics API>>, you can
specify `&group=core` to
* all common RequestHandlers report: request timers / counters, timeouts,
errors.
Handlers that support process distributed shard requests also report
`shardRequests` sub-counters for each type of distributed request.
-* <<Index Merge Metrics,index-level events>>: meters for minor / major merges,
number of merged docs, number of deleted docs, gauges for currently running
merges and their size.
+* <<Index Merge Metrics,index-level events>>: meters for minor / major merges,
number of merged docs, number of deleted docs, number of flushes
* shard replication and transaction log replay on replicas,
* open / available / pending connections for shard handler and update handler.
@@ -312,7 +312,7 @@ complex objects:
----
=== Caching Threads Metrics ===
-The threads metrics in the JVM group can be expensive to compute, as it
requires traversing all threads.
+The threads metrics in the JVM group can be expensive to compute, as it
requires traversing all threads.
This can be avoided for every call to the metrics API (group=jvm) by setting a
high caching expiration interval
(in seconds). For example, to cache the thread metrics for 5 seconds:
@@ -655,11 +655,13 @@ Metrics can be aggregated across cores using Shard and
Cluster reporters.
=== Index Merge Metrics
-These metrics are collected in respective registries for each core (e.g.,
`solr.core.collection1....`), under the `INDEX` category.
+These metrics are collected under the `INDEX` category and track flush
operations (documents being written to disk) and merge operations (segments on
disk being merged).
-Metrics collection is controlled by boolean parameters in the `<metrics>`
section of `solrconfig.xml`:
+For merge metrics, metrics are tracked with the distinction of "minor" and
"major" merges (as merges with fewer documents will be typically more frequent).
+This is indicated by the `merge_type` label for the metric. The threshold for
when a merge becomes large enough to be considered major is configurable, but
+defaults to 524k documents.
-Basic metrics:
+Metrics collection for index merges can be configured in the `<metrics>`
section of `solrconfig.xml` as shown below:
[source,xml]
----
@@ -668,7 +670,6 @@ Basic metrics:
<indexConfig>
<metrics>
<long name="majorMergeDocs">524288</long>
- <bool name="merge">true</bool>
</metrics>
...
</indexConfig>
@@ -676,43 +677,6 @@ Basic metrics:
</config>
----
-Detailed metrics:
-
-[source,xml]
-----
-<config>
- ...
- <indexConfig>
- <metrics>
- <long name="majorMergeDocs">524288</long>
- <bool name="mergeDetails">true</bool>
- </metrics>
- ...
- </indexConfig>
-...
-</config>
-----
-
-The following metrics are collected:
-
-* `INDEX.merge.major` - timer for merge operations that include at least
"majorMergeDocs" (default value for this parameter is 512k documents).
-* `INDEX.merge.minor` - timer for merge operations that include less than
"majorMergeDocs".
-* `INDEX.merge.errors` - counter for merge errors.
-* `INDEX.flush` - meter for index flush operations.
-
-Additionally, the following gauges are reported, which help to monitor the
momentary state of index merge operations:
-
-* `INDEX.merge.major.running` - number of running major merge operations
(depending on the implementation of `MergeScheduler` that is used there can be
several concurrently running merge operations).
-* `INDEX.merge.minor.running` - as above, for minor merge operations.
-* `INDEX.merge.major.running.docs` - total number of documents in the segments
being currently merged in major merge operations.
-* `INDEX.merge.minor.running.docs` - as above, for minor merge operations.
-* `INDEX.merge.major.running.segments` - number of segments being currently
merged in major merge operations.
-* `INDEX.merge.minor.running.segments` - as above, for minor merge operations.
-
-If the boolean flag `mergeDetails` is true then the following additional
metrics are collected:
-
-* `INDEX.merge.major.docs` - meter for the number of documents merged in major
merge operations
-* `INDEX.merge.major.deletedDocs` - meter for the number of deleted documents
expunged in major merge operations
== Metrics API