kotman12 commented on code in PR #3514:
URL: https://github.com/apache/solr/pull/3514#discussion_r2302264076
##########
solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java:
##########
@@ -841,83 +851,139 @@ private CommitVersionInfo getIndexVersion() {
@Override
public void initializeMetrics(
SolrMetricsContext parentContext, Attributes attributes, String scope) {
- super.initializeMetrics(parentContext, attributes, scope);
- solrMetricsContext.gauge(
- () ->
- (core != null && !core.isClosed()
- ? NumberUtils.readableSize(core.getIndexSize())
- : parentContext.nullString()),
- true,
- "indexSize",
- getCategory().toString(),
- scope);
- solrMetricsContext.gauge(
- () ->
- (core != null && !core.isClosed()
- ? getIndexVersion().toString()
- : parentContext.nullString()),
- true,
- "indexVersion",
- getCategory().toString(),
- scope);
- solrMetricsContext.gauge(
- () ->
- (core != null && !core.isClosed()
- ? getIndexVersion().generation
- : parentContext.nullNumber()),
- true,
- GENERATION,
- getCategory().toString(),
- scope);
- solrMetricsContext.gauge(
- () -> (core != null && !core.isClosed() ? core.getIndexDir() :
parentContext.nullString()),
- true,
- "indexPath",
- getCategory().toString(),
- scope);
- solrMetricsContext.gauge(() -> isLeader, true, "isLeader",
getCategory().toString(), scope);
- solrMetricsContext.gauge(() -> isFollower, true, "isFollower",
getCategory().toString(), scope);
- final MetricsMap fetcherMap =
- new MetricsMap(
- map -> {
+ Attributes replicationAttributes =
+ Attributes.builder()
+ .putAll(attributes)
+ .put(CATEGORY_ATTR, Category.REPLICATION.toString())
+ .build();
+ super.initializeMetrics(parentContext, replicationAttributes, scope);
+
+ indexSizeGauge =
+ solrMetricsContext.observableLongGauge(
+ "solr_replication_index_size",
+ "Size of the index in bytes",
+ gauge -> {
+ if (core != null && !core.isClosed()) {
+ gauge.record(core.getIndexSize(), replicationAttributes);
+ }
+ },
+ OtelUnit.BYTES);
+
+ indexVersionGauge =
+ solrMetricsContext.observableLongGauge(
+ "solr_replication_index_version",
+ "Current index version",
+ gauge -> {
+ if (core != null && !core.isClosed()) {
+ gauge.record(getIndexVersion().version, replicationAttributes);
+ }
+ });
+
+ generationGauge =
+ solrMetricsContext.observableLongGauge(
+ "solr_replication_generation",
+ "Current index generation",
+ gauge -> {
+ if (core != null && !core.isClosed()) {
+ gauge.record(getIndexVersion().generation,
replicationAttributes);
+ }
+ });
+
+ isLeaderGauge =
+ solrMetricsContext.observableLongGauge(
+ "solr_replication_is_leader",
+ "Whether this node is a leader (1) or not (0)",
+ gauge -> gauge.record(isLeader ? 1 : 0, replicationAttributes));
+
+ isFollowerGauge =
+ solrMetricsContext.observableLongGauge(
+ "solr_replication_is_follower",
+ "Whether this node is a follower (1) or not (0)",
+ gauge -> gauge.record(isFollower ? 1 : 0, replicationAttributes));
+
+ replicationEnabledGauge =
+ solrMetricsContext.observableLongGauge(
+ "solr_replication_is_enabled",
+ "Whether replication is enabled (1) or not (0)",
+ gauge ->
+ gauge.record(
+ (isLeader && replicationEnabled.get()) ? 1 : 0,
replicationAttributes));
+
+ // Create measurements for fetcher metrics in a batch to ensure consistent
fetcher reference
+ ObservableLongMeasurement isPollingDisabled =
+ solrMetricsContext.longMeasurement(
+ "solr_replication_is_polling_disabled", "Whether polling is
disabled (1) or not (0)");
+
+ ObservableLongMeasurement isReplicating =
+ solrMetricsContext.longMeasurement(
+ "solr_replication_is_replicating", "Whether replication is in
progress (1) or not (0)");
+
+ ObservableLongMeasurement timeElapsed =
+ solrMetricsContext.longMeasurement(
+ "solr_replication_time_elapsed",
+ "Time elapsed during replication in seconds",
+ OtelUnit.SECONDS);
+
+ ObservableLongMeasurement bytesDownloaded =
+ solrMetricsContext.longMeasurement(
+ "solr_replication_bytes_downloaded",
+ "Total bytes downloaded during replication",
+ OtelUnit.BYTES);
+
+ ObservableLongMeasurement downloadSpeed =
+ solrMetricsContext.longMeasurement(
+ "solr_replication_download_speed", "Download speed in bytes per
second");
+
+ // Use batch callback to ensure consistent fetcher reference
Review Comment:
The two metrics not _directly_ related to the indexFetcher instance can
probably be pulled out. I left it because it was more in line with the way it
was done before and I didn't see anything wrong with it. The reason I'm doing
this is more for the other ones. If the volatile instance can change then you
can query different indexFetcher instances for the same snapshot/point-in-time.
I don't know how granular these gauges publish metrics but I imagine it would
be confusing to publish `timeElapsed` and `bytesDownloaded` for two different
fetchers at roughly the same sample/point-in-time. I imagine that is why it was
done this way before, to cache the indexFetcher instance and ensure atomicity.
Am I missing something?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]