This is an automated email from the ASF dual-hosted git repository. mlbiscoc pushed a commit to branch feature/SOLR-17458-rebased in repository https://gitbox.apache.org/repos/asf/solr.git
commit 5c87f8d8597f0174dada2898a3f5e2db7f707bd6 Author: Luke Kot-Zaniewski <[email protected]> AuthorDate: Fri Sep 5 11:45:27 2025 -0400 SOLR-17806: switch ReplicationHandler metrics to OTEL (#3514) * switch ReplicationHandler metrics to OTEL * switch ReplicationHandler metrics to OTEL * rename some gauges and IOUtils::closeQuietly * capture all metrics in the callback --- .../apache/solr/handler/ReplicationHandler.java | 173 ++++++++++++--------- .../org/apache/solr/metrics/SolrMetricManager.java | 50 +++--- .../apache/solr/metrics/SolrMetricsContext.java | 18 +++ 3 files changed, 152 insertions(+), 89 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java b/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java index d3aa17b7b12..b900dc22ddb 100644 --- a/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java @@ -31,6 +31,8 @@ import static org.apache.solr.handler.admin.api.ReplicationAPIBase.STATUS; import static org.apache.solr.handler.admin.api.ReplicationAPIBase.TLOG_FILE; import io.opentelemetry.api.common.Attributes; +import io.opentelemetry.api.metrics.BatchCallback; +import io.opentelemetry.api.metrics.ObservableLongMeasurement; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; @@ -79,6 +81,7 @@ import org.apache.solr.common.params.CoreAdminParams; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.ExecutorUtil; +import org.apache.solr.common.util.IOUtils; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.SolrNamedThreadFactory; @@ -100,8 +103,8 @@ import org.apache.solr.handler.admin.api.ReplicationAPIBase; import org.apache.solr.handler.admin.api.SnapshotBackupAPI; import org.apache.solr.handler.api.V2ApiUtils; import org.apache.solr.jersey.APIConfigProvider; -import org.apache.solr.metrics.MetricsMap; import org.apache.solr.metrics.SolrMetricsContext; +import org.apache.solr.metrics.otel.OtelUnit; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.search.SolrIndexSearcher; @@ -151,6 +154,7 @@ public class ReplicationHandler extends RequestHandlerBase private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); SolrCore core; + private BatchCallback metricsCallback; @Override public Name getPermissionName(AuthorizationContext request) { @@ -841,83 +845,110 @@ public class ReplicationHandler extends RequestHandlerBase @Override public void initializeMetrics( SolrMetricsContext parentContext, Attributes attributes, String scope) { - super.initializeMetrics(parentContext, attributes, scope); - solrMetricsContext.gauge( - () -> - (core != null && !core.isClosed() - ? NumberUtils.readableSize(core.getIndexSize()) - : parentContext.nullString()), - true, - "indexSize", - getCategory().toString(), - scope); - solrMetricsContext.gauge( - () -> - (core != null && !core.isClosed() - ? getIndexVersion().toString() - : parentContext.nullString()), - true, - "indexVersion", - getCategory().toString(), - scope); - solrMetricsContext.gauge( - () -> - (core != null && !core.isClosed() - ? getIndexVersion().generation - : parentContext.nullNumber()), - true, - GENERATION, - getCategory().toString(), - scope); - solrMetricsContext.gauge( - () -> (core != null && !core.isClosed() ? core.getIndexDir() : parentContext.nullString()), - true, - "indexPath", - getCategory().toString(), - scope); - solrMetricsContext.gauge(() -> isLeader, true, "isLeader", getCategory().toString(), scope); - solrMetricsContext.gauge(() -> isFollower, true, "isFollower", getCategory().toString(), scope); - final MetricsMap fetcherMap = - new MetricsMap( - map -> { + Attributes replicationAttributes = + Attributes.builder() + .putAll(attributes) + .put(CATEGORY_ATTR, Category.REPLICATION.toString()) + .build(); + super.initializeMetrics(parentContext, replicationAttributes, scope); + + ObservableLongMeasurement indexSizeMetric = + solrMetricsContext.longMeasurement( + "solr_replication_index_size", "Size of the index in bytes", OtelUnit.BYTES); + + ObservableLongMeasurement indexVersionMetric = + solrMetricsContext.longMeasurement( + "solr_replication_index_version", "Current index version"); + + ObservableLongMeasurement indexGenerationMetric = + solrMetricsContext.longMeasurement( + "solr_replication_index_generation", "Current index generation"); + + ObservableLongMeasurement isLeaderMetric = + solrMetricsContext.longMeasurement( + "solr_replication_is_leader", "Whether this node is a leader (1) or not (0)"); + + ObservableLongMeasurement isFollowerMetric = + solrMetricsContext.longMeasurement( + "solr_replication_is_follower", "Whether this node is a follower (1) or not (0)"); + + ObservableLongMeasurement replicationEnabledMetric = + solrMetricsContext.longMeasurement( + "solr_replication_is_enabled", "Whether replication is enabled (1) or not (0)"); + + ObservableLongMeasurement isPollingDisabledMetric = + solrMetricsContext.longMeasurement( + "solr_replication_is_polling_disabled", "Whether polling is disabled (1) or not (0)"); + + ObservableLongMeasurement isReplicatingMetric = + solrMetricsContext.longMeasurement( + "solr_replication_is_replicating", "Whether replication is in progress (1) or not (0)"); + + ObservableLongMeasurement timeElapsedMetric = + solrMetricsContext.longMeasurement( + "solr_replication_time_elapsed", + "Time elapsed during replication in seconds", + OtelUnit.SECONDS); + + ObservableLongMeasurement bytesDownloadedMetric = + solrMetricsContext.longMeasurement( + "solr_replication_downloaded_size", + "Total bytes downloaded during replication", + OtelUnit.BYTES); + + ObservableLongMeasurement downloadSpeedMetric = + solrMetricsContext.longMeasurement( + "solr_replication_download_speed", "Download speed in bytes per second"); + + metricsCallback = + solrMetricsContext.batchCallback( + () -> { + if (core != null && !core.isClosed()) { + indexSizeMetric.record(core.getIndexSize(), replicationAttributes); + + CommitVersionInfo vInfo = getIndexVersion(); + if (vInfo != null) { + indexVersionMetric.record(vInfo.version, replicationAttributes); + indexGenerationMetric.record(vInfo.generation, replicationAttributes); + } + } + + isLeaderMetric.record(isLeader ? 1 : 0, replicationAttributes); + isFollowerMetric.record(isFollower ? 1 : 0, replicationAttributes); + replicationEnabledMetric.record( + (isLeader && replicationEnabled.get()) ? 1 : 0, replicationAttributes); + IndexFetcher fetcher = currentIndexFetcher; if (fetcher != null) { - map.put(LEADER_URL, fetcher.getLeaderCoreUrl()); - if (getPollInterval() != null) { - map.put(ReplicationAPIBase.POLL_INTERVAL, getPollInterval()); - } - map.put("isPollingDisabled", isPollingDisabled()); - map.put("isReplicating", isReplicating()); + isPollingDisabledMetric.record(isPollingDisabled() ? 1 : 0, replicationAttributes); + isReplicatingMetric.record(isReplicating() ? 1 : 0, replicationAttributes); + long elapsed = fetcher.getReplicationTimeElapsed(); long val = fetcher.getTotalBytesDownloaded(); if (elapsed > 0) { - map.put("timeElapsed", elapsed); - map.put("bytesDownloaded", val); - map.put("downloadSpeed", val / elapsed); + timeElapsedMetric.record(elapsed, replicationAttributes); + bytesDownloadedMetric.record(val, replicationAttributes); + downloadSpeedMetric.record(val / elapsed, replicationAttributes); } - Properties props = loadReplicationProperties(); - addReplicationProperties(map::putNoEx, props); } - }); - solrMetricsContext.gauge(fetcherMap, true, "fetcher", getCategory().toString(), scope); - solrMetricsContext.gauge( - () -> isLeader && includeConfFiles != null ? includeConfFiles : "", - true, - "confFilesToReplicate", - getCategory().toString(), - scope); - solrMetricsContext.gauge( - () -> isLeader ? getReplicateAfterStrings() : Collections.<String>emptyList(), - true, - REPLICATE_AFTER, - getCategory().toString(), - scope); - solrMetricsContext.gauge( - () -> isLeader && replicationEnabled.get(), - true, - "replicationEnabled", - getCategory().toString(), - scope); + }, + indexSizeMetric, + indexVersionMetric, + indexGenerationMetric, + isLeaderMetric, + isFollowerMetric, + replicationEnabledMetric, + isPollingDisabledMetric, + isReplicatingMetric, + timeElapsedMetric, + bytesDownloadedMetric, + downloadSpeedMetric); + } + + @Override + public void close() throws IOException { + IOUtils.closeQuietly(metricsCallback); + super.close(); } // TODO Should a failure retrieving any piece of info mark the overall request as a failure? Is diff --git a/solr/core/src/java/org/apache/solr/metrics/SolrMetricManager.java b/solr/core/src/java/org/apache/solr/metrics/SolrMetricManager.java index e2dd4f75158..bda5fa4af39 100644 --- a/solr/core/src/java/org/apache/solr/metrics/SolrMetricManager.java +++ b/solr/core/src/java/org/apache/solr/metrics/SolrMetricManager.java @@ -26,6 +26,7 @@ import com.codahale.metrics.MetricRegistry; import com.codahale.metrics.MetricSet; import com.codahale.metrics.SharedMetricRegistries; import com.codahale.metrics.Timer; +import io.opentelemetry.api.metrics.BatchCallback; import io.opentelemetry.api.metrics.DoubleCounter; import io.opentelemetry.api.metrics.DoubleCounterBuilder; import io.opentelemetry.api.metrics.DoubleGauge; @@ -50,6 +51,7 @@ import io.opentelemetry.api.metrics.ObservableLongCounter; import io.opentelemetry.api.metrics.ObservableLongGauge; import io.opentelemetry.api.metrics.ObservableLongMeasurement; import io.opentelemetry.api.metrics.ObservableLongUpDownCounter; +import io.opentelemetry.api.metrics.ObservableMeasurement; import io.opentelemetry.exporter.prometheus.PrometheusMetricReader; import io.opentelemetry.sdk.metrics.SdkMeterProvider; import io.opentelemetry.sdk.metrics.internal.SdkMeterProviderUtil; @@ -261,15 +263,7 @@ public class SolrMetricManager { } public LongGauge longGauge(String registry, String gaugeName, String description, OtelUnit unit) { - LongGaugeBuilder builder = - meterProvider(registry) - .get(OTEL_SCOPE_NAME) - .gaugeBuilder(gaugeName) - .setDescription(description) - .ofLongs(); - if (unit != null) builder.setUnit(unit.getSymbol()); - - return builder.build(); + return longGaugeBuilder(registry, gaugeName, description, unit).build(); } public ObservableLongCounter observableLongCounter( @@ -311,15 +305,7 @@ public class SolrMetricManager { String description, Consumer<ObservableLongMeasurement> callback, OtelUnit unit) { - LongGaugeBuilder builder = - meterProvider(registry) - .get(OTEL_SCOPE_NAME) - .gaugeBuilder(gaugeName) - .setDescription(description) - .ofLongs(); - if (unit != null) builder.setUnit(unit.getSymbol()); - - return builder.buildWithCallback(callback); + return longGaugeBuilder(registry, gaugeName, description, unit).buildWithCallback(callback); } public ObservableDoubleGauge observableDoubleGauge( @@ -372,6 +358,34 @@ public class SolrMetricManager { return builder.buildWithCallback(callback); } + ObservableLongMeasurement longMeasurement( + String registry, String gaugeName, String description, OtelUnit unit) { + return longGaugeBuilder(registry, gaugeName, description, unit).buildObserver(); + } + + BatchCallback batchCallback( + String registry, + Runnable callback, + ObservableMeasurement measurement, + ObservableMeasurement... additionalMeasurements) { + return meterProvider(registry) + .get(OTEL_SCOPE_NAME) + .batchCallback(callback, measurement, additionalMeasurements); + } + + private LongGaugeBuilder longGaugeBuilder( + String registry, String gaugeName, String description, OtelUnit unit) { + LongGaugeBuilder builder = + meterProvider(registry) + .get(OTEL_SCOPE_NAME) + .gaugeBuilder(gaugeName) + .setDescription(description) + .ofLongs(); + if (unit != null) builder.setUnit(unit.getSymbol()); + + return builder; + } + // for unit tests public MetricRegistry.MetricSupplier<Counter> getCounterSupplier() { return counterSupplier; diff --git a/solr/core/src/java/org/apache/solr/metrics/SolrMetricsContext.java b/solr/core/src/java/org/apache/solr/metrics/SolrMetricsContext.java index f7f87303936..14b6affa475 100644 --- a/solr/core/src/java/org/apache/solr/metrics/SolrMetricsContext.java +++ b/solr/core/src/java/org/apache/solr/metrics/SolrMetricsContext.java @@ -23,6 +23,7 @@ import com.codahale.metrics.Histogram; import com.codahale.metrics.Meter; import com.codahale.metrics.MetricRegistry; import com.codahale.metrics.Timer; +import io.opentelemetry.api.metrics.BatchCallback; import io.opentelemetry.api.metrics.DoubleCounter; import io.opentelemetry.api.metrics.DoubleGauge; import io.opentelemetry.api.metrics.DoubleHistogram; @@ -37,6 +38,7 @@ import io.opentelemetry.api.metrics.ObservableDoubleMeasurement; import io.opentelemetry.api.metrics.ObservableLongCounter; import io.opentelemetry.api.metrics.ObservableLongGauge; import io.opentelemetry.api.metrics.ObservableLongMeasurement; +import io.opentelemetry.api.metrics.ObservableMeasurement; import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; @@ -274,6 +276,22 @@ public class SolrMetricsContext { registryName, metricName, description, callback, unit); } + public ObservableLongMeasurement longMeasurement(String metricName, String description) { + return longMeasurement(metricName, description, null); + } + + public ObservableLongMeasurement longMeasurement( + String metricName, String description, OtelUnit unit) { + return metricManager.longMeasurement(registryName, metricName, description, unit); + } + + public BatchCallback batchCallback( + Runnable callback, + ObservableMeasurement measurement, + ObservableMeasurement... additionalMeasurements) { + return metricManager.batchCallback(registryName, callback, measurement, additionalMeasurements); + } + /** * Convenience method for {@link SolrMetricManager#meter(SolrMetricsContext, String, String, * String...)}.
