This is an automated email from the ASF dual-hosted git repository. mlbiscoc pushed a commit to branch feature/SOLR-17458-rebased in repository https://gitbox.apache.org/repos/asf/solr.git
commit 1bb61d17e949e51847801d81b3403b9b25a9bb22 Author: Adam Quigley <[email protected]> AuthorDate: Thu Sep 25 13:33:21 2025 -0400 SOLR-17806: Migrate AuditLoggerPlugin to OTEL (#3665) * SOLR-17806: Migrate AuditLoggerPlugin to OTEL * Address PR comment on InterruptedException in test * Fix AuditLoggerPlugin description for solr_auditlogger_request_times metric * Register Nanosecond Histogram view in SolrMetricManager * Run gradlew tidy * Refactor plugin_name as a shared metric category attribute * Address part of PR feedback * Improve description for solr_auditlogger_lost OTEL metric * Add TODO note about making histogram buckets configurable * Use synchronous gauge for solr_auditlogger_async_enabled metric * Run gradlew tidy --- .../org/apache/solr/metrics/SolrMetricManager.java | 35 +++++- .../apache/solr/metrics/SolrMetricProducer.java | 1 + .../apache/solr/security/AuditLoggerPlugin.java | 126 +++++++++++++-------- .../apache/solr/security/AuthenticationPlugin.java | 4 +- .../solr/security/MultiDestinationAuditLogger.java | 3 +- .../solr/security/AuditLoggerIntegrationTest.java | 126 ++++++++++++++++++--- .../solr/security/AuditLoggerPluginTest.java | 4 + .../security/MockSolrMetricsContextFactory.java | 49 ++++++++ .../security/MultiDestinationAuditLoggerTest.java | 5 + .../security/SolrLogAuditLoggerPluginTest.java | 4 + .../deployment-guide/pages/audit-logging.adoc | 25 ++-- .../apache/solr/cloud/SolrCloudAuthTestCase.java | 40 ------- 12 files changed, 299 insertions(+), 123 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/metrics/SolrMetricManager.java b/solr/core/src/java/org/apache/solr/metrics/SolrMetricManager.java index e179306eaae..98d18cea32f 100644 --- a/solr/core/src/java/org/apache/solr/metrics/SolrMetricManager.java +++ b/solr/core/src/java/org/apache/solr/metrics/SolrMetricManager.java @@ -52,7 +52,11 @@ import io.opentelemetry.api.metrics.ObservableLongGauge; import io.opentelemetry.api.metrics.ObservableLongMeasurement; import io.opentelemetry.api.metrics.ObservableLongUpDownCounter; import io.opentelemetry.api.metrics.ObservableMeasurement; +import io.opentelemetry.sdk.metrics.Aggregation; +import io.opentelemetry.sdk.metrics.InstrumentSelector; +import io.opentelemetry.sdk.metrics.InstrumentType; import io.opentelemetry.sdk.metrics.SdkMeterProvider; +import io.opentelemetry.sdk.metrics.View; import io.opentelemetry.sdk.metrics.internal.SdkMeterProviderUtil; import io.opentelemetry.sdk.metrics.internal.exemplar.ExemplarFilter; import java.io.IOException; @@ -157,6 +161,23 @@ public class SolrMetricManager { private final ConcurrentMap<String, MeterProviderAndReaders> meterProviderAndReaders = new ConcurrentHashMap<>(); + private static final List<Double> SOLR_NANOSECOND_HISTOGRAM_BOUNDARIES = + List.of( + 0.0, + 5_000.0, + 10_000.0, + 25_000.0, + 50_000.0, + 100_000.0, + 250_000.0, + 500_000.0, + 1_000_000.0, + 2_500_000.0, + 5_000_000.0, + 25_000_000.0, + 100_000_000.0, + 1_000_000_000.0); + public SolrMetricManager() { metricsConfig = new MetricsConfig.MetricsConfigBuilder().build(); counterSupplier = MetricSuppliers.counterSupplier(null, null); @@ -773,7 +794,19 @@ public class SolrMetricManager { var reader = new FilterablePrometheusMetricReader(true, null); // NOCOMMIT: We need to add a Periodic Metric Reader here if we want to push with OTLP // with an exporter - var provider = SdkMeterProvider.builder().registerMetricReader(reader); + var provider = + SdkMeterProvider.builder() + .registerMetricReader(reader) + .registerView( + InstrumentSelector.builder() + .setType(InstrumentType.HISTOGRAM) + .setUnit(OtelUnit.NANOSECONDS.getSymbol()) + .build(), + View.builder() + .setAggregation( + Aggregation.explicitBucketHistogram( + SOLR_NANOSECOND_HISTOGRAM_BOUNDARIES)) + .build()); // TODO: Make histogram bucket boundaries configurable SdkMeterProviderUtil.setExemplarFilter(provider, ExemplarFilter.traceBased()); return new MeterProviderAndReaders(provider.build(), reader); }) diff --git a/solr/core/src/java/org/apache/solr/metrics/SolrMetricProducer.java b/solr/core/src/java/org/apache/solr/metrics/SolrMetricProducer.java index 94b740540af..435b7c0051c 100644 --- a/solr/core/src/java/org/apache/solr/metrics/SolrMetricProducer.java +++ b/solr/core/src/java/org/apache/solr/metrics/SolrMetricProducer.java @@ -27,6 +27,7 @@ public interface SolrMetricProducer extends AutoCloseable { public static final AttributeKey<String> CATEGORY_ATTR = AttributeKey.stringKey("category"); public static final AttributeKey<String> HANDLER_ATTR = AttributeKey.stringKey("handler"); public static final AttributeKey<String> OPERATION_ATTR = AttributeKey.stringKey("ops"); + public static final AttributeKey<String> PLUGIN_NAME_ATTR = AttributeKey.stringKey("plugin_name"); /** * Unique metric tag identifies components with the same life-cycle, which should be registered / diff --git a/solr/core/src/java/org/apache/solr/security/AuditLoggerPlugin.java b/solr/core/src/java/org/apache/solr/security/AuditLoggerPlugin.java index 78dfdd4d572..89a6c923f7a 100644 --- a/solr/core/src/java/org/apache/solr/security/AuditLoggerPlugin.java +++ b/solr/core/src/java/org/apache/solr/security/AuditLoggerPlugin.java @@ -16,9 +16,6 @@ */ package org.apache.solr.security; -import com.codahale.metrics.Counter; -import com.codahale.metrics.Meter; -import com.codahale.metrics.Timer; import com.fasterxml.jackson.annotation.JsonInclude.Include; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.SerializationFeature; @@ -33,10 +30,8 @@ import java.util.Collections; import java.util.Date; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.BlockingQueue; -import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; @@ -46,6 +41,10 @@ import org.apache.solr.common.util.SolrNamedThreadFactory; import org.apache.solr.common.util.TimeSource; import org.apache.solr.core.SolrInfoBean; import org.apache.solr.metrics.SolrMetricsContext; +import org.apache.solr.metrics.otel.OtelUnit; +import org.apache.solr.metrics.otel.instruments.AttributedLongCounter; +import org.apache.solr.metrics.otel.instruments.AttributedLongGauge; +import org.apache.solr.metrics.otel.instruments.AttributedLongTimer; import org.apache.solr.security.AuditEvent.EventType; import org.apache.solr.util.TimeOut; import org.slf4j.Logger; @@ -77,18 +76,16 @@ public abstract class AuditLoggerPlugin implements Closeable, Runnable, SolrInfo int blockingQueueSize; protected AuditEventFormatter formatter; - private Set<String> metricNames = ConcurrentHashMap.newKeySet(); private ExecutorService executorService; private boolean closed; private MuteRules muteRules; protected SolrMetricsContext solrMetricsContext; - protected Meter numErrors = new Meter(); - protected Meter numLost = new Meter(); - protected Meter numLogged = new Meter(); - protected Timer requestTimes = new Timer(); - protected Timer queuedTime = new Timer(); - protected Counter totalTime = new Counter(); + protected AttributedLongCounter numLogged; + protected AttributedLongCounter numErrors; + protected AttributedLongCounter numLost; + protected AttributedLongTimer requestTimes; + private AttributedLongTimer queuedTime; // Event types to be logged by default protected List<String> eventTypes = @@ -163,15 +160,16 @@ public abstract class AuditLoggerPlugin implements Closeable, Runnable, SolrInfo if (async) { auditAsync(event); } else { - Timer.Context timer = requestTimes.time(); - numLogged.mark(); + AttributedLongTimer.MetricTimer timer = requestTimes.start(TimeUnit.NANOSECONDS); + numLogged.inc(); try { audit(event); } catch (Exception e) { - numErrors.mark(); + numErrors.inc(); log.error("Exception when attempting to audit log", e); } finally { - totalTime.inc(timer.stop()); + // Record the timing metric + timer.stop(); } } } @@ -207,7 +205,7 @@ public abstract class AuditLoggerPlugin implements Closeable, Runnable, SolrInfo "Audit log async queue is full (size={}), not blocking since {}==false", blockingQueueSize, PARAM_BLOCKASYNC); - numLost.mark(); + numLost.inc(); } } } @@ -222,19 +220,21 @@ public abstract class AuditLoggerPlugin implements Closeable, Runnable, SolrInfo auditsInFlight.incrementAndGet(); if (event == null) continue; if (event.getDate() != null) { - queuedTime.update( - new Date().getTime() - event.getDate().getTime(), TimeUnit.MILLISECONDS); + long queueTimeNanos = + TimeUnit.MILLISECONDS.toNanos(new Date().getTime() - event.getDate().getTime()); + queuedTime.record(queueTimeNanos); } - Timer.Context timer = requestTimes.time(); + AttributedLongTimer.MetricTimer timer = requestTimes.start(TimeUnit.NANOSECONDS); audit(event); - numLogged.mark(); - totalTime.inc(timer.stop()); + numLogged.inc(); + // Record the timing metric + timer.stop(); } catch (InterruptedException e) { log.warn("Interrupted while waiting for next audit log event"); Thread.currentThread().interrupt(); } catch (Exception ex) { log.error("Exception when attempting to audit log asynchronously", ex); - numErrors.mark(); + numErrors.inc(); } finally { auditsInFlight.decrementAndGet(); } @@ -261,39 +261,73 @@ public abstract class AuditLoggerPlugin implements Closeable, Runnable, SolrInfo this.formatter = formatter; } - // TODO SOLR-17458: Migrate to Otel @Override public void initializeMetrics( SolrMetricsContext parentContext, Attributes attributes, final String scope) { solrMetricsContext = parentContext.getChildContext(this); String className = this.getClass().getSimpleName(); log.debug("Initializing metrics for {}", className); - numErrors = solrMetricsContext.meter("errors", getCategory().toString(), scope, className); - numLost = solrMetricsContext.meter("lost", getCategory().toString(), scope, className); - numLogged = solrMetricsContext.meter("count", getCategory().toString(), scope, className); + + Attributes attrsWithCategory = + Attributes.builder() + .putAll(attributes) + .put(CATEGORY_ATTR, Category.SECURITY.toString()) + .put(PLUGIN_NAME_ATTR, this.getClass().getSimpleName()) + .build(); + + numLogged = + new AttributedLongCounter( + solrMetricsContext.longCounter( + "solr_auditlogger_count", + "The number of audit events that were successfully logged."), + attrsWithCategory); + numErrors = + new AttributedLongCounter( + solrMetricsContext.longCounter( + "solr_auditlogger_errors", "The number of audit events that resulted in errors."), + attrsWithCategory); + numLost = + new AttributedLongCounter( + solrMetricsContext.longCounter( + "solr_auditlogger_lost", + "The number of audit events that were lost due to async queue being full."), + attrsWithCategory); requestTimes = - solrMetricsContext.timer("requestTimes", getCategory().toString(), scope, className); - totalTime = solrMetricsContext.counter("totalTime", getCategory().toString(), scope, className); + new AttributedLongTimer( + this.solrMetricsContext.longHistogram( + "solr_auditlogger_request_times", + "Distribution of audit event request durations", + OtelUnit.NANOSECONDS), + attrsWithCategory); + if (async) { - solrMetricsContext.gauge( - () -> blockingQueueSize, - true, - "queueCapacity", - getCategory().toString(), - scope, - className); - solrMetricsContext.gauge( - () -> blockingQueueSize - queue.remainingCapacity(), - true, - "queueSize", - getCategory().toString(), - scope, - className); + solrMetricsContext.observableLongGauge( + "solr_auditlogger_queue", + "Metrics around the audit logger queue when running in async mode", + (observableLongMeasurement -> { + observableLongMeasurement.record( + blockingQueueSize, + attrsWithCategory.toBuilder().put(TYPE_ATTR, "capacity").build()); + observableLongMeasurement.record( + blockingQueueSize - queue.remainingCapacity(), + attrsWithCategory.toBuilder().put(TYPE_ATTR, "size").build()); + })); queuedTime = - solrMetricsContext.timer("queuedTime", getCategory().toString(), scope, className); + new AttributedLongTimer( + solrMetricsContext.longHistogram( + "solr_auditlogger_queued_time", + "Distribution of time events spend queued before processing", + OtelUnit.NANOSECONDS), + attrsWithCategory); } - solrMetricsContext.gauge( - () -> async, true, "async", getCategory().toString(), scope, className); + + AttributedLongGauge asyncEnabledGauge = + new AttributedLongGauge( + solrMetricsContext.longGauge( + "solr_auditlogger_async_enabled", + "Whether the audit logger is running in async mode (1) or not (0)"), + attrsWithCategory); + asyncEnabledGauge.set(async ? 1L : 0L); } @Override diff --git a/solr/core/src/java/org/apache/solr/security/AuthenticationPlugin.java b/solr/core/src/java/org/apache/solr/security/AuthenticationPlugin.java index ed7646ad7b4..f68c0dc0397 100644 --- a/solr/core/src/java/org/apache/solr/security/AuthenticationPlugin.java +++ b/solr/core/src/java/org/apache/solr/security/AuthenticationPlugin.java @@ -150,8 +150,8 @@ public abstract class AuthenticationPlugin implements SolrInfoBean { Attributes attrsWithCategory = Attributes.builder() .putAll(attributes) - .put("category", getCategory().toString()) - .put("plugin_name", this.getClass().getSimpleName()) + .put(CATEGORY_ATTR, getCategory().toString()) + .put(PLUGIN_NAME_ATTR, this.getClass().getSimpleName()) .build(); // Metrics numErrors = diff --git a/solr/core/src/java/org/apache/solr/security/MultiDestinationAuditLogger.java b/solr/core/src/java/org/apache/solr/security/MultiDestinationAuditLogger.java index d834d3b3c3b..13d3ea48cd2 100644 --- a/solr/core/src/java/org/apache/solr/security/MultiDestinationAuditLogger.java +++ b/solr/core/src/java/org/apache/solr/security/MultiDestinationAuditLogger.java @@ -133,8 +133,7 @@ public class MultiDestinationAuditLogger extends AuditLoggerPlugin implements Re public void initializeMetrics( SolrMetricsContext parentContext, Attributes attributes, String scope) { super.initializeMetrics(parentContext, attributes, scope); - // TODO SOLR-17458: Add Otel - plugins.forEach(p -> p.initializeMetrics(solrMetricsContext, Attributes.empty(), scope)); + plugins.forEach(p -> p.initializeMetrics(parentContext, Attributes.empty(), scope)); } @Override diff --git a/solr/core/src/test/org/apache/solr/security/AuditLoggerIntegrationTest.java b/solr/core/src/test/org/apache/solr/security/AuditLoggerIntegrationTest.java index 6f800b34f54..f774abbbd58 100644 --- a/solr/core/src/test/org/apache/solr/security/AuditLoggerIntegrationTest.java +++ b/solr/core/src/test/org/apache/solr/security/AuditLoggerIntegrationTest.java @@ -28,9 +28,13 @@ import static org.apache.solr.security.AuditEvent.RequestType.SEARCH; import static org.apache.solr.security.Sha256AuthenticationProvider.getSaltedHashedValue; import com.codahale.metrics.MetricRegistry; -import com.codahale.metrics.Timer; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; +import io.opentelemetry.exporter.prometheus.PrometheusMetricReader; +import io.prometheus.metrics.model.snapshots.CounterSnapshot; +import io.prometheus.metrics.model.snapshots.GaugeSnapshot; +import io.prometheus.metrics.model.snapshots.HistogramSnapshot; +import io.prometheus.metrics.model.snapshots.Labels; import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.IOException; @@ -61,9 +65,11 @@ import org.apache.solr.cloud.SolrCloudAuthTestCase; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.MapSolrParams; import org.apache.solr.common.util.SolrNamedThreadFactory; +import org.apache.solr.core.CoreContainer; import org.apache.solr.security.AuditEvent.EventType; import org.apache.solr.security.AuditEvent.RequestType; import org.apache.solr.security.AuditLoggerPlugin.JSONAuditEventFormatter; +import org.apache.solr.util.SolrMetricTestUtils; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -81,6 +87,8 @@ public class AuditLoggerIntegrationTest extends SolrCloudAuthTestCase { // Use a harness per thread to be able to beast this test private ThreadLocal<AuditTestHarness> testHarness = new ThreadLocal<>(); + private PrometheusMetricReader metricsReader; + @Override @Before public void setUp() throws Exception { @@ -103,8 +111,16 @@ public class AuditLoggerIntegrationTest extends SolrCloudAuthTestCase { setupCluster(false, null, false); runThreeTestAdminCommands(); assertThreeTestAdminEvents(); - assertAuditMetricsMinimums( - testHarness.get().cluster, CallbackAuditLoggerPlugin.class.getSimpleName(), 3, 0); + assertAuditMetricsMinimums(3, 0); + Labels labels = getDefaultAuditLoggerMetricsLabelsBuilder().build(); + + assertGaugeMetricValue("solr_auditlogger_async_enabled", labels, 0); + + var snapshot = + SolrMetricTestUtils.getHistogramDatapoint( + metricsReader, "solr_auditlogger_request_times_nanoseconds", labels); + assertNotNull(snapshot); + assertTrue("Expected at least 3 measurements", snapshot.getCount() >= 3); } @Test @@ -112,8 +128,26 @@ public class AuditLoggerIntegrationTest extends SolrCloudAuthTestCase { setupCluster(true, null, false); runThreeTestAdminCommands(); assertThreeTestAdminEvents(); - assertAuditMetricsMinimums( - testHarness.get().cluster, CallbackAuditLoggerPlugin.class.getSimpleName(), 3, 0); + assertAuditMetricsMinimums(3, 0); + Labels labels = getDefaultAuditLoggerMetricsLabelsBuilder().build(); + + assertGaugeMetricValue("solr_auditlogger_async_enabled", labels, 1); + + // Verify queue capacity matches default size + Labels capacityLabels = + getDefaultAuditLoggerMetricsLabelsBuilder().label("type", "capacity").build(); + assertGaugeMetricValue("solr_auditlogger_queue", capacityLabels, 4096); + + // Verify queue is empty after processing events + Labels sizeLabels = getDefaultAuditLoggerMetricsLabelsBuilder().label("type", "size").build(); + assertGaugeMetricValue("solr_auditlogger_queue", sizeLabels, 0); + + var snapshot = + SolrMetricTestUtils.getHistogramDatapoint( + metricsReader, "solr_auditlogger_request_times_nanoseconds", labels); + assertNotNull(snapshot); + assertTrue("Expected at least 3 measurements", snapshot.getCount() >= 3); + assertTrue("Expected a positive sum for request times", snapshot.getSum() >= 0); } @Test @@ -133,20 +167,21 @@ public class AuditLoggerIntegrationTest extends SolrCloudAuthTestCase { gate.release(3); assertThreeTestAdminEvents(); - assertAuditMetricsMinimums( - testHarness.get().cluster, CallbackAuditLoggerPlugin.class.getSimpleName(), 3, 0); - ArrayList<MetricRegistry> registries = getMetricsRegistries(testHarness.get().cluster); - Timer timer = - ((Timer) - registries - .get(0) - .getMetrics() - .get("SECURITY./auditlogging.CallbackAuditLoggerPlugin.queuedTime")); - double meanTimeOnQueue = timer.getSnapshot().getMean(); - double meanTimeExpected = (start - end) / 3.0D; + assertAuditMetricsMinimums(3, 0); + + Labels labels = getDefaultAuditLoggerMetricsLabelsBuilder().build(); + HistogramSnapshot.HistogramDataPointSnapshot snapshot = + SolrMetricTestUtils.getHistogramDatapoint( + metricsReader, "solr_auditlogger_queued_time_nanoseconds", labels); + assertNotNull(snapshot); + assertTrue("Expected at least 3 measurements", snapshot.getCount() >= 3); + + // Calculate mean from sum and count + double mean = snapshot.getSum() / snapshot.getCount(); + double minExpectedTimeNanos = (end - start) / 3.0; assertTrue( - "Expecting mean time on queue > " + meanTimeExpected + ", got " + meanTimeOnQueue, - meanTimeOnQueue > meanTimeExpected); + "Expecting mean time on queue > " + minExpectedTimeNanos + " ns, got " + mean + " ns", + mean > minExpectedTimeNanos); } @Test @@ -542,6 +577,61 @@ public class AuditLoggerIntegrationTest extends SolrCloudAuthTestCase { myCluster.waitForAllNodes(10); testHarness.get().setCluster(myCluster); + + CoreContainer coreContainer = myCluster.getJettySolrRunner(0).getCoreContainer(); + assertNotNull(coreContainer); + metricsReader = SolrMetricTestUtils.getPrometheusMetricReader(coreContainer, "solr.node"); + } + + private Labels.Builder getDefaultAuditLoggerMetricsLabelsBuilder() { + return Labels.builder() + .label("category", "SECURITY") + .label("plugin_name", "CallbackAuditLoggerPlugin") + .label("handler", "/auditlogging") + .label("otel_scope_name", "org.apache.solr"); + } + + private Labels getDefaultAuditLoggerMetricsLabels() { + return getDefaultAuditLoggerMetricsLabelsBuilder().build(); + } + + private void assertGaugeMetricValue(String metricName, Labels labels, long expectedValue) { + GaugeSnapshot.GaugeDataPointSnapshot dataPointSnapshot = + (GaugeSnapshot.GaugeDataPointSnapshot) + SolrMetricTestUtils.getDataPointSnapshot(metricsReader, metricName, labels); + assertNotNull(dataPointSnapshot); + assertEquals(expectedValue, dataPointSnapshot.getValue(), 0.0); + } + + private void assertAuditMetricsMinimums(int count, int errors) throws InterruptedException { + Labels labels = getDefaultAuditLoggerMetricsLabels(); + assertCounterMinimumWithRetry("solr_auditlogger_count", labels, count); + + if (errors > 0) { + assertCounterMinimumWithRetry("solr_auditlogger_errors", labels, errors); + } + } + + private void assertCounterMinimumWithRetry(String metricName, Labels labels, int expectedMinimum) + throws InterruptedException { + boolean success = checkCounterMinimum(metricName, labels, expectedMinimum); + + if (!success && expectedMinimum > 0) { + log.info("First {} metric check failed, pausing 2s before re-attempt", metricName); + Thread.sleep(2000); + success = checkCounterMinimum(metricName, labels, expectedMinimum); + } + + assertTrue( + String.format( + "Counter metric '%s' did not meet minimum %d after retry", metricName, expectedMinimum), + success); + } + + private boolean checkCounterMinimum(String metricName, Labels labels, int expectedMinimum) { + CounterSnapshot.CounterDataPointSnapshot snapshot = + SolrMetricTestUtils.getCounterDatapoint(metricsReader, metricName, labels); + return snapshot != null && snapshot.getValue() >= expectedMinimum; } /** diff --git a/solr/core/src/test/org/apache/solr/security/AuditLoggerPluginTest.java b/solr/core/src/test/org/apache/solr/security/AuditLoggerPluginTest.java index 9b82e6d0585..1de35430800 100644 --- a/solr/core/src/test/org/apache/solr/security/AuditLoggerPluginTest.java +++ b/solr/core/src/test/org/apache/solr/security/AuditLoggerPluginTest.java @@ -17,6 +17,7 @@ package org.apache.solr.security; +import io.opentelemetry.api.common.Attributes; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -26,6 +27,7 @@ import java.util.List; import java.util.concurrent.atomic.AtomicInteger; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.SolrException; +import org.apache.solr.metrics.SolrMetricsContext; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -133,6 +135,8 @@ public class AuditLoggerPluginTest extends SolrTestCaseJ4 { config = new HashMap<>(); config.put("async", false); plugin.init(config); + SolrMetricsContext mockSolrMetricsContext = MockSolrMetricsContextFactory.create(); + plugin.initializeMetrics(mockSolrMetricsContext, Attributes.empty(), "solr.test"); } @Override diff --git a/solr/core/src/test/org/apache/solr/security/MockSolrMetricsContextFactory.java b/solr/core/src/test/org/apache/solr/security/MockSolrMetricsContextFactory.java new file mode 100644 index 00000000000..679e14af67d --- /dev/null +++ b/solr/core/src/test/org/apache/solr/security/MockSolrMetricsContextFactory.java @@ -0,0 +1,49 @@ +package org.apache.solr.security; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import com.codahale.metrics.Counter; +import com.codahale.metrics.Timer; +import io.opentelemetry.api.metrics.LongCounter; +import io.opentelemetry.api.metrics.LongGauge; +import io.opentelemetry.api.metrics.LongHistogram; +import org.apache.solr.metrics.SolrMetricsContext; +import org.apache.solr.metrics.otel.OtelUnit; + +public final class MockSolrMetricsContextFactory { + + public static SolrMetricsContext create() { + SolrMetricsContext mockParentContext = mock(SolrMetricsContext.class); + SolrMetricsContext mockChildContext = mock(SolrMetricsContext.class); + + when(mockParentContext.getChildContext(any())).thenReturn(mockChildContext); + + LongCounter mockOtelLongCounter = mock(LongCounter.class); + when(mockChildContext.longCounter(anyString(), any())).thenReturn(mockOtelLongCounter); + + Timer mockTimer = mock(Timer.class); + Timer.Context mockTimerContext = mock(Timer.Context.class); + when(mockTimer.time()).thenReturn(mockTimerContext); + when(mockChildContext.timer(anyString(), anyString(), anyString(), anyString())) + .thenReturn(mockTimer); + + Counter mockCounter = mock(Counter.class); + when(mockChildContext.counter(anyString(), anyString(), anyString(), anyString())) + .thenReturn(mockCounter); + + LongHistogram mockLongHistogram = mock(LongHistogram.class); + when(mockChildContext.longHistogram(anyString(), anyString(), any(OtelUnit.class))) + .thenReturn(mockLongHistogram); + + when(mockChildContext.observableLongGauge(anyString(), anyString(), any())).thenReturn(null); + when(mockChildContext.observableLongCounter(anyString(), anyString(), any())).thenReturn(null); + + LongGauge mockLongGauge = mock(LongGauge.class); + when(mockChildContext.longGauge(anyString(), anyString())).thenReturn(mockLongGauge); + + return mockParentContext; + } +} diff --git a/solr/core/src/test/org/apache/solr/security/MultiDestinationAuditLoggerTest.java b/solr/core/src/test/org/apache/solr/security/MultiDestinationAuditLoggerTest.java index 3b1ba2c4827..fe6c30070b0 100644 --- a/solr/core/src/test/org/apache/solr/security/MultiDestinationAuditLoggerTest.java +++ b/solr/core/src/test/org/apache/solr/security/MultiDestinationAuditLoggerTest.java @@ -16,6 +16,7 @@ */ package org.apache.solr.security; +import io.opentelemetry.api.common.Attributes; import java.io.IOException; import java.nio.file.Path; import java.util.ArrayList; @@ -24,6 +25,7 @@ import java.util.HashMap; import java.util.Map; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.metrics.SolrMetricsContext; import org.junit.Test; public class MultiDestinationAuditLoggerTest extends SolrTestCaseJ4 { @@ -52,6 +54,9 @@ public class MultiDestinationAuditLoggerTest extends SolrTestCaseJ4 { al.inform(loader); al.init(config); + SolrMetricsContext mockSolrMetricsContext = MockSolrMetricsContextFactory.create(); + al.initializeMetrics(mockSolrMetricsContext, Attributes.empty(), "solr.test"); + al.doAudit(new AuditEvent(AuditEvent.EventType.ANONYMOUS).setUsername("me")); assertEquals( 0, diff --git a/solr/core/src/test/org/apache/solr/security/SolrLogAuditLoggerPluginTest.java b/solr/core/src/test/org/apache/solr/security/SolrLogAuditLoggerPluginTest.java index 6685b25f340..15242a166ec 100644 --- a/solr/core/src/test/org/apache/solr/security/SolrLogAuditLoggerPluginTest.java +++ b/solr/core/src/test/org/apache/solr/security/SolrLogAuditLoggerPluginTest.java @@ -20,9 +20,11 @@ package org.apache.solr.security; import static org.apache.solr.security.AuditLoggerPluginTest.EVENT_ANONYMOUS; import static org.apache.solr.security.AuditLoggerPluginTest.EVENT_AUTHENTICATED; +import io.opentelemetry.api.common.Attributes; import java.util.HashMap; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.SolrException; +import org.apache.solr.metrics.SolrMetricsContext; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -38,6 +40,8 @@ public class SolrLogAuditLoggerPluginTest extends SolrTestCaseJ4 { plugin = new SolrLogAuditLoggerPlugin(); config = new HashMap<>(); config.put("async", false); + SolrMetricsContext mockSolrMetricsContext = MockSolrMetricsContextFactory.create(); + plugin.initializeMetrics(mockSolrMetricsContext, Attributes.empty(), "solr.test"); } @Test(expected = SolrException.class) diff --git a/solr/solr-ref-guide/modules/deployment-guide/pages/audit-logging.adoc b/solr/solr-ref-guide/modules/deployment-guide/pages/audit-logging.adoc index 44efcd7f4c5..20c94bb71a1 100644 --- a/solr/solr-ref-guide/modules/deployment-guide/pages/audit-logging.adoc +++ b/solr/solr-ref-guide/modules/deployment-guide/pages/audit-logging.adoc @@ -230,20 +230,17 @@ See the javadocs for the base class at {solr-javadocs}/core/org/apache/solr/secu == Metrics -Audit logging plugins record metrics about count and timing of log requests, as well as queue size for async loggers. -The metrics keys are all recorded on the `SECURITY` category, and each metric name are prefixed with a scope of `/auditlogging` and the class name of the logger, e.g., `SolrLogAuditLoggerPlugin`. +The metrics keys are all recorded on the `SECURITY` category, and each metric includes attributes for the handler (e.g., `/auditlogging`) and the class name of the logger (e.g., `SolrLogAuditLoggerPlugin`). The individual metrics are: -* `count`: (_meter_) Records number and rate of audit logs written. -* `errors`: (_meter_) Records number and rate of errors. -* `lost`: (_meter_) Records number and rate of events lost if the queue is full and `blockAsync=false`. -* `requestTimes`: (_timer_) Records latency and percentiles for audit logging performance. -* `totalTime`: (_counter_) Records total time spent logging. -* `queueCapacity`: (_gauge_) Records the maximum size of the async logging queue. -* `queueSize`: (_gauge_) Records the number of events currently waiting in the queue. -* `queuedTime`: (_timer_) Records the amount of time events waited in queue. -Adding this with the `requestTimes` metric will show the total time from event to logging complete. -* `async`: (_gauge_) Tells whether this logger is in async mode. - -TIP: If you experience a very high request rate and have a slow audit logger plugin, you may see the `queueSize` and `queuedTime` metrics increase, and possibly start dropping events (shown by an increase in `lost` count). +* `solr_auditlogger_count_total`: (_counter_) Records number of audit logs written. +* `solr_auditlogger_errors_total`: (_counter_) Records number of errors. +* `solr_auditlogger_lost_total`: (_counter_) Records number of events lost if the queue is full and `blockAsync=false`. +* `solr_auditlogger_request_times_nanoseconds`: (_histogram_) Records latency and percentiles for audit logging performance. +* `solr_auditlogger_queue`: (_gauge_) Records the maximum size (`type=capacity`) and current size (`type=size`) of the async logging queue. +* `solr_auditlogger_queued_time_nanoseconds`: (_histogram_) Records the amount of time events waited in queue. +Adding this with the `solr_auditlogger_request_times_nanoseconds` metric will show the total time from event to logging complete. +* `solr_auditlogger_async_enabled`: (_gauge_) Tells whether this logger is in async mode (1) or not (0). + +TIP: If you experience a very high request rate and have a slow audit logger plugin, you may see the `solr_auditlogger_queue` size metric increase, and possibly start dropping events (shown by an increase in `solr_auditlogger_lost_total` count). In this case you may want to increase the `numThreads` setting. diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/SolrCloudAuthTestCase.java b/solr/test-framework/src/java/org/apache/solr/cloud/SolrCloudAuthTestCase.java index c9ed378aa27..7fbf26ab5cf 100644 --- a/solr/test-framework/src/java/org/apache/solr/cloud/SolrCloudAuthTestCase.java +++ b/solr/test-framework/src/java/org/apache/solr/cloud/SolrCloudAuthTestCase.java @@ -337,46 +337,6 @@ public class SolrCloudAuthTestCase extends SolrCloudTestCase { return metrics; } - /** - * Common test method to be able to check audit metrics - * - * @param className the class name to be used for composing prefix, e.g. - * "SECURITY./auditlogging/SolrLogAuditLoggerPlugin" - */ - protected void assertAuditMetricsMinimums( - MiniSolrCloudCluster cluster, String className, int count, int errors) - throws InterruptedException { - String prefix = "SECURITY./auditlogging." + className + "."; - Map<String, Long> expectedCounts = new HashMap<>(); - expectedCounts.put("count", (long) count); - - Map<String, Long> counts = countSecurityMetrics(cluster, prefix, AUDIT_METRICS_KEYS); - boolean success = isMetricsEqualOrLarger(AUDIT_METRICS_TO_COMPARE, expectedCounts, counts); - if (!success) { - log.info("First metrics count assert failed, pausing 2s before re-attempt"); - Thread.sleep(2000); - counts = countSecurityMetrics(cluster, prefix, AUDIT_METRICS_KEYS); - success = isMetricsEqualOrLarger(AUDIT_METRICS_TO_COMPARE, expectedCounts, counts); - } - - assertTrue( - "Expected metric minimums for prefix " - + prefix - + ": " - + expectedCounts - + ", but got: " - + counts, - success); - } - - private boolean isMetricsEqualOrLarger( - List<String> metricsToCompare, - Map<String, Long> expectedCounts, - Map<String, Long> actualCounts) { - return metricsToCompare.stream() - .allMatch(k -> actualCounts.get(k).intValue() >= expectedCounts.get(k).intValue()); - } - // Have to sum the metrics from all three shards/nodes private long sumCount(String prefix, String key, List<Map<String, Metric>> metrics) { assertTrue(
