This is an automated email from the ASF dual-hosted git repository.
gerlowskija pushed a commit to branch branch_10x
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/branch_10x by this push:
new 26c7e78e85f SOLR-18146: Fix race in CircuitBreakerRegistry (#4189)
26c7e78e85f is described below
commit 26c7e78e85fbdee69e7e765359beac289f464526
Author: Jason Gerlowski <[email protected]>
AuthorDate: Mon Mar 9 15:12:41 2026 -0400
SOLR-18146: Fix race in CircuitBreakerRegistry (#4189)
Global CB loading is now only done by a single thread, and skipped by
whichever threads "lose" that race on startup.
---
.../SOLR-18146-fix-global-cb-race-condition.yml | 7 ++
.../circuitbreaker/CircuitBreakerRegistry.java | 31 +++--
.../apache/solr/util/TestGlobalCircuitBreaker.java | 3 +
.../TestGlobalCircuitBreakerRegistration.java | 137 +++++++++++++++++++++
.../solr/util/circuitbreaker/package-info.java | 22 ++++
5 files changed, 187 insertions(+), 13 deletions(-)
diff --git a/changelog/unreleased/SOLR-18146-fix-global-cb-race-condition.yml
b/changelog/unreleased/SOLR-18146-fix-global-cb-race-condition.yml
new file mode 100644
index 00000000000..f9056b84814
--- /dev/null
+++ b/changelog/unreleased/SOLR-18146-fix-global-cb-race-condition.yml
@@ -0,0 +1,7 @@
+title: Fix race conditions in "global" CircuitBreaker registration
+type: fixed
+authors:
+ - name: Jason Gerlowski
+links:
+ - name: SOLR-18146
+ url: https://issues.apache.org/jira/browse/SOLR-18146
diff --git
a/solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreakerRegistry.java
b/solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreakerRegistry.java
index 91e284b823f..075aefc23dd 100644
---
a/solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreakerRegistry.java
+++
b/solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreakerRegistry.java
@@ -28,6 +28,7 @@ import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -50,9 +51,6 @@ import org.slf4j.LoggerFactory;
*/
public class CircuitBreakerRegistry implements Closeable {
private static final Logger log =
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
- private final Map<SolrRequestType, List<CircuitBreaker>> circuitBreakerMap =
new HashMap<>();
- private static final Map<SolrRequestType, List<CircuitBreaker>>
globalCircuitBreakerMap =
- new ConcurrentHashMap<>();
private static final Pattern SYSPROP_REGEX =
Pattern.compile("solr.circuitbreaker\\.(update|query)\\.(cpu|mem|loadavg)");
public static final String SYSPROP_PREFIX = "solr.circuitbreaker.";
@@ -64,6 +62,12 @@ public class CircuitBreakerRegistry implements Closeable {
public static final String SYSPROP_QUERY_LOADAVG = SYSPROP_PREFIX +
"query.loadavg";
public static final String SYSPROP_WARN_ONLY_SUFFIX = ".warnonly";
+ private static boolean globalsInitialized = false;
+ private static final Map<SolrRequestType, List<CircuitBreaker>>
globalCircuitBreakerMap =
+ new ConcurrentHashMap<>();
+
+ private final Map<SolrRequestType, List<CircuitBreaker>> circuitBreakerMap =
new HashMap<>();
+
public CircuitBreakerRegistry(CoreContainer coreContainer) {
initGlobal(coreContainer);
}
@@ -118,7 +122,8 @@ public class CircuitBreakerRegistry implements Closeable {
return metricType + ":" + System.getProperty(thresholdProp) + ":" +
warnOnly;
}
- private static void initGlobal(CoreContainer coreContainer) {
+ private static synchronized void initGlobal(CoreContainer coreContainer) {
+ if (globalsInitialized) return;
final var parsedBreakers =
parseCircuitBreakersFromProperties(coreContainer);
for (CircuitBreaker breaker : parsedBreakers) {
registerGlobal(breaker);
@@ -129,6 +134,7 @@ public class CircuitBreakerRegistry implements Closeable {
breaker.getRequestTypes());
}
}
+ globalsInitialized = true;
}
/** List all registered circuit breakers for global context */
@@ -165,7 +171,7 @@ public class CircuitBreakerRegistry implements Closeable {
.forEach(
r -> {
List<CircuitBreaker> list =
- globalCircuitBreakerMap.computeIfAbsent(r, k -> new
ArrayList<>());
+ globalCircuitBreakerMap.computeIfAbsent(r, k -> new
CopyOnWriteArrayList<>());
list.add(circuitBreaker);
});
}
@@ -235,14 +241,13 @@ public class CircuitBreakerRegistry implements Closeable {
}
}
- private static void closeGlobal() {
- synchronized (globalCircuitBreakerMap) {
- closeCircuitBreakers(
- globalCircuitBreakerMap.values().stream()
- .flatMap(List::stream)
- .collect(Collectors.toList()));
- globalCircuitBreakerMap.clear();
- }
+ private static synchronized void closeGlobal() {
+ closeCircuitBreakers(
+ globalCircuitBreakerMap.values().stream()
+ .flatMap(List::stream)
+ .collect(Collectors.toList()));
+ globalCircuitBreakerMap.clear();
+ globalsInitialized = false;
}
/**
diff --git
a/solr/core/src/test/org/apache/solr/util/TestGlobalCircuitBreaker.java
b/solr/core/src/test/org/apache/solr/util/TestGlobalCircuitBreaker.java
index db1a3b4380d..f1c4e110d28 100644
--- a/solr/core/src/test/org/apache/solr/util/TestGlobalCircuitBreaker.java
+++ b/solr/core/src/test/org/apache/solr/util/TestGlobalCircuitBreaker.java
@@ -34,6 +34,9 @@ public class TestGlobalCircuitBreaker extends SolrTestCaseJ4 {
System.setProperty("queryResultCache.enabled", "false");
System.setProperty("documentCache.enabled", "true");
+ // Deregister existing CBs so that they're re-populated on the next core
load.
+ CircuitBreakerRegistry.deregisterGlobal();
+
// Set a global update breaker for a low CPU, which will trip during
indexing
System.setProperty(CircuitBreakerRegistry.SYSPROP_UPDATE_LOADAVG, "0.1");
diff --git
a/solr/core/src/test/org/apache/solr/util/circuitbreaker/TestGlobalCircuitBreakerRegistration.java
b/solr/core/src/test/org/apache/solr/util/circuitbreaker/TestGlobalCircuitBreakerRegistration.java
new file mode 100644
index 00000000000..fe04fa34170
--- /dev/null
+++
b/solr/core/src/test/org/apache/solr/util/circuitbreaker/TestGlobalCircuitBreakerRegistration.java
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.util.circuitbreaker;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.BrokenBarrierException;
+import java.util.concurrent.CyclicBarrier;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Future;
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.util.ExecutorUtil;
+import org.apache.solr.common.util.SolrNamedThreadFactory;
+import org.junit.After;
+import org.junit.Test;
+
+/**
+ * Attempts to reproduce the race condition described in SOLR-18146
+ *
+ * <p>The race: {@link CircuitBreakerRegistry} relies on a static map of
global (i.e.
+ * non-core-specific) CircuitBreakers. If Solr was loading multiple cores on
startup, these
+ * concurrent core-loads could race when initializing this static state
leading to
+ * ConcurrentModificationExceptions, ArrayIndexOutOfBoundsExceptions, dropped
circuit breaker
+ * entries, etc.
+ *
+ * <p>These issues should be resolved by SOLR-18146, but the test remains to
catch them if they
+ * recur. Often useful to run in larger iterations with {@code
-Ptests.iters=20}.
+ */
+public class TestGlobalCircuitBreakerRegistration extends SolrTestCaseJ4 {
+
+ @After
+ public void cleanup() {
+ CircuitBreakerRegistry.deregisterGlobal();
+ }
+
+ @Test
+ public void testConcurrentGlobalRegistrationDoesNotThrow() throws Exception {
+ final int threadCount = 50;
+ final int itersPerThread = 20;
+ final int total = threadCount * itersPerThread;
+ final CyclicBarrier barrier = new CyclicBarrier(threadCount);
+ final List<Throwable> failures = Collections.synchronizedList(new
ArrayList<>());
+
+ // Pre-create all circuit breakers before the barrier to make the race as
"hot" as possible
+ List<List<CircuitBreaker>> breakersPerThread = new
ArrayList<>(threadCount);
+ for (int t = 0; t < threadCount; t++) {
+ List<CircuitBreaker> threadBreakers = new ArrayList<>(itersPerThread);
+ for (int i = 0; i < itersPerThread; i++) {
+ CircuitBreaker b =
+ new CircuitBreaker() {
+ @Override
+ public boolean isTripped() {
+ return false;
+ }
+
+ @Override
+ public String getErrorMessage() {
+ return "test";
+ }
+ };
+ b.setRequestTypes(List.of("query"));
+ threadBreakers.add(b);
+ }
+ breakersPerThread.add(threadBreakers);
+ }
+
+ ExecutorService executor =
+ ExecutorUtil.newMDCAwareFixedThreadPool(
+ threadCount, new
SolrNamedThreadFactory("test-concurrent-cb-registration"));
+ try {
+ List<Future<?>> futures = new ArrayList<>();
+ for (int t = 0; t < threadCount; t++) {
+ final List<CircuitBreaker> myBreakers = breakersPerThread.get(t);
+ futures.add(
+ executor.submit(
+ () -> {
+ try {
+ // Release all threads simultaneously to maximize
contention on
+ // the shared ArrayList for the QUERY key in
globalCircuitBreakerMap.
+ barrier.await();
+ for (CircuitBreaker b : myBreakers) {
+ CircuitBreakerRegistry.registerGlobal(b);
+ }
+ } catch (BrokenBarrierException | InterruptedException e) {
+ Thread.currentThread().interrupt();
+ } catch (Throwable ex) {
+ // Captures ArrayIndexOutOfBoundsException (or similar)
from
+ // concurrent ArrayList.add() when the race is triggered.
+ failures.add(ex);
+ }
+ }));
+ }
+ for (Future<?> f : futures) {
+ f.get();
+ }
+ } finally {
+ ExecutorUtil.shutdownAndAwaitTermination(executor);
+ }
+
+ assertTrue(
+ "Exceptions thrown during concurrent registerGlobal: " + failures,
failures.isEmpty());
+
+ // Even without a thrown exception the backing ArrayList can be silently
+ // corrupted: concurrent adds may produce null slots or overwrite each
other,
+ // losing registrations. Verify the invariant that all registered
breakers are
+ // non-null and that none were lost.
+ Set<CircuitBreaker> registered = CircuitBreakerRegistry.listGlobal();
+ assertFalse(
+ "globalCircuitBreakerMap contains null entries — ArrayList corrupted
by concurrent add()",
+ registered.contains(null));
+ assertEquals(
+ "Expected "
+ + total
+ + " registered circuit breakers but found "
+ + registered.size()
+ + " — data was lost due to concurrent ArrayList.add() corruption",
+ total,
+ registered.size());
+ }
+}
diff --git
a/solr/core/src/test/org/apache/solr/util/circuitbreaker/package-info.java
b/solr/core/src/test/org/apache/solr/util/circuitbreaker/package-info.java
new file mode 100644
index 00000000000..f093cf77bfa
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/util/circuitbreaker/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for {@link
org.apache.solr.util.circuitbreaker.CircuitBreakerRegistry} and other CB
+ * functionality.
+ */
+package org.apache.solr.util.circuitbreaker;