This is an automated email from the ASF dual-hosted git repository.

gerlowskija pushed a commit to branch branch_10x
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/branch_10x by this push:
     new 26c7e78e85f SOLR-18146: Fix race in CircuitBreakerRegistry (#4189)
26c7e78e85f is described below

commit 26c7e78e85fbdee69e7e765359beac289f464526
Author: Jason Gerlowski <[email protected]>
AuthorDate: Mon Mar 9 15:12:41 2026 -0400

    SOLR-18146: Fix race in CircuitBreakerRegistry (#4189)
    
    Global CB loading is now only done by a single thread, and skipped by
    whichever threads "lose" that race on startup.
---
 .../SOLR-18146-fix-global-cb-race-condition.yml    |   7 ++
 .../circuitbreaker/CircuitBreakerRegistry.java     |  31 +++--
 .../apache/solr/util/TestGlobalCircuitBreaker.java |   3 +
 .../TestGlobalCircuitBreakerRegistration.java      | 137 +++++++++++++++++++++
 .../solr/util/circuitbreaker/package-info.java     |  22 ++++
 5 files changed, 187 insertions(+), 13 deletions(-)

diff --git a/changelog/unreleased/SOLR-18146-fix-global-cb-race-condition.yml 
b/changelog/unreleased/SOLR-18146-fix-global-cb-race-condition.yml
new file mode 100644
index 00000000000..f9056b84814
--- /dev/null
+++ b/changelog/unreleased/SOLR-18146-fix-global-cb-race-condition.yml
@@ -0,0 +1,7 @@
+title: Fix race conditions in "global" CircuitBreaker registration
+type: fixed
+authors:
+  - name: Jason Gerlowski
+links:
+  - name: SOLR-18146
+    url: https://issues.apache.org/jira/browse/SOLR-18146
diff --git 
a/solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreakerRegistry.java
 
b/solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreakerRegistry.java
index 91e284b823f..075aefc23dd 100644
--- 
a/solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreakerRegistry.java
+++ 
b/solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreakerRegistry.java
@@ -28,6 +28,7 @@ import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.CopyOnWriteArrayList;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -50,9 +51,6 @@ import org.slf4j.LoggerFactory;
  */
 public class CircuitBreakerRegistry implements Closeable {
   private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-  private final Map<SolrRequestType, List<CircuitBreaker>> circuitBreakerMap = 
new HashMap<>();
-  private static final Map<SolrRequestType, List<CircuitBreaker>> 
globalCircuitBreakerMap =
-      new ConcurrentHashMap<>();
   private static final Pattern SYSPROP_REGEX =
       
Pattern.compile("solr.circuitbreaker\\.(update|query)\\.(cpu|mem|loadavg)");
   public static final String SYSPROP_PREFIX = "solr.circuitbreaker.";
@@ -64,6 +62,12 @@ public class CircuitBreakerRegistry implements Closeable {
   public static final String SYSPROP_QUERY_LOADAVG = SYSPROP_PREFIX + 
"query.loadavg";
   public static final String SYSPROP_WARN_ONLY_SUFFIX = ".warnonly";
 
+  private static boolean globalsInitialized = false;
+  private static final Map<SolrRequestType, List<CircuitBreaker>> 
globalCircuitBreakerMap =
+      new ConcurrentHashMap<>();
+
+  private final Map<SolrRequestType, List<CircuitBreaker>> circuitBreakerMap = 
new HashMap<>();
+
   public CircuitBreakerRegistry(CoreContainer coreContainer) {
     initGlobal(coreContainer);
   }
@@ -118,7 +122,8 @@ public class CircuitBreakerRegistry implements Closeable {
     return metricType + ":" + System.getProperty(thresholdProp) + ":" + 
warnOnly;
   }
 
-  private static void initGlobal(CoreContainer coreContainer) {
+  private static synchronized void initGlobal(CoreContainer coreContainer) {
+    if (globalsInitialized) return;
     final var parsedBreakers = 
parseCircuitBreakersFromProperties(coreContainer);
     for (CircuitBreaker breaker : parsedBreakers) {
       registerGlobal(breaker);
@@ -129,6 +134,7 @@ public class CircuitBreakerRegistry implements Closeable {
             breaker.getRequestTypes());
       }
     }
+    globalsInitialized = true;
   }
 
   /** List all registered circuit breakers for global context */
@@ -165,7 +171,7 @@ public class CircuitBreakerRegistry implements Closeable {
         .forEach(
             r -> {
               List<CircuitBreaker> list =
-                  globalCircuitBreakerMap.computeIfAbsent(r, k -> new 
ArrayList<>());
+                  globalCircuitBreakerMap.computeIfAbsent(r, k -> new 
CopyOnWriteArrayList<>());
               list.add(circuitBreaker);
             });
   }
@@ -235,14 +241,13 @@ public class CircuitBreakerRegistry implements Closeable {
     }
   }
 
-  private static void closeGlobal() {
-    synchronized (globalCircuitBreakerMap) {
-      closeCircuitBreakers(
-          globalCircuitBreakerMap.values().stream()
-              .flatMap(List::stream)
-              .collect(Collectors.toList()));
-      globalCircuitBreakerMap.clear();
-    }
+  private static synchronized void closeGlobal() {
+    closeCircuitBreakers(
+        globalCircuitBreakerMap.values().stream()
+            .flatMap(List::stream)
+            .collect(Collectors.toList()));
+    globalCircuitBreakerMap.clear();
+    globalsInitialized = false;
   }
 
   /**
diff --git 
a/solr/core/src/test/org/apache/solr/util/TestGlobalCircuitBreaker.java 
b/solr/core/src/test/org/apache/solr/util/TestGlobalCircuitBreaker.java
index db1a3b4380d..f1c4e110d28 100644
--- a/solr/core/src/test/org/apache/solr/util/TestGlobalCircuitBreaker.java
+++ b/solr/core/src/test/org/apache/solr/util/TestGlobalCircuitBreaker.java
@@ -34,6 +34,9 @@ public class TestGlobalCircuitBreaker extends SolrTestCaseJ4 {
     System.setProperty("queryResultCache.enabled", "false");
     System.setProperty("documentCache.enabled", "true");
 
+    // Deregister existing CBs so that they're re-populated on the next core 
load.
+    CircuitBreakerRegistry.deregisterGlobal();
+
     // Set a global update breaker for a low CPU, which will trip during 
indexing
     System.setProperty(CircuitBreakerRegistry.SYSPROP_UPDATE_LOADAVG, "0.1");
 
diff --git 
a/solr/core/src/test/org/apache/solr/util/circuitbreaker/TestGlobalCircuitBreakerRegistration.java
 
b/solr/core/src/test/org/apache/solr/util/circuitbreaker/TestGlobalCircuitBreakerRegistration.java
new file mode 100644
index 00000000000..fe04fa34170
--- /dev/null
+++ 
b/solr/core/src/test/org/apache/solr/util/circuitbreaker/TestGlobalCircuitBreakerRegistration.java
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.util.circuitbreaker;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.BrokenBarrierException;
+import java.util.concurrent.CyclicBarrier;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Future;
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.util.ExecutorUtil;
+import org.apache.solr.common.util.SolrNamedThreadFactory;
+import org.junit.After;
+import org.junit.Test;
+
+/**
+ * Attempts to reproduce the race condition described in SOLR-18146
+ *
+ * <p>The race: {@link CircuitBreakerRegistry} relies on a static map of 
global (i.e.
+ * non-core-specific) CircuitBreakers. If Solr was loading multiple cores on 
startup, these
+ * concurrent core-loads could race when initializing this static state 
leading to
+ * ConcurrentModificationExceptions, ArrayIndexOutOfBoundsExceptions, dropped 
circuit breaker
+ * entries, etc.
+ *
+ * <p>These issues should be resolved by SOLR-18146, but the test remains to 
catch them if they
+ * recur. Often useful to run in larger iterations with {@code 
-Ptests.iters=20}.
+ */
+public class TestGlobalCircuitBreakerRegistration extends SolrTestCaseJ4 {
+
+  @After
+  public void cleanup() {
+    CircuitBreakerRegistry.deregisterGlobal();
+  }
+
+  @Test
+  public void testConcurrentGlobalRegistrationDoesNotThrow() throws Exception {
+    final int threadCount = 50;
+    final int itersPerThread = 20;
+    final int total = threadCount * itersPerThread;
+    final CyclicBarrier barrier = new CyclicBarrier(threadCount);
+    final List<Throwable> failures = Collections.synchronizedList(new 
ArrayList<>());
+
+    // Pre-create all circuit breakers before the barrier to make the race as 
"hot" as possible
+    List<List<CircuitBreaker>> breakersPerThread = new 
ArrayList<>(threadCount);
+    for (int t = 0; t < threadCount; t++) {
+      List<CircuitBreaker> threadBreakers = new ArrayList<>(itersPerThread);
+      for (int i = 0; i < itersPerThread; i++) {
+        CircuitBreaker b =
+            new CircuitBreaker() {
+              @Override
+              public boolean isTripped() {
+                return false;
+              }
+
+              @Override
+              public String getErrorMessage() {
+                return "test";
+              }
+            };
+        b.setRequestTypes(List.of("query"));
+        threadBreakers.add(b);
+      }
+      breakersPerThread.add(threadBreakers);
+    }
+
+    ExecutorService executor =
+        ExecutorUtil.newMDCAwareFixedThreadPool(
+            threadCount, new 
SolrNamedThreadFactory("test-concurrent-cb-registration"));
+    try {
+      List<Future<?>> futures = new ArrayList<>();
+      for (int t = 0; t < threadCount; t++) {
+        final List<CircuitBreaker> myBreakers = breakersPerThread.get(t);
+        futures.add(
+            executor.submit(
+                () -> {
+                  try {
+                    // Release all threads simultaneously to maximize 
contention on
+                    // the shared ArrayList for the QUERY key in 
globalCircuitBreakerMap.
+                    barrier.await();
+                    for (CircuitBreaker b : myBreakers) {
+                      CircuitBreakerRegistry.registerGlobal(b);
+                    }
+                  } catch (BrokenBarrierException | InterruptedException e) {
+                    Thread.currentThread().interrupt();
+                  } catch (Throwable ex) {
+                    // Captures ArrayIndexOutOfBoundsException (or similar) 
from
+                    // concurrent ArrayList.add() when the race is triggered.
+                    failures.add(ex);
+                  }
+                }));
+      }
+      for (Future<?> f : futures) {
+        f.get();
+      }
+    } finally {
+      ExecutorUtil.shutdownAndAwaitTermination(executor);
+    }
+
+    assertTrue(
+        "Exceptions thrown during concurrent registerGlobal: " + failures, 
failures.isEmpty());
+
+    // Even without a thrown exception the backing ArrayList can be silently
+    // corrupted: concurrent adds may produce null slots or overwrite each 
other,
+    // losing registrations.  Verify the invariant that all registered 
breakers are
+    // non-null and that none were lost.
+    Set<CircuitBreaker> registered = CircuitBreakerRegistry.listGlobal();
+    assertFalse(
+        "globalCircuitBreakerMap contains null entries — ArrayList corrupted 
by concurrent add()",
+        registered.contains(null));
+    assertEquals(
+        "Expected "
+            + total
+            + " registered circuit breakers but found "
+            + registered.size()
+            + " — data was lost due to concurrent ArrayList.add() corruption",
+        total,
+        registered.size());
+  }
+}
diff --git 
a/solr/core/src/test/org/apache/solr/util/circuitbreaker/package-info.java 
b/solr/core/src/test/org/apache/solr/util/circuitbreaker/package-info.java
new file mode 100644
index 00000000000..f093cf77bfa
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/util/circuitbreaker/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for {@link 
org.apache.solr.util.circuitbreaker.CircuitBreakerRegistry} and other CB
+ * functionality.
+ */
+package org.apache.solr.util.circuitbreaker;

Reply via email to