This is an automated email from the ASF dual-hosted git repository.

psalagnac pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/main by this push:
     new 8873cf1242b SOLR-18155: Abort shard leader election if container 
shutdown has started (#4224)
8873cf1242b is described below

commit 8873cf1242be0787503530079be23f30c55916a0
Author: Pierre Salagnac <[email protected]>
AuthorDate: Wed Mar 18 10:27:55 2026 +0100

    SOLR-18155: Abort shard leader election if container shutdown has started 
(#4224)
    
    This is mostly for tests. It makes sure a replica cannot be elected leader 
for a very short time while all nodes are shutting down.
---
 changelog/unreleased/SOLR-18155-election-leak.yml      |  7 +++++++
 .../apache/solr/cloud/ShardLeaderElectionContext.java  | 18 +++++++++++++-----
 2 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/changelog/unreleased/SOLR-18155-election-leak.yml 
b/changelog/unreleased/SOLR-18155-election-leak.yml
new file mode 100644
index 00000000000..a43436d0980
--- /dev/null
+++ b/changelog/unreleased/SOLR-18155-election-leak.yml
@@ -0,0 +1,7 @@
+title: Abort shard leader election if container shutdown sequence has started, 
so we don't have leaders elected very late and not properly closed.
+type: fixed
+authors:
+  - name: Pierre Salagnac
+links:
+  - name: SOLR-18155
+    url: https://issues.apache.org/jira/browse/SOLR-18155
diff --git 
a/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContext.java 
b/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContext.java
index 96401345503..fc77cee2e20 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContext.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ShardLeaderElectionContext.java
@@ -77,6 +77,14 @@ final class ShardLeaderElectionContext extends 
ShardLeaderElectionContextBase {
     syncStrategy.close();
   }
 
+  /**
+   * Internally check whether we should abort the election process. This 
returns true if either this
+   * context was explicitly closed, or Solr server is being shut down.
+   */
+  private boolean shouldAbort() {
+    return isClosed || cc.isShutDown();
+  }
+
   @Override
   public void cancelElection() throws InterruptedException, KeeperException {
     String coreName = leaderProps.getStr(ZkStateReader.CORE_NAME_PROP);
@@ -154,7 +162,7 @@ final class ShardLeaderElectionContext extends 
ShardLeaderElectionContextBase {
         waitForReplicasToComeUp(leaderVoteWait);
       }
 
-      if (isClosed) {
+      if (shouldAbort()) {
         // Solr is shutting down or the ZooKeeper session expired while 
waiting for replicas. If the
         // later, we cannot be sure we are still the leader, so we should bail 
out. The OnReconnect
         // handler will re-register the cores and handle a new leadership 
election.
@@ -185,7 +193,7 @@ final class ShardLeaderElectionContext extends 
ShardLeaderElectionContextBase {
           }
         }
 
-        if (isClosed) {
+        if (shouldAbort()) {
           return;
         }
 
@@ -267,7 +275,7 @@ final class ShardLeaderElectionContext extends 
ShardLeaderElectionContextBase {
         }
       }
 
-      if (!isClosed) {
+      if (!shouldAbort()) {
         try {
           if (replicaType.replicateFromLeader) {
             // stop replicate from old leader
@@ -361,7 +369,7 @@ final class ShardLeaderElectionContext extends 
ShardLeaderElectionContextBase {
       ZkShardTerms zkShardTerms, String coreNodeName, int timeout) throws 
InterruptedException {
     long timeoutAt =
         System.nanoTime() + TimeUnit.NANOSECONDS.convert(timeout, 
TimeUnit.MILLISECONDS);
-    while (!isClosed && !cc.isShutDown()) {
+    while (!shouldAbort()) {
       if (System.nanoTime() > timeoutAt) {
         log.warn(
             "After waiting for {}ms, no other potential leader was found, {} 
try to become leader anyway (core_term:{}, highest_term:{})",
@@ -446,7 +454,7 @@ final class ShardLeaderElectionContext extends 
ShardLeaderElectionContextBase {
     DocCollection docCollection = 
zkController.getClusterState().getCollectionOrNull(collection);
     Slice slices = (docCollection == null) ? null : 
docCollection.getSlice(shardId);
     int cnt = 0;
-    while (!isClosed && !cc.isShutDown()) {
+    while (!shouldAbort()) {
       // wait for everyone to be up
       if (slices != null) {
         int found = 0;

Reply via email to