This is an automated email from the ASF dual-hosted git repository.

gerlowskija pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git

commit 244a520c29db0d922ade6d886ff674f915abcbae
Author: Jason Gerlowski <[email protected]>
AuthorDate: Wed Apr 2 08:55:32 2025 -0400

    SOLR-17692: Abort ongoing fetches on core close (#3292)
    
    RecoveryStrategy.close aims to stop replication when the surrounding
    core is closed, but doesn't quite manage in all cases.  In particular,
    the 'closed' flag isn't able to preempt replication once the
    IndexFetcher has started pulling files.
    
    This commit aims to fix this by having RecoveryStrategy.close invoke
    ReplicationHandler.abortFetch, which sets a flag that *is* noticed by
    IndexFetcher.  This should ensure that DELETEREPLICA calls and other
    core-shutdown paths don't block on long-running recovery operations.
---
 solr/CHANGES.txt                                   |  3 +++
 .../org/apache/solr/cloud/RecoveryStrategy.java    | 23 ++++++++++++++++++----
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 49422376cc4..8c07505f98f 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -66,6 +66,9 @@ Bug Fixes
 
 * SOLR-17709: Fix race condition when checking distrib async cmd status 
(Houston Putman)
 
+* SOLR-17692: Core unload/deletion now preempts all forms of ongoing 
"recovery", rather than inadvertently waiting for
+  completion in some cases. (Jason Gerlowski)
+
 Dependency Upgrades
 ---------------------
 * SOLR-17471: Upgrade Lucene to 9.12.1. (Pierre Salagnac, Christine Poerschke)
diff --git a/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java 
b/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
index be7c4b7c1ab..edcab845fd6 100644
--- a/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
+++ b/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
@@ -106,6 +106,7 @@ public class RecoveryStrategy implements Runnable, 
Closeable {
       Integer.getInteger("solr.cloud.wait-for-updates-with-stale-state-pause", 
2500);
   private int maxRetries = 500;
   private int startingRecoveryDelayMilliSeconds = 2000;
+  private ReplicationHandler replicationHandlerDoingFetch;
 
   public static interface RecoveryListener {
     public void recovered();
@@ -188,6 +189,15 @@ public class RecoveryStrategy implements Runnable, 
Closeable {
     close = true;
     cancelPrepRecoveryCmd();
     log.warn("Stopping recovery for core=[{}] coreNodeName=[{}]", coreName, 
coreZkNodeName);
+    abortIndexFetchingIfNecessary(replicationHandlerDoingFetch);
+  }
+
+  private void abortIndexFetchingIfNecessary(ReplicationHandler fetcher) {
+    // a 'null' ReplicationHandler indicates that no 
full-recovery/index-fetching is ongoing to
+    // abort.
+    if (fetcher != null) {
+      fetcher.abortFetch();
+    }
   }
 
   private final void recoveryFailed(final ZkController zkController, final 
CoreDescriptor cd)
@@ -240,10 +250,15 @@ public class RecoveryStrategy implements Runnable, 
Closeable {
         ReplicationHandler.SKIP_COMMIT_ON_LEADER_VERSION_ZERO, replicaType == 
Replica.Type.TLOG);
 
     if (isClosed()) return; // we check closed on return
-    boolean success = replicationHandler.doFetch(solrParams, 
false).getSuccessful();
-
-    if (!success) {
-      throw new SolrException(ErrorCode.SERVER_ERROR, "Replication for 
recovery failed.");
+    try {
+      // Stash the RH so the fetch can be aborted if RecoveryStrategy is 
closed mid-fetch
+      replicationHandlerDoingFetch = replicationHandler;
+      boolean success = replicationHandler.doFetch(solrParams, 
false).getSuccessful();
+      if (!success) {
+        throw new SolrException(ErrorCode.SERVER_ERROR, "Replication for 
recovery failed.");
+      }
+    } finally {
+      replicationHandlerDoingFetch = null;
     }
 
     // solrcloud_debug

Reply via email to