This is an automated email from the ASF dual-hosted git repository.

houston pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/main by this push:
     new b73841db19e SOLR-17645: Gracefully handle exceptions in executor 
threads (#3150)
b73841db19e is described below

commit b73841db19ebf75e0b0ef752c708337340f7bfb7
Author: Houston Putman <[email protected]>
AuthorDate: Fri Feb 14 17:15:09 2025 -0600

    SOLR-17645: Gracefully handle exceptions in executor threads (#3150)
    
    * Gracefully handle exceptions in executor threads
    * Use better map methods, catch exception when refreshing terms
---
 .../org/apache/solr/cloud/ZkCollectionTerms.java   |  4 +---
 .../java/org/apache/solr/cloud/ZkController.java   |  5 ++--
 .../java/org/apache/solr/cloud/ZkShardTerms.java   | 10 +++++++-
 .../apache/solr/common/cloud/ZkStateReader.java    | 28 ++++++++++++++++++----
 4 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkCollectionTerms.java 
b/solr/core/src/java/org/apache/solr/cloud/ZkCollectionTerms.java
index 56726c6f506..a6c6a857a9e 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkCollectionTerms.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkCollectionTerms.java
@@ -38,9 +38,7 @@ class ZkCollectionTerms implements AutoCloseable {
 
   public ZkShardTerms getShard(String shardId) {
     synchronized (terms) {
-      if (!terms.containsKey(shardId))
-        terms.put(shardId, new ZkShardTerms(collection, shardId, zkClient));
-      return terms.get(shardId);
+      return terms.computeIfAbsent(shardId, shard -> new 
ZkShardTerms(collection, shard, zkClient));
     }
   }
 
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java 
b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
index c8119d7a9bc..10b1457a211 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
@@ -1863,9 +1863,8 @@ public class ZkController implements Closeable {
 
   private ZkCollectionTerms getCollectionTerms(String collection) {
     synchronized (collectionToTerms) {
-      if (!collectionToTerms.containsKey(collection))
-        collectionToTerms.put(collection, new ZkCollectionTerms(collection, 
zkClient));
-      return collectionToTerms.get(collection);
+      return collectionToTerms.computeIfAbsent(
+          collection, col -> new ZkCollectionTerms(col, zkClient));
     }
   }
 
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkShardTerms.java 
b/solr/core/src/java/org/apache/solr/cloud/ZkShardTerms.java
index e58e0e5ec32..5c0f5186e91 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkShardTerms.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkShardTerms.java
@@ -419,7 +419,15 @@ public class ZkShardTerms implements AutoCloseable {
           // Only refresh the data if the node was created or its data changed.
           if (Watcher.Event.EventType.NodeCreated == event.getType()
               || Watcher.Event.EventType.NodeDataChanged == event.getType()) {
-            refreshTerms();
+            try {
+              refreshTerms();
+            } catch (SolrException e) {
+              log.warn(
+                  "Error refreshing shard terms for collection: {}, shard: {}",
+                  collection,
+                  shard,
+                  e);
+            }
           }
         };
     try {
diff --git 
a/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/ZkStateReader.java 
b/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/ZkStateReader.java
index 4f11b43a150..948050acf9c 100644
--- 
a/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/ZkStateReader.java
+++ 
b/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/ZkStateReader.java
@@ -1619,12 +1619,25 @@ public class ZkStateReader implements SolrCloseable {
 
     final CountDownLatch latch = new CountDownLatch(1);
     waitLatches.add(latch);
-    AtomicReference<DocCollection> docCollection = new AtomicReference<>();
+    final AtomicReference<DocCollection> docCollection = new 
AtomicReference<>();
+    final AtomicReference<SolrException> thrownException = new 
AtomicReference<>();
     CollectionStateWatcher watcher =
         (n, c) -> {
           docCollection.set(c);
-          boolean matches = predicate.matches(n, c);
-          if (matches) latch.countDown();
+          boolean matches = false;
+          try {
+            matches = predicate.matches(n, c);
+            if (matches) {
+              latch.countDown();
+              thrownException.set(null);
+            }
+          } catch (SolrException e) {
+            if (thrownException.getAndSet(e) != null) {
+              // Return if we have seen an exception twice
+              latch.countDown();
+              matches = true;
+            }
+          }
 
           return matches;
         };
@@ -1632,13 +1645,18 @@ public class ZkStateReader implements SolrCloseable {
     try {
       registerCollectionStateWatcher(collection, watcher);
       // wait for the watcher predicate to return true, or time out
-      if (!latch.await(wait, unit))
+      if (!latch.await(wait, unit)) {
         throw new TimeoutException(
             "Timeout waiting to see state for collection="
                 + collection
                 + " :"
                 + docCollection.get());
-
+      } else if (thrownException.get() != null) {
+        throw new SolrException(
+            SolrException.ErrorCode.SERVER_ERROR,
+            "Error occurred while checking state",
+            thrownException.get());
+      }
     } finally {
       removeCollectionStateWatcher(collection, watcher);
       waitLatches.remove(latch);

Reply via email to