This is an automated email from the ASF dual-hosted git repository.

psalagnac pushed a commit to branch delete-zk-nodes
in repository https://gitbox.apache.org/repos/asf/solr.git

commit 8d1601156745853c5f89f056f10d71d7bad29722
Author: Pierre Salagnac <[email protected]>
AuthorDate: Tue Apr 8 11:34:24 2025 +0200

    SOLR-14240: Clean up ZK nodes after shard deletion is invoked
---
 solr/CHANGES.txt                                   |  3 ++
 .../solr/cloud/api/collections/DeleteShardCmd.java | 32 +++++++++++++++++++++-
 .../org/apache/solr/cloud/DeleteShardTest.java     | 21 ++++++++++++++
 3 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 61a1d98e47a..1a4c70dbc16 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -243,6 +243,9 @@ Bug Fixes
 * SOLR-17692: Core unload/deletion now preempts all forms of ongoing 
"recovery", rather than inadvertently waiting for
   completion in some cases. (Jason Gerlowski)
 
+* SOLR-12831: Clean up shard metadata in ZooKeeper nodes after shard deletion 
is invoked. This makes sure Zookeeper
+ nodes for leader election and terms are not left behind (Andy Vuong, Pierre 
Salagnac).
+
 Dependency Upgrades
 ---------------------
 * SOLR-17471: Upgrade Lucene to 9.12.1. (Pierre Salagnac, Christine Poerschke)
diff --git 
a/solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteShardCmd.java 
b/solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteShardCmd.java
index 72c93c02c62..d32b80d4b39 100644
--- 
a/solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteShardCmd.java
+++ 
b/solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteShardCmd.java
@@ -38,6 +38,7 @@ import org.apache.solr.common.SolrException;
 import org.apache.solr.common.cloud.ClusterState;
 import org.apache.solr.common.cloud.Replica;
 import org.apache.solr.common.cloud.Slice;
+import org.apache.solr.common.cloud.SolrZkClient;
 import org.apache.solr.common.cloud.ZkNodeProps;
 import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.common.params.CoreAdminParams;
@@ -201,7 +202,7 @@ public class DeleteShardCmd implements 
CollApiCmds.CollectionApiCommand {
       } else {
         ccc.offerStateUpdate(m);
       }
-
+      cleanupZooKeeperShardMetadata(collectionName, sliceId);
       zkStateReader.waitForState(
           collectionName, 45, TimeUnit.SECONDS, (c) -> c.getSlice(sliceId) == 
null);
 
@@ -238,4 +239,33 @@ public class DeleteShardCmd implements 
CollApiCmds.CollectionApiCommand {
     }
     return sourceReplicas;
   }
+
+  /**
+   * Best effort to delete Zookeeper nodes that stored other details than the 
shard itself in
+   * cluster state. If we fail for any reason, we just log and the shard is 
still deleted.
+   */
+  private void cleanupZooKeeperShardMetadata(String collection, String sliceId)
+      throws InterruptedException {
+
+    String[] cleanupPaths =
+        new String[] {
+          ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection + 
"/leader_elect/" + sliceId,
+          ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection + "/leaders/" + 
sliceId,
+          ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection + "/terms/" + 
sliceId
+        };
+
+    SolrZkClient client = ccc.getZkStateReader().getZkClient();
+    for (String path : cleanupPaths) {
+      try {
+        client.clean(path);
+      } catch (KeeperException ex) {
+        log.warn(
+            "Non-fatal error occurred when deleting shard metadata {}/{} at 
path {}",
+            collection,
+            sliceId,
+            path,
+            ex);
+      }
+    }
+  }
 }
diff --git a/solr/core/src/test/org/apache/solr/cloud/DeleteShardTest.java 
b/solr/core/src/test/org/apache/solr/cloud/DeleteShardTest.java
index dec630995cb..08b0f96320f 100644
--- a/solr/core/src/test/org/apache/solr/cloud/DeleteShardTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/DeleteShardTest.java
@@ -23,7 +23,9 @@ import org.apache.solr.common.cloud.DocCollection;
 import org.apache.solr.common.cloud.Replica;
 import org.apache.solr.common.cloud.Slice;
 import org.apache.solr.common.cloud.Slice.State;
+import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.util.FileUtils;
+import org.apache.zookeeper.KeeperException;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -54,6 +56,8 @@ public class DeleteShardTest extends SolrCloudTestCase {
     DocCollection state = getCollectionState(collection);
     assertEquals(State.ACTIVE, state.getSlice("shard1").getState());
     assertEquals(State.ACTIVE, state.getSlice("shard2").getState());
+    assertShardMetadata(collection, "shard1", true);
+    assertShardMetadata(collection, "shard2", true);
 
     // Can't delete an ACTIVE shard
     expectThrows(
@@ -67,11 +71,13 @@ public class DeleteShardTest extends SolrCloudTestCase {
     // Can delete an INACTIVE shard
     CollectionAdminRequest.deleteShard(collection, 
"shard1").process(cluster.getSolrClient());
     waitForState("Expected 'shard1' to be removed", collection, c -> 
c.getSlice("shard1") == null);
+    assertShardMetadata(collection, "shard1", false);
 
     // Can delete a shard under construction
     setSliceState(collection, "shard2", Slice.State.CONSTRUCTION);
     CollectionAdminRequest.deleteShard(collection, 
"shard2").process(cluster.getSolrClient());
     waitForState("Expected 'shard2' to be removed", collection, c -> 
c.getSlice("shard2") == null);
+    assertShardMetadata(collection, "shard2", false);
   }
 
   @Test
@@ -125,4 +131,19 @@ public class DeleteShardTest extends SolrCloudTestCase {
         collectionName,
         c -> c.getSlice(shardId).getState() == state);
   }
+
+  /** Check whether shard metadata exist in Zookeeper. */
+  private void assertShardMetadata(String collection, String sliceId, boolean 
shouldExist)
+      throws KeeperException, InterruptedException {
+
+    String leaderElectPath =
+        ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection + "/leader_elect/" 
+ sliceId;
+    assertEquals(shouldExist, cluster.getZkClient().exists(leaderElectPath, 
true));
+
+    String leaderPath = ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection + 
"/leaders/" + sliceId;
+    assertEquals(shouldExist, cluster.getZkClient().exists(leaderPath, true));
+
+    String termPath = ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection + 
"/terms/" + sliceId;
+    assertEquals(shouldExist, cluster.getZkClient().exists(termPath, true));
+  }
 }

Reply via email to