pjmcarthur commented on code in PR #1926:
URL: https://github.com/apache/solr/pull/1926#discussion_r1330310547


##########
solr/core/src/java/org/apache/solr/cluster/maintenance/InactiveShardRemover.java:
##########
@@ -0,0 +1,251 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.cluster.maintenance;
+
+import com.google.common.annotations.VisibleForTesting;
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+import org.apache.solr.api.ConfigurablePlugin;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.client.solrj.response.CollectionAdminResponse;
+import org.apache.solr.client.solrj.response.RequestStatusState;
+import org.apache.solr.cloud.ClusterSingleton;
+import org.apache.solr.common.cloud.ClusterState;
+import org.apache.solr.common.cloud.DocCollection;
+import org.apache.solr.common.cloud.Slice;
+import org.apache.solr.common.cloud.ZkStateReader;
+import org.apache.solr.common.util.SolrNamedThreadFactory;
+import org.apache.solr.core.CoreContainer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/** This Cluster Singleton can be configured to periodically find and remove 
inactive Shards */
+public class InactiveShardRemover
+    implements ClusterSingleton, 
ConfigurablePlugin<InactiveShardRemoverConfig> {
+
+  private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  public static final String PLUGIN_NAME = ".inactive-shard-remover";
+
+  static class DeleteActor {
+
+    private final CoreContainer coreContainer;
+
+    DeleteActor(final CoreContainer coreContainer) {
+      this.coreContainer = coreContainer;
+    }
+
+    void delete(final Slice slice, final String asyncId) throws IOException {
+      CollectionAdminRequest.DeleteShard deleteRequest =
+          CollectionAdminRequest.deleteShard(slice.getCollection(), 
slice.getName());
+      deleteRequest.setAsyncId(asyncId);
+      
coreContainer.getZkController().getSolrCloudManager().request(deleteRequest);
+    }
+  }
+
+  private State state = State.STOPPED;
+
+  private final CoreContainer coreContainer;
+
+  private final DeleteActor deleteActor;
+
+  private ScheduledExecutorService executor;
+
+  private long scheduleIntervalSeconds;
+
+  private long ttlSeconds;
+
+  private int maxDeletesPerCycle;
+
+  /** Constructor invoked via Reflection */
+  public InactiveShardRemover(final CoreContainer cc) {
+    this(cc, new DeleteActor(cc));
+  }
+
+  @VisibleForTesting
+  InactiveShardRemover(final CoreContainer cc, final DeleteActor actor) {
+    this.coreContainer = cc;
+    this.deleteActor = actor;
+  }
+
+  @Override
+  public void configure(final InactiveShardRemoverConfig cfg) {
+    this.scheduleIntervalSeconds = cfg.scheduleIntervalSeconds;
+    this.maxDeletesPerCycle = cfg.maxDeletesPerCycle;
+    this.ttlSeconds = cfg.ttlSeconds;
+  }
+
+  @Override
+  public String getName() {
+    return PLUGIN_NAME;
+  }
+
+  @Override
+  public State getState() {
+    return state;
+  }
+
+  @Override
+  public void start() throws Exception {
+    state = State.STARTING;
+    executor = Executors.newScheduledThreadPool(1, new 
SolrNamedThreadFactory(PLUGIN_NAME));
+    executor.scheduleAtFixedRate(
+        this::deleteInactiveSlices,
+        scheduleIntervalSeconds,
+        scheduleIntervalSeconds,
+        TimeUnit.SECONDS);
+    state = State.RUNNING;
+  }
+
+  @Override
+  public void stop() {
+    if (state == State.RUNNING) {
+      state = State.STOPPING;
+      executor.shutdownNow();
+      try {
+        if (!executor.awaitTermination(10, TimeUnit.SECONDS)) {
+          log.warn(
+              "Executor pool did not terminate within the specified timeout: 
{} {}",
+              10,
+              TimeUnit.SECONDS);
+        }
+      } catch (InterruptedException e) {
+        log.warn("Failed to shut down the executor pool", e);
+        Thread.currentThread().interrupt();
+      }
+    }
+    state = State.STOPPED;
+  }
+
+  @VisibleForTesting
+  void deleteInactiveSlices() {
+    final ClusterState clusterState = 
coreContainer.getZkController().getClusterState();
+    Collection<Slice> inactiveSlices = new HashSet<>();
+    clusterState
+        .getCollectionsMap()
+        .forEach((k, v) -> inactiveSlices.addAll(collectInactiveSlices(v)));
+
+    if (log.isInfoEnabled()) {
+      log.info(
+          "Found {} inactive Shards to delete, {} will be deleted",
+          inactiveSlices.size(),
+          Math.max(inactiveSlices.size(), maxDeletesPerCycle));
+    }
+
+    
inactiveSlices.stream().limit(maxDeletesPerCycle).forEach(this::deleteShard);
+  }
+
+  private Collection<Slice> collectInactiveSlices(final DocCollection 
docCollection) {
+    final Collection<Slice> slices = new HashSet<>(docCollection.getSlices());
+    slices.removeAll(docCollection.getActiveSlices());
+    return 
slices.stream().filter(this::isExpired).collect(Collectors.toList());
+  }
+
+  private void deleteShard(final Slice s) {
+    final String asyncId = s.getCollection() + ";" + s.getName() + 
";DELETESHARD";

Review Comment:
   Honestly I was somewhat torn between the two. I chose async because I don't 
think the implementation really cares about waiting for the result of the 
delete operation, and a fire-and-forget approach seemed suitable. On the other 
hand, async makes testing a little more difficult. Happy to accept a 
recommendation or some guidance on the preferred style here. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org
For additional commands, e-mail: issues-h...@solr.apache.org

Reply via email to