This is an automated email from the ASF dual-hosted git repository.

gerlowskija pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/branch_9x by this push:
     new 82a847f0f9a SOLR-17149: Fix backup/restore for large collections. 
(#2243)
82a847f0f9a is described below

commit 82a847f0f9af18d6eceee18743d636db7a879f3e
Author: Pierre Salagnac <[email protected]>
AuthorDate: Tue Feb 6 16:08:48 2024 +0100

    SOLR-17149: Fix backup/restore for large collections. (#2243)
    
    9.4 introduced a new executor used for "expensive" operations, that
    relied on a bounded queue.  This caused backup operations to fail on
    collections large enough to hit this queue capacity.
    
    This commit fixes this problem by making the executor queue unbounded.
    
    ---------
    
    Co-authored-by: Pierre Salagnac <[email protected]>
    Co-authored-by: Jason Gerlowski <[email protected]>
---
 solr/CHANGES.txt                                                 | 2 ++
 solr/core/src/java/org/apache/solr/core/CoreContainer.java       | 3 ++-
 .../src/java/org/apache/solr/handler/admin/CoreAdminHandler.java | 4 +++-
 .../cloud/api/collections/LocalFSCloudIncrementalBackupTest.java | 2 +-
 .../src/test/org/apache/solr/gcs/GCSIncrementalBackupTest.java   | 2 +-
 .../cloud/api/collections/HdfsCloudIncrementalBackupTest.java    | 2 +-
 .../src/test/org/apache/solr/s3/S3IncrementalBackupTest.java     | 2 +-
 .../solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java | 9 +++++++--
 .../cloud/api/collections/AbstractIncrementalBackupTest.java     | 7 +++++--
 9 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 9897ba24226..3436e034b61 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -147,6 +147,8 @@ Bug Fixes
 
 * SOLR-17112: bin/solr script doesn't do ps properly on some systems. 
(Vincenzo D'Amore via Eric Pugh)
 
+* SOLR-17149: Backups on collections with too many shards fail due to 
restrictive Executor queue size (Pierre Salagnac via Jason Gerlowski)
+
 Dependency Upgrades
 ---------------------
 * SOLR-17012: Update Apache Hadoop to 3.3.6 and Apache Curator to 5.5.0 (Kevin 
Risden)
diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java 
b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
index 3d7dff9ed13..86cb52f0710 100644
--- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java
+++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
@@ -414,7 +414,8 @@ public class CoreContainer {
         new OrderedExecutor(
             cfg.getReplayUpdatesThreads(),
             ExecutorUtil.newMDCAwareCachedThreadPool(
-                cfg.getReplayUpdatesThreads(),
+                cfg.getReplayUpdatesThreads(), // thread count
+                cfg.getReplayUpdatesThreads(), // queue size
                 new SolrNamedThreadFactory("replayUpdatesExecutor")));
     this.appHandlersByConfigSetId = new JerseyAppHandlerCache();
 
diff --git 
a/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java 
b/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
index ca67a355b44..d2d823eb190 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
@@ -443,7 +443,9 @@ public class CoreAdminHandler extends RequestHandlerBase 
implements PermissionNa
     // We keep the number number of max threads very low to have throttling 
for expensive tasks
     private ExecutorService expensiveExecutor =
         ExecutorUtil.newMDCAwareCachedThreadPool(
-            5, new 
SolrNamedThreadFactory("parallelCoreAdminAPIExpensiveExecutor"));
+            5,
+            Integer.MAX_VALUE,
+            new 
SolrNamedThreadFactory("parallelCoreAdminAPIExpensiveExecutor"));
 
     public CoreAdminAsyncTracker() {
       HashMap<String, Map<String, TaskObject>> map = new HashMap<>(3, 1.0f);
diff --git 
a/solr/core/src/test/org/apache/solr/cloud/api/collections/LocalFSCloudIncrementalBackupTest.java
 
b/solr/core/src/test/org/apache/solr/cloud/api/collections/LocalFSCloudIncrementalBackupTest.java
index 06b9ff3e0df..588a990560b 100644
--- 
a/solr/core/src/test/org/apache/solr/cloud/api/collections/LocalFSCloudIncrementalBackupTest.java
+++ 
b/solr/core/src/test/org/apache/solr/cloud/api/collections/LocalFSCloudIncrementalBackupTest.java
@@ -80,7 +80,7 @@ public class LocalFSCloudIncrementalBackupTest extends 
AbstractIncrementalBackup
       backupLocation = createTempDir("mybackup").toAbsolutePath().toString();
     }
 
-    configureCluster(NUM_SHARDS) // nodes
+    configureCluster(NUM_NODES) // nodes
         .addConfig(
             "conf1", 
TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf"))
         .withSolrXml(SOLR_XML.replace("ALLOWPATHS_TEMPLATE_VAL", 
backupLocation))
diff --git 
a/solr/modules/gcs-repository/src/test/org/apache/solr/gcs/GCSIncrementalBackupTest.java
 
b/solr/modules/gcs-repository/src/test/org/apache/solr/gcs/GCSIncrementalBackupTest.java
index ac0b15a0187..8dd8f8f4f42 100644
--- 
a/solr/modules/gcs-repository/src/test/org/apache/solr/gcs/GCSIncrementalBackupTest.java
+++ 
b/solr/modules/gcs-repository/src/test/org/apache/solr/gcs/GCSIncrementalBackupTest.java
@@ -73,7 +73,7 @@ public class GCSIncrementalBackupTest extends 
AbstractIncrementalBackupTest {
   @BeforeClass
   public static void setupClass() throws Exception {
 
-    configureCluster(NUM_SHARDS) // nodes
+    configureCluster(NUM_NODES) // nodes
         .addConfig("conf1", 
getFile("conf/solrconfig.xml").getParentFile().toPath())
         .withSolrXml(SOLR_XML)
         .configure();
diff --git 
a/solr/modules/hdfs/src/test/org/apache/solr/hdfs/cloud/api/collections/HdfsCloudIncrementalBackupTest.java
 
b/solr/modules/hdfs/src/test/org/apache/solr/hdfs/cloud/api/collections/HdfsCloudIncrementalBackupTest.java
index 9ae15f86e0b..5d78be2a423 100644
--- 
a/solr/modules/hdfs/src/test/org/apache/solr/hdfs/cloud/api/collections/HdfsCloudIncrementalBackupTest.java
+++ 
b/solr/modules/hdfs/src/test/org/apache/solr/hdfs/cloud/api/collections/HdfsCloudIncrementalBackupTest.java
@@ -122,7 +122,7 @@ public class HdfsCloudIncrementalBackupTest extends 
AbstractIncrementalBackupTes
     System.setProperty("solr.hdfs.home", hdfsUri + "/solr");
     useFactory("solr.StandardDirectoryFactory");
 
-    configureCluster(NUM_SHARDS) // nodes
+    configureCluster(NUM_NODES) // nodes
         .addConfig(
             "conf1", 
TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf"))
         .withSolrXml(SOLR_XML)
diff --git 
a/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3IncrementalBackupTest.java
 
b/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3IncrementalBackupTest.java
index d76daa270f1..ffe0a7dfb8b 100644
--- 
a/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3IncrementalBackupTest.java
+++ 
b/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3IncrementalBackupTest.java
@@ -93,7 +93,7 @@ public class S3IncrementalBackupTest extends 
AbstractIncrementalBackupTest {
     System.setProperty("aws.accessKeyId", "foo");
     System.setProperty("aws.secretAccessKey", "bar");
 
-    configureCluster(NUM_SHARDS) // nodes
+    configureCluster(NUM_NODES) // nodes
         .addConfig("conf1", 
getFile("conf/solrconfig.xml").getParentFile().toPath())
         .withSolrXml(
             SOLR_XML
diff --git a/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java 
b/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java
index f0d1f8bc698..c97658efebd 100644
--- a/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java
+++ b/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java
@@ -181,9 +181,14 @@ public class ExecutorUtil {
   }
 
   public static ExecutorService newMDCAwareCachedThreadPool(
-      int maxThreads, ThreadFactory threadFactory) {
+      int maxThreads, int queueCapacity, ThreadFactory threadFactory) {
     return new MDCAwareThreadPoolExecutor(
-        0, maxThreads, 60L, TimeUnit.SECONDS, new 
LinkedBlockingQueue<>(maxThreads), threadFactory);
+        0,
+        maxThreads,
+        60L,
+        TimeUnit.SECONDS,
+        new LinkedBlockingQueue<>(queueCapacity),
+        threadFactory);
   }
 
   @SuppressForbidden(reason = "class customizes ThreadPoolExecutor so it can 
be used instead")
diff --git 
a/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractIncrementalBackupTest.java
 
b/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractIncrementalBackupTest.java
index 03acadab7ff..79a5f59d12f 100644
--- 
a/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractIncrementalBackupTest.java
+++ 
b/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractIncrementalBackupTest.java
@@ -88,7 +88,9 @@ public abstract class AbstractIncrementalBackupTest extends 
SolrCloudTestCase {
   private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
   private static long docsSeed; // see indexDocs()
+  protected static final int NUM_NODES = 2;
   protected static final int NUM_SHARDS = 2; // granted we sometimes shard 
split to get more
+  protected static final int LARGE_NUM_SHARDS = 11; // Periodically chosen via 
randomization
   protected static final int REPL_FACTOR = 2;
   protected static final String BACKUPNAME_PREFIX = "mytestbackup";
   protected static final String BACKUP_REPO_NAME = "trackingBackupRepository";
@@ -129,10 +131,11 @@ public abstract class AbstractIncrementalBackupTest 
extends SolrCloudTestCase {
     setTestSuffix("testbackupincsimple");
     final String backupCollectionName = getCollectionName();
     final String restoreCollectionName = backupCollectionName + "_restore";
+    final int randomizedNumShards = rarely() ? LARGE_NUM_SHARDS : NUM_SHARDS;
 
     CloudSolrClient solrClient = cluster.getSolrClient();
 
-    CollectionAdminRequest.createCollection(backupCollectionName, "conf1", 
NUM_SHARDS, 1)
+    CollectionAdminRequest.createCollection(backupCollectionName, "conf1", 
randomizedNumShards, 1)
         .process(solrClient);
     int totalIndexedDocs = indexDocs(backupCollectionName, true);
     String backupName = BACKUPNAME_PREFIX + testSuffix;
@@ -166,7 +169,7 @@ public abstract class AbstractIncrementalBackupTest extends 
SolrCloudTestCase {
       log.info("Created backup with {} docs, took {}ms", numFound, timeTaken);
 
       t = System.nanoTime();
-      randomlyPrecreateRestoreCollection(restoreCollectionName, "conf1", 
NUM_SHARDS, 1);
+      randomlyPrecreateRestoreCollection(restoreCollectionName, "conf1", 
randomizedNumShards, 1);
       CollectionAdminRequest.restoreCollection(restoreCollectionName, 
backupName)
           .setBackupId(0)
           .setLocation(backupLocation)

Reply via email to