This is an automated email from the ASF dual-hosted git repository.
HoustonPutman pushed a commit to branch branch_10x
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/branch_10x by this push:
new f5a9985085f SOLR-17821: Fix error scenario for ShardInstall or Restore
(#3434)
f5a9985085f is described below
commit f5a9985085f16312f04cc866cb05fada6f507279
Author: Houston Putman <[email protected]>
AuthorDate: Wed May 20 15:00:27 2026 -0700
SOLR-17821: Fix error scenario for ShardInstall or Restore (#3434)
(cherry picked from commit 4e5c83d2f07e3c1be51dcdcf5004665df33fb419)
---
.../solr-17821-fix-restore-error-scenario.yml | 9 ++
.../api/model/InstallShardDataRequestBody.java | 4 +
.../java/org/apache/solr/cloud/SyncStrategy.java | 13 +-
.../cloud/api/collections/InstallShardDataCmd.java | 122 ++++++++++++++++---
.../solr/cloud/api/collections/RestoreCmd.java | 40 +++++--
.../solr/handler/admin/CollectionsHandler.java | 3 +
.../solr/handler/admin/api/InstallCoreData.java | 6 -
.../solr/handler/admin/api/InstallShardData.java | 6 +-
.../apache/solr/handler/admin/api/RestoreCore.java | 6 -
.../solr/handler/component/ShardRequest.java | 3 +
.../apache/solr/cloud/CollectionsAPISolrJTest.java | 72 ++++++-----
.../solr/cloud/ZkShardTermsRecoveryTest.java | 8 ++
.../LocalFSCloudIncrementalBackupTest.java | 6 +
.../api/collections/LocalFSInstallShardTest.java | 8 +-
.../apache/solr/gcs/GCSIncrementalBackupTest.java | 6 +
.../org/apache/solr/gcs/GCSInstallShardTest.java | 9 +-
.../apache/solr/s3/S3IncrementalBackupTest.java | 15 +++
.../org/apache/solr/s3/S3InstallShardTest.java | 9 +-
.../apache/solr/cloud/MiniSolrCloudCluster.java | 9 ++
.../collections/AbstractIncrementalBackupTest.java | 133 ++++++++++++++++++++-
.../api/collections/AbstractInstallShardTest.java | 69 +++++++++--
21 files changed, 456 insertions(+), 100 deletions(-)
diff --git a/changelog/unreleased/solr-17821-fix-restore-error-scenario.yml
b/changelog/unreleased/solr-17821-fix-restore-error-scenario.yml
new file mode 100644
index 00000000000..dbadb3d6fe3
--- /dev/null
+++ b/changelog/unreleased/solr-17821-fix-restore-error-scenario.yml
@@ -0,0 +1,9 @@
+# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc
+title: Fix error scenario in InstallShardData and Restore
+type: fixed # added, changed, fixed, deprecated, removed, dependency_update,
security, other
+authors:
+ - name: Houston Putman
+ nick: HoustonPutman
+links:
+ - name: SOLR-17821
+ url: https://issues.apache.org/jira/browse/SOLR-17821
diff --git
a/solr/api/src/java/org/apache/solr/client/api/model/InstallShardDataRequestBody.java
b/solr/api/src/java/org/apache/solr/client/api/model/InstallShardDataRequestBody.java
index 31bec8eb434..05b27f1dcab 100644
---
a/solr/api/src/java/org/apache/solr/client/api/model/InstallShardDataRequestBody.java
+++
b/solr/api/src/java/org/apache/solr/client/api/model/InstallShardDataRequestBody.java
@@ -24,5 +24,9 @@ public class InstallShardDataRequestBody {
@JsonProperty public String repository;
+ @JsonProperty public String name;
+
+ @JsonProperty public String shardBackupId;
+
@JsonProperty public String async;
}
diff --git a/solr/core/src/java/org/apache/solr/cloud/SyncStrategy.java
b/solr/core/src/java/org/apache/solr/cloud/SyncStrategy.java
index cfbad7cd7e1..d400e90d14d 100644
--- a/solr/core/src/java/org/apache/solr/cloud/SyncStrategy.java
+++ b/solr/core/src/java/org/apache/solr/cloud/SyncStrategy.java
@@ -74,11 +74,6 @@ public class SyncStrategy {
updateExecutor = updateShardHandler.getUpdateExecutor();
}
- private static class ShardCoreRequest extends ShardRequest {
- String coreName;
- public String baseUrl;
- }
-
public PeerSync.PeerSyncResult sync(
ZkController zkController, SolrCore core, ZkNodeProps leaderProps) {
return sync(zkController, core, leaderProps, false, false);
@@ -322,8 +317,8 @@ public class SyncStrategy {
} else {
RecoveryRequest rr = new RecoveryRequest();
rr.leaderProps = leaderProps;
- rr.baseUrl = ((ShardCoreRequest) srsp.getShardRequest()).baseUrl;
- rr.coreName = ((ShardCoreRequest) srsp.getShardRequest()).coreName;
+ rr.baseUrl = srsp.getShardRequest().nodeName;
+ rr.coreName = srsp.getShardRequest().coreName;
recoveryRequests.add(rr);
}
} else {
@@ -355,9 +350,9 @@ public class SyncStrategy {
private void requestSync(
String baseUrl, String replica, String leaderUrl, String coreName, int
nUpdates) {
// TODO should we use peerSyncWithLeader instead?
- ShardCoreRequest sreq = new ShardCoreRequest();
+ ShardRequest sreq = new ShardRequest();
sreq.coreName = coreName;
- sreq.baseUrl = baseUrl;
+ sreq.nodeName = baseUrl;
sreq.purpose = ShardRequest.PURPOSE_PRIVATE;
sreq.shards = new String[] {replica};
sreq.actualShards = sreq.shards;
diff --git
a/solr/core/src/java/org/apache/solr/cloud/api/collections/InstallShardDataCmd.java
b/solr/core/src/java/org/apache/solr/cloud/api/collections/InstallShardDataCmd.java
index ca654a150ae..9e48cba893a 100644
---
a/solr/core/src/java/org/apache/solr/cloud/api/collections/InstallShardDataCmd.java
+++
b/solr/core/src/java/org/apache/solr/cloud/api/collections/InstallShardDataCmd.java
@@ -17,20 +17,26 @@
package org.apache.solr.cloud.api.collections;
-import static org.apache.solr.cloud.Overseer.QUEUE_OPERATION;
-import static org.apache.solr.common.params.CommonAdminParams.ASYNC;
-
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.lang.invoke.MethodHandles;
-import java.util.HashMap;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+import org.apache.solr.cloud.ZkShardTerms;
+import org.apache.solr.common.SolrErrorWrappingException;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.DocCollection;
+import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.ZkNodeProps;
-import org.apache.solr.common.params.CollectionParams;
import org.apache.solr.common.params.CoreAdminParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
@@ -80,27 +86,112 @@ public class InstallShardDataCmd implements
CollApiCmds.CollectionApiCommand {
// Build the core-admin request
final ModifiableSolrParams coreApiParams = new ModifiableSolrParams();
coreApiParams.set(
- CoreAdminParams.ACTION,
CoreAdminParams.CoreAdminAction.INSTALLCOREDATA.toString());
- typedMessage.toMap(new HashMap<>()).forEach((k, v) -> coreApiParams.set(k,
v.toString()));
+ CoreAdminParams.ACTION,
CoreAdminParams.CoreAdminAction.RESTORECORE.toString());
+ coreApiParams.set(CoreAdminParams.BACKUP_LOCATION, typedMessage.location);
+ coreApiParams.set(CoreAdminParams.BACKUP_REPOSITORY,
typedMessage.repository);
+ coreApiParams.set(CoreAdminParams.NAME, typedMessage.name);
+ coreApiParams.set(CoreAdminParams.SHARD_BACKUP_ID,
typedMessage.shardBackupId);
// Send the core-admin request to each replica in the slice
final ShardHandler shardHandler = ccc.newShardHandler();
- shardRequestTracker.sliceCmd(clusterState, coreApiParams, null,
installSlice, shardHandler);
+ List<Replica> notLiveReplicas =
+ shardRequestTracker.sliceCmd(clusterState, coreApiParams, null,
installSlice, shardHandler);
final String errorMessage =
String.format(
Locale.ROOT,
- "Could not install data to collection [%s] and shard [%s]",
+ "Could not install data to collection [%s] and shard [%s] on any
leader-eligible replicas",
typedMessage.collection,
typedMessage.shard);
- shardRequestTracker.processResponses(results, shardHandler, true,
errorMessage);
+ shardRequestTracker.processResponses(results, shardHandler, false,
errorMessage);
+ Collection<Replica> allReplicas =
+ clusterState
+ .getCollection(typedMessage.collection)
+ .getSlice(typedMessage.shard)
+ .getReplicas();
+
+ // Ensure that terms are correct for this shard after the execution is done
+ // We only care about leader eligible replicas, all others will eventually
get updated.
+ List<Replica> leaderEligibleReplicas =
+ allReplicas.stream().filter(r ->
r.getType().leaderEligible).collect(Collectors.toList());
+
+ NamedList<Object> failures = (NamedList<Object>) results.get("failure");
+ Set<Replica> successfulReplicas =
+ leaderEligibleReplicas.stream()
+ .filter(replica -> !notLiveReplicas.contains(replica))
+ .filter(
+ replica ->
+ failures == null
+ ||
failures.get(CollectionHandlingUtils.requestKey(replica)) == null)
+ .collect(Collectors.toSet());
+
+ if (successfulReplicas.isEmpty()) {
+ // No leader-eligible replicas succeeded, return failure
+ if (failures == null) {
+ throw new SolrException(
+ SolrException.ErrorCode.SERVER_ERROR,
+ errorMessage + ". No leader-eligible replicas are live.");
+ } else {
+ throw new SolrErrorWrappingException(
+ SolrException.ErrorCode.SERVER_ERROR, errorMessage,
List.of(failures.asMap(1)));
+ }
+ } else if (successfulReplicas.size() < leaderEligibleReplicas.size()) {
+ // Some, but not all, leader-eligible replicas succeeded.
+ // Ensure the shard terms are correct so that the non-successful
replicas go into recovery
+ ZkShardTerms shardTerms =
+ ccc.getCoreContainer()
+ .getZkController()
+ .getShardTerms(typedMessage.collection, typedMessage.shard);
+ final Set<String> replicasToStartRecovery = new HashSet<>();
+ leaderEligibleReplicas.stream()
+ .filter(r -> !successfulReplicas.contains(r))
+ .map(Replica::getName)
+ .forEach(replicasToStartRecovery::add);
+ log.info("Putting the unsuccessful replicas into recovery: {}",
replicasToStartRecovery);
+ shardTerms.ensureHighestTerms(
+ installCollection,
+
successfulReplicas.stream().map(Replica::getName).collect(Collectors.toSet()));
+ ccc.getZkStateReader()
+ .waitForState(
+ typedMessage.collection,
+ 30,
+ TimeUnit.SECONDS,
+ (liveNodes, collectionState) -> {
+
collectionState.getSlice(typedMessage.shard).getReplicas().stream()
+ .filter(r -> Replica.State.RECOVERING.equals(r.getState()))
+ .map(Replica::getName)
+ .forEach(replicasToStartRecovery::remove);
+ return replicasToStartRecovery.isEmpty();
+ });
+
+ // In order for the async request to succeed, we need to ensure that
there is no failure
+ // message
+ NamedList<Object> successes = (NamedList<Object>) results.get("success");
+ failures.forEach(
+ (replicaKey, value) -> {
+ successes.add(
+ replicaKey,
+ new NamedList<>(
+ Map.of(
+ "explanation",
+ "Core install failed, but is now recovering from the
leader",
+ "failure",
+ value)));
+ });
+ results.remove("failure");
+ } else {
+ // other replicas to-be-created will know that they are out of date by
+ // looking at their term : 0 compare to term of this core : 1
+ ccc.getCoreContainer()
+ .getZkController()
+ .getShardTerms(typedMessage.collection, typedMessage.shard)
+ .ensureHighestTermsAreNotZero();
+ }
}
/** A value-type representing the message received by {@link
InstallShardDataCmd} */
+ @JsonIgnoreProperties(ignoreUnknown = true)
public static class RemoteMessage implements JacksonReflectMapWriter {
- @JsonProperty(QUEUE_OPERATION)
- public String operation =
CollectionParams.CollectionAction.INSTALLSHARDDATA.toLower();
-
@JsonProperty public String collection;
@JsonProperty public String shard;
@@ -109,8 +200,9 @@ public class InstallShardDataCmd implements
CollApiCmds.CollectionApiCommand {
@JsonProperty public String location;
- @JsonProperty(ASYNC)
- public String asyncId;
+ @JsonProperty public String name = "";
+
+ @JsonProperty public String shardBackupId;
public void validate() {
if (StrUtils.isBlank(collection)) {
diff --git
a/solr/core/src/java/org/apache/solr/cloud/api/collections/RestoreCmd.java
b/solr/core/src/java/org/apache/solr/cloud/api/collections/RestoreCmd.java
index c4ef360fa58..dabaf64420e 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/RestoreCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/RestoreCmd.java
@@ -24,6 +24,7 @@ import static
org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP;
import static
org.apache.solr.common.params.CollectionParams.CollectionAction.ADDREPLICA;
import static
org.apache.solr.common.params.CollectionParams.CollectionAction.CREATE;
import static
org.apache.solr.common.params.CollectionParams.CollectionAction.CREATESHARD;
+import static
org.apache.solr.common.params.CollectionParams.CollectionAction.INSTALLSHARDDATA;
import static
org.apache.solr.common.params.CollectionParams.CollectionAction.MODIFYCOLLECTION;
import static org.apache.solr.common.params.CommonParams.NAME;
@@ -108,7 +109,7 @@ public class RestoreCmd implements
CollApiCmds.CollectionApiCommand {
}
}
- private void requestReplicasToRestore(
+ private void requestShardsToRestore(
NamedList<Object> results,
DocCollection restoreCollection,
AdminCmdContext adminCmdContext,
@@ -117,11 +118,13 @@ public class RestoreCmd implements
CollApiCmds.CollectionApiCommand {
String repo,
ShardHandler shardHandler) {
ShardRequestTracker shardRequestTracker =
- CollectionHandlingUtils.asyncRequestTracker(adminCmdContext, ccc);
+ CollectionHandlingUtils.asyncRequestTracker(adminCmdContext,
"/admin/collections", ccc);
// Copy data from backed up index to each replica
for (Slice slice : restoreCollection.getSlices()) {
ModifiableSolrParams params = new ModifiableSolrParams();
- params.set(CoreAdminParams.ACTION,
CoreAdminParams.CoreAdminAction.RESTORECORE.toString());
+ params.set(CollectionAdminParams.COLLECTION, slice.getCollection());
+ params.set(CollectionAdminParams.SHARD, slice.getName());
+ params.set(CoreAdminParams.ACTION, INSTALLSHARDDATA.toString());
Optional<ShardBackupId> shardBackupId =
backupProperties.getShardBackupIdFor(slice.getName());
if (shardBackupId.isPresent()) {
params.set(CoreAdminParams.SHARD_BACKUP_ID,
shardBackupId.get().getIdAsString());
@@ -130,11 +133,24 @@ public class RestoreCmd implements
CollApiCmds.CollectionApiCommand {
}
params.set(CoreAdminParams.BACKUP_LOCATION, backupPath.toASCIIString());
params.set(CoreAdminParams.BACKUP_REPOSITORY, repo);
- shardRequestTracker.sliceCmd(
- adminCmdContext.getClusterState(), params, null, slice,
shardHandler);
+ Replica replica = slice.getLeader();
+ if (replica == null) {
+ replica =
+ slice.getReplicas().stream()
+ .findFirst()
+ .orElseThrow(
+ () ->
+ new SolrException(
+ ErrorCode.INVALID_STATE,
+ String.format(
+ Locale.ROOT,
+ "No replicas for shard %s in collection %s.
Cannot restore to a shard with no replicas",
+ slice.getName(),
+ slice.getCollection())));
+ }
+ shardRequestTracker.sendShardRequest(replica, params, shardHandler);
}
- shardRequestTracker.processResponses(
- new NamedList<>(), shardHandler, true, "Could not restore core");
+ shardRequestTracker.processResponses(results, shardHandler, true, "Could
not restore shard");
}
/** Encapsulates the parsing and access for common parameters restore
parameters and values */
@@ -273,7 +289,7 @@ public class RestoreCmd implements
CollApiCmds.CollectionApiCommand {
// refresh the location copy of collection state
restoreCollection =
rc.zkStateReader.getClusterState().getCollection(rc.restoreCollectionName);
- requestReplicasToRestore(
+ requestShardsToRestore(
results,
restoreCollection,
rc.adminCmdContext.withClusterState(rc.zkStateReader.getClusterState()),
@@ -625,7 +641,7 @@ public class RestoreCmd implements
CollApiCmds.CollectionApiCommand {
rc.adminCmdContext.withClusterState(rc.zkStateReader.getClusterState()),
restoreCollection);
try {
- requestReplicasToRestore(
+ requestShardsToRestore(
results,
restoreCollection,
rc.adminCmdContext.withClusterState(rc.zkStateReader.getClusterState()),
@@ -649,8 +665,7 @@ public class RestoreCmd implements
CollApiCmds.CollectionApiCommand {
ZkStateReader.COLLECTION_PROP, restoreCollection.getName(),
ZkStateReader.READ_ONLY, null);
new CollApiCmds.ModifyCollectionCmd(ccc)
- .call(
- adminCmdContext.subRequestContext(MODIFYCOLLECTION, null),
params, new NamedList<>());
+ .call(adminCmdContext.subRequestContext(MODIFYCOLLECTION), params,
new NamedList<>());
}
private void enableReadOnly(AdminCmdContext adminCmdContext, DocCollection
restoreCollection)
@@ -662,8 +677,7 @@ public class RestoreCmd implements
CollApiCmds.CollectionApiCommand {
ZkStateReader.COLLECTION_PROP, restoreCollection.getName(),
ZkStateReader.READ_ONLY, "true");
new CollApiCmds.ModifyCollectionCmd(ccc)
- .call(
- adminCmdContext.subRequestContext(MODIFYCOLLECTION, null),
params, new NamedList<>());
+ .call(adminCmdContext.subRequestContext(MODIFYCOLLECTION), params,
new NamedList<>());
}
}
}
diff --git
a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
index be9f394d26c..6e3c9e453d7 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
@@ -101,6 +101,7 @@ import static
org.apache.solr.common.params.CommonParams.TIMING;
import static org.apache.solr.common.params.CommonParams.VALUE_LONG;
import static org.apache.solr.common.params.CoreAdminParams.BACKUP_LOCATION;
import static org.apache.solr.common.params.CoreAdminParams.BACKUP_REPOSITORY;
+import static org.apache.solr.common.params.CoreAdminParams.SHARD_BACKUP_ID;
import static org.apache.solr.common.util.StrUtils.formatString;
import java.lang.invoke.MethodHandles;
@@ -1068,6 +1069,8 @@ public class CollectionsHandler extends
RequestHandlerBase implements Permission
reqBody.async = req.getParams().get(ASYNC);
reqBody.repository = req.getParams().get(BACKUP_REPOSITORY);
reqBody.location = req.getParams().get(BACKUP_LOCATION);
+ reqBody.name = req.getParams().get(NAME);
+ reqBody.shardBackupId = req.getParams().get(SHARD_BACKUP_ID);
final InstallShardData installApi = new
InstallShardData(h.coreContainer, req, rsp);
final SolrJerseyResponse installResponse =
diff --git
a/solr/core/src/java/org/apache/solr/handler/admin/api/InstallCoreData.java
b/solr/core/src/java/org/apache/solr/handler/admin/api/InstallCoreData.java
index a91a0688e0a..5d6290e1497 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/api/InstallCoreData.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/api/InstallCoreData.java
@@ -95,12 +95,6 @@ public class InstallCoreData extends CoreAdminAPIBase
implements InstallCoreData
SolrException.ErrorCode.SERVER_ERROR,
"Failed to install data to core=" + core.getName());
}
-
- // other replicas to-be-created will know that they are out of date by
- // looking at their term : 0 compare to term of this core : 1
- zkController
- .getShardTerms(cd.getCollectionName(), cd.getShardId())
- .ensureHighestTermsAreNotZero();
}
return response;
diff --git
a/solr/core/src/java/org/apache/solr/handler/admin/api/InstallShardData.java
b/solr/core/src/java/org/apache/solr/handler/admin/api/InstallShardData.java
index 840bbeab4a5..4033fa6f097 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/api/InstallShardData.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/api/InstallShardData.java
@@ -78,10 +78,10 @@ public class InstallShardData extends AdminAPIBase
implements InstallShardDataAp
// Only install data to shards which belong to a collection in read-only
mode
final DocCollection dc =
coreContainer.getZkController().getZkStateReader().getCollection(collName);
- if (!dc.isReadOnly()) {
+ if (dc.getSlice(shardName).getReplicas().size() > 1 && !dc.isReadOnly()) {
throw new SolrException(
SolrException.ErrorCode.BAD_REQUEST,
- "Collection must be in readOnly mode before installing data to
shard");
+ "Collection must be in readOnly mode before installing data to shard
with more than 1 replica");
}
submitRemoteMessageAndHandleResponse(
@@ -112,6 +112,8 @@ public class InstallShardData extends AdminAPIBase
implements InstallShardDataAp
if (requestBody != null) {
messageTyped.location = requestBody.location;
messageTyped.repository = requestBody.repository;
+ messageTyped.name = requestBody.name;
+ messageTyped.shardBackupId = requestBody.shardBackupId;
}
messageTyped.validate();
diff --git
a/solr/core/src/java/org/apache/solr/handler/admin/api/RestoreCore.java
b/solr/core/src/java/org/apache/solr/handler/admin/api/RestoreCore.java
index 3997b1971b4..dcf1cfe85c1 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/api/RestoreCore.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/api/RestoreCore.java
@@ -132,12 +132,6 @@ public class RestoreCore extends CoreAdminAPIBase
implements RestoreCoreApi {
throw new SolrException(
SolrException.ErrorCode.SERVER_ERROR, "Failed to restore core=" +
core.getName());
}
- // other replicas to-be-created will know that they are out of date by
- // looking at their term : 0 compare to term of this core : 1
- coreContainer
- .getZkController()
- .getShardTerms(cd.getCollectionName(), cd.getShardId())
- .ensureHighestTermsAreNotZero();
// transitions state of update log to ACTIVE
UpdateLog updateLog = core.getUpdateHandler().getUpdateLog();
diff --git
a/solr/core/src/java/org/apache/solr/handler/component/ShardRequest.java
b/solr/core/src/java/org/apache/solr/handler/component/ShardRequest.java
index 5222b38abee..ecaee01c7fa 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/ShardRequest.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/ShardRequest.java
@@ -60,6 +60,9 @@ public class ShardRequest {
/** may be null */
public String coreNodeName;
+ /** may be null */
+ public String coreName;
+
/** may be null */
public Map<String, String> headers;
diff --git
a/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java
b/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java
index c9598a8a19e..17cb7e51ebb 100644
--- a/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java
@@ -113,8 +113,7 @@ public class CollectionsAPISolrJTest extends
SolrCloudTestCase {
assertEquals(0, (int) status.get("status"));
assertTrue(status.get("QTime") > 0);
}
- // Sometimes multiple cores land on the same node so it's less than 4
- int nodesCreated = response.getCollectionNodesStatus().size();
+
// Use of _default configset should generate a warning for data-driven
functionality in
// production use
assertTrue(
@@ -126,7 +125,7 @@ public class CollectionsAPISolrJTest extends
SolrCloudTestCase {
assertEquals(0, response.getStatus());
assertTrue(response.isSuccess());
Map<String, NamedList<Integer>> nodesStatus =
response.getCollectionNodesStatus();
- assertEquals(nodesStatus.toString(), nodesCreated, nodesStatus.size());
+ assertEquals(nodesStatus.toString(), 4, nodesStatus.size());
waitForState(
"Expected " + collectionName + " to disappear from cluster state",
@@ -246,36 +245,43 @@ public class CollectionsAPISolrJTest extends
SolrCloudTestCase {
assertTrue(status.get("QTime") > 0);
}
- // Sometimes multiple cores land on the same node so it's less than 4
- // int nodesCreated = response.getCollectionNodesStatus().size();
- // response =
- //
- //
CollectionAdminRequest.deleteCollection(collectionName).process(cluster.getSolrClient());
- //
- // assertEquals(0, response.getStatus());
- // assertTrue(response.isSuccess());
- // Map<String, NamedList<Integer>> nodesStatus =
response.getCollectionNodesStatus();
- // // Delete could have been sent before the collection was finished
coming online
- // assertEquals(nodesStatus.toString(), nodesCreated,
nodesStatus.size());
- //
- // waitForState(
- // "Expected " + collectionName + " to disappear from cluster
state",
- // collectionName,
- // Objects::isNull);
- //
- // // Test Creating a new collection.
- // collectionName = "solrj_test2";
- //
- // response =
- // CollectionAdminRequest.createCollection(collectionName, "conf",
2, 2)
- // .process(cluster.getSolrClient());
- // assertEquals(0, response.getStatus());
- // assertTrue(response.isSuccess());
- //
- // waitForState(
- // "Expected " + collectionName + " to appear in cluster state",
- // collectionName,
- // Objects::nonNull);
+ waitForState(
+ "Expected " + collectionName + " to disappear from cluster state",
+ collectionName,
+ ((liveNodes, collectionState) ->
+ collectionState.getSlices().stream()
+ .flatMap(
+ s -> s.getReplicas(r ->
!r.getState().equals(Replica.State.ACTIVE)).stream())
+ .findAny()
+ .isEmpty()));
+
+ response =
+
CollectionAdminRequest.deleteCollection(collectionName).process(cluster.getSolrClient());
+
+ assertEquals(0, response.getStatus());
+ assertTrue(response.isSuccess());
+ Map<String, NamedList<Integer>> nodesStatus =
response.getCollectionNodesStatus();
+ // Delete could have been sent before the collection was finished coming
online
+ assertEquals(nodesStatus.toString(), 4, nodesStatus.size());
+
+ waitForState(
+ "Expected " + collectionName + " to disappear from cluster state",
+ collectionName,
+ Objects::isNull);
+
+ // Test Creating a new collection.
+ collectionName = "solrj_test2";
+
+ response =
+ CollectionAdminRequest.createCollection(collectionName, "conf", 2, 2)
+ .process(cluster.getSolrClient());
+ assertEquals(0, response.getStatus());
+ assertTrue(response.isSuccess());
+
+ waitForState(
+ "Expected " + collectionName + " to appear in cluster state",
+ collectionName,
+ Objects::nonNull);
}
@Test
diff --git
a/solr/core/src/test/org/apache/solr/cloud/ZkShardTermsRecoveryTest.java
b/solr/core/src/test/org/apache/solr/cloud/ZkShardTermsRecoveryTest.java
index ac1e9177fd0..10385f2c54c 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ZkShardTermsRecoveryTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ZkShardTermsRecoveryTest.java
@@ -52,10 +52,18 @@ public class ZkShardTermsRecoveryTest extends
SolrCloudTestCase {
CollectionAdminRequest.createCollection(COLLECTION, "conf",
NUM_SHARDS, NUM_REPLICAS)
.process(cluster.getSolrClient())
.getStatus());
+
waitForState(
"Timeout waiting for collection to be active after creation",
COLLECTION,
clusterShape(NUM_SHARDS, NUM_SHARDS * NUM_REPLICAS));
+
+ UpdateRequest up = new UpdateRequest();
+ for (int i = 0; i < 200; i++) {
+ up.add("id", "id-" + i);
+ }
+ up.commit(cluster.getSolrClient(), COLLECTION);
+ NUM_DOCS += 200;
}
@Before
diff --git
a/solr/core/src/test/org/apache/solr/cloud/api/collections/LocalFSCloudIncrementalBackupTest.java
b/solr/core/src/test/org/apache/solr/cloud/api/collections/LocalFSCloudIncrementalBackupTest.java
index eabd8101480..6cf4e994d68 100644
---
a/solr/core/src/test/org/apache/solr/cloud/api/collections/LocalFSCloudIncrementalBackupTest.java
+++
b/solr/core/src/test/org/apache/solr/cloud/api/collections/LocalFSCloudIncrementalBackupTest.java
@@ -58,6 +58,12 @@ public class LocalFSCloudIncrementalBackupTest extends
AbstractIncrementalBackup
+ " </solrcloud>\n"
+ " \n"
+ " <backup>\n"
+ + " <repository name=\"errorBackupRepository\" class=\""
+ + ErrorThrowingTrackingBackupRepository.class.getName()
+ + "\"> \n"
+ + " <str name=\"delegateRepoName\">localfs</str>\n"
+ + " <str name=\"hostPort\">${hostPort:8983}</str>\n"
+ + " </repository>\n"
+ " <repository name=\"trackingBackupRepository\"
class=\"org.apache.solr.core.TrackingBackupRepository\"> \n"
+ " <str name=\"delegateRepoName\">localfs</str>\n"
+ " </repository>\n"
diff --git
a/solr/core/src/test/org/apache/solr/cloud/api/collections/LocalFSInstallShardTest.java
b/solr/core/src/test/org/apache/solr/cloud/api/collections/LocalFSInstallShardTest.java
index 690ff447194..989c894b978 100644
---
a/solr/core/src/test/org/apache/solr/cloud/api/collections/LocalFSInstallShardTest.java
+++
b/solr/core/src/test/org/apache/solr/cloud/api/collections/LocalFSInstallShardTest.java
@@ -30,6 +30,12 @@ public class LocalFSInstallShardTest extends
AbstractInstallShardTest {
+ " <repository name=\"trackingBackupRepository\"
class=\"org.apache.solr.core.TrackingBackupRepository\"> \n"
+ " <str name=\"delegateRepoName\">localfs</str>\n"
+ " </repository>\n"
+ + " <repository name=\"errorBackupRepository\" class=\""
+ +
AbstractIncrementalBackupTest.ErrorThrowingTrackingBackupRepository.class.getName()
+ + "\"> \n"
+ + " <str name=\"delegateRepoName\">localfs</str>\n"
+ + " <str name=\"hostPort\">${hostPort:8983}</str>\n"
+ + " </repository>\n"
+ " <repository name=\"localfs\"
class=\"org.apache.solr.core.backup.repository.LocalFileSystemRepository\"> \n"
+ " </repository>\n"
+ " </backup>\n";
@@ -43,7 +49,7 @@ public class LocalFSInstallShardTest extends
AbstractInstallShardTest {
final String tmpDirPrefix = whitespacesInPath ? "my install" : "myinstall";
final String backupLocation =
createTempDir(tmpDirPrefix).toAbsolutePath().toString();
- configureCluster(1) // nodes
+ configureCluster(2) // nodes
.addConfig(
"conf1",
TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf"))
.withSolrXml(SOLR_XML.replace("ALLOWPATHS_TEMPLATE_VAL",
backupLocation))
diff --git
a/solr/modules/gcs-repository/src/test/org/apache/solr/gcs/GCSIncrementalBackupTest.java
b/solr/modules/gcs-repository/src/test/org/apache/solr/gcs/GCSIncrementalBackupTest.java
index 846563b929f..d955da11e1e 100644
---
a/solr/modules/gcs-repository/src/test/org/apache/solr/gcs/GCSIncrementalBackupTest.java
+++
b/solr/modules/gcs-repository/src/test/org/apache/solr/gcs/GCSIncrementalBackupTest.java
@@ -55,6 +55,12 @@ public class GCSIncrementalBackupTest extends
AbstractIncrementalBackupTest {
+ " </solrcloud>\n"
+ " \n"
+ " <backup>\n"
+ + " <repository name=\"errorBackupRepository\" class=\""
+ + ErrorThrowingTrackingBackupRepository.class.getName()
+ + "\"> \n"
+ + " <str name=\"delegateRepoName\">localfs</str>\n"
+ + " <str name=\"hostPort\">${hostPort:8983}</str>\n"
+ + " </repository>\n"
+ " <repository name=\"trackingBackupRepository\"
class=\"org.apache.solr.core.TrackingBackupRepository\"> \n"
+ " <str name=\"delegateRepoName\">localfs</str>\n"
+ " </repository>\n"
diff --git
a/solr/modules/gcs-repository/src/test/org/apache/solr/gcs/GCSInstallShardTest.java
b/solr/modules/gcs-repository/src/test/org/apache/solr/gcs/GCSInstallShardTest.java
index ecb08fa0192..4b78c0cc805 100644
---
a/solr/modules/gcs-repository/src/test/org/apache/solr/gcs/GCSInstallShardTest.java
+++
b/solr/modules/gcs-repository/src/test/org/apache/solr/gcs/GCSInstallShardTest.java
@@ -19,6 +19,7 @@ package org.apache.solr.gcs;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering;
import org.apache.lucene.tests.util.LuceneTestCase;
+import org.apache.solr.cloud.api.collections.AbstractIncrementalBackupTest;
import org.apache.solr.cloud.api.collections.AbstractInstallShardTest;
import org.apache.solr.handler.admin.api.InstallShardData;
import org.junit.AfterClass;
@@ -40,6 +41,12 @@ public class GCSInstallShardTest extends
AbstractInstallShardTest {
+ " <repository name=\"trackingBackupRepository\"
class=\"org.apache.solr.core.TrackingBackupRepository\"> \n"
+ " <str name=\"delegateRepoName\">localfs</str>\n"
+ " </repository>\n"
+ + " <repository name=\"errorBackupRepository\" class=\""
+ +
AbstractIncrementalBackupTest.ErrorThrowingTrackingBackupRepository.class.getName()
+ + "\"> \n"
+ + " <str name=\"delegateRepoName\">localfs</str>\n"
+ + " <str name=\"hostPort\">${hostPort:8983}</str>\n"
+ + " </repository>\n"
+ " <repository name=\"localfs\"
class=\"org.apache.solr.gcs.LocalStorageGCSBackupRepository\"> \n"
+ " <str name=\"gcsBucket\">someBucketName</str>\n"
+ " <str name=\"location\">backup1</str>\n"
@@ -51,7 +58,7 @@ public class GCSInstallShardTest extends
AbstractInstallShardTest {
@BeforeClass
public static void setupClass() throws Exception {
- configureCluster(1) // nodes
+ configureCluster(2) // nodes
.addConfig("conf1", getFile("conf/solrconfig.xml").getParent())
.withSolrXml(SOLR_XML)
.configure();
diff --git
a/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3IncrementalBackupTest.java
b/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3IncrementalBackupTest.java
index 80c5207505b..c35dbc17ab0 100644
---
a/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3IncrementalBackupTest.java
+++
b/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3IncrementalBackupTest.java
@@ -22,6 +22,7 @@ import
com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering;
import java.lang.invoke.MethodHandles;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.solr.cloud.api.collections.AbstractIncrementalBackupTest;
+import org.apache.solr.util.LogLevel;
import org.junit.BeforeClass;
import org.junit.ClassRule;
import org.slf4j.Logger;
@@ -31,6 +32,9 @@ import software.amazon.awssdk.regions.Region;
// Backups do checksum validation against a footer value not present in
'SimpleText'
@LuceneTestCase.SuppressCodecs({"SimpleText"})
@ThreadLeakLingering(linger = 10)
+@LogLevel(
+ value =
+
"org.apache.solr.cloud=DEBUG;org.apache.solr.cloud.api.collections=DEBUG;org.apache.solr.cloud.overseer=DEBUG")
public class S3IncrementalBackupTest extends AbstractIncrementalBackupTest {
private static final Logger log =
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@@ -64,6 +68,12 @@ public class S3IncrementalBackupTest extends
AbstractIncrementalBackupTest {
+ " </solrcloud>\n"
+ " \n"
+ " <backup>\n"
+ + " <repository name=\"errorBackupRepository\" class=\""
+ + ErrorThrowingTrackingBackupRepository.class.getName()
+ + "\"> \n"
+ + " <str name=\"delegateRepoName\">s3</str>\n"
+ + " <str name=\"hostPort\">${hostPort:8983}</str>\n"
+ + " </repository>\n"
+ " <repository name=\"trackingBackupRepository\"
class=\"org.apache.solr.core.TrackingBackupRepository\"> \n"
+ " <str name=\"delegateRepoName\">s3</str>\n"
+ " </repository>\n"
@@ -107,6 +117,11 @@ public class S3IncrementalBackupTest extends
AbstractIncrementalBackupTest {
.addConfig("conf1", getFile("conf/solrconfig.xml").getParent())
.withSolrXml(
SOLR_XML
+ // Only a single node will have a bad bucket name, all else
should succeed.
+ // The bad node will be added later
+ .replace("BAD_BUCKET_ALL_BUT_ONE", "non-existent")
+ .replace("BAD_BUCKET_ONE", BUCKET_NAME)
+ .replace("BAD_BUCKET", BUCKET_NAME)
.replace("BUCKET", BUCKET_NAME)
.replace("REGION", Region.US_EAST_1.id())
.replace("ENDPOINT", "http://localhost:" +
S3_MOCK_RULE.getHttpPort()))
diff --git
a/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3InstallShardTest.java
b/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3InstallShardTest.java
index 194b2ffddc6..c44e2170a39 100644
---
a/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3InstallShardTest.java
+++
b/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3InstallShardTest.java
@@ -20,6 +20,7 @@ package org.apache.solr.s3;
import com.adobe.testing.s3mock.junit4.S3MockRule;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering;
import org.apache.lucene.tests.util.LuceneTestCase;
+import org.apache.solr.cloud.api.collections.AbstractIncrementalBackupTest;
import org.apache.solr.cloud.api.collections.AbstractInstallShardTest;
import org.apache.solr.handler.admin.api.InstallShardData;
import org.junit.BeforeClass;
@@ -44,6 +45,12 @@ public class S3InstallShardTest extends
AbstractInstallShardTest {
+ " <repository name=\"trackingBackupRepository\"
class=\"org.apache.solr.core.TrackingBackupRepository\"> \n"
+ " <str name=\"delegateRepoName\">s3</str>\n"
+ " </repository>\n"
+ + " <repository name=\"errorBackupRepository\" class=\""
+ +
AbstractIncrementalBackupTest.ErrorThrowingTrackingBackupRepository.class.getName()
+ + "\"> \n"
+ + " <str name=\"delegateRepoName\">s3</str>\n"
+ + " <str name=\"hostPort\">${hostPort:8983}</str>\n"
+ + " </repository>\n"
+ " <repository name=\"s3\"
class=\"org.apache.solr.s3.S3BackupRepository\"> \n"
+ " <str name=\"s3.bucket.name\">BUCKET</str>\n"
+ " <str name=\"s3.region\">REGION</str>\n"
@@ -65,7 +72,7 @@ public class S3InstallShardTest extends
AbstractInstallShardTest {
AbstractS3ClientTest.setS3ConfFile();
- configureCluster(1) // nodes
+ configureCluster(2) // nodes
.addConfig("conf1", getFile("conf/solrconfig.xml").getParent())
.withSolrXml(
SOLR_XML
diff --git
a/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java
b/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java
index ae9a8fe87f6..121a212270d 100644
---
a/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java
+++
b/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java
@@ -468,6 +468,15 @@ public class MiniSolrCloudCluster implements SolrBackend {
return startJettySolrRunner(newNodeName(), jettyConfig, null);
}
+ /**
+ * Start a new Solr instance, using the default config but with a custom
Solr xml
+ *
+ * @return a JettySolrRunner
+ */
+ public JettySolrRunner startJettySolrRunner(String solrXml) throws Exception
{
+ return startJettySolrRunner(newNodeName(), jettyConfig, solrXml);
+ }
+
/**
* Add a previously stopped node back to the cluster on a different port
*
diff --git
a/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractIncrementalBackupTest.java
b/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractIncrementalBackupTest.java
index 5f62d6669f5..fb643e551a4 100644
---
a/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractIncrementalBackupTest.java
+++
b/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractIncrementalBackupTest.java
@@ -61,6 +61,7 @@ import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.ZkStateReader;
+import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.DirectoryFactory;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.TrackingBackupRepository;
@@ -89,12 +90,13 @@ public abstract class AbstractIncrementalBackupTest extends
SolrCloudTestCase {
private static final Logger log =
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static long docsSeed; // see indexDocs()
- protected static final int NUM_NODES = 2;
+ protected static final int NUM_NODES = 3;
protected static final int NUM_SHARDS = 2; // granted we sometimes shard
split to get more
protected static final int LARGE_NUM_SHARDS = 11; // Periodically chosen via
randomization
protected static final int REPL_FACTOR = 2;
protected static final String BACKUPNAME_PREFIX = "mytestbackup";
protected static final String BACKUP_REPO_NAME = "trackingBackupRepository";
+ protected static final String ERROR_BACKUP_REPO_NAME =
"errorBackupRepository";
protected String testSuffix = "test1";
@@ -491,6 +493,125 @@ public abstract class AbstractIncrementalBackupTest
extends SolrCloudTestCase {
}
}
+ @Test
+ public void testRestoreToOriginalSucceedsWithErrors() throws Exception {
+ setTestSuffix("testRestoreToOriginalSucceedsOnASingleError");
+ final String backupCollectionName = getCollectionName();
+ final String backupName = BACKUPNAME_PREFIX + testSuffix;
+
+ // Bootstrap the backup collection with seed docs
+ CollectionAdminRequest.createCollection(backupCollectionName, "conf1",
NUM_SHARDS, NUM_NODES)
+ .process(cluster.getSolrClient());
+ int backupDocs = indexDocs(backupCollectionName, true);
+
+ // Backup and immediately add more docs to the collection
+ try (BackupRepository repository =
+ cluster
+ .getJettySolrRunner(0)
+ .getCoreContainer()
+ .newBackupRepository(ERROR_BACKUP_REPO_NAME)) {
+ final String backupLocation =
repository.getBackupLocation(getBackupLocation());
+ final RequestStatusState result =
+ CollectionAdminRequest.backupCollection(backupCollectionName,
backupName)
+ .setBackupConfigset(false)
+ .setLocation(backupLocation)
+ .setRepositoryName(ERROR_BACKUP_REPO_NAME)
+ .processAndWait(cluster.getSolrClient(), 20);
+ assertEquals(RequestStatusState.COMPLETED, result);
+ }
+ assertEquals(backupDocs, getNumDocsInCollection(backupCollectionName));
+ clearDocs(backupCollectionName);
+ assertEquals(0, getNumDocsInCollection(backupCollectionName));
+
+ /*
+ Restore original docs and validate that doc count is correct
+ */
+ // Test a single bad node
+ try (BackupRepository repository =
+ cluster
+ .getJettySolrRunner(0)
+ .getCoreContainer()
+ .newBackupRepository(ERROR_BACKUP_REPO_NAME)) {
+ // Only the first jetty will fail
+ ErrorThrowingTrackingBackupRepository.portsToFailOn =
+ Set.of(cluster.getJettySolrRunner(0).getLocalPort());
+ final String backupLocation =
repository.getBackupLocation(getBackupLocation());
+ final RequestStatusState result =
+ CollectionAdminRequest.restoreCollection(backupCollectionName,
backupName)
+ .setLocation(backupLocation)
+ .setRepositoryName(ERROR_BACKUP_REPO_NAME)
+ .processAndWait(cluster.getSolrClient(), 30);
+ assertEquals(RequestStatusState.COMPLETED, result);
+ waitForState(
+ "The failed core-install should recover and become healthy",
+ backupCollectionName,
+ 30,
+ TimeUnit.SECONDS,
+ SolrCloudTestCase.activeClusterShape(NUM_SHARDS, NUM_SHARDS *
NUM_NODES));
+ }
+ assertEquals(backupDocs, getNumDocsInCollection(backupCollectionName));
+ clearDocs(backupCollectionName);
+ assertEquals(0, getNumDocsInCollection(backupCollectionName));
+
+ // Test a single good node
+ try (BackupRepository repository =
+ cluster
+ .getJettySolrRunner(0)
+ .getCoreContainer()
+ .newBackupRepository(ERROR_BACKUP_REPO_NAME)) {
+ final String backupLocation =
repository.getBackupLocation(getBackupLocation());
+ // All but the first jetty will fail
+ ErrorThrowingTrackingBackupRepository.portsToFailOn =
+ cluster.getJettySolrRunners().subList(1, NUM_NODES).stream()
+ .map(JettySolrRunner::getLocalPort)
+ .collect(Collectors.toSet());
+ final RequestStatusState result =
+ CollectionAdminRequest.restoreCollection(backupCollectionName,
backupName)
+ .setLocation(backupLocation)
+ .setRepositoryName(ERROR_BACKUP_REPO_NAME)
+ .processAndWait(cluster.getSolrClient(), 30);
+ assertEquals(RequestStatusState.COMPLETED, result);
+ waitForState(
+ "The failed core-install should recover and become healthy",
+ backupCollectionName,
+ 30,
+ TimeUnit.SECONDS,
+ SolrCloudTestCase.activeClusterShape(NUM_SHARDS, NUM_SHARDS *
NUM_NODES));
+ }
+ assertEquals(backupDocs, getNumDocsInCollection(backupCollectionName));
+ }
+
+ public static class ErrorThrowingTrackingBackupRepository extends
TrackingBackupRepository {
+
+ public static Set<Integer> portsToFailOn = new HashSet<>();
+
+ private int port;
+
+ @Override
+ public void init(NamedList<?> args) {
+ super.init(args);
+ port = Integer.parseInt((String) args.get("hostPort"));
+ }
+
+ @Override
+ public void copyFileTo(URI sourceRepo, String fileName, Directory dest)
throws IOException {
+ if (portsToFailOn.contains(port)) {
+ throw new UnsupportedOperationException();
+ }
+ super.copyFileTo(sourceRepo, fileName, dest);
+ }
+
+ @Override
+ public void copyIndexFileTo(
+ URI sourceRepo, String sourceFileName, Directory dest, String
destFileName)
+ throws IOException {
+ if (portsToFailOn.contains(port)) {
+ throw new UnsupportedOperationException();
+ }
+ super.copyIndexFileTo(sourceRepo, sourceFileName, dest, destFileName);
+ }
+ }
+
protected void corruptIndexFiles() throws IOException {
List<Slice> slices = new
ArrayList<>(getCollectionState(getCollectionName()).getSlices());
Replica leader = slices.get(random().nextInt(slices.size())).getLeader();
@@ -567,6 +688,14 @@ public abstract class AbstractIncrementalBackupTest
extends SolrCloudTestCase {
CollectionAdminRequest.deleteCollection(restoreCollectionName).process(solrClient);
}
+ protected void clearDocs(String collectionName) throws Exception {
+
CollectionAdminRequest.deleteCollection(collectionName).process(cluster.getSolrClient());
+ CollectionAdminRequest.createCollection(collectionName, "conf1",
NUM_SHARDS, NUM_NODES)
+ .process(cluster.getSolrClient());
+
+ log.info("Cleared all docs in collection: {}", collectionName);
+ }
+
private void indexDocs(String collectionName, int numDocs, boolean useUUID)
throws Exception {
Random random = new Random(docsSeed);
@@ -605,7 +734,7 @@ public abstract class AbstractIncrementalBackupTest extends
SolrCloudTestCase {
}
}
- private long getNumDocsInCollection(String collectionName) throws Exception {
+ protected long getNumDocsInCollection(String collectionName) throws
Exception {
return new QueryRequest(new SolrQuery("*:*"))
.process(cluster.getSolrClient(), collectionName)
.getResults()
diff --git
a/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractInstallShardTest.java
b/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractInstallShardTest.java
index ac1cc7b2b44..86d12f4a7bd 100644
---
a/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractInstallShardTest.java
+++
b/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractInstallShardTest.java
@@ -28,6 +28,7 @@ import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Random;
+import java.util.Set;
import java.util.UUID;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
@@ -72,6 +73,7 @@ public abstract class AbstractInstallShardTest extends
SolrCloudTestCase {
private static final Logger log =
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
protected static final String BACKUP_REPO_NAME = "trackingBackupRepository";
+ protected static final String ERROR_BACKUP_REPO_NAME =
"errorBackupRepository";
private static long docsSeed; // see indexDocs()
@@ -93,20 +95,20 @@ public abstract class AbstractInstallShardTest extends
SolrCloudTestCase {
}
}
- private String deleteAfterTest(String collName) {
+ protected String deleteAfterTest(String collName) {
collectionsToDelete.add(collName);
return collName;
}
// Populated by 'bootstrapBackupRepositoryData'
- private static int singleShardNumDocs = -1;
- private static int replicasPerShard = -1;
- private static int multiShardNumDocs = -1;
- private static URI singleShard1Uri = null;
- private static URI nonExistentLocationUri = null;
- private static URI[] multiShardUris = null;
+ protected static int singleShardNumDocs = -1;
+ protected static int replicasPerShard = -1;
+ protected static int multiShardNumDocs = -1;
+ protected static URI singleShard1Uri = null;
+ protected static URI nonExistentLocationUri = null;
+ protected static URI[] multiShardUris = null;
- private List<String> collectionsToDelete;
+ protected List<String> collectionsToDelete;
public static void bootstrapBackupRepositoryData(String
baseRepositoryLocation) throws Exception {
final int numShards = /*random().nextInt(3) + 2*/ 4;
@@ -175,6 +177,12 @@ public abstract class AbstractInstallShardTest extends
SolrCloudTestCase {
CollectionAdminRequest.installDataToShard(
collectionName, "shard1", singleShardLocation, BACKUP_REPO_NAME)
.process(cluster.getSolrClient());
+ waitForState(
+ "The failed core-install (previous leader) should recover and become
healthy",
+ collectionName,
+ 30,
+ TimeUnit.SECONDS,
+ SolrCloudTestCase.activeClusterShape(1, replicasPerShard));
assertCollectionHasNumDocs(collectionName, singleShardNumDocs);
}
@@ -238,6 +246,45 @@ public abstract class AbstractInstallShardTest extends
SolrCloudTestCase {
assertCollectionHasNumDocs(collectionName, multiShardNumDocs);
}
+ @Test
+ public void testInstallSucceedsOnASingleError() throws Exception {
+ final String collectionName = createAndAwaitEmptyCollection(1, 2);
+ deleteAfterTest(collectionName);
+ enableReadOnly(collectionName);
+
+
AbstractIncrementalBackupTest.ErrorThrowingTrackingBackupRepository.portsToFailOn
=
+ Set.of(cluster.getJettySolrRunner(0).getLocalPort());
+ final String singleShardLocation = singleShard1Uri.toString();
+ { // Test synchronous request error reporting
+ CollectionAdminRequest.installDataToShard(
+ collectionName, "shard1", singleShardLocation,
ERROR_BACKUP_REPO_NAME)
+ .process(cluster.getSolrClient());
+ waitForState(
+ "The failed core-install should recover and become healthy",
+ collectionName,
+ 30,
+ TimeUnit.SECONDS,
+ SolrCloudTestCase.activeClusterShape(1, 2));
+ assertCollectionHasNumDocs(collectionName, singleShardNumDocs);
+ }
+
+ { // Test asynchronous request error reporting
+ final var requestStatusState =
+ CollectionAdminRequest.installDataToShard(
+ collectionName, "shard1", singleShardLocation,
ERROR_BACKUP_REPO_NAME)
+ .processAndWait(cluster.getSolrClient(), 15);
+
+ assertEquals(RequestStatusState.COMPLETED, requestStatusState);
+ waitForState(
+ "The failed core-install should recover and become healthy",
+ collectionName,
+ 30,
+ TimeUnit.SECONDS,
+ SolrCloudTestCase.activeClusterShape(1, 2));
+ assertCollectionHasNumDocs(collectionName, singleShardNumDocs);
+ }
+ }
+
/**
* Builds a string representation of a valid solr.xml configuration, with
the provided
* backup-repository configuration inserted
@@ -272,7 +319,7 @@ public abstract class AbstractInstallShardTest extends
SolrCloudTestCase {
+ "</solr>\n";
}
- private static void assertCollectionHasNumDocs(String collection, int
expectedNumDocs)
+ protected static void assertCollectionHasNumDocs(String collection, int
expectedNumDocs)
throws Exception {
final SolrClient solrClient = cluster.getSolrClient();
assertEquals(
@@ -364,7 +411,7 @@ public abstract class AbstractInstallShardTest extends
SolrCloudTestCase {
log.info("Indexed {} docs to collection: {}", numDocs, collectionName);
}
- private static String createAndAwaitEmptyCollection(int numShards, int
replicasPerShard)
+ protected static String createAndAwaitEmptyCollection(int numShards, int
replicasPerShard)
throws Exception {
final SolrClient solrClient = cluster.getSolrClient();
@@ -377,7 +424,7 @@ public abstract class AbstractInstallShardTest extends
SolrCloudTestCase {
return collectionName;
}
- private static void enableReadOnly(String collectionName) throws Exception {
+ protected static void enableReadOnly(String collectionName) throws Exception
{
CollectionAdminRequest.modifyCollection(collectionName, Map.of("readOnly",
true))
.process(cluster.getSolrClient());
}