This is an automated email from the ASF dual-hosted git repository.
dsmiley pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/main by this push:
new d6097068bc0 SOLR-17877: Introduce cluster property overseerEnabled,
replacing dual booleans (#3524)
d6097068bc0 is described below
commit d6097068bc029d391f67a3f7f247494a510f4401
Author: David Smiley <[email protected]>
AuthorDate: Thu Sep 4 22:19:03 2025 -0400
SOLR-17877: Introduce cluster property overseerEnabled, replacing dual
booleans (#3524)
The SolrCloud Overseer is disable-able, in lieu of a simpler distributed
mode of
cluster command and collection state processing. In Solr 9 this was
possible with undocumented
/ unsupported configuration in solr.xml (`distributedClusterStateUpdates`
&
`distributedCollectionConfigSetExecution`). Those are now removed. In
their place is a new
overseerEnabled cluster property and an env var
SOLR_CLOUD_OVERSEER_ENABLED.
---
solr/CHANGES.txt | 7 ++
.../solr/cloud/DistributedClusterStateUpdater.java | 8 ---
.../src/java/org/apache/solr/cloud/Overseer.java | 3 +-
.../java/org/apache/solr/cloud/ZkController.java | 29 ++++++---
...istributedCollectionConfigSetCommandRunner.java | 11 +---
.../src/java/org/apache/solr/core/CloudConfig.java | 38 -----------
.../java/org/apache/solr/core/SolrXmlConfig.java | 7 +-
.../solr/cloud/CreateCollectionCleanupTest.java | 3 +-
.../org/apache/solr/cloud/DeleteReplicaTest.java | 2 +-
.../test/org/apache/solr/cloud/OverseerTest.java | 19 +++---
.../org/apache/solr/cloud/ZkControllerTest.java | 8 +--
solr/server/solr/solr.xml | 2 -
.../pages/cluster-node-management.adoc | 2 +-
.../pages/major-changes-in-solr-10.adoc | 25 ++++++++
.../apache/solr/common/cloud/ZkStateReader.java | 4 +-
.../cloud/PerReplicaStatesIntegrationTest.java | 2 +-
.../apache/solr/cloud/MiniSolrCloudCluster.java | 75 +++++-----------------
.../org/apache/solr/cloud/SolrCloudTestCase.java | 12 ++--
18 files changed, 96 insertions(+), 161 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 218419c6c7c..27dbf74f6ce 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -221,6 +221,13 @@ Other Changes
* SOLR-17874: Switch remaining usages of Apache HttpClient to use the
internally managed Jetty HttpClient instance. (David Smiley)
+* SOLR-17877: The SolrCloud Overseer is disable-able, in lieu of a simpler
distributed mode of
+ cluster command and collection state processing. In Solr 9 this was
possible with undocumented
+ / unsupported configuration in solr.xml (`distributedClusterStateUpdates` &
+ `distributedCollectionConfigSetExecution`). Those are now removed. In
their place is a new
+ overseerEnabled cluster property and an env var SOLR_CLOUD_OVERSEER_ENABLED.
Read more in the
+ upgrade guide. (David Smiley)
+
================== 9.10.0 ==================
New Features
---------------------
diff --git
a/solr/core/src/java/org/apache/solr/cloud/DistributedClusterStateUpdater.java
b/solr/core/src/java/org/apache/solr/cloud/DistributedClusterStateUpdater.java
index 3c7552790c9..f2c293b938c 100644
---
a/solr/core/src/java/org/apache/solr/cloud/DistributedClusterStateUpdater.java
+++
b/solr/core/src/java/org/apache/solr/cloud/DistributedClusterStateUpdater.java
@@ -92,14 +92,6 @@ public class DistributedClusterStateUpdater {
*/
public DistributedClusterStateUpdater(boolean useDistributedStateUpdate) {
this.useDistributedStateUpdate = useDistributedStateUpdate;
- if (log.isInfoEnabled()) {
- log.info(
- "Creating DistributedClusterStateUpdater with
useDistributedStateUpdate="
- + useDistributedStateUpdate
- + ". Solr will be using "
- + (useDistributedStateUpdate ? "distributed" : "Overseer based")
- + " cluster state updates."); // nowarn
- }
}
/**
diff --git a/solr/core/src/java/org/apache/solr/cloud/Overseer.java
b/solr/core/src/java/org/apache/solr/cloud/Overseer.java
index b1a9f72c1f7..c6ca1426cdb 100644
--- a/solr/core/src/java/org/apache/solr/cloud/Overseer.java
+++ b/solr/core/src/java/org/apache/solr/cloud/Overseer.java
@@ -711,8 +711,7 @@ public class Overseer implements SolrCloseable {
this.zkController = zkController;
this.stats = new Stats();
this.config = config;
- this.distributedClusterStateUpdater =
- new
DistributedClusterStateUpdater(config.getDistributedClusterStateUpdates());
+ this.distributedClusterStateUpdater =
zkController.getDistributedClusterStateUpdater();
this.solrMetricsContext =
new SolrMetricsContext(
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
index 6d3d52eb402..60bb3faf35e 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
@@ -102,6 +102,7 @@ import org.apache.solr.common.params.CollectionAdminParams;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.Compressor;
+import org.apache.solr.common.util.EnvUtils;
import org.apache.solr.common.util.ExecutorUtil;
import org.apache.solr.common.util.IOUtils;
import org.apache.solr.common.util.ObjectReleaseTracker;
@@ -306,10 +307,6 @@ public class ZkController implements Closeable {
this.cloudConfig = cloudConfig;
- // Use the configured way to do cluster state update (Overseer queue vs
distributed)
- distributedClusterStateUpdater =
- new
DistributedClusterStateUpdater(cloudConfig.getDistributedClusterStateUpdates());
-
this.genericCoreNodeNames = cloudConfig.getGenericCoreNodeNames();
this.zkServerAddress = zkServerAddress;
@@ -373,11 +370,6 @@ public class ZkController implements Closeable {
// Refuse to start if ZK has a non empty /clusterstate.json or a /solr.xml
file
checkNoOldClusterstate(zkClient);
- this.distributedCommandRunner =
- cloudConfig.getDistributedCollectionConfigSetExecution()
- ? Optional.of(new DistributedCollectionConfigSetCommandRunner(cc,
zkClient))
- : Optional.empty();
-
this.overseerRunningMap = Overseer.getRunningMap(zkClient);
this.overseerCompletedMap = Overseer.getCompletedMap(zkClient);
this.overseerFailureMap = Overseer.getFailureMap(zkClient);
@@ -390,6 +382,25 @@ public class ZkController implements Closeable {
if (cc != null) cc.securityNodeChanged();
});
+ // Now that zkStateReader is available, read OVERSEER_ENABLED.
+ // When overseerEnabled is false, both distributed features should be
enabled
+ Boolean overseerEnabled =
+ zkStateReader.getClusterProperty(ZkStateReader.OVERSEER_ENABLED, null);
+ if (overseerEnabled == null) {
+ overseerEnabled =
EnvUtils.getPropertyAsBool("solr.cloud.overseer.enabled", true);
+ }
+ if (overseerEnabled) {
+ log.info("The Overseer is enabled. It will process all cluster commands
& state updates.");
+ } else {
+ log.info(
+ "The Overseer is disabled. Cluster commands & state updates will
happen on any/all nodes.");
+ }
+ this.distributedClusterStateUpdater = new
DistributedClusterStateUpdater(!overseerEnabled);
+ this.distributedCommandRunner =
+ !overseerEnabled
+ ? Optional.of(new DistributedCollectionConfigSetCommandRunner(cc,
zkClient))
+ : Optional.empty();
+
init();
if (distributedClusterStateUpdater.isDistributedStateUpdate()) {
diff --git
a/solr/core/src/java/org/apache/solr/cloud/api/collections/DistributedCollectionConfigSetCommandRunner.java
b/solr/core/src/java/org/apache/solr/cloud/api/collections/DistributedCollectionConfigSetCommandRunner.java
index 5778198db0f..7789d374564 100644
---
a/solr/core/src/java/org/apache/solr/cloud/api/collections/DistributedCollectionConfigSetCommandRunner.java
+++
b/solr/core/src/java/org/apache/solr/cloud/api/collections/DistributedCollectionConfigSetCommandRunner.java
@@ -64,8 +64,8 @@ import org.slf4j.LoggerFactory;
* going through Overseer and {@link OverseerCollectionMessageHandler} or
{@link
* org.apache.solr.cloud.OverseerConfigSetMessageHandler}.
*
- * <p>This class is only called when Collection and Config Set API calls are
configured to be
- * distributed, which implies cluster state updates are distributed as well.
+ * <p>This class is only called when the Overseer is disabled, which implies
cluster state updates
+ * are distributed as well.
*/
public class DistributedCollectionConfigSetCommandRunner {
private static final Logger log =
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@@ -112,13 +112,6 @@ public class DistributedCollectionConfigSetCommandRunner {
// note: coreContainer.getZkController() is not yet instantiated; don't
call it right now
this.coreContainer = coreContainer;
- if (log.isInfoEnabled()) {
- // Note is it hard to print a log when Collection API is handled by
Overseer because Overseer
- // is started regardless of how Collection API is handled, so it doesn't
really know...
- log.info(
- "Creating DistributedCollectionConfigSetCommandRunner. Collection
and ConfigSet APIs are running distributed (not Overseer based)");
- }
-
// TODO we should look at how everything is getting closed when the node
is shutdown. But it
// seems that CollectionsHandler (that creates instances of this class) is
not really closed, so
// maybe it doesn't matter?
diff --git a/solr/core/src/java/org/apache/solr/core/CloudConfig.java
b/solr/core/src/java/org/apache/solr/core/CloudConfig.java
index 790aa333012..e73bc029e1f 100644
--- a/solr/core/src/java/org/apache/solr/core/CloudConfig.java
+++ b/solr/core/src/java/org/apache/solr/core/CloudConfig.java
@@ -49,10 +49,6 @@ public class CloudConfig {
private final String pkiHandlerPublicKeyPath;
- private final boolean useDistributedClusterStateUpdates;
-
- private final boolean useDistributedCollectionConfigSetExecution;
-
private final int minStateByteLenForCompression;
private final String stateCompressorClass;
@@ -72,8 +68,6 @@ public class CloudConfig {
boolean createCollectionCheckLeaderActive,
String pkiHandlerPrivateKeyPath,
String pkiHandlerPublicKeyPath,
- boolean useDistributedClusterStateUpdates,
- boolean useDistributedCollectionConfigSetExecution,
int minStateByteLenForCompression,
String stateCompressorClass) {
this.zkHost = zkHost;
@@ -90,17 +84,9 @@ public class CloudConfig {
this.createCollectionCheckLeaderActive = createCollectionCheckLeaderActive;
this.pkiHandlerPrivateKeyPath = pkiHandlerPrivateKeyPath;
this.pkiHandlerPublicKeyPath = pkiHandlerPublicKeyPath;
- this.useDistributedClusterStateUpdates = useDistributedClusterStateUpdates;
- this.useDistributedCollectionConfigSetExecution =
useDistributedCollectionConfigSetExecution;
this.minStateByteLenForCompression = minStateByteLenForCompression;
this.stateCompressorClass = stateCompressorClass;
- if (useDistributedCollectionConfigSetExecution &&
!useDistributedClusterStateUpdates) {
- throw new SolrException(
- SolrException.ErrorCode.SERVER_ERROR,
- "'useDistributedCollectionConfigSetExecution' can't be true if
useDistributedClusterStateUpdates is false");
- }
-
if (this.hostPort == -1)
throw new SolrException(
SolrException.ErrorCode.SERVER_ERROR, "'hostPort' must be configured
to run SolrCloud");
@@ -162,14 +148,6 @@ public class CloudConfig {
return pkiHandlerPublicKeyPath;
}
- public boolean getDistributedClusterStateUpdates() {
- return useDistributedClusterStateUpdates;
- }
-
- public boolean getDistributedCollectionConfigSetExecution() {
- return useDistributedCollectionConfigSetExecution;
- }
-
public int getMinStateByteLenForCompression() {
return minStateByteLenForCompression;
}
@@ -202,8 +180,6 @@ public class CloudConfig {
DEFAULT_CREATE_COLLECTION_CHECK_LEADER_ACTIVE;
private String pkiHandlerPrivateKeyPath;
private String pkiHandlerPublicKeyPath;
- private boolean useDistributedClusterStateUpdates = false;
- private boolean useDistributedCollectionConfigSetExecution = false;
private int minStateByteLenForCompression =
DEFAULT_MINIMUM_STATE_SIZE_FOR_COMPRESSION;
private String stateCompressorClass;
@@ -277,18 +253,6 @@ public class CloudConfig {
return this;
}
- public CloudConfigBuilder setUseDistributedClusterStateUpdates(
- boolean useDistributedClusterStateUpdates) {
- this.useDistributedClusterStateUpdates =
useDistributedClusterStateUpdates;
- return this;
- }
-
- public CloudConfigBuilder setUseDistributedCollectionConfigSetExecution(
- boolean useDistributedCollectionConfigSetExecution) {
- this.useDistributedCollectionConfigSetExecution =
useDistributedCollectionConfigSetExecution;
- return this;
- }
-
public CloudConfigBuilder setMinStateByteLenForCompression(int
minStateByteLenForCompression) {
this.minStateByteLenForCompression = minStateByteLenForCompression;
return this;
@@ -315,8 +279,6 @@ public class CloudConfig {
createCollectionCheckLeaderActive,
pkiHandlerPrivateKeyPath,
pkiHandlerPublicKeyPath,
- useDistributedClusterStateUpdates,
- useDistributedCollectionConfigSetExecution,
minStateByteLenForCompression,
stateCompressorClass);
}
diff --git a/solr/core/src/java/org/apache/solr/core/SolrXmlConfig.java
b/solr/core/src/java/org/apache/solr/core/SolrXmlConfig.java
index 18fdcf8be57..2c4479ce64b 100644
--- a/solr/core/src/java/org/apache/solr/core/SolrXmlConfig.java
+++ b/solr/core/src/java/org/apache/solr/core/SolrXmlConfig.java
@@ -564,18 +564,13 @@ public class SolrXmlConfig {
case "pkiHandlerPublicKeyPath":
builder.setPkiHandlerPublicKeyPath(value);
break;
- case "distributedClusterStateUpdates":
-
builder.setUseDistributedClusterStateUpdates(Boolean.parseBoolean(value));
- break;
- case "distributedCollectionConfigSetExecution":
-
builder.setUseDistributedCollectionConfigSetExecution(Boolean.parseBoolean(value));
- break;
case "minStateByteLenForCompression":
builder.setMinStateByteLenForCompression(parseInt(name, value));
break;
case "stateCompressor":
builder.setStateCompressorClass(value);
break;
+
default:
throw new SolrException(
SolrException.ErrorCode.SERVER_ERROR,
diff --git
a/solr/core/src/test/org/apache/solr/cloud/CreateCollectionCleanupTest.java
b/solr/core/src/test/org/apache/solr/cloud/CreateCollectionCleanupTest.java
index ac31461d509..df3babd9823 100644
--- a/solr/core/src/test/org/apache/solr/cloud/CreateCollectionCleanupTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/CreateCollectionCleanupTest.java
@@ -56,7 +56,6 @@ public class CreateCollectionCleanupTest extends
SolrCloudTestCase {
+ " <int
name=\"distribUpdateConnTimeout\">${distribUpdateConnTimeout:45000}</int>\n"
+ " <int
name=\"distribUpdateSoTimeout\">${distribUpdateSoTimeout:340000}</int>\n"
+ " <int
name=\"createCollectionWaitTimeTillActive\">${createCollectionWaitTimeTillActive:10}</int>\n"
- + " <str
name=\"distributedClusterStateUpdates\">${solr.distributedClusterStateUpdates:false}</str>
\n"
+ " </solrcloud>\n"
+ " \n"
+ "</solr>\n";
@@ -67,7 +66,7 @@ public class CreateCollectionCleanupTest extends
SolrCloudTestCase {
.addConfig(
"conf1",
TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf"))
.withSolrXml(CLOUD_SOLR_XML_WITH_10S_CREATE_COLL_WAIT)
- .useOtherCollectionConfigSetExecution()
+ .flipOverseerEnablement()
.configure();
}
diff --git a/solr/core/src/test/org/apache/solr/cloud/DeleteReplicaTest.java
b/solr/core/src/test/org/apache/solr/cloud/DeleteReplicaTest.java
index bfa9a05c10c..bf7be2f425d 100644
--- a/solr/core/src/test/org/apache/solr/cloud/DeleteReplicaTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/DeleteReplicaTest.java
@@ -64,7 +64,7 @@ public class DeleteReplicaTest extends SolrCloudTestCase {
// these tests need to be isolated, so we don't share the minicluster
configureCluster(4)
.addConfig("conf", configset("cloud-minimal"))
- .useOtherCollectionConfigSetExecution()
+ .flipOverseerEnablement()
// Some tests (this one) use "the other" cluster Collection API
execution strategy to
// increase coverage
.configure();
diff --git a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
index a48b2eda8d3..b9576d81e38 100644
--- a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
@@ -208,7 +208,6 @@ public class OverseerTest extends SolrTestCaseJ4 {
"");
final Overseer overseer =
MiniSolrCloudCluster.getOpenOverseer(overseers);
// This being an Overseer test, we force it to use the Overseer based
cluster state update.
- // Look for "new Overseer" calls in this class.
assertFalse(overseer.getDistributedClusterStateUpdater().isDistributedStateUpdate());
ZkDistributedQueue q = overseer.getStateUpdateQueue();
q.offer(m);
@@ -998,6 +997,9 @@ public class OverseerTest extends SolrTestCaseJ4 {
reader = new ZkStateReader(zkClient);
reader.createClusterStateWatchersAndUpdate();
+ // Set system property to ensure tests use Overseer mode
+ System.setProperty("solr.cloud.overseer.enabled", "true");
+
mockController =
new MockZKController(server.getZkAddress(), "127.0.0.1:8983_solr",
overseers);
@@ -1021,10 +1023,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
"/admin/cores",
reader,
zkController,
- new CloudConfig.CloudConfigBuilder("127.0.0.1", 8983)
- .setUseDistributedClusterStateUpdates(false)
- .setUseDistributedCollectionConfigSetExecution(false)
- .build());
+ new CloudConfig.CloudConfigBuilder("127.0.0.1", 8983).build());
overseers.add(overseer);
ElectionContext ec =
new OverseerElectionContext(zkClient, overseer,
server.getZkAddress().replace("/", "_"));
@@ -1837,8 +1836,12 @@ public class OverseerTest extends SolrTestCaseJ4 {
httpShardHandlerFactory.init(new PluginInfo("shardHandlerFactory",
Collections.emptyMap()));
httpShardHandlerFactorys.add(httpShardHandlerFactory);
+ // Set system property to ensure tests use Overseer mode
+ System.setProperty("solr.cloud.overseer.enabled", "true");
+
ZkController zkController = createMockZkController(address, null, reader);
zkControllers.add(zkController);
+
// Create an Overseer with associated configuration to NOT USE distributed
state update. Tests
// in this class really test the Overseer.
Overseer overseer =
@@ -1848,9 +1851,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
"/admin/cores",
reader,
zkController,
- new CloudConfig.CloudConfigBuilder("127.0.0.1", 8983)
- .setUseDistributedClusterStateUpdates(false)
- .build());
+ new CloudConfig.CloudConfigBuilder("127.0.0.1", 8983).build());
overseers.add(overseer);
ElectionContext ec = new OverseerElectionContext(zkClient, overseer,
address.replace("/", "_"));
overseerElector.setup(ec);
@@ -1910,6 +1911,8 @@ public class OverseerTest extends SolrTestCaseJ4 {
when(zkController.getCoreContainer()).thenReturn(mockAlwaysUpCoreContainer);
when(zkController.getZkClient()).thenReturn(zkClient);
when(zkController.getZkStateReader()).thenReturn(reader);
+ when(zkController.getDistributedClusterStateUpdater())
+ .thenReturn(new DistributedClusterStateUpdater(false));
// primitive support for CC.runAsync
doAnswer(
invocable -> {
diff --git a/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java
b/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java
index ea1230774a4..706763a15cc 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java
@@ -261,13 +261,7 @@ public class ZkControllerTest extends SolrCloudTestCase {
ZkController zkController = null;
try {
- CloudConfig cloudConfig =
- new CloudConfig.CloudConfigBuilder("127.0.0.1", 8983)
- .setUseDistributedClusterStateUpdates(
- Boolean.getBoolean("solr.distributedClusterStateUpdates"))
- .setUseDistributedCollectionConfigSetExecution(
-
Boolean.getBoolean("solr.distributedCollectionConfigSetExecution"))
- .build();
+ CloudConfig cloudConfig = new
CloudConfig.CloudConfigBuilder("127.0.0.1", 8983).build();
zkController =
new ZkController(cc, cluster.getZkServer().getZkAddress(),
TIMEOUT, cloudConfig);
zkControllerRef.set(zkController);
diff --git a/solr/server/solr/solr.xml b/solr/server/solr/solr.xml
index f3ca244896b..ac4193c8ea4 100644
--- a/solr/server/solr/solr.xml
+++ b/solr/server/solr/solr.xml
@@ -49,8 +49,6 @@
<str
name="zkCredentialsProvider">${zkCredentialsProvider:org.apache.solr.common.cloud.DefaultZkCredentialsProvider}</str>
<str
name="zkACLProvider">${zkACLProvider:org.apache.solr.common.cloud.DefaultZkACLProvider}</str>
<str
name="zkCredentialsInjector">${zkCredentialsInjector:org.apache.solr.common.cloud.DefaultZkCredentialsInjector}</str>
- <bool
name="distributedClusterStateUpdates">${distributedClusterStateUpdates:false}</bool>
- <bool
name="distributedCollectionConfigSetExecution">${distributedCollectionConfigSetExecution:false}</bool>
<int
name="minStateByteLenForCompression">${minStateByteLenForCompression:-1}</int>
<str
name="stateCompressor">${stateCompressor:org.apache.solr.common.util.ZLibCompressor}</str>
diff --git
a/solr/solr-ref-guide/modules/deployment-guide/pages/cluster-node-management.adoc
b/solr/solr-ref-guide/modules/deployment-guide/pages/cluster-node-management.adoc
index 87ae3b1405c..4fd8678cb30 100644
---
a/solr/solr-ref-guide/modules/deployment-guide/pages/cluster-node-management.adoc
+++
b/solr/solr-ref-guide/modules/deployment-guide/pages/cluster-node-management.adoc
@@ -274,7 +274,7 @@ curl -X DELETE
http://localhost:8983/api/cluster/properties/urlScheme
|===
+
The name of the property. Appears in the path of v2 requests.
-Supported properties names are `location`, `maxCoresPerNode`, `urlScheme`, and
`defaultShardPreferences`.
+Supported properties names are `location`, `maxCoresPerNode`,
`overseerEnabled`, `urlScheme`, and `defaultShardPreferences`.
+
Other properties can be set (for example, if you need them for custom plugins)
but they must begin with the prefix `ext.`.
Unknown properties that don't begin with `ext.` will be rejected.
diff --git
a/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-10.adoc
b/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-10.adoc
index 21e6566f33c..224aa943bb4 100644
---
a/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-10.adoc
+++
b/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-10.adoc
@@ -68,6 +68,31 @@ This makes it clear that they pertain specifically to
“JavaBin” rather than
* The system property `solr.httpclient.builder.factory` now only configures
SolrClients using a Jetty based HttpClient, not an Apache one.
+=== SolrCloud Overseer
+
+SolrCloud now supports disabling the "Overseer", which is an elected node
responsible for processing all cluster administration requests and collection
state updates.
+When disabled, _any_ node that either receives such a request or wishes to do
it internally will execute the command.
+It was possible to disable the Overseer since Solr 9 using undocumented
configuration in solr.xml (`distributedClusterStateUpdates` &
`distributedCollectionConfigSetExecution`) that have since been removed.
+Now this mode is toggled either with a boolean
xref:deployment-guide:cluster-node-management.adoc#clusterprop[cluster
property] `overseerEnabled`, or an env var `SOLR_CLOUD_OVERSEER_ENABLED`.
+In Solr 11, the Overseer might cease to exist, in an effort to simplify
SolrCloud and maintenance.
+
+Upgrades: This choice cannot be changed with a rolling upgrade; doing so is
highly risky.
+All nodes in the cluster must always have a consistent understanding of the
overseer's enablement.
+If using the cluster property toggle, use the
xref:deployment-guide:zookeeper-utilities.adoc#set-a-cluster-property[bin/solr
cluster] CLI utility to set it while the cluster is offline.
+If using the env var; ensure each Solr node is configured to start with the
setting set consistently.
+
+When the Overseer is disabled, it is nonetheless still elected, which can be
influenced by node roles.
+If you are using any
xref:configuration-guide:cluster-singleton-plugins.adoc[cluster singleton
plugins], they execute on the node elected to be the Overseer.
+
+In general, most users won't notice a difference.
+Commands should execute faster without the Overseer.
+Debugging some SolrCloud problems with the Overseer is more challenging than
without since the Overseer is complex (a principal reason for it's disablement).
+But the Overseer centralized some processing that results in efficiencies for
large clusters in some scenarios.
+If you have a collection that has many replicas (hundreds), and many are
co-located on the same node, then node stops and starts will internally
interact with ZooKeeper more.
+Using
xref:configuration-guide:configuring-solr-xml.adoc[minStateByteLenForCompression]
will help.
+Creating a replica (either via collection creation or other circumstances) can
take more time without the Overseer if these creation commands are delivered to
many nodes around the cluster.
+That can be avoided simply by sending admin requests to a consistent node.
+
=== Service installer
The service installer now installs a `systemd` startup script instead of an
`init.d` startup script. It is up to the user to uninstall any existing
`init.d` script when upgrading.
diff --git
a/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/ZkStateReader.java
b/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/ZkStateReader.java
index 7d0f80379b2..41fa9139a8f 100644
---
a/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/ZkStateReader.java
+++
b/solr/solrj-zookeeper/src/java/org/apache/solr/common/cloud/ZkStateReader.java
@@ -138,6 +138,7 @@ public class ZkStateReader implements SolrCloseable {
public static final String NRT_REPLICAS = "nrtReplicas";
public static final String TLOG_REPLICAS = "tlogReplicas";
public static final String READ_ONLY = "readOnly";
+ public static final String OVERSEER_ENABLED = "overseerEnabled";
public static final String CONFIGS_ZKNODE = "/configs";
public static final String CONFIGNAME_PROP = "configName";
@@ -381,7 +382,8 @@ public class ZkStateReader implements SolrCloseable {
SOLR_ENVIRONMENT,
CollectionAdminParams.DEFAULTS,
CONTAINER_PLUGINS,
- PLACEMENT_PLUGIN);
+ PLACEMENT_PLUGIN,
+ OVERSEER_ENABLED);
private final SolrZkClient zkClient;
diff --git
a/solr/solrj/src/test/org/apache/solr/common/cloud/PerReplicaStatesIntegrationTest.java
b/solr/solrj/src/test/org/apache/solr/common/cloud/PerReplicaStatesIntegrationTest.java
index e1697a83869..81b210c01a0 100644
---
a/solr/solrj/src/test/org/apache/solr/common/cloud/PerReplicaStatesIntegrationTest.java
+++
b/solr/solrj/src/test/org/apache/solr/common/cloud/PerReplicaStatesIntegrationTest.java
@@ -286,7 +286,7 @@ public class PerReplicaStatesIntegrationTest extends
SolrCloudTestCase {
String PRS_COLL = "prs_test_coll2";
MiniSolrCloudCluster cluster =
configureCluster(3)
- .withDistributedClusterStateUpdates(false, false)
+ .withOverseer(true)
.addConfig(
"conf",
getFile("solrj")
diff --git
a/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java
b/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java
index c7d8af3053f..d3f9f4e15c9 100644
---
a/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java
+++
b/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java
@@ -72,6 +72,7 @@ import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.params.CollectionAdminParams;
+import org.apache.solr.common.util.EnvUtils;
import org.apache.solr.common.util.ExecutorUtil;
import org.apache.solr.common.util.IOUtils;
import org.apache.solr.common.util.SolrNamedThreadFactory;
@@ -141,8 +142,6 @@ public class MiniSolrCloudCluster {
? PRE_GENERATED_PUBLIC_KEY_URL.toExternalForm()
: "")
+ "}</str> \n"
- + " <str
name=\"distributedClusterStateUpdates\">${solr.distributedClusterStateUpdates:false}</str>
\n"
- + " <str
name=\"distributedCollectionConfigSetExecution\">${solr.distributedCollectionConfigSetExecution:false}</str>
\n"
+ " </solrcloud>\n"
+
// NOTE: this turns off the metrics collection unless overridden by
a sysprop
@@ -1031,8 +1030,8 @@ public class MiniSolrCloudCluster {
private Map<String, Object> clusterProperties = new HashMap<>();
private boolean trackJettyMetrics;
- private boolean useDistributedCollectionConfigSetExecution;
- private boolean useDistributedClusterStateUpdate;
+ private boolean overseerEnabled =
+ EnvUtils.getPropertyAsBool("solr.cloud.overseer.enabled", true);
private boolean formatZkServer = true;
private boolean disableTraceIdGeneration = false;
@@ -1118,13 +1117,8 @@ public class MiniSolrCloudCluster {
* <p>The real need is for a few tests covering reasonable use cases to
call this method. If
* you're adding a new test, you don't have to call it (but it's ok if you
do).
*/
- public Builder useOtherCollectionConfigSetExecution() {
- // Switch from Overseer to distributed Collection execution and vice
versa
- useDistributedCollectionConfigSetExecution =
!useDistributedCollectionConfigSetExecution;
- // Reverse distributed cluster state updates as well if possible (state
can't be Overseer
- // based if Collections API is distributed)
- useDistributedClusterStateUpdate =
- !useDistributedClusterStateUpdate ||
useDistributedCollectionConfigSetExecution;
+ public Builder flipOverseerEnablement() {
+ overseerEnabled = !overseerEnabled;
return this;
}
@@ -1133,40 +1127,21 @@ public class MiniSolrCloudCluster {
* update strategy to be either Overseer based or distributed. <b>This
method can be useful when
* debugging tests</b> failing in only one of the two modes to have all
local runs exhibit the
* issue, as well obviously for tests that are not compatible with one of
the two modes.
+ * Alternatively, a system property can be used in lieu of this method.
*
- * <p>If this method is not called, the strategy being used will be random
if the configuration
- * passed to the cluster ({@code solr.xml} equivalent) contains a
placeholder similar to:
+ * <p>If this method is not called nor set via system property, the
strategy being used will
+ * default to Overseer mode (overseerEnabled=true). However, note {@link
SolrCloudTestCase}
+ * (above this) randomly chooses the mode.
*
- * <pre>{@code
- * <solrcloud>
- * ....
- * <str
name="distributedClusterStateUpdates">${solr.distributedClusterStateUpdates:false}</str>
- * <str
name="distributedCollectionConfigSetExecution">${solr.distributedCollectionConfigSetExecution:false}</str>
- * ....
- * </solrcloud>
- * }</pre>
+ * <p>For tests that need to explicitly test distributed vs Overseer
behavior, use this method
+ * to control which mode is used. The cluster property 'overseerEnabled'
will be set
+ * accordingly.
*
- * For an example of a configuration supporting this setting, see {@link
- * MiniSolrCloudCluster#DEFAULT_CLOUD_SOLR_XML}. When a test sets a
different {@code solr.xml}
- * config (using {@link #withSolrXml}), if the config does not contain the
placeholder, the
- * strategy will be defined by the values assigned to {@code
useDistributedClusterStateUpdates}
- * and {@code useDistributedCollectionConfigSetExecution} in {@link
- * org.apache.solr.core.CloudConfig.CloudConfigBuilder}.
- *
- * @param distributedCollectionConfigSetApi When {@code true}, Collection
and Config Set API
- * commands are executed in a distributed way by nodes. When {@code
false}, they are
- * executed by Overseer.
- * @param distributedClusterStateUpdates When {@code true}, cluster state
updates are handled in
- * a distributed way by nodes. When {@code false}, cluster state
updates are handled by
- * Overseer.
- * <p>If {@code distributedCollectionConfigSetApi} is {@code true}
then this parameter must
- * be {@code true}.
+ * @param overseerEnabled When {@code false}, Collection and Config Set
API commands are
+ * executed in a distributed way by nodes. When {@code true}, they are
executed by Overseer.
*/
- @SuppressWarnings("InvalidParam")
- public Builder withDistributedClusterStateUpdates(
- boolean distributedCollectionConfigSetApi, boolean
distributedClusterStateUpdates) {
- useDistributedCollectionConfigSetExecution =
distributedCollectionConfigSetApi;
- useDistributedClusterStateUpdate = distributedClusterStateUpdates;
+ public Builder withOverseer(boolean overseerEnabled) {
+ this.overseerEnabled = overseerEnabled;
return this;
}
@@ -1206,23 +1181,7 @@ public class MiniSolrCloudCluster {
* @throws Exception if an error occurs on startup
*/
public MiniSolrCloudCluster build() throws Exception {
- // Two lines below will have an impact on how the MiniSolrCloudCluster
and therefore the test
- // run if the config being
- // used in the test does have the appropriate placeholders. See for
example
- // DEFAULT_CLOUD_SOLR_XML in MiniSolrCloudCluster.
- // Hard coding values here will impact such tests.
- // To hard code behavior for tests not having these placeholders - and
for SolrCloud as well
- // for that matter! -
- // change the values assigned to useDistributedClusterStateUpdates and
- // useDistributedCollectionConfigSetExecution in
- // org.apache.solr.core.CloudConfig.CloudConfigBuilder. Do not forget
then to revert before
- // commit!
- System.setProperty(
- "solr.distributedCollectionConfigSetExecution",
- Boolean.toString(useDistributedCollectionConfigSetExecution));
- System.setProperty(
- "solr.distributedClusterStateUpdates",
- Boolean.toString(useDistributedClusterStateUpdate));
+ this.clusterProperties.put(ZkStateReader.OVERSEER_ENABLED,
Boolean.toString(overseerEnabled));
// eager init to prevent OTEL init races caused by test setup
if (!disableTraceIdGeneration &&
TracerConfigurator.TRACE_ID_GEN_ENABLED) {
diff --git
a/solr/test-framework/src/java/org/apache/solr/cloud/SolrCloudTestCase.java
b/solr/test-framework/src/java/org/apache/solr/cloud/SolrCloudTestCase.java
index bb0f60e2a9b..c4cffced3df 100644
--- a/solr/test-framework/src/java/org/apache/solr/cloud/SolrCloudTestCase.java
+++ b/solr/test-framework/src/java/org/apache/solr/cloud/SolrCloudTestCase.java
@@ -114,18 +114,14 @@ public class SolrCloudTestCase extends SolrTestCaseJ4 {
*/
protected static MiniSolrCloudCluster.Builder configureCluster(int
nodeCount) {
// By default the MiniSolrCloudCluster being built will randomly (seed
based) decide which
- // collection API strategy to use (distributed or Overseer based) and
which cluster update
- // strategy to use (distributed if collection API is distributed, but
Overseer based or
- // distributed randomly chosen if Collection API is Overseer based), and
whether to use PRS
+ // collection API strategy to use (distributed or Overseer based) and
whether to use PRS
configurePrsDefault();
- boolean useDistributedCollectionConfigSetExecution =
LuceneTestCase.random().nextInt(2) == 0;
- boolean useDistributedClusterStateUpdate =
- useDistributedCollectionConfigSetExecution ||
LuceneTestCase.random().nextInt(2) == 0;
return new MiniSolrCloudCluster.Builder(nodeCount, createTempDir())
- .withDistributedClusterStateUpdates(
- useDistributedCollectionConfigSetExecution,
useDistributedClusterStateUpdate);
+ .withOverseer(
+ EnvUtils.getPropertyAsBool(
+ "solr.cloud.overseer.enabled",
LuceneTestCase.random().nextBoolean()));
}
public static void configurePrsDefault() {