This is an automated email from the ASF dual-hosted git repository.
dsmiley pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/main by this push:
new d1b373daa9b SOLR-17877: fix overseerEnabled cluster prop (#3627)
d1b373daa9b is described below
commit d1b373daa9b1c6ca5daeff32a291524d1b92b34b
Author: David Smiley <[email protected]>
AuthorDate: Tue Sep 9 12:33:00 2025 -0400
SOLR-17877: fix overseerEnabled cluster prop (#3627)
ZkController:
* fix parsing of overseerEnabled cluster property
* fix sequence of reading cluster property
* reorder some initialization for clarity
MiniSolrCloudCluster:
* Remove flipOverseerEnablement
* Fix MiniSolrCloudCluster application of overseer enablement
TestDistributedTracing requires Overseer
MockSimpleZkController: unused
* Test overseerEnabled property (unit test)
---
.../java/org/apache/solr/cloud/ZkController.java | 40 +++++-----
.../solr/cloud/CreateCollectionCleanupTest.java | 1 -
.../org/apache/solr/cloud/DeleteReplicaTest.java | 7 +-
.../apache/solr/cloud/MockSimpleZkController.java | 36 ---------
.../org/apache/solr/cloud/ZkControllerTest.java | 90 ++++++++++++++++++++++
.../solr/opentelemetry/TestDistributedTracing.java | 1 +
.../apache/solr/cloud/MiniSolrCloudCluster.java | 17 +---
7 files changed, 113 insertions(+), 79 deletions(-)
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
index 833669ac5ab..248d46272ba 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
@@ -300,7 +300,7 @@ public class ZkController implements Closeable {
String zkServerAddress,
int zkClientConnectTimeout,
CloudConfig cloudConfig)
- throws InterruptedException, TimeoutException, IOException {
+ throws InterruptedException, TimeoutException, IOException,
KeeperException {
if (cc == null) throw new IllegalArgumentException("CoreContainer cannot
be null.");
this.cc = cc;
@@ -375,6 +375,8 @@ public class ZkController implements Closeable {
this.overseerFailureMap = Overseer.getFailureMap(zkClient);
this.asyncIdsMap = Overseer.getAsyncIdsMap(zkClient);
+ createClusterZkNodes(zkClient);
+
zkStateReader =
new ZkStateReader(
zkClient,
@@ -382,19 +384,29 @@ public class ZkController implements Closeable {
if (cc != null) cc.securityNodeChanged();
});
+ zkStateReader.createClusterStateWatchersAndUpdate(); // and reads cluster
properties
+
+ // note: Can't read cluster properties until createClusterState ^ is called
+ final String urlSchemeFromClusterProp =
+ zkStateReader.getClusterProperty(ZkStateReader.URL_SCHEME,
ZkStateReader.HTTP);
+ // this must happen after zkStateReader has initialized the cluster props
+ this.baseURL = URLUtil.getBaseUrlForNodeName(this.nodeName,
urlSchemeFromClusterProp);
+
// Now that zkStateReader is available, read OVERSEER_ENABLED.
- // When overseerEnabled is false, both distributed features should be
enabled
- Boolean overseerEnabled =
- zkStateReader.getClusterProperty(ZkStateReader.OVERSEER_ENABLED, null);
- if (overseerEnabled == null) {
- overseerEnabled =
EnvUtils.getPropertyAsBool("solr.cloud.overseer.enabled", true);
- }
+ final boolean overseerEnabled =
+ Boolean.parseBoolean(
+ String.valueOf(
+ zkStateReader.getClusterProperty(
+ ZkStateReader.OVERSEER_ENABLED,
+ EnvUtils.getPropertyAsBool("solr.cloud.overseer.enabled",
true))));
+
if (overseerEnabled) {
log.info("The Overseer is enabled. It will process all cluster commands
& state updates.");
} else {
log.info(
"The Overseer is disabled. Cluster commands & state updates will
happen on any/all nodes.");
}
+ // These "distributed" things replace the Overseer when that's disabled
this.distributedClusterStateUpdater = new
DistributedClusterStateUpdater(!overseerEnabled);
this.distributedCommandRunner =
!overseerEnabled
@@ -1068,16 +1080,6 @@ public class ZkController implements Closeable {
private void init() {
try {
- createClusterZkNodes(zkClient);
- zkStateReader.createClusterStateWatchersAndUpdate();
-
- // note: Can't read cluster properties until createClusterState ^ is
called
- final String urlSchemeFromClusterProp =
- zkStateReader.getClusterProperty(ZkStateReader.URL_SCHEME,
ZkStateReader.HTTP);
-
- // this must happen after zkStateReader has initialized the cluster props
- this.baseURL = Utils.getBaseUrlForNodeName(this.nodeName,
urlSchemeFromClusterProp);
-
checkForExistingEphemeralNode();
registerLiveNodesListener();
checkClusterVersionCompatibility();
@@ -1109,10 +1111,6 @@ public class ZkController implements Closeable {
// Do this last to signal we're up.
createEphemeralLiveNode();
- } catch (IOException e) {
- log.error("", e);
- throw new SolrException(
- SolrException.ErrorCode.SERVER_ERROR, "Can't create
ZooKeeperController", e);
} catch (InterruptedException e) {
// Restore the interrupted status
Thread.currentThread().interrupt();
diff --git
a/solr/core/src/test/org/apache/solr/cloud/CreateCollectionCleanupTest.java
b/solr/core/src/test/org/apache/solr/cloud/CreateCollectionCleanupTest.java
index df3babd9823..66956d90ae2 100644
--- a/solr/core/src/test/org/apache/solr/cloud/CreateCollectionCleanupTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/CreateCollectionCleanupTest.java
@@ -66,7 +66,6 @@ public class CreateCollectionCleanupTest extends
SolrCloudTestCase {
.addConfig(
"conf1",
TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf"))
.withSolrXml(CLOUD_SOLR_XML_WITH_10S_CREATE_COLL_WAIT)
- .flipOverseerEnablement()
.configure();
}
diff --git a/solr/core/src/test/org/apache/solr/cloud/DeleteReplicaTest.java
b/solr/core/src/test/org/apache/solr/cloud/DeleteReplicaTest.java
index bf7be2f425d..6368c665e77 100644
--- a/solr/core/src/test/org/apache/solr/cloud/DeleteReplicaTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/DeleteReplicaTest.java
@@ -62,12 +62,7 @@ public class DeleteReplicaTest extends SolrCloudTestCase {
System.setProperty("distribUpdateSoTimeout", "15000");
// these tests need to be isolated, so we don't share the minicluster
- configureCluster(4)
- .addConfig("conf", configset("cloud-minimal"))
- .flipOverseerEnablement()
- // Some tests (this one) use "the other" cluster Collection API
execution strategy to
- // increase coverage
- .configure();
+ configureCluster(4).addConfig("conf",
configset("cloud-minimal")).configure();
}
@After
diff --git
a/solr/core/src/test/org/apache/solr/cloud/MockSimpleZkController.java
b/solr/core/src/test/org/apache/solr/cloud/MockSimpleZkController.java
deleted file mode 100644
index 2c678132a86..00000000000
--- a/solr/core/src/test/org/apache/solr/cloud/MockSimpleZkController.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.cloud;
-
-import java.io.IOException;
-import java.util.concurrent.TimeoutException;
-import org.apache.solr.core.CloudConfig;
-import org.apache.solr.core.CoreContainer;
-
-public class MockSimpleZkController extends ZkController {
-
- public MockSimpleZkController(
- CoreContainer cc, String zkServerAddress, int zkClientConnectTimeout,
CloudConfig cloudConfig)
- throws InterruptedException, TimeoutException, IOException {
- super(cc, zkServerAddress, zkClientConnectTimeout, cloudConfig);
- }
-
- @Override
- public CoreContainer getCoreContainer() {
- return super.getCoreContainer();
- }
-}
diff --git a/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java
b/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java
index 24fbdc4a70b..9eab3a3cafe 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java
@@ -682,6 +682,96 @@ public class ZkControllerTest extends SolrCloudTestCase {
}
}
+ @Test
+ public void testOverseerEnabledClusterPropertyRespected() throws Exception {
+ // Do not use MiniSolrCloudCluster
+ Path zkDir = createTempDir("testOverseerEnabledClusterPropertyRespected");
+ ZkTestServer server = new ZkTestServer(zkDir);
+ try {
+ server.run();
+
+ // Create cluster ZK nodes and set the overseerEnabled cluster property
to false BEFORE
+ // ZkController starts
+ try (SolrZkClient client =
+ new SolrZkClient.Builder()
+ .withUrl(server.getZkAddress())
+ .withTimeout(TIMEOUT, TimeUnit.MILLISECONDS)
+ .build()) {
+
+ ZkController.createClusterZkNodes(client);
+ ClusterProperties cp = new ClusterProperties(client);
+ cp.setClusterProperty(ZkStateReader.OVERSEER_ENABLED, "false");
+ }
+
+ // Now create a ZkController - it should respect the cluster property
and have overseer
+ // disabled
+ CoreContainer cc = getCoreContainer();
+ try {
+ CloudConfig cloudConfig = new
CloudConfig.CloudConfigBuilder("127.0.0.1", 8983).build();
+ try (ZkController zkController =
+ new ZkController(cc, server.getZkAddress(), TIMEOUT, cloudConfig))
{
+
+ // The overseer should be disabled due to the cluster property,
meaning distributed state
+ // updates should be enabled
+ // Since overseerEnabled=false in cluster properties,
!overseerEnabled should be true, so
+ // isDistributedStateUpdate() should return true
+ assertTrue(
+ "Overseer should be disabled when overseerEnabled cluster
property is false, meaning distributed state updates should be enabled",
+
zkController.getDistributedClusterStateUpdater().isDistributedStateUpdate());
+ }
+ } finally {
+ cc.shutdown();
+ }
+ } finally {
+ server.shutdown();
+ }
+ }
+
+ @Test
+ public void testOverseerEnabledClusterPropertyTrue() throws Exception {
+ // Do not use MiniSolrCloudCluster
+ Path zkDir = createTempDir("testOverseerEnabledClusterPropertyTrue");
+ ZkTestServer server = new ZkTestServer(zkDir);
+ try {
+ server.run();
+
+ // Create cluster ZK nodes and set the overseerEnabled cluster property
to true BEFORE
+ // ZkController starts
+ try (SolrZkClient client =
+ new SolrZkClient.Builder()
+ .withUrl(server.getZkAddress())
+ .withTimeout(TIMEOUT, TimeUnit.MILLISECONDS)
+ .build()) {
+
+ ZkController.createClusterZkNodes(client);
+ ClusterProperties cp = new ClusterProperties(client);
+ cp.setClusterProperty(ZkStateReader.OVERSEER_ENABLED, "true");
+ }
+
+ // Now create a ZkController - it should respect the cluster property
and have overseer
+ // enabled
+ CoreContainer cc = getCoreContainer();
+ try {
+ CloudConfig cloudConfig = new
CloudConfig.CloudConfigBuilder("127.0.0.1", 8983).build();
+ try (ZkController zkController =
+ new ZkController(cc, server.getZkAddress(), TIMEOUT, cloudConfig))
{
+
+ // The overseer should be enabled due to the cluster property,
meaning distributed state
+ // updates should be disabled
+ // Since overseerEnabled=true in cluster properties,
!overseerEnabled should be false, so
+ // isDistributedStateUpdate() should return false
+ assertFalse(
+ "Overseer should be enabled when overseerEnabled cluster
property is true, meaning distributed state updates should be disabled",
+
zkController.getDistributedClusterStateUpdater().isDistributedStateUpdate());
+ }
+ } finally {
+ cc.shutdown();
+ }
+ } finally {
+ server.shutdown();
+ }
+ }
+
private CoreContainer getCoreContainer() {
return new MockCoreContainer();
}
diff --git
a/solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/TestDistributedTracing.java
b/solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/TestDistributedTracing.java
index 711c1039a6d..501893e99e8 100644
---
a/solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/TestDistributedTracing.java
+++
b/solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/TestDistributedTracing.java
@@ -58,6 +58,7 @@ public class TestDistributedTracing extends SolrCloudTestCase
{
.addConfig("config",
TEST_PATH().resolve("collection1").resolve("conf"))
.withSolrXml(TEST_PATH().resolve("solr.xml"))
.withTraceIdGenerationDisabled()
+ .withOverseer(true) // some assertions assume overseer
.configure();
assertNotEquals(
diff --git
a/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java
b/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java
index d3f9f4e15c9..4c75533ed35 100644
---
a/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java
+++
b/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java
@@ -1108,20 +1108,6 @@ public class MiniSolrCloudCluster {
return this;
}
- /**
- * This method makes the MiniSolrCloudCluster use the "other" Collection
API execution strategy
- * than it normally would. When some test classes call this method (and
some don't) we make sure
- * that a run of multiple tests with a single seed will exercise both code
lines (distributed
- * and Overseer based Collection API) so regressions can be spotted faster.
- *
- * <p>The real need is for a few tests covering reasonable use cases to
call this method. If
- * you're adding a new test, you don't have to call it (but it's ok if you
do).
- */
- public Builder flipOverseerEnablement() {
- overseerEnabled = !overseerEnabled;
- return this;
- }
-
/**
* Force the cluster Collection and config state API execution as well as
the cluster state
* update strategy to be either Overseer based or distributed. <b>This
method can be useful when
@@ -1181,7 +1167,7 @@ public class MiniSolrCloudCluster {
* @throws Exception if an error occurs on startup
*/
public MiniSolrCloudCluster build() throws Exception {
- this.clusterProperties.put(ZkStateReader.OVERSEER_ENABLED,
Boolean.toString(overseerEnabled));
+ System.setProperty("solr.cloud.overseer.enabled",
Boolean.toString(overseerEnabled));
// eager init to prevent OTEL init races caused by test setup
if (!disableTraceIdGeneration &&
TracerConfigurator.TRACE_ID_GEN_ENABLED) {
@@ -1204,6 +1190,7 @@ public class MiniSolrCloudCluster {
cluster.uploadConfigSet(config.path, config.name);
}
+ // TODO process BEFORE nodes start up! Some props are only read on node
start.
if (clusterProperties.size() > 0) {
ClusterProperties props = new ClusterProperties(cluster.getZkClient());
for (Map.Entry<String, Object> entry : clusterProperties.entrySet()) {