This is an automated email from the ASF dual-hosted git repository.
epugh pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/main by this push:
new 13fd8ad7e4e Test rolling upgrade of Solr using Docker and BATS (#3706)
13fd8ad7e4e is described below
commit 13fd8ad7e4e0c57c371edbca1576f7d903dafcda
Author: David Smiley <[email protected]>
AuthorDate: Tue Feb 3 19:35:19 2026 -0500
Test rolling upgrade of Solr using Docker and BATS (#3706)
Demonstrates moving between two versions of Solr.
Co-authored-by: copilot-swe-agent[bot]
<[email protected]>
Co-authored-by: dsmiley <[email protected]>
Co-authored-by: Eric Pugh <[email protected]>
---
solr/packaging/build.gradle | 11 +-
solr/packaging/test/bats_helper.bash | 48 +++----
solr/packaging/test/test_extraction.bats | 36 ++++--
solr/packaging/test/test_rolling_upgrade.bats | 172 ++++++++++++++++++++++++++
solr/packaging/test/test_start_solr.bats | 2 +
5 files changed, 221 insertions(+), 48 deletions(-)
diff --git a/solr/packaging/build.gradle b/solr/packaging/build.gradle
index 0e260f6c9ee..759528e7550 100644
--- a/solr/packaging/build.gradle
+++ b/solr/packaging/build.gradle
@@ -327,11 +327,14 @@ class BatsTask extends Exec {
protected void exec() {
executable "$project.ext.nodeProjectDir/node_modules/bats/bin/bats"
+ def batsArgs = []
+ if (logger.isInfoEnabled()) {
+ batsArgs << '--verbose-run'
+ }
+ batsArgs += ['-T', '--print-output-on-failure', '--report-formatter',
'junit', '--output', "$project.buildDir/test-output"]
// Note: tests to run must be listed after all other arguments
- // Additional debugging output: -x, --verbose-run
- setArgs(['-T', '--print-output-on-failure', '--report-formatter', 'junit',
'--output', "$project.buildDir/test-output"] +
- (testFiles.empty ? testDir : testFiles))
-
+ batsArgs += testFiles.empty ? [testDir] : testFiles
+ setArgs(batsArgs)
super.exec()
}
}
diff --git a/solr/packaging/test/bats_helper.bash
b/solr/packaging/test/bats_helper.bash
index db475a10d0f..d684724a2e5 100644
--- a/solr/packaging/test/bats_helper.bash
+++ b/solr/packaging/test/bats_helper.bash
@@ -97,41 +97,21 @@ collection_exists() {
return 1
}
-# Wait for a collection to be queryable
-wait_for_collection() {
- local collection="$1"
- local timeout=${2:-180}
- local start_ts
- start_ts=$(date +%s)
- while true; do
- if curl -s -S -f
"http://localhost:${SOLR_PORT}/solr/${collection}/select?q=*:*" | grep -q
'"responseHeader"'; then
+# Utility function to retry a command until it succeeds or times out
+wait_for() {
+ local timeout="${1:-30}" # Default 30 seconds timeout
+ local interval="${2:-1}" # Default 1 second between retries
+ shift 2 # Remove timeout and interval from args
+ local command=("$@") # Remaining args are the command to execute
+
+ local end_time=$(($(date +%s) + timeout))
+
+ while [ $(date +%s) -lt $end_time ]; do
+ if "${command[@]}"; then
return 0
fi
- local now
- now=$(date +%s)
- if [ $(( now - start_ts )) -ge ${timeout} ]; then
- echo "Timed out waiting for collection '${collection}' to become
queryable" >&2
- return 1
- fi
- sleep 3
+ sleep "$interval"
done
-}
-
-# Apply the ExtractingRequestHandler via Config API and print error body on
failure
-apply_extract_handler() {
- local collection="$1"
- local
json="{\"add-requesthandler\":{\"name\":\"/update/extract\",\"class\":\"org.apache.solr.handler.extraction.ExtractingRequestHandler\",\"tikaserver.url\":\"http://localhost:${TIKA_PORT}\",\"defaults\":{\"lowernames\":\"true\",\"captureAttr\":\"true\"}}}"
- local url="http://localhost:${SOLR_PORT}/solr/${collection}/config"
- # Capture body and status code
- local resp code body
- sleep 5
- resp=$(curl -s -S -w "\n%{http_code}" -X POST -H
'Content-type:application/json' -d "$json" "$url")
- code="${resp##*$'\n'}"
- body="${resp%$'\n'*}"
- if [ "$code" = "200" ]; then
- return 0
- else
- echo "Config API error applying ExtractingRequestHandler to ${collection}
(HTTP ${code}): ${body}" >&3
- return 1
- fi
+
+ return 1 # Timeout reached
}
diff --git a/solr/packaging/test/test_extraction.bats
b/solr/packaging/test/test_extraction.bats
index 865298331a0..12c4b72840a 100644
--- a/solr/packaging/test/test_extraction.bats
+++ b/solr/packaging/test/test_extraction.bats
@@ -18,6 +18,25 @@
load bats_helper
+# Apply the ExtractingRequestHandler via Config API and print error body on
failure
+apply_extract_handler() {
+ local collection="$1"
+ local
json="{\"add-requesthandler\":{\"name\":\"/update/extract\",\"class\":\"org.apache.solr.handler.extraction.ExtractingRequestHandler\",\"tikaserver.url\":\"http://localhost:${TIKA_PORT}\",\"defaults\":{\"lowernames\":\"true\",\"captureAttr\":\"true\"}}}"
+ local url="http://localhost:${SOLR_PORT}/solr/${collection}/config"
+ # Capture body and status code
+ local resp code body
+ sleep 5
+ resp=$(curl -s -S -w "\n%{http_code}" -X POST -H
'Content-type:application/json' -d "$json" "$url")
+ code="${resp##*$'\n'}"
+ body="${resp%$'\n'*}"
+ if [ "$code" = "200" ]; then
+ return 0
+ else
+ echo "Config API error applying ExtractingRequestHandler to ${collection}
(HTTP ${code}): ${body}" >&3
+ return 1
+ fi
+}
+
setup_file() {
if command -v docker >/dev/null 2>&1 && docker info >/dev/null 2>&1; then
export TIKA_PORT=$((SOLR_PORT+5))
@@ -25,7 +44,7 @@ setup_file() {
echo "Tika Server started on port ${TIKA_PORT}" >&3
else
export DOCKER_UNAVAILABLE=1
- echo "WARNING: Docker not available (CLI missing or daemon not running);
Tika-dependent tests will be bypassed and marked as passed." >&3
+ echo "WARNING: Docker not available (CLI missing or daemon not running);
Tika-dependent tests will be bypassed." >&3
fi
}
@@ -51,8 +70,7 @@ teardown() {
@test "using curl to extract a single pdf file" {
if [ -n "${DOCKER_UNAVAILABLE:-}" ]; then
- echo "WARNING: Docker not available; bypassing test." >&3
- return 0
+ skip "Docker is not available"
fi
# Disable security manager to allow extraction
@@ -61,7 +79,7 @@ teardown() {
solr start -Dsolr.modules=extraction
solr create -c gettingstarted -d _default
- wait_for_collection gettingstarted 30
+ wait_for 30 3 curl -s -S -f
"http://localhost:${SOLR_PORT}/solr/gettingstarted/select?q=*:*" -o /dev/null
apply_extract_handler gettingstarted
curl
"http://localhost:${SOLR_PORT}/solr/gettingstarted/update/extract?literal.id=doc1&commit=true"
-F "myfile=@${SOLR_TIP}/example/exampledocs/solr-word.pdf"
@@ -73,8 +91,7 @@ teardown() {
@test "using the bin/solr post tool to extract content from pdf" {
if [ -n "${DOCKER_UNAVAILABLE:-}" ]; then
- echo "WARNING: Docker not available; bypassing test." >&3
- return 0
+ skip "Docker is not available"
fi
# Disable security manager to allow extraction
@@ -83,7 +100,7 @@ teardown() {
solr start -Dsolr.modules=extraction
solr create -c content_extraction -d _default
- wait_for_collection content_extraction 30
+ wait_for 30 3 curl -s -S -f
"http://localhost:${SOLR_PORT}/solr/content_extraction/select?q=*:*" -o
/dev/null
apply_extract_handler content_extraction
# We filter to pdf to invoke the Extract handler.
@@ -99,8 +116,7 @@ teardown() {
@test "using the bin/solr post tool to crawl web site" {
if [ -n "${DOCKER_UNAVAILABLE:-}" ]; then
- echo "WARNING: Docker not available; bypassing test." >&3
- return 0
+ skip "Docker is not available"
fi
# Disable security manager to allow extraction
@@ -109,7 +125,7 @@ teardown() {
solr start -Dsolr.modules=extraction
solr create -c website_extraction -d _default
- wait_for_collection website_extraction 30
+ wait_for 30 3 curl -s -S -f
"http://localhost:${SOLR_PORT}/solr/website_extraction/select?q=*:*" -o
/dev/null
apply_extract_handler website_extraction
# Change to --recursive 1 to crawl multiple pages, but may be too slow.
diff --git a/solr/packaging/test/test_rolling_upgrade.bats
b/solr/packaging/test/test_rolling_upgrade.bats
new file mode 100644
index 00000000000..3902189eeb3
--- /dev/null
+++ b/solr/packaging/test/test_rolling_upgrade.bats
@@ -0,0 +1,172 @@
+#!/usr/bin/env bats
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+load bats_helper
+
+# You can test alternative images via
+# export SOLR_BEGIN_IMAGE="apache/solr-nightly:9.9.0-slim" and then running
+# ./gradlw iTest --tests test_docker_solrcloud.bats
+SOLR_BEGIN_IMAGE="${SOLR_BEGIN_IMAGE:-apache/solr-nightly:9.10.0-SNAPSHOT-slim}"
+SOLR_END_IMAGE="${SOLR_END_IMAGE:-apache/solr-nightly:10.0.0-SNAPSHOT-slim}"
+
+setup() {
+ common_clean_setup
+
+ # Pre-checks
+ if ! command -v docker >/dev/null 2>&1 || ! docker info >/dev/null 2>&1; then
+ skip "Docker is not available"
+ fi
+ docker pull "$SOLR_BEGIN_IMAGE" || skip "Docker image $SOLR_BEGIN_IMAGE is
not available"
+ docker pull "$SOLR_END_IMAGE" || skip "Docker image $SOLR_END_IMAGE is not
available"
+
+ # Record test start time for scoping logs on failure
+ TEST_STARTED_AT_ISO=$(date -Iseconds)
+ export TEST_STARTED_AT_ISO
+
+ # Persist artifacts under Gradle’s test-output
+ ARTIFACT_DIR="${TEST_OUTPUT_DIR}/docker"
+ mkdir -p "$ARTIFACT_DIR"
+ export ARTIFACT_DIR
+}
+
+teardown() {
+ failed=$([[ -z "${BATS_TEST_COMPLETED:-}" ]] && [[ -z
"${BATS_TEST_SKIPPED:-}" ]] && echo 1 || echo 0)
+ if [[ "$failed" -eq 1 ]]; then
+ echo "# Test failed - capturing Docker diagnostics" >&3
+ echo "# === docker ps (summary) ===" >&3
+ docker ps -a --format 'table
{{.Names}}\t{{.Status}}\t{{.Image}}\t{{.Ports}}' >&3 2>&3 || true
+ fi
+
+ for container in solr-node1 solr-node2 solr-node3; do
+ if docker ps -a --format '{{.Names}}' | grep -q "^${container}$"
2>/dev/null; then
+ if [[ "$failed" -eq 1 ]]; then
+ echo "# === Docker logs for $container ===" >&3
+ docker logs --timestamps --since "$TEST_STARTED_AT_ISO" "$container"
>&3 2>&3 || echo "# Failed to get logs for $container" >&3
+ echo "# === Docker inspect for $container ===" >&3
+ docker inspect "$container" | jq '.[] | {Name: .Name, State: .State,
Ports: .NetworkSettings.Ports}' >&3 2>&3 || true
+ fi
+ # Persist artifacts
+ docker logs --timestamps "$container" >"$ARTIFACT_DIR/${container}.log"
2>&1 || true
+ docker inspect "$container" >"$ARTIFACT_DIR/${container}.inspect.json"
2>&1 || true
+ docker exec "$container" ps aux >"$ARTIFACT_DIR/${container}.ps.txt"
2>&1 || true
+ fi
+ done
+
+ echo "# Docker artifacts saved to: $ARTIFACT_DIR" >&3
+
+ docker stop solr-node1 solr-node2 solr-node3 2>/dev/null || true
+ docker rm solr-node1 solr-node2 solr-node3 2>/dev/null || true
+ docker volume rm solr-data1 solr-data2 solr-data3 2>/dev/null || true
+ docker network rm solrcloud-test 2>/dev/null || true
+}
+
+@test "Docker SolrCloud rolling upgrade" {
+ # Networking & volumes
+ docker network create solrcloud-test
+ docker volume create solr-data1
+ docker volume create solr-data2
+ docker volume create solr-data3
+
+ echo "Starting solr-node1 with embedded ZooKeeper"
+ docker run --name solr-node1 -d \
+ --network solrcloud-test \
+ --memory=400m \
+ --platform linux/amd64 \
+ -v solr-data1:/var/solr \
+ "$SOLR_BEGIN_IMAGE" solr start -f -c -m 200m --host solr-node1 -p 8983
+ docker exec solr-node1 solr assert --started http://solr-node1:8983
--timeout 10000
+
+ # start next 2 in parallel
+
+ echo "Starting solr-node2 connected to first node's ZooKeeper"
+ docker run --name solr-node2 -d \
+ --network solrcloud-test \
+ --memory=400m \
+ --platform linux/amd64 \
+ -v solr-data2:/var/solr \
+ "$SOLR_BEGIN_IMAGE" solr start -f -c -m 200m --host solr-node2 -p 8984 -z
solr-node1:9983
+
+ echo "Started solr-node3 connected to first node's ZooKeeper"
+ docker run --name solr-node3 -d \
+ --network solrcloud-test \
+ --memory=400m \
+ --platform linux/amd64 \
+ -v solr-data3:/var/solr \
+ "$SOLR_BEGIN_IMAGE" solr start -f -c -m 200m --host solr-node3 -p 8985 -z
solr-node1:9983
+
+ docker exec solr-node2 solr assert --started http://solr-node2:8984
--timeout 30000
+ docker exec solr-node3 solr assert --started http://solr-node3:8985
--timeout 30000
+
+ echo "Creating a Collection"
+ docker exec --user=solr solr-node1 solr create -c test-collection -n
techproducts --shards 3
+
+ echo "Checking collection health"
+ wait_for 30 1 docker exec solr-node1 solr healthcheck -c test-collection
+
+ echo "Add some sample data"
+ docker exec --user=solr solr-node1 solr post -c test-collection
example/exampledocs/mem.xml
+ assert_success
+
+ # Begin rolling upgrade - upgrade node 3 first (reverse order: 3, 2, 1)
+ echo "Starting rolling upgrade - upgrading node 3"
+ docker stop solr-node3
+ docker rm solr-node3
+ docker run --name solr-node3 -d \
+ --network solrcloud-test \
+ --memory=400m \
+ --platform linux/amd64 \
+ -v solr-data3:/var/solr \
+ "$SOLR_END_IMAGE" solr start -f -m 200m --host solr-node3 -p 8985 -z
solr-node1:9983
+ docker exec solr-node3 solr assert --started http://solr-node3:8985
--timeout 30000
+ assert_success
+
+ # Upgrade node 2 second
+ echo "Upgrading node 2"
+ docker stop solr-node2
+ docker rm solr-node2
+ docker run --name solr-node2 -d \
+ --network solrcloud-test \
+ --memory=400m \
+ --platform linux/amd64 \
+ -v solr-data2:/var/solr \
+ "$SOLR_END_IMAGE" solr start -f -m 200m --host solr-node2 -p 8984 -z
solr-node1:9983
+ docker exec solr-node2 solr assert --started http://solr-node2:8984
--timeout 30000
+ assert_success
+
+ echo "Upgrading node 1 (ZK node)"
+ docker stop solr-node1
+ docker rm solr-node1
+ docker run --name solr-node1 -d \
+ --network solrcloud-test \
+ --memory=400m \
+ --platform linux/amd64 \
+ -v solr-data1:/var/solr \
+ "$SOLR_END_IMAGE" solr start -f -m 200m --host solr-node1 -p 8983
+ docker exec solr-node1 solr assert --started http://solr-node1:8983
--timeout 30000
+ assert_success
+
+ # Final collection health check
+ wait_for 30 1 docker exec solr-node1 solr healthcheck -c test-collection
+
+ echo "checking cluster has exactly 3 live nodes"
+ run docker exec solr-node1 curl -s
"http://solr-node1:8983/solr/admin/collections?action=CLUSTERSTATUS"
+ assert_success
+
+ local live_nodes_count=$(echo "$output" | jq -r '.cluster.live_nodes |
length')
+ assert_equal "$live_nodes_count" "3"
+
+}
diff --git a/solr/packaging/test/test_start_solr.bats
b/solr/packaging/test/test_start_solr.bats
index 27f1fe9df03..6d978bd146d 100644
--- a/solr/packaging/test/test_start_solr.bats
+++ b/solr/packaging/test/test_start_solr.bats
@@ -60,6 +60,8 @@ teardown() {
# for start/stop/restart we parse the args separate from picking the command
# which means you don't get an error message for passing a start arg, like
--jvm-opts to a stop commmand.
+ # Pre-check
+ timeout || skip "timeout utility is not available"
# Set a timeout duration (in seconds)
TIMEOUT_DURATION=2