This is an automated email from the ASF dual-hosted git repository.
xintongsong pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/flink-agents.git
The following commit(s) were added to refs/heads/main by this push:
new e2096725 [test] Add per-test retry for flaky live-LLM
e2e/cross-language tests
e2096725 is described below
commit e2096725abab6c22b37006caafe9fdee6154658b
Author: Weiqing Yang <[email protected]>
AuthorDate: Sat May 30 22:12:40 2026 -0700
[test] Add per-test retry for flaky live-LLM e2e/cross-language tests
The live-LLM e2e and cross-language tests run a small Ollama model
(qwen3:1.7b) and intermittently fail on non-deterministic tool-call
results or Ollama read timeouts, turning CI red on unrelated changes.
Retry these suites automatically, scoped to the e2e/cross-language
invocations only so unit and style runs stay deterministic:
- Python: pytest-rerunfailures with --reruns 2 --reruns-delay 5 on the
e2e pytest calls in tools/ut.sh and tools/e2e.sh.
- Java: -Dsurefire.rerunFailingTestsCount=2 on the e2e mvn calls in
tools/ut.sh and test_resource_cross_language.sh.
A test that passes on retry yields a green build but is reported as a
flake, so the signal is preserved rather than masked.
Part of #716.
---
e2e-test/test-scripts/test_resource_cross_language.sh | 2 +-
python/pyproject.toml | 1 +
tools/e2e.sh | 2 +-
tools/ut.sh | 6 ++++--
4 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/e2e-test/test-scripts/test_resource_cross_language.sh
b/e2e-test/test-scripts/test_resource_cross_language.sh
index 8fb7fc2b..ad5c7af0 100755
--- a/e2e-test/test-scripts/test_resource_cross_language.sh
+++ b/e2e-test/test-scripts/test_resource_cross_language.sh
@@ -30,7 +30,7 @@ echo "Root directory: $root_dir"
cd "$root_dir/e2e-test/flink-agents-end-to-end-tests-resource-cross-language"
echo "Running all tests in resource-cross-language module..."
-mvn -T16 --batch-mode --no-transfer-progress test
+mvn -T16 --batch-mode --no-transfer-progress test
-Dsurefire.rerunFailingTestsCount=2
ret=$?
if [ "$ret" != "0" ]; then
diff --git a/python/pyproject.toml b/python/pyproject.toml
index f5d9814c..cbd0d5b6 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -90,6 +90,7 @@ build = [
# Test dependencies
test = [
"pytest==9.0.3",
+ "pytest-rerunfailures==16.3",
]
# Lint dependencies
diff --git a/tools/e2e.sh b/tools/e2e.sh
index 886705fa..bc04762e 100755
--- a/tools/e2e.sh
+++ b/tools/e2e.sh
@@ -81,7 +81,7 @@ function run_resource_cross_language_test_in_python {
return 1
fi
- cd "$python_dir" && uv run --no-sync pytest flink_agents -s -k
"e2e_tests_resource_cross_language"
+ cd "$python_dir" && uv run --no-sync pytest flink_agents -s -k
"e2e_tests_resource_cross_language" --reruns 2 --reruns-delay 5
}
function run_resource_name_consistency_check {
diff --git a/tools/ut.sh b/tools/ut.sh
index 44b941ba..8b9de711 100755
--- a/tools/ut.sh
+++ b/tools/ut.sh
@@ -155,7 +155,7 @@ java_tests() {
local all_passed=true
for version in "${flink_versions[@]}"; do
echo "Running E2E tests for Flink ${version}..."
- mvn --batch-mode --no-transfer-progress test -pl
'e2e-test/flink-agents-end-to-end-tests-integration' -Pflink-${version}
${SPOTLESS_FLAG}
+ mvn --batch-mode --no-transfer-progress test -pl
'e2e-test/flink-agents-end-to-end-tests-integration' -Pflink-${version}
-Dsurefire.rerunFailingTestsCount=2 ${SPOTLESS_FLAG}
if [ $? -ne 0 ]; then
echo "E2E tests failed for Flink ${version}" >&2
@@ -232,6 +232,8 @@ python_tests() {
uv run --no-sync pytest flink_agents \
-s \
-k "e2e_tests_integration" \
+ --reruns 2 \
+ --reruns-delay 5 \
-o log_cli=true \
-o log_cli_level=${LOG_LEVEL:-CRITICAL}
else
@@ -259,7 +261,7 @@ python_tests() {
echo "Running tests with pytest..."
fi
if $run_e2e; then
- pytest flink_agents -k "e2e_tests_integration" -o log_cli=true
-o log_cli_level=${LOG_LEVEL:-OFF}
+ pytest flink_agents -k "e2e_tests_integration" --reruns 2
--reruns-delay 5 -o log_cli=true -o log_cli_level=${LOG_LEVEL:-OFF}
else
pytest flink_agents -k "not e2e_tests" -o log_cli=true -o
log_cli_level=${LOG_LEVEL:-OFF}
fi