[1/2] kudu git commit: build-support: option to retry all failed tests

adar Tue, 28 Aug 2018 14:23:39 -0700

Repository: kudu
Updated Branches:
  refs/heads/master 3bb5f56cf -> d55df3c6e



build-support: option to retry all failed tests

Currently, users can opt to retry flaky tests as reported by the
user-specified test server. The test server's flaky test list may not
accurately reflect what tests are flaky in all environments. In
environments where there are flaky tests that are under-represented by
the test server, it would still be nice to be resilient to flakies. As
such, this patch adds an option to retry all failed tests.

Here's a run of a non-flaky test into which I added a FATAL log.
http://dist-test.cloudera.org/job?job_id=awong.1535433877.28172

Change-Id: I24aea0b9e7a1c2c66bc5feffcb454ff01cdca6fd
Reviewed-on: http://gerrit.cloudera.org:8080/11342
Tested-by: Kudu Jenkins
Reviewed-by: Grant Henke <granthe...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/5d69deb3
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/5d69deb3
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/5d69deb3

Branch: refs/heads/master
Commit: 5d69deb36925113796cd69f51061b8396b0174fc
Parents: 3bb5f56
Author: Andrew Wong <aw...@cloudera.com>
Authored: Mon Aug 27 19:03:08 2018 -0700
Committer: Andrew Wong <aw...@cloudera.com>
Committed: Tue Aug 28 15:39:46 2018 +0000

----------------------------------------------------------------------
 build-support/dist_test.py |  9 ++++++++-
 build-support/run-test.sh  | 35 ++++++++++++++++++++---------------
 2 files changed, 28 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/5d69deb3/build-support/dist_test.py
----------------------------------------------------------------------
diff --git a/build-support/dist_test.py b/build-support/dist_test.py
index ad2bcf1..c1176fe 100755
--- a/build-support/dist_test.py
+++ b/build-support/dist_test.py
@@ -58,6 +58,11 @@ MAX_TASKS_PER_JOB=10000
 # of retries, so we have to subtract 1.
 FLAKY_TEST_RETRIES = int(os.environ.get('KUDU_FLAKY_TEST_ATTEMPTS', 1)) - 1
 
+# Whether to retry all failed C++ tests, rather than just known flaky tests.
+# Since Java flaky tests are not reported by the test server, Java tests are
+# always retried, regardless of this value.
+RETRY_ALL_TESTS = int(os.environ.get('KUDU_RETRY_ALL_FAILED_TESTS', 0))
+
 # Flags to include when running Gradle tasks
 GRADLE_FLAGS = os.environ.get('EXTRA_GRADLE_FLAGS', "")
 
@@ -473,9 +478,11 @@ def run_tests(parser, options):
     create_archive_input(staging, execution,
                          collect_tmpdir=options.collect_tmpdir)
   run_isolate(staging)
+  retry_all = RETRY_ALL_TESTS > 0
   create_task_json(staging,
                    flaky_test_set=get_flakies(),
-                   replicate_tasks=options.num_instances)
+                   replicate_tasks=options.num_instances,
+                   retry_all_tests=retry_all)
   submit_tasks(staging, options)
 
 def add_run_subparser(subparsers):

http://git-wip-us.apache.org/repos/asf/kudu/blob/5d69deb3/build-support/run-test.sh
----------------------------------------------------------------------
diff --git a/build-support/run-test.sh b/build-support/run-test.sh
index 541d6f7..e8cc996 100755
--- a/build-support/run-test.sh
+++ b/build-support/run-test.sh
@@ -23,11 +23,12 @@
 # If KUDU_COMPRESS_TEST_OUTPUT is non-empty, then the logs will be
 # gzip-compressed while they are written.
 #
-# If KUDU_FLAKY_TEST_ATTEMPTS is non-zero, and the test being run matches
-# one of the lines in the file KUDU_FLAKY_TEST_LIST, then the test will
-# be retried on failure up to the specified number of times. This can be
-# used in the gerrit workflow to prevent annoying false -1s caused by
-# tests that are known to be flaky in master.
+# If KUDU_FLAKY_TEST_ATTEMPTS is non-zero, and either the test being run
+# matches one of the lines in the file KUDU_FLAKY_TEST_LIST or
+# KUDU_RETRY_ALL_FAILED_TESTS is non-zero, then the test will be retried on
+# failure up to the specified number of times. This can be used in the gerrit
+# workflow to prevent annoying false -1s caused by tests that are known to be
+# flaky in master.
 #
 # If KUDU_REPORT_TEST_RESULTS is non-zero, then tests are reported to the
 # central test server.
@@ -70,23 +71,27 @@ else
   TEST_NAME=${SHORT_TEST_NAME}
 fi
 
-# Determine whether the test is a known flaky by comparing against the 
user-specified
-# list.
+# Determine whether the user has chosen to retry all failed tests, or whether
+# the test is a known flaky by comparing against the user-specified list.
 TEST_EXECUTION_ATTEMPTS=1
-if [ -n "$KUDU_FLAKY_TEST_LIST" ]; then
+if [ "$KUDU_RETRY_ALL_FAILED_TESTS" -gt 0 ]; then
+  echo "Will retry on failure"
+  TEST_IS_RETRYABLE=1
+elif [ -n "KUDU_FLAKY_TEST_LIST" ]; then
   if [ -f "$KUDU_FLAKY_TEST_LIST" ]; then
-    IS_KNOWN_FLAKY=$(grep --count --line-regexp "$SHORT_TEST_NAME" 
"$KUDU_FLAKY_TEST_LIST")
+    TEST_IS_RETRYABLE=$(grep --count --line-regexp "$SHORT_TEST_NAME" 
"$KUDU_FLAKY_TEST_LIST")
   else
     echo "Flaky test list file $KUDU_FLAKY_TEST_LIST missing"
-    IS_KNOWN_FLAKY=0
-  fi
-  if [ "$IS_KNOWN_FLAKY" -gt 0 ]; then
-    TEST_EXECUTION_ATTEMPTS=${KUDU_FLAKY_TEST_ATTEMPTS:-1}
-    echo $TEST_NAME is a known-flaky test. Will attempt running it
-    echo up to $TEST_EXECUTION_ATTEMPTS times.
+    TEST_IS_RETRYABLE=0
   fi
 fi
 
+if [ "$TEST_IS_RETRYABLE" -gt 0 ]; then
+  TEST_EXECUTION_ATTEMPTS=${KUDU_FLAKY_TEST_ATTEMPTS:-1}
+  echo $TEST_NAME is a retryable test. Will attempt running it
+  echo up to $TEST_EXECUTION_ATTEMPTS times.
+fi
+
 
 # We run each test in its own subdir to avoid core file related races.
 TEST_WORKDIR=$BUILD_ROOT/test-work/$TEST_NAME

[1/2] kudu git commit: build-support: option to retry all failed tests

Reply via email to