If we want to catch new failures faster we have a better chance if CI
doesn't auto-retry (once).

There are some tests that are still "unstable" and fail every now and
then.  In order to reduce the number of false negatives keep the
--recheck for them.  To achieve that we use a new macro, TAG_UNSTABLE,
to tag all these tests.  The list of "unstable" tests is compiled based
on the following discussion:
https://mail.openvswitch.org/pipermail/ovs-dev/2023-June/405465.html

In order to avoid new GitHub actions jobs, we re-purpose the last job of
each target type to also run the unstable tests.  These jobs were
already running less tests than others so the additional run time should
not be an issue.

Signed-off-by: Dumitru Ceara <dce...@redhat.com>
---
V3:
- Addressed Ales' comment:
  - pass RECHECK to the container running the tests in ci.sh
V2:
- Addressed Ales' comments:
  - always run stable and unstable tests before declaring pass/fail
Changes in v1 (since RFC):
- kept recheck for unstable tests
- introduced TAG_UNSTABLE
- changed test.yml to run unstable tests in the last batch of every
  test target type.
---
 .ci/ci.sh                  |  2 +-
 .ci/linux-build.sh         | 76 ++++++++++++++++++++++++++++++--------
 .github/workflows/test.yml | 15 ++++----
 tests/ovn-ic.at            |  1 +
 tests/ovn-ipsec.at         |  1 +
 tests/ovn-macros.at        |  5 +++
 tests/ovn-northd.at        |  1 +
 tests/ovn-performance.at   |  1 +
 tests/ovn.at               | 13 +++++++
 9 files changed, 92 insertions(+), 23 deletions(-)

diff --git a/.ci/ci.sh b/.ci/ci.sh
index 10f11939c5..3a7ee97696 100755
--- a/.ci/ci.sh
+++ b/.ci/ci.sh
@@ -101,7 +101,7 @@ function run_tests() {
         && \
         ARCH=$ARCH CC=$CC LIBS=$LIBS OPTS=$OPTS TESTSUITE=$TESTSUITE \
         TEST_RANGE=$TEST_RANGE SANITIZERS=$SANITIZERS DPDK=$DPDK \
-        ./.ci/linux-build.sh
+        RECHECK=$RECHECK UNSTABLE=$UNSTABLE ./.ci/linux-build.sh
     "
 }
 
diff --git a/.ci/linux-build.sh b/.ci/linux-build.sh
index 5a79a52daf..4c5361f3ca 100755
--- a/.ci/linux-build.sh
+++ b/.ci/linux-build.sh
@@ -9,6 +9,7 @@ COMMON_CFLAGS=""
 OVN_CFLAGS=""
 OPTS="$OPTS --enable-Werror"
 JOBS=${JOBS:-"-j4"}
+RECHECK=${RECHECK:-"no"}
 
 function install_dpdk()
 {
@@ -99,6 +100,17 @@ function configure_clang()
     COMMON_CFLAGS="${COMMON_CFLAGS} -Wno-error=unused-command-line-argument"
 }
 
+function run_tests()
+{
+    if ! make distcheck CFLAGS="${COMMON_CFLAGS} ${OVN_CFLAGS}" $JOBS \
+        TESTSUITEFLAGS="$JOBS $TEST_RANGE" RECHECK=$RECHECK
+    then
+        # testsuite.log is necessary for debugging.
+        cat */_build/sub/tests/testsuite.log
+        return 1
+    fi
+}
+
 function execute_tests()
 {
     # 'distcheck' will reconfigure with required options.
@@ -106,27 +118,61 @@ function execute_tests()
     configure_ovn
 
     export DISTCHECK_CONFIGURE_FLAGS="$OPTS"
-    if ! make distcheck CFLAGS="${COMMON_CFLAGS} ${OVN_CFLAGS}" $JOBS \
-        TESTSUITEFLAGS="$JOBS $TEST_RANGE" RECHECK=yes
-    then
-        # testsuite.log is necessary for debugging.
-        cat */_build/sub/tests/testsuite.log
+
+    local stable_rc=0
+    local unstable_rc=0
+
+    if ! SKIP_UNSTABLE=yes run_tests; then
+        stable_rc=1
+    fi
+
+    if [ "$UNSTABLE" ]; then
+        if ! SKIP_UNSTABLE=no TEST_RANGE="-k unstable" RECHECK=yes \
+                run_tests; then
+            unstable_rc=1
+        fi
+    fi
+
+    if [[ $stable_rc -ne 0 ]] || [[ $unstable_rc -ne 0 ]]; then
         exit 1
     fi
 }
 
+function run_system_tests()
+{
+    local type=$1
+    local log_file=$2
+
+    if ! sudo make $JOBS $type TESTSUITEFLAGS="$TEST_RANGE" \
+            RECHECK=$RECHECK; then
+        # $log_file is necessary for debugging.
+        cat tests/$log_file
+        return 1
+    fi
+}
+
 function execute_system_tests()
 {
-      type=$1
-      log_file=$2
-
-      configure_ovn $OPTS
-      make $JOBS || { cat config.log; exit 1; }
-      if ! sudo make $JOBS $type TESTSUITEFLAGS="$TEST_RANGE" RECHECK=yes; then
-          # $log_file is necessary for debugging.
-          cat tests/$log_file
-          exit 1
-      fi
+    configure_ovn $OPTS
+    make $JOBS || { cat config.log; exit 1; }
+
+    local stable_rc=0
+    local unstable_rc=0
+
+    if ! SKIP_UNSTABLE=yes run_system_tests $@; then
+        stable_rc=1
+    fi
+
+    if [ "$UNSTABLE" ]; then
+        if ! SKIP_UNSTABLE=no TEST_RANGE="-k unstable" RECHECK=yes \
+                run_system_tests $@; then
+            unstable_rc=1
+        fi
+    fi
+
+    if [[ $stable_rc -ne 0 ]] || [[ $unstable_rc -ne 0 ]]; then
+        exit 1
+    fi
 }
 
 configure_$CC
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index fe2a14c401..7d40251003 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -92,6 +92,7 @@ jobs:
       TESTSUITE:   ${{ matrix.cfg.testsuite }}
       TEST_RANGE:  ${{ matrix.cfg.test_range }}
       SANITIZERS:  ${{ matrix.cfg.sanitizers }}
+      UNSTABLE:    ${{ matrix.cfg.unstable }}
 
     name: linux ${{ join(matrix.cfg.*, ' ') }}
     runs-on: ubuntu-latest
@@ -104,27 +105,27 @@ jobs:
         - { compiler: clang, opts: --disable-ssl }
         - { compiler: gcc, testsuite: test, test_range: "-500" }
         - { compiler: gcc, testsuite: test, test_range: "501-1000" }
-        - { compiler: gcc, testsuite: test, test_range: "1001-" }
+        - { compiler: gcc, testsuite: test, test_range: "1001-", unstable: 
unstable }
         - { compiler: clang, testsuite: test, sanitizers: sanitizers, 
test_range: "-300" }
         - { compiler: clang, testsuite: test, sanitizers: sanitizers, 
test_range: "301-600" }
         - { compiler: clang, testsuite: test, sanitizers: sanitizers, 
test_range: "601-900" }
         - { compiler: clang, testsuite: test, sanitizers: sanitizers, 
test_range: "901-1200" }
-        - { compiler: clang, testsuite: test, sanitizers: sanitizers, 
test_range: "1201-" }
+        - { compiler: clang, testsuite: test, sanitizers: sanitizers, 
test_range: "1201-", unstable: unstable }
         - { compiler: gcc, testsuite: test, libs: -ljemalloc, test_range: 
"-500" }
         - { compiler: gcc, testsuite: test, libs: -ljemalloc, test_range: 
"501-1000" }
-        - { compiler: gcc, testsuite: test, libs: -ljemalloc, test_range: 
"1001-" }
+        - { compiler: gcc, testsuite: test, libs: -ljemalloc, test_range: 
"1001-", unstable: unstable }
         - { compiler: gcc, testsuite: system-test-dpdk, dpdk: dpdk, 
test_range: "-100" }
         - { compiler: gcc, testsuite: system-test-dpdk, dpdk: dpdk, 
test_range: "101-200" }
-        - { compiler: gcc, testsuite: system-test-dpdk, dpdk: dpdk, 
test_range: "201-" }
+        - { compiler: gcc, testsuite: system-test-dpdk, dpdk: dpdk, 
test_range: "201-", unstable: unstable }
         - { compiler: gcc, testsuite: system-test-userspace, test_range: 
"-100" }
         - { compiler: gcc, testsuite: system-test-userspace, test_range: 
"101-200" }
-        - { compiler: gcc, testsuite: system-test-userspace, test_range: 
"201-" }
+        - { compiler: gcc, testsuite: system-test-userspace, test_range: 
"201-", unstable: unstable }
         - { compiler: gcc, testsuite: system-test, test_range: "-100" }
         - { compiler: gcc, testsuite: system-test, test_range: "101-200" }
-        - { compiler: gcc, testsuite: system-test, test_range: "201-" }
+        - { compiler: gcc, testsuite: system-test, test_range: "201-", 
unstable: unstable }
         - { compiler: clang, testsuite: system-test, sanitizers: sanitizers, 
test_range: "-100" }
         - { compiler: clang, testsuite: system-test, sanitizers: sanitizers, 
test_range: "101-200" }
-        - { compiler: clang, testsuite: system-test, sanitizers: sanitizers, 
test_range: "201-" }
+        - { compiler: clang, testsuite: system-test, sanitizers: sanitizers, 
test_range: "201-", unstable: unstable }
         - { arch: x86, compiler: gcc, opts: --disable-ssl }
 
     steps:
diff --git a/tests/ovn-ic.at b/tests/ovn-ic.at
index ceee450925..285662e3b8 100644
--- a/tests/ovn-ic.at
+++ b/tests/ovn-ic.at
@@ -256,6 +256,7 @@ AT_CLEANUP
 
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([ovn-ic -- gateway sync])
+TAG_UNSTABLE
 
 ovn_init_ic_db
 net_add n1
diff --git a/tests/ovn-ipsec.at b/tests/ovn-ipsec.at
index 10ef978780..e621547c59 100644
--- a/tests/ovn-ipsec.at
+++ b/tests/ovn-ipsec.at
@@ -1,6 +1,7 @@
 AT_BANNER([OVN - IPsec])
 
 AT_SETUP([ipsec -- basic configuration])
+TAG_UNSTABLE
 ovn_start
 
 # Configure the Northbound database
diff --git a/tests/ovn-macros.at b/tests/ovn-macros.at
index 6f2d085ae4..6420721740 100644
--- a/tests/ovn-macros.at
+++ b/tests/ovn-macros.at
@@ -871,3 +871,8 @@ m4_define([RUN_OVN_NBCTL], [
     check ovn-nbctl ${command}
     unset command
 ])
+
+m4_define([TAG_UNSTABLE], [
+    AT_KEYWORDS([unstable])
+    AT_SKIP_IF([test X"$SKIP_UNSTABLE" = Xyes])
+])
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index 3e06f14c94..c1c8287e03 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -4531,6 +4531,7 @@ AT_CLEANUP
 
 OVN_FOR_EACH_NORTHD_NO_HV([
 AT_SETUP([northd ssl file change])
+TAG_UNSTABLE
 AT_SKIP_IF([test "$HAVE_OPENSSL" = no])
 PKIDIR="$(cd $abs_top_builddir/tests && pwd)"
 AT_SKIP_IF([expr "$PKIDIR" : ".*[[     '\"
diff --git a/tests/ovn-performance.at b/tests/ovn-performance.at
index ba329f0f64..9de0a4e770 100644
--- a/tests/ovn-performance.at
+++ b/tests/ovn-performance.at
@@ -225,6 +225,7 @@ m4_define([OVN_CONTROLLER_EXPECT_HIT_COND],[
 ])
 
 AT_SETUP([ovn-controller incremental processing])
+TAG_UNSTABLE
 # Check which operations the trigger full logical flow processing.
 #
 # Create and destroy logical routers, switches, ports, address sets and ACLs
diff --git a/tests/ovn.at b/tests/ovn.at
index cd6d4b9ff4..cd8f481bbc 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -7853,6 +7853,7 @@ AT_CLEANUP
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([policy-based routing: 1 HVs, 2 LSs, 1 lport/LS, 1 LR])
 AT_KEYWORDS([policy-based-routing])
+TAG_UNSTABLE
 ovn_start
 
 # Logical network:
@@ -8025,6 +8026,7 @@ AT_CLEANUP
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([policy-based routing IPv6: 1 HVs, 3 LSs, 1 lport/LS, 1 LR])
 AT_KEYWORDS([policy-based-routing])
+TAG_UNSTABLE
 ovn_start
 
 # Logical network:
@@ -14405,6 +14407,7 @@ AT_CLEANUP
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([options:multiple requested-chassis for logical port])
 AT_KEYWORDS([multi-chassis])
+TAG_UNSTABLE
 ovn_start
 
 net_add n1
@@ -14507,6 +14510,7 @@ AT_CLEANUP
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([options:multiple requested-chassis for logical port: change chassis 
role])
 AT_KEYWORDS([multi-chassis])
+TAG_UNSTABLE
 ovn_start
 
 net_add n1
@@ -14557,6 +14561,7 @@ AT_CLEANUP
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([options:multiple requested-chassis for logical port: unclaimed 
behavior])
 AT_KEYWORDS([multi-chassis])
+TAG_UNSTABLE
 ovn_start
 
 net_add n1
@@ -16130,6 +16135,7 @@ AT_CLEANUP
 
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([tug-of-war between two chassis for the same port])
+TAG_UNSTABLE
 ovn_start
 
 ovn-nbctl ls-add ls0
@@ -29503,6 +29509,7 @@ AT_CLEANUP
 
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([nb_cfg timestamp])
+TAG_UNSTABLE
 ovn_start
 
 check ovn-nbctl ls-add s2
@@ -29604,6 +29611,7 @@ AT_CLEANUP
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([ARP replies for SNAT external ips])
 AT_KEYWORDS([slowtest])
+TAG_UNSTABLE
 ovn_start
 
 net_add n1
@@ -29970,6 +29978,7 @@ AT_CLEANUP
 # It is to cover a corner case when flows are re-processed in the I-P
 # iteration, combined with the scenario of conflicting ACLs.
 AT_SETUP([conflict ACLs with address set])
+TAG_UNSTABLE
 ovn_start
 
 ovn-nbctl ls-add ls1
@@ -30165,6 +30174,7 @@ AT_CLEANUP
 #
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([multi-vtep SB Chassis encap updates])
+TAG_UNSTABLE
 ovn_start
 
 net_add n1
@@ -32354,6 +32364,7 @@ AT_CLEANUP
 # - 2 for expanding the port group @pg1 to the 2 locally bound lports.
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([ACL with Port Group conjunction flow efficiency])
+TAG_UNSTABLE
 ovn_start
 
 net_add n1
@@ -34606,6 +34617,7 @@ AT_CLEANUP
 
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([recomputes])
+TAG_UNSTABLE
 ovn_start
 
 net_add n1
@@ -35040,6 +35052,7 @@ MULTIPLE_OVS_INT([])
 
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([Check default openflow flows])
+TAG_UNSTABLE
 ovn_start
 
 # Check that every table has a default (i.e: priority=0) flow.
-- 
2.31.1

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to