If we want to catch new failures faster we have a better chance if CI
doesn't auto-retry (once).

There are some tests that are still "unstable" and fail every now and
then.  In order to reduce the number of false negatives keep the
--recheck for them.  To achieve that we use a new macro, TAG_UNSTABLE,
to tag all these tests.  The list of "unstable" tests is compiled based
on the following discussion:
https://mail.openvswitch.org/pipermail/ovs-dev/2023-June/405465.html

In order to avoid new GitHub actions jobs, we re-purpose the last job of
each target type to also run the unstable tests.  These jobs were
already running less tests than others so the additional run time should
not be an issue.

Signed-off-by: Dumitru Ceara <dce...@redhat.com>
---
V4:
- Correctly pass 'SKIP_UNSTABLE' to system tests.
- tag more tests (1 unit test, 3 system tests) as unstable.
V3:
- Addressed Ales' comment:
  - pass RECHECK to the container running the tests in ci.sh
V2:
- Addressed Ales' comments:
  - always run stable and unstable tests before declaring pass/fail
Changes in v1 (since RFC):
- kept recheck for unstable tests
- introduced TAG_UNSTABLE
- changed test.yml to run unstable tests in the last batch of every
  test target type.
---
 .ci/ci.sh                  |  2 +-
 .ci/linux-build.sh         | 77 ++++++++++++++++++++++++++++++--------
 .github/workflows/test.yml | 15 ++++----
 tests/ovn-ic.at            |  1 +
 tests/ovn-ipsec.at         |  1 +
 tests/ovn-macros.at        |  5 +++
 tests/ovn-northd.at        |  1 +
 tests/ovn-performance.at   |  1 +
 tests/ovn.at               | 14 +++++++
 tests/system-ovn.at        |  3 ++
 10 files changed, 97 insertions(+), 23 deletions(-)

diff --git a/.ci/ci.sh b/.ci/ci.sh
index 3f1b41eadc..6beeace840 100755
--- a/.ci/ci.sh
+++ b/.ci/ci.sh
@@ -101,7 +101,7 @@ function run_tests() {
         && \
         ARCH=$ARCH CC=$CC LIBS=$LIBS OPTS=$OPTS TESTSUITE=$TESTSUITE \
         TEST_RANGE=$TEST_RANGE SANITIZERS=$SANITIZERS DPDK=$DPDK \
-        ./.ci/linux-build.sh
+        RECHECK=$RECHECK UNSTABLE=$UNSTABLE ./.ci/linux-build.sh
     "
 }
 
diff --git a/.ci/linux-build.sh b/.ci/linux-build.sh
index 7270b9e60a..b0c3c9252e 100755
--- a/.ci/linux-build.sh
+++ b/.ci/linux-build.sh
@@ -9,6 +9,7 @@ COMMON_CFLAGS=""
 OVN_CFLAGS=""
 OPTS="$OPTS --enable-Werror"
 JOBS=${JOBS:-"-j4"}
+RECHECK=${RECHECK:-"no"}
 
 function install_dpdk()
 {
@@ -99,6 +100,18 @@ function configure_clang()
     COMMON_CFLAGS="${COMMON_CFLAGS} -Wno-error=unused-command-line-argument"
 }
 
+function run_tests()
+{
+    if ! make distcheck CFLAGS="${COMMON_CFLAGS} ${OVN_CFLAGS}" $JOBS \
+        TESTSUITEFLAGS="$JOBS $TEST_RANGE" RECHECK=$RECHECK \
+        SKIP_UNSTABLE=$SKIP_UNSTABLE
+    then
+        # testsuite.log is necessary for debugging.
+        cat */_build/sub/tests/testsuite.log
+        return 1
+    fi
+}
+
 function execute_tests()
 {
     # 'distcheck' will reconfigure with required options.
@@ -106,27 +119,61 @@ function execute_tests()
     configure_ovn
 
     export DISTCHECK_CONFIGURE_FLAGS="$OPTS"
-    if ! make distcheck CFLAGS="${COMMON_CFLAGS} ${OVN_CFLAGS}" $JOBS \
-        TESTSUITEFLAGS="$JOBS $TEST_RANGE" RECHECK=yes
-    then
-        # testsuite.log is necessary for debugging.
-        cat */_build/sub/tests/testsuite.log
+
+    local stable_rc=0
+    local unstable_rc=0
+
+    if ! SKIP_UNSTABLE=yes run_tests; then
+        stable_rc=1
+    fi
+
+    if [ "$UNSTABLE" ]; then
+        if ! SKIP_UNSTABLE=no TEST_RANGE="-k unstable" RECHECK=yes \
+                run_tests; then
+            unstable_rc=1
+        fi
+    fi
+
+    if [[ $stable_rc -ne 0 ]] || [[ $unstable_rc -ne 0 ]]; then
         exit 1
     fi
 }
 
+function run_system_tests()
+{
+    local type=$1
+    local log_file=$2
+
+    if ! sudo make $JOBS $type TESTSUITEFLAGS="$TEST_RANGE" \
+            RECHECK=$RECHECK SKIP_UNSTABLE=$SKIP_UNSTABLE; then
+        # $log_file is necessary for debugging.
+        cat tests/$log_file
+        return 1
+    fi
+}
+
 function execute_system_tests()
 {
-      type=$1
-      log_file=$2
-
-      configure_ovn $OPTS
-      make $JOBS || { cat config.log; exit 1; }
-      if ! sudo make $JOBS $type TESTSUITEFLAGS="$TEST_RANGE" RECHECK=yes; then
-          # $log_file is necessary for debugging.
-          cat tests/$log_file
-          exit 1
-      fi
+    configure_ovn $OPTS
+    make $JOBS || { cat config.log; exit 1; }
+
+    local stable_rc=0
+    local unstable_rc=0
+
+    if ! SKIP_UNSTABLE=yes run_system_tests $@; then
+        stable_rc=1
+    fi
+
+    if [ "$UNSTABLE" ]; then
+        if ! SKIP_UNSTABLE=no TEST_RANGE="-k unstable" RECHECK=yes \
+                run_system_tests $@; then
+            unstable_rc=1
+        fi
+    fi
+
+    if [[ $stable_rc -ne 0 ]] || [[ $unstable_rc -ne 0 ]]; then
+        exit 1
+    fi
 }
 
 configure_$CC
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 5c5ce6ed10..9ec9fe8c72 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -92,6 +92,7 @@ jobs:
       TESTSUITE:   ${{ matrix.cfg.testsuite }}
       TEST_RANGE:  ${{ matrix.cfg.test_range }}
       SANITIZERS:  ${{ matrix.cfg.sanitizers }}
+      UNSTABLE:    ${{ matrix.cfg.unstable }}
 
     name: linux ${{ join(matrix.cfg.*, ' ') }}
     runs-on: ubuntu-latest
@@ -104,21 +105,21 @@ jobs:
         - { compiler: clang, opts: --disable-ssl }
         - { compiler: gcc, testsuite: test, test_range: "-300" }
         - { compiler: gcc, testsuite: test, test_range: "301-600" }
-        - { compiler: gcc, testsuite: test, test_range: "601-" }
+        - { compiler: gcc, testsuite: test, test_range: "601-", unstable: 
unstable }
         - { compiler: clang, testsuite: test, sanitizers: sanitizers, 
test_range: "-300" }
         - { compiler: clang, testsuite: test, sanitizers: sanitizers, 
test_range: "301-600" }
-        - { compiler: clang, testsuite: test, sanitizers: sanitizers, 
test_range: "601-" }
+        - { compiler: clang, testsuite: test, sanitizers: sanitizers, 
test_range: "601-", unstable: unstable }
         - { compiler: gcc, testsuite: test, libs: -ljemalloc, test_range: 
"-300" }
         - { compiler: gcc, testsuite: test, libs: -ljemalloc, test_range: 
"301-600" }
-        - { compiler: gcc, testsuite: test, libs: -ljemalloc, test_range: 
"601-" }
+        - { compiler: gcc, testsuite: test, libs: -ljemalloc, test_range: 
"601-", unstable: unstable }
         - { compiler: gcc, testsuite: system-test-dpdk, dpdk: dpdk, 
test_range: "-100" }
-        - { compiler: gcc, testsuite: system-test-dpdk, dpdk: dpdk, 
test_range: "101-" }
+        - { compiler: gcc, testsuite: system-test-dpdk, dpdk: dpdk, 
test_range: "101-", unstable: unstable }
         - { compiler: gcc, testsuite: system-test-userspace, test_range: 
"-100" }
-        - { compiler: gcc, testsuite: system-test-userspace, test_range: 
"101-" }
+        - { compiler: gcc, testsuite: system-test-userspace, test_range: 
"101-", unstable: unstable }
         - { compiler: gcc, testsuite: system-test, test_range: "-100" }
-        - { compiler: gcc, testsuite: system-test, test_range: "101-" }
+        - { compiler: gcc, testsuite: system-test, test_range: "101-", 
unstable: unstable }
         - { compiler: clang, testsuite: system-test, sanitizers: sanitizers, 
test_range: "-100" }
-        - { compiler: clang, testsuite: system-test, sanitizers: sanitizers, 
test_range: "101-" }
+        - { compiler: clang, testsuite: system-test, sanitizers: sanitizers, 
test_range: "101-", unstable: unstable }
         - { arch: x86, compiler: gcc, opts: --disable-ssl }
 
     steps:
diff --git a/tests/ovn-ic.at b/tests/ovn-ic.at
index 2ae8dd12d4..15b1e99062 100644
--- a/tests/ovn-ic.at
+++ b/tests/ovn-ic.at
@@ -256,6 +256,7 @@ AT_CLEANUP
 
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([ovn-ic -- gateway sync])
+TAG_UNSTABLE
 
 ovn_init_ic_db
 net_add n1
diff --git a/tests/ovn-ipsec.at b/tests/ovn-ipsec.at
index 10ef978780..e621547c59 100644
--- a/tests/ovn-ipsec.at
+++ b/tests/ovn-ipsec.at
@@ -1,6 +1,7 @@
 AT_BANNER([OVN - IPsec])
 
 AT_SETUP([ipsec -- basic configuration])
+TAG_UNSTABLE
 ovn_start
 
 # Configure the Northbound database
diff --git a/tests/ovn-macros.at b/tests/ovn-macros.at
index 8dc4ec75c6..bdc6cc9c87 100644
--- a/tests/ovn-macros.at
+++ b/tests/ovn-macros.at
@@ -971,3 +971,8 @@ m4_define([RUN_OVN_NBCTL], [
     check ovn-nbctl ${command}
     unset command
 ])
+
+m4_define([TAG_UNSTABLE], [
+    AT_KEYWORDS([unstable])
+    AT_SKIP_IF([test X"$SKIP_UNSTABLE" = Xyes])
+])
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index 196fe01fbd..9c981b56e7 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -4608,6 +4608,7 @@ AT_CLEANUP
 
 OVN_FOR_EACH_NORTHD_NO_HV([
 AT_SETUP([northd ssl file change])
+TAG_UNSTABLE
 AT_SKIP_IF([test "$HAVE_OPENSSL" = no])
 PKIDIR="$(cd $abs_top_builddir/tests && pwd)"
 AT_SKIP_IF([expr "$PKIDIR" : ".*[[     '\"
diff --git a/tests/ovn-performance.at b/tests/ovn-performance.at
index ba329f0f64..9de0a4e770 100644
--- a/tests/ovn-performance.at
+++ b/tests/ovn-performance.at
@@ -225,6 +225,7 @@ m4_define([OVN_CONTROLLER_EXPECT_HIT_COND],[
 ])
 
 AT_SETUP([ovn-controller incremental processing])
+TAG_UNSTABLE
 # Check which operations the trigger full logical flow processing.
 #
 # Create and destroy logical routers, switches, ports, address sets and ACLs
diff --git a/tests/ovn.at b/tests/ovn.at
index 637d92bedb..a88df0262b 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -6050,6 +6050,7 @@ AT_CLEANUP
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([1 HV, 2 LSs, 1 lport/LS, 1 LR])
 AT_KEYWORDS([router-admin-state])
+TAG_UNSTABLE
 ovn_start
 
 # Logical network:
@@ -7865,6 +7866,7 @@ AT_CLEANUP
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([policy-based routing: 1 HVs, 2 LSs, 1 lport/LS, 1 LR])
 AT_KEYWORDS([policy-based-routing])
+TAG_UNSTABLE
 ovn_start
 
 # Logical network:
@@ -8037,6 +8039,7 @@ AT_CLEANUP
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([policy-based routing IPv6: 1 HVs, 3 LSs, 1 lport/LS, 1 LR])
 AT_KEYWORDS([policy-based-routing])
+TAG_UNSTABLE
 ovn_start
 
 # Logical network:
@@ -14394,6 +14397,7 @@ AT_CLEANUP
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([options:multiple requested-chassis for logical port])
 AT_KEYWORDS([multi-chassis])
+TAG_UNSTABLE
 ovn_start
 
 net_add n1
@@ -14525,6 +14529,7 @@ AT_CLEANUP
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([options:multiple requested-chassis for logical port: change chassis 
role])
 AT_KEYWORDS([multi-chassis])
+TAG_UNSTABLE
 ovn_start
 
 net_add n1
@@ -14575,6 +14580,7 @@ AT_CLEANUP
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([options:multiple requested-chassis for logical port: unclaimed 
behavior])
 AT_KEYWORDS([multi-chassis])
+TAG_UNSTABLE
 ovn_start
 
 net_add n1
@@ -16160,6 +16166,7 @@ AT_CLEANUP
 
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([tug-of-war between two chassis for the same port])
+TAG_UNSTABLE
 ovn_start
 
 ovn-nbctl ls-add ls0
@@ -29585,6 +29592,7 @@ AT_CLEANUP
 
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([nb_cfg timestamp])
+TAG_UNSTABLE
 ovn_start
 
 check ovn-nbctl ls-add s2
@@ -29686,6 +29694,7 @@ AT_CLEANUP
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([ARP replies for SNAT external ips])
 AT_KEYWORDS([slowtest])
+TAG_UNSTABLE
 ovn_start
 
 net_add n1
@@ -30047,6 +30056,7 @@ AT_CLEANUP
 # It is to cover a corner case when flows are re-processed in the I-P
 # iteration, combined with the scenario of conflicting ACLs.
 AT_SETUP([conflict ACLs with address set])
+TAG_UNSTABLE
 ovn_start
 
 ovn-nbctl ls-add ls1
@@ -30242,6 +30252,7 @@ AT_CLEANUP
 #
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([multi-vtep SB Chassis encap updates])
+TAG_UNSTABLE
 ovn_start
 
 net_add n1
@@ -32430,6 +32441,7 @@ AT_CLEANUP
 # - 2 for expanding the port group @pg1 to the 2 locally bound lports.
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([ACL with Port Group conjunction flow efficiency])
+TAG_UNSTABLE
 ovn_start
 
 net_add n1
@@ -34723,6 +34735,7 @@ AT_CLEANUP
 
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([recomputes])
+TAG_UNSTABLE
 ovn_start
 
 net_add n1
@@ -35157,6 +35170,7 @@ MULTIPLE_OVS_INT([])
 
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([Check default openflow flows])
+TAG_UNSTABLE
 ovn_start
 
 # Check that every table has a default (i.e: priority=0) flow.
diff --git a/tests/system-ovn.at b/tests/system-ovn.at
index b7fb489e50..1c25297492 100644
--- a/tests/system-ovn.at
+++ b/tests/system-ovn.at
@@ -8906,6 +8906,7 @@ AT_CLEANUP
 
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([LB - ICMP related traffic])
+TAG_UNSTABLE
 
 CHECK_CONNTRACK()
 CHECK_CONNTRACK_NAT()
@@ -11224,6 +11225,7 @@ AT_CLEANUP
 
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([ovn mirroring])
+TAG_UNSTABLE
 AT_KEYWORDS([mirror])
 AT_SKIP_IF([test $HAVE_TCPDUMP = no])
 
@@ -11403,6 +11405,7 @@ AT_CLEANUP
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([Tiered ACLs])
 AT_KEYWORDS([acl])
+TAG_UNSTABLE
 
 ovn_start
 OVS_TRAFFIC_VSWITCHD_START()
-- 
2.39.3

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to