When ovn is upgraded, ovn-controller is updated first on the compute nodes. Then ovn-northd and DB are upgraded. This patch tests whether the intermediate state (i.e. with ovn-controller being upgraded) works properly, running system tests from the base line (i.e. before the upgrade).
Flow tables might change between releases. Hence this patch must take that into account by updating the (old) system tests with any updated table numbers. In some cases, (new) ovn-controller might change flows in existing tables, causing some 'upgrade' tests to fail. Such tests can be skipped using the TAG_TEST_NOT_UPGRADABLE tag. This patch upgrades the ci to run automatically some upgrade tests weekly. It also provides a shell script to run those tests locally. Upgrade-tests are run on push/pull only for LTS (24.03) and latest release (25.09) to avoid too long tests. Upgrades from other branches are run on schedule. This patch depends on patch [1] on branch-25.09. [1] "tests: Add new TAG_TEST_NOT_UPGRADABLE to some tests." Reported-at: https://issues.redhat.com/browse/FDP-1240 Assisted-by: claude, with model: Claude Sonnet 4.5 Signed-off-by: Xavier Simonart <[email protected]> -v2: - Updated based on Ales' feedback: - Move upgrade test logic from complex sh to py script. - Create new yaml for upgrade tests. - Rebased. - Clone Base branch in different folder, to avoid messing up user develoment folder. - Run upgrade tests through make check-upgrade instead of shell script. - Create CI matrix dynamically so it is more clear which steps are run. - Updated testing.rst. --- .ci/ci.sh | 5 +- .ci/linux-build.sh | 35 +- .ci/ovn_upgrade_test.py | 110 +++++ .ci/ovn_upgrade_utils.py | 556 ++++++++++++++++++++++++ .github/workflows/ovn-upgrade-tests.yml | 187 ++++++++ Documentation/topics/testing.rst | 172 ++++++++ Makefile.am | 3 + tests/automake.mk | 17 + 8 files changed, 1076 insertions(+), 9 deletions(-) create mode 100755 .ci/ovn_upgrade_test.py create mode 100755 .ci/ovn_upgrade_utils.py create mode 100644 .github/workflows/ovn-upgrade-tests.yml diff --git a/.ci/ci.sh b/.ci/ci.sh index 3640d3243..76c364868 100755 --- a/.ci/ci.sh +++ b/.ci/ci.sh @@ -54,6 +54,9 @@ function archive_logs() { cp -r $CONTAINER_WORKDIR/tests/system-*-testsuite.* \ $log_dir || true \ && \ + cp -r $CONTAINER_WORKDIR/tests/upgrade-testsuite.* \ + $log_dir || true \ + && \ chmod -R +r $log_dir \ && tar -czvf $CONTAINER_WORKSPACE/logs.tgz $log_dir @@ -102,7 +105,7 @@ function run_tests() { ARCH=$ARCH CC=$CC LIBS=$LIBS OPTS=$OPTS TESTSUITE=$TESTSUITE \ TEST_RANGE=$TEST_RANGE SANITIZERS=$SANITIZERS DPDK=$DPDK \ RECHECK=$RECHECK UNSTABLE=$UNSTABLE TIMEOUT=$TIMEOUT \ - ./.ci/linux-build.sh + BASE_VERSION=$BASE_VERSION ./.ci/linux-build.sh " } diff --git a/.ci/linux-build.sh b/.ci/linux-build.sh index 183833a16..cb1981fee 100755 --- a/.ci/linux-build.sh +++ b/.ci/linux-build.sh @@ -1,7 +1,11 @@ #!/bin/bash set -o errexit -set -x + +# Enable debug output for CI, optional for local +if [ "$NO_DEBUG" = "0" ]; then + set -x +fi ARCH=${ARCH:-"x86_64"} USE_SPARSE=${USE_SPARSE:-"yes"} @@ -11,6 +15,7 @@ OPTS="$OPTS --enable-Werror" JOBS=${JOBS:-"-j4"} RECHECK=${RECHECK:-"no"} TIMEOUT=${TIMEOUT:-"0"} +NO_DEBUG=${NO_DEBUG:-0} function install_dpdk() { @@ -181,17 +186,23 @@ function run_system_tests() if ! sudo timeout -k 5m -v $TIMEOUT make $JOBS $type \ TESTSUITEFLAGS="$TEST_RANGE" RECHECK=$RECHECK \ - SKIP_UNSTABLE=$SKIP_UNSTABLE; then - # $log_file is necessary for debugging. - cat tests/$log_file + SKIP_UNSTABLE=$SKIP_UNSTABLE UPGRADE_TEST=$UPGRADE_TEST \ + BASE_VERSION=$BASE_VERSION; then + # Suppress output locally when NO_DEBUG not 0. + if [ "$NO_DEBUG" = "0" ]; then + cat tests/$log_file + fi return 1 fi } function execute_system_tests() { - configure_ovn $OPTS - make $JOBS || { cat config.log; exit 1; } + # Upgrade tests build separately + if [ "$UPGRADE_TEST" != "yes" ]; then + configure_ovn $OPTS + make $JOBS || { cat config.log; exit 1; } + fi local stable_rc=0 local unstable_rc=0 @@ -201,8 +212,12 @@ function execute_system_tests() fi if [ "$UNSTABLE" ]; then - if ! SKIP_UNSTABLE=no TEST_RANGE="-k unstable" RECHECK=yes \ - run_system_tests $@; then + if [[ "$TEST_RANGE" == *"-d"* ]]; then + TEST_RANGE="-k unstable -d" + else + TEST_RANGE="-k unstable" + fi + if ! SKIP_UNSTABLE=no RECHECK=yes run_system_tests $@; then unstable_rc=1 fi fi @@ -238,6 +253,10 @@ if [ "$TESTSUITE" ]; then sudo bash -c "echo 2048 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages" execute_system_tests "check-system-dpdk" "system-dpdk-testsuite.log" ;; + + "upgrade-test") + execute_system_tests "check-upgrade" "system-kmod-testsuite.log" + ;; esac else configure_ovn $OPTS diff --git a/.ci/ovn_upgrade_test.py b/.ci/ovn_upgrade_test.py new file mode 100755 index 000000000..670558600 --- /dev/null +++ b/.ci/ovn_upgrade_test.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 + +import atexit +import os +import signal +import sys +from pathlib import Path + + +from ovn_upgrade_utils import ( + log, + run_command, + run_shell_command, + ovn_upgrade_save_current_binaries, + ovn_upgrade_extract_info, + run_upgrade_workflow, + remove_upgrade_test_directory, +) + + +def run_tests(base_dir, original_dir, flags, unstable): + log(f"Running system tests in upgrade scenario with flags {flags}") + os.chdir(base_dir) + cc = os.environ.get('CC', 'gcc') + no_debug = "1" if sys.stdout.isatty() else "0" + + test_cmd = f"""CC={cc} TESTSUITE=system-test UPGRADE_TEST=yes + TEST_RANGE="{flags}" UNSTABLE={unstable} + NO_DEBUG={no_debug} . {original_dir}/.ci/linux-build.sh""" + + success = run_shell_command(test_cmd) + os.chdir(original_dir) + return success + + +def main(): + test_success = False + cleanup_done = False + + def cleanup(): + nonlocal cleanup_done + if cleanup_done: + return + cleanup_done = True + + flags = os.environ.get('TESTSUITEFLAGS', '') + if '-d' in flags or not test_success: + log(f"Keeping {upgrade_dir} for debugging") + else: + remove_upgrade_test_directory(upgrade_dir, base_dir) + + atexit.register(cleanup) + signal.signal(signal.SIGINT, lambda s, f: sys.exit(1)) + signal.signal(signal.SIGTERM, lambda s, f: sys.exit(1)) + + base_version = os.environ.get('BASE_VERSION', 'branch-24.03') + flags = os.environ.get('TESTSUITEFLAGS') + unstable = os.environ.get('UNSTABLE') + + log("=" * 70) + log(f"OVN Upgrade Test - Base: {base_version}, Flags: {flags}") + log("=" * 70) + + if not run_command(["sudo", "-v"])[0]: + log("sudo access required") + return 1 + + original_dir = Path.cwd() + upgrade_dir = original_dir / "tests/upgrade-testsuite.dir" + base_dir = upgrade_dir / "base-repo" + binaries_dir = upgrade_dir / "ovn-upgrade-binaries" + + log(f"Removing old {upgrade_dir}...") + if not remove_upgrade_test_directory(upgrade_dir, base_dir): + log(f"Failed to remove old {upgrade_dir}") + return 1 + + upgrade_dir.mkdir(parents=True, exist_ok=True) + base_dir.mkdir(parents=True, exist_ok=True) + binaries_dir.mkdir(parents=True, exist_ok=True) + + log("Saving current version binaries") + if not ovn_upgrade_save_current_binaries(binaries_dir): + log("Failed to save current binaries") + return 1 + + if not ovn_upgrade_extract_info(upgrade_dir): + log("Failed to extract info") + return 1 + + if not run_upgrade_workflow(base_version, base_dir, upgrade_dir, + binaries_dir): + log("Upgrade workflow failed") + return 1 + + test_success = run_tests(base_dir, original_dir, flags, unstable) + + log("=" * 70) + if test_success: + log("UPGRADE TESTS PASSED") + else: + log("UPGRADE TESTS FAILED") + log(f"Check: {base_dir}/tests/system-kmod-testsuite.log") + log("=" * 70) + + return 0 if test_success else 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.ci/ovn_upgrade_utils.py b/.ci/ovn_upgrade_utils.py new file mode 100755 index 000000000..50610a79e --- /dev/null +++ b/.ci/ovn_upgrade_utils.py @@ -0,0 +1,556 @@ +#!/usr/bin/env python3 + +import os +import re +import shutil +import subprocess +from datetime import datetime +from pathlib import Path + + +def log(message): + timestamp = datetime.now().strftime("%H:%M:%S") + print(f"[{timestamp}] {message}", flush=True) + + +def run_command(cmd): + result = subprocess.run(cmd, capture_output=True, text=True, check=False) + return result.returncode == 0, result.stdout, result.stderr + + +def run_shell_command(cmd, log_file=None): + if log_file: + with open(log_file, 'a', encoding='utf-8') as f: + result = subprocess.run(["bash", "-c", cmd], stdout=f, + stderr=subprocess.STDOUT, check=False) + else: + result = subprocess.run(["bash", "-c", cmd], check=False) + return result.returncode == 0 + + +def extract_oftable_values(content): + match = re.search(r'^#define\s+OFTABLE_LOG_EGRESS_PIPELINE\s+(\d+)', + content, re.MULTILINE) + log_egress = int(match.group(1)) if match else None + match = re.search(r'^#define\s+OFTABLE_SAVE_INPORT\s+(\d+)', + content, re.MULTILINE) + save_inport = int(match.group(1)) if match else None + return log_egress, save_inport + + +def replace_block_in_file(target_file, src_file, line_prefix): + if not target_file.exists(): + return False + if not src_file.exists(): + # No src_file file means nothing to replace. + return True + with open(target_file, encoding='utf-8') as f: + lines = f.readlines() + with open(src_file, encoding='utf-8') as f: + new_content = f.read() + + # Replace all lines starting with line_prefix with new_content. + output_lines = [] + inserted = False + + for line in lines: + if line.startswith(line_prefix): + if not inserted: + output_lines.append(new_content) + inserted = True + # Skip old lines with this prefix + continue + output_lines.append(line) + + with open(target_file, 'w', encoding='utf-8') as f: + f.writelines(output_lines) + + return True + + +def ovn_upgrade_build(log_file): + use_sparse = "yes" if shutil.which("sparse") else "no" + cc = os.environ.get('CC', 'gcc') + opts = os.environ.get('OPTS', '') + log(f"Rebuilding OVN with {cc}") + + build_script = f""" + set -e + export USE_SPARSE={use_sparse} + export CC={cc} + export OPTS={opts} + make $JOBS + """ + return run_shell_command(build_script, log_file) + + +def ovs_ovn_upgrade_build(log_file): + use_sparse = "yes" if shutil.which("sparse") else "no" + cc = os.environ.get('CC', 'gcc') + opts = os.environ.get('OPTS', '') + log(f"Building OVS and OVN with {cc}") + build_script = f""" + set -e + export USE_SPARSE={use_sparse} + export CC={cc} + export OPTS={opts} + . .ci/linux-build.sh + """ + return run_shell_command(build_script, log_file) + + +def log_binary_version(binary_path, keywords): + success, stdout, _ = run_command([binary_path, "--version"]) + if success: + for line in stdout.splitlines(): + if any(kw in line for kw in keywords): + log(f" {line}") + + +def ovn_upgrade_save_current_binaries(binaries_dir): + + files = [ + "controller/ovn-controller", + "ovs/vswitchd/ovs-vswitchd", + "ovs/ovsdb/ovsdb-server", + "ovs/utilities/ovs-vsctl", + "ovs/utilities/ovs-ofctl", + "ovs/utilities/ovs-appctl", + "ovs/utilities/ovs-dpctl", + "ovs/vswitchd/vswitch.ovsschema" + ] + + for file in files: + try: + shutil.copy(Path(file), binaries_dir) + except Exception as e: + log(f"Failed to copy {file}: {e}") + return False + + log("Saved current versions:") + log_binary_version(str(binaries_dir / "ovn-controller"), + ['ovn-controller', 'SB DB Schema']) + log_binary_version(str(binaries_dir / "ovs-vswitchd"), ['vSwitch']) + return True + + +def ovn_upgrade_extract_info(upgrade_dir): + lflow_h = Path("controller/lflow.h") + if not lflow_h.exists(): + log("controller/lflow.h not found") + return False + + # Get all ofctl defines from lflow.h. + with open(lflow_h, encoding='utf-8') as f: + oftable_defines = [ + line.strip() for line in f if line.startswith('#define OFTABLE_') + ] + + if not oftable_defines: + log("No #define OFTABLE_ found in lflow.h") + return False + + output_file = upgrade_dir / "ovn-upgrade-ofctl-defines.h" + with open(output_file, 'w', encoding='utf-8') as of: + of.write('\n'.join(oftable_defines) + '\n') + log(f" Wrote {output_file}") + + # Get all m4_define([OFTABLE_ from ovn-macros.at. + macros_file = Path("tests/ovn-macros.at") + output_file = upgrade_dir / "ovn-upgrade-oftable-m4-defines.txt" + if macros_file.exists(): + with open(macros_file, encoding='utf-8') as f: + m4_defines = [ + line.strip() for line in f + if line.startswith('m4_define([OFTABLE_') + ] + + with open(output_file, 'w', encoding='utf-8') as of: + of.write('\n'.join(m4_defines) + '\n' if m4_defines else '') + log(f" Wrote {output_file}") + + # Get value of OFTABLE_LOG_EGRESS_PIPELINE. + with open(lflow_h, encoding='utf-8') as f: + content = f.read() + new_log_egress, _ = extract_oftable_values(content) + + if not new_log_egress: + log("Could not extract OFTABLE_LOG_EGRESS_PIPELINE value") + return False + + output_file = upgrade_dir / "ovn-upgrade-new-log-egress.txt" + with open(output_file, 'w', encoding='utf-8') as f: + f.write(str(new_log_egress) + '\n') + log(f" Wrote {output_file}") + + return True + + +def ovn_upgrade_checkout_local(base_version, base_dir, log_file_str): + original_dir = os.getcwd() + log(f"Running locally. Cloning from {original_dir} to {base_dir}") + + success, _, stderr = run_command([ + "git", "clone", "--local", "--shared", ".", str(base_dir), + "--branch", base_version + ]) + if not success: + log(f"Failed to clone to {base_dir}") + log(stderr) + return False + + try: + os.chdir(base_dir) + log(f"Checking out base version: {base_version} from {base_dir}") + success, stdout, stderr = run_command(["git", "checkout", + base_version]) + with open(log_file_str, 'a', encoding='utf-8') as f: + f.write(stdout + stderr) + + if not success: + log(f"Failed to checkout {base_version}") + log(stderr) + return False + + return True + + finally: + os.chdir(original_dir) + + +def ovn_upgrade_clone_github(base_version, base_dir, log_file_str): + original_dir = os.getcwd() + success, origin_url, _ = run_command(["git", "config", "--get", + "remote.origin.url"]) + if not success or not origin_url.strip(): + log("Could not get origin URL from working directory") + return False + + try: + origin_url = origin_url.strip() + os.chdir(base_dir) + log(f"Cloning {base_version} from {origin_url} ") + success, stdout, stderr = run_command([ + 'git', 'clone', origin_url, base_dir, '--branch', + base_version, '--depth', '1', '--no-tags' + ]) + with open(log_file_str, 'a', encoding='utf-8') as f: + f.write(stdout + stderr) + + if not success and origin_url != "https://github.com/ovn-org/ovn": + log(f"Not found in {origin_url}, trying ovn-org...") + success, stdout, stderr = run_command([ + 'git', 'clone', "https://github.com/ovn-org/ovn.git", base_dir, + '--branch', base_version, '--depth', '1', '--no-tags' + ]) + with open(log_file_str, 'a', encoding='utf-8') as f: + f.write(stdout + stderr) + + if not success: + log(f"Failed to clone {base_version}") + log(stderr) + return False + finally: + os.chdir(original_dir) + return success + + +def ovn_upgrade_checkout_base(base_version, upgrade_dir, base_dir): + is_local = True + if base_version.startswith("origin/"): + base_version = base_version.split('/', 1)[-1] + is_local = False + + success = False + log_file = upgrade_dir / "git.log" + if log_file.exists(): + log_file.unlink() + log_file_str = str(log_file) + + if is_local: + success = ovn_upgrade_checkout_local(base_version, base_dir, + log_file_str) + + if not success: + # Branch not requested or found in local repo. + # Get working directory's origin URL (the real remote, e.g., GitHub) + success = ovn_upgrade_clone_github(base_version, base_dir, + log_file_str) + + if not success: + log(f"Failed to fetch/checkout {base_version}") + return False + + os.chdir(base_dir) + success, stdout, stderr = run_command(["git", "checkout", base_version]) + with open(log_file_str, 'a', encoding='utf-8') as f: + f.write(stdout + stderr) + + if not success: + log(f"Failed to checkout {base_version}") + log(stderr) + return False + + log(f"Checked out {base_version}") + + log("Updating OVS submodule...") + success, stdout, stderr = run_command(["git", "submodule", "update", + "--init", "--depth", "1"]) + with open(log_file_str, 'a', encoding='utf-8') as f: + f.write(stdout + stderr) + + if not success: + log(f"Failed to update submodules: {stderr}") + return False + + return True + + +def ovn_upgrade_patch_for_ovn_debug(upgrade_dir): + return replace_block_in_file( + Path("controller/lflow.h"), + upgrade_dir / "ovn-upgrade-ofctl-defines.h", + '#define OFTABLE_') + + +def ovn_upgrade_save_ovn_debug(binaries_dir): + log("Saving hybrid ovn-debug...") + src = Path("utilities/ovn-debug") + dst = binaries_dir / "ovn-debug" + + try: + shutil.copy(src, dst) + except Exception as e: + log(f"Failed to save ovn-debug: {e}") + return False + + return True + + +def update_test(old_start, old_end, shift, test_file): + with open(test_file, encoding='utf-8') as f: + content = f.read() + + def replace_table(match): + table_num = int(match.group(1)) + if old_start <= table_num < old_end: + return f"table={table_num + shift}" + return match.group(0) + + # Replace all table=NUMBER patterns + updated_content = re.sub(r'table\s*=\s*(\d+)', replace_table, content) + + with open(test_file, 'w', encoding='utf-8') as f: + f.write(updated_content) + + +def ovn_upgrade_table_numbers_in_tests_patch(upgrade_dir): + new_log_egress_file = upgrade_dir / "ovn-upgrade-new-log-egress.txt" + lflow_h = Path("controller/lflow.h") + + if not new_log_egress_file.exists(): + log("No LOG_EGRESS") + return False + + if not lflow_h.exists(): + log("Controller/lflow.h not found") + return False + + with open(new_log_egress_file, encoding='utf-8') as f: + new_log_egress = int(f.read().strip()) + + # Get old values from base version's lflow.h + with open(lflow_h, encoding='utf-8') as f: + content = f.read() + + old_log_egress, old_save_inport = extract_oftable_values(content) + + if (not old_log_egress or not old_save_inport + or old_log_egress == new_log_egress): + log(f"No change in test files as old_log_egress={old_log_egress}, " + f"old_save_inport={old_save_inport} and " + f"new_log_egress={new_log_egress}") + # No change needed is success. + return True + + shift = new_log_egress - old_log_egress + + log(f"Updating hardcoded table numbers in tests (shift: +{shift} for " + f"tables {old_log_egress}-{old_save_inport - 1})") + + # Update test files + for test_file in ["tests/system-ovn.at", "tests/system-ovn-kmod.at" + "tests/system-ovn-netlink.at"]: + if Path(test_file).exists(): + log(f"Updating {test_file}") + update_test(old_log_egress, old_save_inport, shift, test_file) + return True + + +def ovn_upgrade_schema_in_macros_patch(): + schema_filter = '/OVN_Southbound database lacks/d' + ovn_pattern = r'/has no network name\*/d' + + macros_file = Path("tests/ovn-macros.at") + if macros_file.exists(): + with open(macros_file, encoding='utf-8') as f: + content = f.read() + + if schema_filter not in content: + if re.search(ovn_pattern, content): + content = re.sub(f'({ovn_pattern})', + rf'\1\n{schema_filter}', content, count=1) + with open(macros_file, 'w', encoding='utf-8') as f: + f.write(content) + log("Added schema warning filter to ovn-macros.at") + else: + log("Could not find pattern in ovn-macros.at") + else: + log("Schema already updated in macro") + else: + log("tests/ovn-macros.at not found") + return False + + kmod_file = Path("tests/system-kmod-macros.at") + if kmod_file.exists(): + with open(kmod_file, encoding='utf-8') as f: + content = f.read() + + if schema_filter not in content: + ovs_pattern = r'\[OVS_VSWITCHD_STOP\(\[\$1\]\)' + + if re.search(ovs_pattern, content): + content = re.sub( + ovs_pattern, + rf'[OVS_VSWITCHD_STOP([dnl\n$1";{schema_filter}"])', + content, count=1) + with open(kmod_file, 'w', encoding='utf-8') as f: + f.write(content) + log("Added schema warning filter to system-kmod-macros.at") + else: + log("Could not find pattern in system-kmod-macros.at") + return False + + return True + + +def ovn_upgrade_oftable_ovn_macro_patch(upgrade_dir): + return replace_block_in_file( + Path("tests/ovn-macros.at"), + upgrade_dir / "ovn-upgrade-oftable-m4-defines.txt", + 'm4_define([OFTABLE_') + + +def ovn_upgrade_apply_tests_patches(upgrade_dir): + log("Applying schema filter and table number patches...") + if not ovn_upgrade_table_numbers_in_tests_patch(upgrade_dir): + return False + if not ovn_upgrade_schema_in_macros_patch(): + return False + if not ovn_upgrade_oftable_ovn_macro_patch(upgrade_dir): + return False + return True + + +def ovn_upgrade_restore_binaries(binaries_dir): + log("Replacing binaries with current versions") + + binaries = [ + ("ovn-controller", "controller/ovn-controller"), + ("ovn-debug", "utilities/ovn-debug"), + ("ovs-vswitchd", "ovs/vswitchd/ovs-vswitchd"), + ("ovsdb-server", "ovs/ovsdb/ovsdb-server"), + ("ovs-vsctl", "ovs/utilities/ovs-vsctl"), + ("ovs-ofctl", "ovs/utilities/ovs-ofctl"), + ("ovs-appctl", "ovs/utilities/ovs-appctl"), + ("ovs-dpctl", "ovs/utilities/ovs-dpctl"), + ("vswitch.ovsschema", "ovs/vswitchd/vswitch.ovsschema"), + ] + + for src_name, dest_path in binaries: + src = binaries_dir / src_name + dest = Path(dest_path) + try: + dest.parent.mkdir(parents=True, exist_ok=True) + shutil.copy(src, dest) + except Exception as e: + log(f"Failed to copy {src_name} to {dest}: {e}") + return False + + log("Current versions (from current patch):") + log_binary_version("controller/ovn-controller", + ['ovn-controller', 'SB DB Schema']) + log_binary_version("ovs/vswitchd/ovs-vswitchd", ['vSwitch']) + + log("Base versions (for compatibility testing):") + log_binary_version("northd/ovn-northd", ['ovn-northd']) + log_binary_version("utilities/ovn-nbctl", ['ovn-nbctl']) + + return True + + +def run_upgrade_workflow(base_version, base_dir, upgrade_dir, binaries_dir): + original_dir = Path.cwd() + + try: + if not ovn_upgrade_checkout_base(base_version, upgrade_dir, base_dir): + log("Failed to checkout base version") + return False + + if not ovn_upgrade_apply_tests_patches(upgrade_dir): + log("Failed to apply test patches") + return False + + log("Patching lflow.h with current OFTABLE defines...") + ovn_upgrade_patch_for_ovn_debug(upgrade_dir) + + # Build base version with patched lflow.h + log(f"Building base version (with patched lflow.h) from {Path.cwd()}") + if not ovs_ovn_upgrade_build(str(upgrade_dir / "build-base.log")): + log("Failed to build base version") + log(f"See config.log and {upgrade_dir}/build-base.log") + return False + + # Refresh sudo timestamp after long build + run_command(["sudo", "-v"]) + + if not ovn_upgrade_save_ovn_debug(binaries_dir): + return False + + # Rebuild with original lflow.h + log("Restoring lflow.h to original...") + run_command(["git", "checkout", "controller/lflow.h"]) + + log("Rebuilding base version (clean lflow.h)...") + if not ovn_upgrade_build(str(upgrade_dir / "build-base.log")): + log("Failed to rebuild base version") + log(f"See {upgrade_dir}/build-base.log") + return False + + if not ovn_upgrade_restore_binaries(binaries_dir): + return False + + return True + + finally: + os.chdir(original_dir) + + +def remove_upgrade_test_directory(upgrade_dir, base_dir): + if upgrade_dir.exists(): + if base_dir.exists(): + test_dir = base_dir / "tests" / "system-kmod-testsuite.dir" + test_log = base_dir / "tests" / "system-kmod-testsuite.log" + + if test_dir.exists(): + run_command(["sudo", "rm", "-rf", str(test_dir)]) + if test_log.exists(): + run_command(["sudo", "rm", "-f", str(test_log)]) + + try: + shutil.rmtree(upgrade_dir) + return True + except OSError as e: + log(f"Failed to remove {upgrade_dir}: {e}") + return False + return True diff --git a/.github/workflows/ovn-upgrade-tests.yml b/.github/workflows/ovn-upgrade-tests.yml new file mode 100644 index 000000000..caec1e2f9 --- /dev/null +++ b/.github/workflows/ovn-upgrade-tests.yml @@ -0,0 +1,187 @@ +name: OVN Upgrade Tests + +on: + push: + pull_request: + schedule: + # Run Sunday at midnight + - cron: '0 0 * * 0' + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }} + cancel-in-progress: true + +jobs: + prepare-container: + env: + DEPENDENCIES: podman + name: Prepare container + if: github.repository_owner == 'ovn-org' || github.event_name != 'schedule' + runs-on: ubuntu-24.04 + + steps: + - uses: actions/checkout@v4 + + - name: Update APT cache + run: sudo apt update + + - name: Install dependencies + run: sudo apt install -y ${{ env.DEPENDENCIES }} + + - name: Fix /etc/hosts file + run: | + . .ci/linux-util.sh + fix_etc_hosts + + - name: Disable apparmor + run: | + . .ci/linux-util.sh + disable_apparmor + + - name: Choose image distro + if: github.event_name == 'push' || github.event_name == 'pull_request' + run: | + echo "IMAGE_DISTRO=ubuntu" >> $GITHUB_ENV + + - name: Choose image distro + if: github.event_name == 'schedule' + run: | + echo "IMAGE_DISTRO=fedora" >> $GITHUB_ENV + + - name: Build container + if: github.ref_name != 'main' + run: make ${{ env.IMAGE_DISTRO }} + working-directory: utilities/containers + + - name: Download container + if: github.ref_name == 'main' + run: podman pull ghcr.io/ovn-org/ovn-tests:${{ env.IMAGE_DISTRO }} + + - name: Tag image + run: podman tag ovn-org/ovn-tests:${{ env.IMAGE_DISTRO }} ovn-org/ovn-tests + + - name: Export image + run: podman save -o /tmp/image.tar --format oci-archive ovn-org/ovn-tests + + - name: Cache image + id: image_cache + uses: actions/cache@v4 + with: + path: /tmp/image.tar + key: ${{ github.sha }}/${{ github.event_name }} + + # Job to have a different matrix pull/push (LTS and latest) and on schedule (all supported branches) + setup: + runs-on: ubuntu-24.04 + steps: + - id: define-matrix + run: | + matrix='[ + {"base_version": "origin/branch-24.03", "test_range": "-100"}, + {"base_version": "origin/branch-24.03", "test_range": "101-", "unstable": "unstable"}, + {"base_version": "origin/branch-25.09", "test_range": "-100"}, + {"base_version": "origin/branch-25.09", "test_range": "101-200"}, + {"base_version": "origin/branch-25.09", "test_range": "201-", "unstable": "unstable"} + '] + + if [[ "${{ github.event_name }}" == "schedule" ]]; then + matrix=$(echo "$matrix" | jq -c '. += [ + {"base_version": "origin/branch-24.09", "test_range": "-100"}, + {"base_version": "origin/branch-24.09", "test_range": "101-200"}, + {"base_version": "origin/branch-24.09", "test_range": "201-", "unstable": "unstable"}, + {"base_version": "origin/branch-25.03", "test_range": "-100"}, + {"base_version": "origin/branch-25.03", "test_range": "101-200"}, + {"base_version": "origin/branch-25.03", "test_range": "201-", "unstable": "unstable"} + ]') + fi + # Also add a 'name' as artifact names cannot contain '/'. + echo "matrix=$(echo "$matrix" | jq -c 'map(. + {name: (.base_version | split("/") | last)})')" >> $GITHUB_OUTPUT + outputs: + matrix: ${{ steps.define-matrix.outputs.matrix }} + + upgrade-tests: + needs: [setup, prepare-container] + name: upgrade-test ${{ matrix.cfg.name }} ${{ matrix.cfg.test_range }} + runs-on: ubuntu-24.04 + timeout-minutes: 120 + + strategy: + fail-fast: false + matrix: + cfg: ${{ fromJson(needs.setup.outputs.matrix) }} + + env: + CC: gcc + BASE_VERSION: ${{ matrix.cfg.base_version }} + TEST_RANGE: ${{ matrix.cfg.test_range }} + UNSTABLE: ${{ matrix.cfg.unstable }} + TESTSUITE: "upgrade-test" + + steps: + - name: system-level-dependencies + run: | + sudo apt update + sudo apt -y install linux-modules-extra-$(uname -r) + + - name: checkout + if: github.event_name == 'push' || github.event_name == 'pull_request' + uses: actions/checkout@v4 + with: + submodules: recursive + + # For weekly runs, don't update submodules + - name: checkout without submodule + if: github.event_name == 'schedule' + uses: actions/checkout@v4 + + # Weekly runs test using the tip of the most recent stable OVS branch + # instead of the submodule. + - name: checkout OVS + if: github.event_name == 'schedule' + uses: actions/checkout@v4 + with: + repository: 'openvswitch/ovs' + fetch-depth: 0 + path: 'ovs' + + - name: checkout OVS most recent stable branch. + if: github.event_name == 'schedule' + run: | + git checkout \ + $(git branch -a -l '*branch-*' | sed 's/remotes\/origin\///' | \ + sort -V | tail -1) + working-directory: ovs + + - name: Fix /etc/hosts file + run: | + . .ci/linux-util.sh + fix_etc_hosts + + - name: Disable apparmor + run: | + . .ci/linux-util.sh + disable_apparmor + + - name: image cache + id: image_cache + uses: actions/cache@v4 + with: + path: /tmp/image.tar + key: ${{ github.sha }}/${{ github.event_name }} + + - name: load image + run: | + sudo podman load -i /tmp/image.tar + podman load -i /tmp/image.tar + rm -rf /tmp/image.tar + + - name: build + run: sudo -E ./.ci/ci.sh --archive-logs --timeout=2h + + - name: upload logs on failure + if: failure() || cancelled() + uses: actions/upload-artifact@v4 + with: + name: logs-upgrade-test-${{ matrix.cfg.name }}-${{ matrix.cfg.test_range }} + path: logs.tgz diff --git a/Documentation/topics/testing.rst b/Documentation/topics/testing.rst index cc928ef64..3c4d16055 100644 --- a/Documentation/topics/testing.rst +++ b/Documentation/topics/testing.rst @@ -293,3 +293,175 @@ of these cached objects, be sure to rebuild the test. The cached objects are stored under the relevant folder in ``tests/perf-testsuite.dir/cached``. + +OVN Upgrade Testing +~~~~~~~~~~~~~~~~~~~ + +Overview +++++++++ + +OVN upgrade tests validate that the system continues to function correctly +during rolling upgrades, specifically testing the intermediate state where +ovn-controller is upgraded before ovn-northd and the databases. + +The upgrade tests run the system test suite from an older OVN version using +binaries (ovn-controller, ovs-vswitchd, etc.) from the current development +version, ensuring backward compatibility. + +Running Upgrade Tests Locally ++++++++++++++++++++++++++++++ + +Basic usage:: + + $ make check-upgrade + +This will test upgrades from branch-24.03 (the default base version). + +Specify a different base version:: + + $ make check-upgrade BASE_VERSION=branch-24.09 + +Run a specific range of tests:: + + $ make check-upgrade BASE_VERSION=branch-25.03 TESTSUITEFLAGS="1-100" + +Run only unstable tests:: + + $ make check-upgrade UNSTABLE=1 TESTSUITEFLAGS="-k unstable" + +Environment Variables ++++++++++++++++++++++ + +*BASE_VERSION* + Git branch or tag to use as the base version (default: ``branch-24.03``) + +*TESTSUITEFLAGS* + Test range to run, using autotest syntax (default: ``1-``, meaning all tests) + + - ``1-100`` - Run tests 1 through 100 + - ``50-`` - Run tests 50 and above + - ``-k unstable`` - Run tests with 'unstable' keyword + + Additional flags to pass to the testsuite. Use ``-d`` to keep test + directories on success for debugging. + +*UNSTABLE* + Set to ``1`` to run unstable tests (default: disabled) + +How Upgrade Tests Work +++++++++++++++++++++++ + +The upgrade test workflow: + +1. *Save Current Binaries* + + The test framework saves binaries from your current working tree: + + - ``ovn-controller`` + - ``ovs-vswitchd``, ``ovsdb-server`` + - ``ovs-vsctl``, ``ovs-ofctl``, ``ovs-appctl``, ``ovs-dpctl`` + - Flow table definitions from ``controller/lflow.h`` + +2. *Clone and Checkout Base Version* + + Creates ``upgrade-testsuite.dir/ovn-upgrade-base/`` and checks out the + specified base version. + +3. *Patch Old Tests* + + - Updates hardcoded flow table numbers if tables were renumbered + - Adds schema compatibility filters to suppress expected warnings + - Replaces OFTABLE_* m4 macros with current values + +4. *Build Base Version* + + Builds the base version twice: + + - With patched ``lflow.h`` to create hybrid ``ovn-debug`` tool + - With original ``lflow.h`` for proper ``ovn-northd`` and ``ovn-nbctl`` + +5. *Swap Binaries* + + Replaces the base version's binaries with current versions: + + - Base version: ``ovn-northd``, ``ovn-nbctl`` (test infrastructure) + - Current version: ``ovn-controller``, ``ovs-vswitchd``, ``ovsdb-server`` + +6. *Run Tests* + + Executes the system test suite from the base version with the mixed + binary set. + +Interpreting Test Failures +++++++++++++++++++++++++++ + +Test failures during upgrade testing can indicate: + +*Backward Compatibility Issues* + The new ovn-controller is incompatible with the old northd/databases. + This is a critical issue that must be fixed before release. + +*Flow Generation Changes* + If flow table contents changed intentionally, the (old) test may need the + ``TAG_TEST_NOT_UPGRADABLE`` tag. + +Debugging Failed Tests +++++++++++++++++++++++ + +On failure, the test directory is preserved in ``upgrade-testsuite.dir/``. + +Check the logs:: + + $ upgrade-testsuite.dir/git.log # Git operations + $ upgrade-testsuite.dir/build-base.log # Build output + $ upgrade-testsuite.dir/ovn-upgrade-base/tests/system-kmod-testsuite.log + +Keep test directory for debugging:: + + $ make check-upgrade TESTSUITEFLAGS="-d" + +Marking Tests as Non-Upgradable ++++++++++++++++++++++++++++++++ + +Some tests cannot run in upgrade scenarios: tests for features not yet +fully present in the base version. + +Mark these tests with the ``TAG_TEST_NOT_UPGRADABLE`` keyword:: + + AT_SETUP([test that checks flow details]) + AT_KEYWORDS([TAG_TEST_NOT_UPGRADABLE]) + # ... test code ... + AT_CLEANUP + +These tests will be skipped during upgrade testing but run normally otherwise. + +CI Integration +++++++++++++++ + +Upgrade tests run automatically in GitHub Actions: + +*On Push/Pull Request* + - Tests upgrades from branch-24.03 (LTS) + - Tests upgrades from branch-25.09 (latest release) + +*On Schedule (Weekly)* + - Tests all supported versions (24.03, 24.09, 25.03, 25.09) + +Implementation Details +++++++++++++++++++++++ + +Test are run locally through ``check-upgrade`` Makefile target. +The flow for make check-upgrade is: + +- Makefile +- ci/ovn_upgrade_test.py: run_upgrade_workflow, run_tests +- ci/linux-build.sh(TESTSUITE=system-test) +- execute_system_tests "check-kernel" "system-kmod-testsuite.log" +- run_system_tests check-kernel + +Through the ci the flow is: + +- ci.sh: run_in_container ./.ci/linux-build.sh (TESTSUITE=upgrade-test) +- execute_system_tests "check-upgrade" "system-kmod-testsuite.log" +- run_system_tests check-upgrade +- Back to make check-upgrade-flow. diff --git a/Makefile.am b/Makefile.am index 3ad2077b3..28cbdf227 100644 --- a/Makefile.am +++ b/Makefile.am @@ -89,6 +89,8 @@ EXTRA_DIST = \ .ci/ci.sh \ .ci/linux-build.sh \ .ci/linux-util.sh \ + .ci/ovn_upgrade_test.py \ + .ci/ovn_upgrade_utils.py \ .ci/osx-build.sh \ .ci/osx-prepare.sh \ .ci/ovn-kubernetes/Dockerfile \ @@ -98,6 +100,7 @@ EXTRA_DIST = \ .github/workflows/test.yml \ .github/workflows/ovn-kubernetes.yml \ .github/workflows/ovn-fake-multinode-tests.yml \ + .github/workflows/ovn-upgrade-tests.yml \ .readthedocs.yaml \ boot.sh \ $(MAN_FRAGMENTS) \ diff --git a/tests/automake.mk b/tests/automake.mk index c8047371b..781a11e4b 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -386,3 +386,20 @@ clean-pki: rm -f tests/pki/stamp rm -rf tests/pki endif + +# Upgrade test support +# Run via: make check-upgrade BASE_VERSION=branch-24.03 TESTSUITEFLAGS="1-100" +BASE_VERSION ?= branch-24.03 + +check-upgrade: all + @mkdir -p upgrade-testsuite.dir + @echo "Running upgrade tests from $(BASE_VERSION)..." + @echo "CC=$(CC) OPTS=$(OPTS) TESTSUITEFLAGS=$(TESTSUITEFLAGS) UNSTABLE=$(UNSTABLE)" + @BASE_VERSION="$(BASE_VERSION)" \ + TESTSUITEFLAGS="$(TESTSUITEFLAGS)" \ + UNSTABLE="$(UNSTABLE)" \ + PYTHONPATH="$(srcdir)/.ci:$$PYTHONPATH" \ + $(PYTHON3) "$(srcdir)/.ci/ovn_upgrade_test.py" + +.PHONY: check-upgrade + -- 2.52.0 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
