On Sat, Jan 17, 2026 at 03:09:12PM +0100, Lukas Straub wrote: > Add a COLO migration test for COLO migration and failover. > > COLO does not support q35 machine at this time. > > Signed-off-by: Lukas Straub <[email protected]> > --- > MAINTAINERS | 1 + > tests/qtest/meson.build | 7 ++- > tests/qtest/migration-test.c | 1 + > tests/qtest/migration/colo-tests.c | 113 > +++++++++++++++++++++++++++++++++++++ > tests/qtest/migration/framework.c | 87 +++++++++++++++++++++++++++- > tests/qtest/migration/framework.h | 10 ++++ > 6 files changed, 217 insertions(+), 2 deletions(-) > > diff --git a/MAINTAINERS b/MAINTAINERS > index > dbb217255c2cf35dc0ce971c2021b130fac5469b..92ca20c9d4186a08519d15bfe8cbd583ab061a8b > 100644 > --- a/MAINTAINERS > +++ b/MAINTAINERS > @@ -3840,6 +3840,7 @@ F: migration/colo* > F: migration/multifd-colo.* > F: include/migration/colo.h > F: include/migration/failover.h > +F: tests/qtest/migration/colo-tests.c > F: docs/COLO-FT.txt > > COLO Proxy > diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build > index > 0f053fb56de5806d3c213e3a26c0b19998ae151a..d0129af4431bb08a94a918a1e40a8f657059d764 > 100644 > --- a/tests/qtest/meson.build > +++ b/tests/qtest/meson.build > @@ -367,6 +367,11 @@ if gnutls.found() > endif > endif > > +migration_colo_files = [] > +if get_option('replication').allowed() > + migration_colo_files = [files('migration/colo-tests.c')] > +endif > + > qtests = { > 'aspeed_hace-test': files('aspeed-hace-utils.c', 'aspeed_hace-test.c'), > 'aspeed_smc-test': files('aspeed-smc-utils.c', 'aspeed_smc-test.c'), > @@ -378,7 +383,7 @@ qtests = { > 'migration/migration-util.c') + dbus_vmstate1, > 'erst-test': files('erst-test.c'), > 'ivshmem-test': [rt, '../../contrib/ivshmem-server/ivshmem-server.c'], > - 'migration-test': test_migration_files + migration_tls_files, > + 'migration-test': test_migration_files + migration_tls_files + > migration_colo_files, > 'pxe-test': files('boot-sector.c'), > 'pnv-xive2-test': files('pnv-xive2-common.c', 'pnv-xive2-flush-sync.c', > 'pnv-xive2-nvpg_bar.c'), > diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c > index > 08936871741535c926eeac40a7d7c3f461c72fd0..e582f05c7dc2673dbd05a936df8feb6c964b5bbc > 100644 > --- a/tests/qtest/migration-test.c > +++ b/tests/qtest/migration-test.c > @@ -55,6 +55,7 @@ int main(int argc, char **argv) > migration_test_add_precopy(env); > migration_test_add_cpr(env); > migration_test_add_misc(env); > + migration_test_add_colo(env); > > ret = g_test_run(); > > diff --git a/tests/qtest/migration/colo-tests.c > b/tests/qtest/migration/colo-tests.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..5004f581e4d9e4e6f54eee6d70a9307b7fd123be > --- /dev/null > +++ b/tests/qtest/migration/colo-tests.c > @@ -0,0 +1,113 @@ > +/* > + * SPDX-License-Identifier: GPL-2.0-or-later > + * > + * QTest testcases for COLO migration > + * > + * Copyright (c) 2025 Lukas Straub <[email protected]> > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or later. > + * See the COPYING file in the top-level directory. > + * > + */ > + > +#include "qemu/osdep.h" > +#include "libqtest.h" > +#include "migration/framework.h" > +#include "migration/migration-qmp.h" > +#include "migration/migration-util.h" > +#include "qemu/module.h" > + > +static void test_colo_plain_common(MigrateCommon *args, > + bool failover_during_checkpoint, > + bool primary_failover) > +{ > + args->listen_uri = "tcp:127.0.0.1:0"; > + test_colo_common(args, failover_during_checkpoint, primary_failover); > +} > + > +static void *hook_start_multifd(QTestState *from, QTestState *to) > +{ > + return migrate_hook_start_precopy_tcp_multifd_common(from, to, "none"); > +} > + > +static void test_colo_multifd_common(MigrateCommon *args, > + bool failover_during_checkpoint, > + bool primary_failover) > +{ > + args->listen_uri = "defer"; > + args->start_hook = hook_start_multifd; > + args->start.caps[MIGRATION_CAPABILITY_MULTIFD] = true; > + test_colo_common(args, failover_during_checkpoint, primary_failover); > +} > + > +static void test_colo_plain_primary_failover(char *name, MigrateCommon *args) > +{ > + test_colo_plain_common(args, false, true); > +} > + > +static void test_colo_plain_secondary_failover(char *name, MigrateCommon > *args) > +{ > + test_colo_plain_common(args, false, false); > +} > + > +static void test_colo_multifd_primary_failover(char *name, MigrateCommon > *args) > +{ > + test_colo_multifd_common(args, false, true); > +} > + > +static void test_colo_multifd_secondary_failover(char *name, > + MigrateCommon *args) > +{ > + test_colo_multifd_common(args, false, false); > +} > + > +static void test_colo_plain_primary_failover_checkpoint(char *name, > + MigrateCommon *args) > +{ > + test_colo_plain_common(args, true, true); > +} > + > +static void test_colo_plain_secondary_failover_checkpoint(char *name, > + MigrateCommon > *args) > +{ > + test_colo_plain_common(args, true, false); > +} > + > +static void test_colo_multifd_primary_failover_checkpoint(char *name, > + MigrateCommon > *args) > +{ > + test_colo_multifd_common(args, true, true); > +} > + > +static void test_colo_multifd_secondary_failover_checkpoint(char *name, > + MigrateCommon > *args) > +{ > + test_colo_multifd_common(args, true, false); > +} > + > +void migration_test_add_colo(MigrationTestEnv *env) > +{ > + if (!env->full_set) { > + return; > + } > + > + migration_test_add("/migration/colo/plain/primary_failover", > + test_colo_plain_primary_failover); > + migration_test_add("/migration/colo/plain/secondary_failover", > + test_colo_plain_secondary_failover); > + > + migration_test_add("/migration/colo/multifd/primary_failover", > + test_colo_multifd_primary_failover); > + migration_test_add("/migration/colo/multifd/secondary_failover", > + test_colo_multifd_secondary_failover); > + > + migration_test_add("/migration/colo/plain/primary_failover_checkpoint", > + test_colo_plain_primary_failover_checkpoint); > + migration_test_add("/migration/colo/plain/secondary_failover_checkpoint", > + test_colo_plain_secondary_failover_checkpoint); > + > + migration_test_add("/migration/colo/multifd/primary_failover_checkpoint", > + test_colo_multifd_primary_failover_checkpoint); > + > migration_test_add("/migration/colo/multifd/secondary_failover_checkpoint", > + test_colo_multifd_secondary_failover_checkpoint); > +} > diff --git a/tests/qtest/migration/framework.c > b/tests/qtest/migration/framework.c > index > 57d3b9b7c5a269d31659971e308367bd916d28f6..fe34e7cc7a1a4eeb8d5219f54733bbd8446b0e4e > 100644 > --- a/tests/qtest/migration/framework.c > +++ b/tests/qtest/migration/framework.c > @@ -315,7 +315,7 @@ int migrate_args(char **from, char **to, const char *uri, > MigrateStart *args) > if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) { > memory_size = "150M"; > > - if (g_str_equal(arch, "i386")) { > + if (g_str_equal(arch, "i386") || args->force_pc_machine) {
The naming is better, thanks. Said that, force_pc_machine is unwanted either.. if we can drop it. I asked this in v1: https://lore.kernel.org/qemu-devel/[email protected]/ Can we explore that possibility? > machine_alias = "pc"; > } else { > machine_alias = "q35"; > @@ -1066,6 +1066,91 @@ void > *migrate_hook_start_precopy_tcp_multifd_common(QTestState *from, > return NULL; > } > > +int test_colo_common(MigrateCommon *args, bool failover_during_checkpoint, > + bool primary_failover) > +{ > + QTestState *from, *to; > + void *data_hook = NULL; > + > + /* > + * For the COLO test, both VMs will run in parallel. Thus both VMs want > to > + * open the image read/write at the same time. Using read-only=on is not > + * possible here, because ide-hd does not support read-only backing > image. > + * > + * So use -snapshot, where each qemu instance creates its own writable > + * snapshot internally while leaving the real image read-only. > + */ > + args->start.opts_source = "-snapshot"; > + args->start.opts_target = "-snapshot"; > + > + /* > + * COLO migration code logs many errors when the migration socket > + * is shut down, these are expected so we hide them here. > + */ > + args->start.hide_stderr = true; > + > + /* > + * COLO currently does not work with Q35 machine > + */ > + args->start.force_pc_machine = true; > + > + args->start.oob = true; Just curious: is OOB required in COLO for some reason? I understand yank you used below uses OOB, so the question is behind that, on what can be blocked in main thread, and special in COLO. > + args->start.caps[MIGRATION_CAPABILITY_X_COLO] = true; > + > + if (migrate_start(&from, &to, args->listen_uri, &args->start)) { > + return -1; > + } > + > + migrate_set_parameter_int(from, "x-checkpoint-delay", 300); > + > + if (args->start_hook) { > + data_hook = args->start_hook(from, to); > + } > + > + migrate_ensure_converge(from); > + wait_for_serial("src_serial"); > + > + migrate_qmp(from, to, args->connect_uri, NULL, "{}"); > + > + wait_for_migration_status(from, "colo", NULL); > + wait_for_resume(to, &dst_state); We can move this whole function into colo-tests.c. Here you may want to use get_dst() instead. > + > + wait_for_serial("src_serial"); > + wait_for_serial("dest_serial"); > + > + /* wait for 3 checkpoints */ > + for (int i = 0; i < 3; i++) { > + qtest_qmp_eventwait(to, "RESUME"); > + wait_for_serial("src_serial"); > + wait_for_serial("dest_serial"); > + } > + > + if (failover_during_checkpoint) { > + qtest_qmp_eventwait(to, "STOP"); > + } > + if (primary_failover) { > + qtest_qmp_assert_success(from, "{'exec-oob': 'yank', 'id': > 'yank-cmd', " > + "'arguments': {'instances':" > + "[{'type': 'migration'}]}}"); > + qtest_qmp_assert_success(from, "{'execute': > 'x-colo-lost-heartbeat'}"); > + wait_for_serial("src_serial"); > + } else { > + qtest_qmp_assert_success(to, "{'exec-oob': 'yank', 'id': 'yank-cmd', > " > + "'arguments': {'instances':" > + "[{'type': 'migration'}]}}"); > + qtest_qmp_assert_success(to, "{'execute': 'x-colo-lost-heartbeat'}"); > + wait_for_serial("dest_serial"); > + } > + > + if (args->end_hook) { > + args->end_hook(from, to, data_hook); > + } > + > + migrate_end(from, to, !primary_failover); > + > + return 0; > +} > + > QTestMigrationState *get_src(void) > { > return &src_state; > diff --git a/tests/qtest/migration/framework.h > b/tests/qtest/migration/framework.h > index > 2ef0f57962605c9e3bc7b7de48e52351e5389138..75088c5fb098a0f95acb1e23585d3b6e8307451e > 100644 > --- a/tests/qtest/migration/framework.h > +++ b/tests/qtest/migration/framework.h > @@ -139,6 +139,9 @@ typedef struct { > /* Do not connect to target monitor and qtest sockets in qtest_init */ > bool defer_target_connect; > > + /* Use pc machine for x86_64 */ > + bool force_pc_machine; > + > /* > * Migration capabilities to be set in both source and > * destination. For unilateral capabilities, use > @@ -248,6 +251,8 @@ void test_postcopy_common(MigrateCommon *args); > void test_postcopy_recovery_common(MigrateCommon *args); > int test_precopy_common(MigrateCommon *args); > void test_file_common(MigrateCommon *args, bool stop_src); > +int test_colo_common(MigrateCommon *args, bool failover_during_checkpoint, > + bool colo_primary_failover); > void *migrate_hook_start_precopy_tcp_multifd_common(QTestState *from, > QTestState *to, > const char *method); > @@ -267,5 +272,10 @@ void migration_test_add_file(MigrationTestEnv *env); > void migration_test_add_precopy(MigrationTestEnv *env); > void migration_test_add_cpr(MigrationTestEnv *env); > void migration_test_add_misc(MigrationTestEnv *env); > +#ifdef CONFIG_REPLICATION > +void migration_test_add_colo(MigrationTestEnv *env); > +#else > +static inline void migration_test_add_colo(MigrationTestEnv *env) {}; > +#endif > > #endif /* TEST_FRAMEWORK_H */ > > -- > 2.39.5 > -- Peter Xu
