Use the return-path capability with colo and reuse the opened return path file on both primary and secondary side.
This fixes a crash in colo where migration_cancel() races with colo closing s->rp_state.from_dst_file. Signed-off-by: Lukas Straub <[email protected]> --- docs/system/qemu-colo.rst | 4 ++-- migration/colo.c | 26 +++++++++----------------- migration/options.c | 10 +++++++++- tests/qtest/migration/colo-tests.c | 1 + tests/qtest/migration/framework.c | 13 +++++++++++++ 5 files changed, 34 insertions(+), 20 deletions(-) diff --git a/docs/system/qemu-colo.rst b/docs/system/qemu-colo.rst index f7d3b6439cf3401a58c412634239d1a43999a10e..9ec3eb79b006a24e0d3a360f1c8f49a68f4d8086 100644 --- a/docs/system/qemu-colo.rst +++ b/docs/system/qemu-colo.rst @@ -225,7 +225,7 @@ any IP's here, except for the ``$primary_ip`` variable:: **3.** On Secondary VM's QEMU monitor, issue command:: {"execute":"qmp_capabilities"} - {"execute": "migrate-set-capabilities", "arguments": {"capabilities": [ {"capability": "x-colo", "state": true } ] } } + {"execute": "migrate-set-capabilities", "arguments": {"capabilities": [ {"capability": "return-path", "state": true }, {"capability": "x-colo", "state": true } ] } } {"execute": "nbd-server-start", "arguments": {"addr": {"type": "inet", "data": {"host": "0.0.0.0", "port": "9999"} } } } {"execute": "nbd-server-add", "arguments": {"device": "parent0", "writable": true } } @@ -242,7 +242,7 @@ Note: {"execute":"qmp_capabilities"} {"execute": "blockdev-add", "arguments": {"driver": "nbd", "node-name": "nbd0", "server": {"type": "inet", "host": "127.0.0.2", "port": "9999"}, "export": "parent0", "detect-zeroes": "on"} } {"execute": "x-blockdev-change", "arguments":{"parent": "colo-disk0", "node": "nbd0" } } - {"execute": "migrate-set-capabilities", "arguments": {"capabilities": [ {"capability": "x-colo", "state": true } ] } } + {"execute": "migrate-set-capabilities", "arguments": {"capabilities": [ {"capability": "return-path", "state": true }, {"capability": "x-colo", "state": true } ] } } {"execute": "migrate", "arguments": {"uri": "tcp:127.0.0.2:9998" } } Note: diff --git a/migration/colo.c b/migration/colo.c index dc47d03874039b686d2a4072ac4e6c77e4ff1f87..2d36f933cf155c1084565162294a61d32e28fe86 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -539,6 +539,8 @@ static void colo_process_checkpoint(MigrationState *s) Error *local_err = NULL; int ret; + assert(s->rp_state.from_dst_file); + assert(!s->rp_state.rp_thread_created); if (get_colo_mode() != COLO_MODE_PRIMARY) { error_report("COLO mode must be COLO_MODE_PRIMARY"); return; @@ -546,12 +548,6 @@ static void colo_process_checkpoint(MigrationState *s) failover_init_state(); - s->rp_state.from_dst_file = qemu_file_get_return_path(s->to_dst_file); - if (!s->rp_state.from_dst_file) { - error_report("Open QEMUFile from_dst_file failed"); - goto out; - } - packets_compare_notifier.notify = colo_compare_notify_checkpoint; colo_compare_register_notifier(&packets_compare_notifier); @@ -636,16 +632,6 @@ out: colo_compare_unregister_notifier(&packets_compare_notifier); timer_free(s->colo_delay_timer); qemu_event_destroy(&s->colo_checkpoint_event); - - /* - * Must be called after failover BH is completed, - * Or the failover BH may shutdown the wrong fd that - * re-used by other threads after we release here. - */ - if (s->rp_state.from_dst_file) { - qemu_fclose(s->rp_state.from_dst_file); - s->rp_state.from_dst_file = NULL; - } } void migrate_start_colo_process(MigrationState *s) @@ -838,6 +824,7 @@ static void *colo_process_incoming_thread(void *opaque) migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_COLO); + assert(mis->to_src_file); if (get_colo_mode() != COLO_MODE_SECONDARY) { error_report("COLO mode must be COLO_MODE_SECONDARY"); return NULL; @@ -854,7 +841,6 @@ static void *colo_process_incoming_thread(void *opaque) failover_init_state(); - mis->to_src_file = qemu_file_get_return_path(mis->from_src_file); /* * Note: the communication between Primary side and Secondary side * should be sequential, we set the fd to unblocked in migration incoming @@ -866,6 +852,12 @@ static void *colo_process_incoming_thread(void *opaque) goto out; } + /* + * rp thread still running on primary side, shut it down to go into + * colo state. + */ + migrate_send_rp_shut(mis, 0); + colo_incoming_start_dirty_log(); bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE); diff --git a/migration/options.c b/migration/options.c index 1ffe85a2d8ccc8de9cc04d3c12293538a367abf7..f33b2979290f30ecfd0ffc4654498f25750f6720 100644 --- a/migration/options.c +++ b/migration/options.c @@ -575,7 +575,15 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) ERRP_GUARD(); MigrationIncomingState *mis = migration_incoming_get_current(); -#ifndef CONFIG_REPLICATION +#ifdef CONFIG_REPLICATION + if (new_caps[MIGRATION_CAPABILITY_X_COLO]) { + if (!new_caps[MIGRATION_CAPABILITY_RETURN_PATH]) { + error_setg(errp, "Capability 'x-colo' requires capability " + "'return-path'"); + return false; + } + } +#else if (new_caps[MIGRATION_CAPABILITY_X_COLO]) { error_setg(errp, "QEMU compiled without replication module" " can't enable COLO"); diff --git a/tests/qtest/migration/colo-tests.c b/tests/qtest/migration/colo-tests.c index 598a1d3821ed0a90318732702027cebad47352fd..366b9f7ac6d56a6e74a29d3e062124ad4234608e 100644 --- a/tests/qtest/migration/colo-tests.c +++ b/tests/qtest/migration/colo-tests.c @@ -46,6 +46,7 @@ static int test_colo_common(MigrateCommon *args, * used in production. */ args->start.oob = true; + args->start.caps[MIGRATION_CAPABILITY_RETURN_PATH] = true; args->start.caps[MIGRATION_CAPABILITY_X_COLO] = true; if (migrate_start(&from, &to, args->listen_uri, &args->start)) { diff --git a/tests/qtest/migration/framework.c b/tests/qtest/migration/framework.c index 2a3efeb80740cbfe876f649f0af685c5839e00a2..0bfc241914462c023ceab2fab0386009dc5a4320 100644 --- a/tests/qtest/migration/framework.c +++ b/tests/qtest/migration/framework.c @@ -216,6 +216,19 @@ static void migrate_start_set_capabilities(QTestState *from, QTestState *to, * MigrationCapability_lookup and MIGRATION_CAPABILITY_ constants * are from qapi-types-migration.h. */ + + /* + * Enable return path first, since other features depend on it. + */ + if (args->caps[MIGRATION_CAPABILITY_RETURN_PATH]) { + if (from) { + migrate_set_capability(from, "return-path", true); + } + if (to) { + migrate_set_capability(to, "return-path", true); + } + } + for (uint8_t i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { if (!args->caps[i]) { continue; -- 2.39.5
