Signed-off-by: Md Haris Iqbal <haris.p...@gmail.com> --- hmp-commands.hx | 14 ++++--- hmp.c | 3 +- include/migration/migration.h | 3 ++ migration/migration.c | 97 ++++++++++++++++++++++++++++++++++++++++--- migration/postcopy-ram.c | 9 ++++ migration/savevm.c | 35 ++++++++++++---- qapi-schema.json | 2 +- qemu-version.h | 1 + qmp-commands.hx | 4 +- 9 files changed, 145 insertions(+), 23 deletions(-) create mode 100644 qemu-version.h
diff --git a/hmp-commands.hx b/hmp-commands.hx index 8f765fd..e468c53 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -932,17 +932,19 @@ ETEXI { .name = "migrate_incoming", - .args_type = "uri:s", - .params = "uri", - .help = "Continue an incoming migration from an -incoming defer", + .args_type = "recover:-r,uri:s", + .params = "[-r] uri", + .help = "Continue an incoming migration from an -incoming defer" + "\n\t\t\t -r to recover from a broken migration", .mhandler.cmd = hmp_migrate_incoming, }, STEXI -@item migrate_incoming @var{uri} +@item migrate_incoming [-r] @var{uri} @findex migrate_incoming -Continue an incoming migration using the @var{uri} (that has the same syntax -as the -incoming option). +Continue an incoming migration using the @var{uri} + -r to recover from a broken migration (that has the same syntax + as the -incoming option). ETEXI diff --git a/hmp.c b/hmp.c index 02ed457..965e4f3 100644 --- a/hmp.c +++ b/hmp.c @@ -1186,9 +1186,10 @@ void hmp_migrate_cancel(Monitor *mon, const QDict *qdict) void hmp_migrate_incoming(Monitor *mon, const QDict *qdict) { Error *err = NULL; + bool recover = qdict_get_try_bool(qdict, "recover", false); const char *uri = qdict_get_str(qdict, "uri"); - qmp_migrate_incoming(uri, &err); + qmp_migrate_incoming(uri, !!recover, recover, &err); hmp_handle_error(mon, &err); } diff --git a/include/migration/migration.h b/include/migration/migration.h index bcaf55d..74d456e 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -82,6 +82,9 @@ typedef enum { struct MigrationIncomingState { QEMUFile *from_src_file; + /* To be used by a VM for recovery */ + bool in_recovery; + /* * Free at the start of the main state load, set as the main thread finishes * loading state. diff --git a/migration/migration.c b/migration/migration.c index 149cf1e..166f4f7 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -432,6 +432,8 @@ void migration_fd_process_incoming(QEMUFile *f) void migration_channel_process_incoming(MigrationState *s, QIOChannel *ioc) { + MigrationIncomingState *mis = migration_incoming_get_current(); + trace_migration_set_incoming_channel( ioc, object_get_typename(OBJECT(ioc))); @@ -445,6 +447,19 @@ void migration_channel_process_incoming(MigrationState *s, } } else { QEMUFile *f = qemu_fopen_channel_input(ioc); + + if (mis != NULL && atomic_mb_read(&mis->in_recovery)) { + mis->from_src_file = f; + + qemu_mutex_lock(&migration_recovery_mutex); + atomic_mb_set(&mis->in_recovery, false); + qemu_cond_signal(&migration_recovery_cond); + qemu_mutex_unlock(&migration_recovery_mutex); + + fprintf(stderr, "recovered\n"); + return; + } + migration_fd_process_incoming(f); } } @@ -1063,19 +1078,62 @@ void migrate_del_blocker(Error *reason) migration_blockers = g_slist_remove(migration_blockers, reason); } -void qmp_migrate_incoming(const char *uri, Error **errp) +void qmp_migrate_incoming(const char *uri, bool in_recover, bool recover, Error **errp) { Error *local_err = NULL; + bool recovery = in_recover && recover; static bool once = true; + MigrationIncomingState *mis = migration_incoming_get_current(); - if (!deferred_incoming) { - error_setg(errp, "For use with '-incoming defer'"); - return; - } - if (!once) { + if (recovery) { + if (mis != NULL) { + + if(!atomic_mb_read(&mis->in_recovery)) { + /* Recovery option was set but the VM + * Does not seem to have been in recovery + */ + error_setg(errp, "No VM to recover"); + return; + } else { + /* Recovery option was set and the VM + * needs a recovery, resetting the socket + * to NULL + */ + mis->from_src_file = NULL; + if(mis->have_fault_thread) { + /* shutdown the socket to source, causing the fault_thread to shutdown */ + uint64_t tmp64 = 1; + + fprintf(stderr, "rp shutdown\n"); + + if (write(mis->userfault_quit_fd, &tmp64, 8) != 8) { + error_report("%s: incrementing userfault_quit_fd: %s", + __func__, strerror(errno)); + } + close(mis->userfault_quit_fd); + close(mis->userfault_fd); + mis->have_fault_thread = false; + } + fprintf(stderr, "rp after shutdown %p\n", mis->to_src_file); + } + + } else { + /* Recovery option was set but there + * is no VM running/(in recovery) + */ + error_setg(errp, "Cannot use -r option without a VM to recover"); + return; + } + } else if (!once) { error_setg(errp, "The incoming migration has already been started"); } + if (!recover && !deferred_incoming) { + error_setg(errp, "For use with '-incoming defer'"); + return; + } + + qemu_start_incoming_migration(uri, &local_err); if (local_err) { @@ -2007,6 +2065,33 @@ int qemu_migrate_postcopy_outgoing_recovery(MigrationState* ms) } +int qemu_migrate_postcopy_incoming_recovery(QEMUFile **f, + MigrationIncomingState* mis) +{ + migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, + MIGRATION_STATUS_POSTCOPY_RECOVERY); + + atomic_mb_set(&mis->in_recovery, true); + /* Code for network recovery to be added here */ + qemu_mutex_lock(&migration_recovery_mutex); + while(atomic_mb_read(&mis->in_recovery) == true) { + fprintf(stderr, "Recover, not letting it fail %p\n", mis->from_src_file); + qemu_cond_wait(&migration_recovery_cond, &migration_recovery_mutex); + } + qemu_mutex_unlock(&migration_recovery_mutex); + + if(mis->from_src_file != NULL) { + *f = mis->from_src_file; + + migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_RECOVERY, + MIGRATION_STATUS_ACTIVE); + return 0; + } + + return -1; +} + + PostcopyState postcopy_state_get(void) { return atomic_mb_read(&incoming_postcopy_state); diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 9b04778..d19c13a 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -393,6 +393,8 @@ static int ram_block_enable_notify(const char *block_name, void *host_addr, */ static void *postcopy_ram_fault_thread(void *opaque) { + fprintf(stderr, "return path thread started\n"); + MigrationIncomingState *mis = opaque; struct uffd_msg msg; int ret; @@ -481,8 +483,15 @@ static void *postcopy_ram_fault_thread(void *opaque) migrate_send_rp_req_pages(mis, NULL, rb_offset, hostpagesize); } + + ret = qemu_file_get_error(mis->to_src_file); + if (ret != 0) { + qemu_file_clear_error(mis->to_src_file); + break; + } } trace_postcopy_ram_fault_thread_exit(); + fprintf(stderr, "return path failed\n"); return NULL; } diff --git a/migration/savevm.c b/migration/savevm.c index 33a2911..79f601c 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1829,6 +1829,7 @@ static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis) { uint8_t section_type; int ret; + PostcopyState ps; while ((section_type = qemu_get_byte(f)) != QEMU_VM_EOF) { @@ -1837,28 +1838,46 @@ static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis) case QEMU_VM_SECTION_START: case QEMU_VM_SECTION_FULL: ret = qemu_loadvm_section_start_full(f, mis); - if (ret < 0) { - return ret; - } break; case QEMU_VM_SECTION_PART: case QEMU_VM_SECTION_END: ret = qemu_loadvm_section_part_end(f, mis); - if (ret < 0) { - return ret; - } break; case QEMU_VM_COMMAND: ret = loadvm_process_command(f); trace_qemu_loadvm_state_section_command(ret); - if ((ret < 0) || (ret & LOADVM_QUIT)) { + if (ret & LOADVM_QUIT) { + fprintf(stderr, "LOADVM_QUIT\n"); return ret; - } + } break; default: error_report("Unknown savevm section type %d", section_type); return -EINVAL; } + + if (ret < 0) { + ps = postcopy_state_get(); + ret = qemu_file_get_error(f); + + /* This check is based on how the error is set during the network + * recv(). When recv() returns 0 (i.e. no data to read), the error + * is set to -EIO. For all other network errors, it is set + * according to the return value received. + */ + if (ret == -EIO && ps == POSTCOPY_INCOMING_RUNNING) { + ret = qemu_migrate_postcopy_incoming_recovery(&f, mis); + + if (ret == 0) { + postcopy_ram_enable_notify(mis); + qemu_file_clear_error(f); + continue; + } + } + + ret = qemu_file_get_error(f); + return ret; + } } return 0; diff --git a/qapi-schema.json b/qapi-schema.json index a658462..6a4c23b 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -2159,7 +2159,7 @@ # compatible with -incoming and the format of the uri is already exposed # above libvirt ## -{ 'command': 'migrate-incoming', 'data': {'uri': 'str' } } +{ 'command': 'migrate-incoming', 'data': {'uri': 'str', '*recover': 'bool' } } # @xen-save-devices-state: # diff --git a/qemu-version.h b/qemu-version.h new file mode 100644 index 0000000..9ce32a4 --- /dev/null +++ b/qemu-version.h @@ -0,0 +1 @@ +#define QEMU_PKGVERSION " (v2.6.0-1776-g689a31f-dirty)" diff --git a/qmp-commands.hx b/qmp-commands.hx index dd727bf..4234bc9 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -694,7 +694,7 @@ EQMP { .name = "migrate-incoming", - .args_type = "uri:s", + .args_type = "recover:-r,uri:s", .mhandler.cmd_new = qmp_marshal_migrate_incoming, }, @@ -703,10 +703,12 @@ migrate-incoming ---------------- Continue an incoming migration + -r to recover from a broken migration Arguments: - "uri": Source/listening URI (json-string) +- "recover": recover migration (json-bool, optional) Example: -- 2.7.4