VFIO_REPCOPY_INFO_REINIT is checked only during precopy, before the switchover decision. However, the switchover decision and guest stop are not atomic, so a VFIO device may want to set VFIO_PRECOPY_INFO_REINIT and request another switchover ACK in the gap after switchover decision has been made but before the guest is stopped. This would be missed and may increase downtime.
Solve this by checking if VFIO_PRECOPY_INFO_REINIT was set during that gap, and request a new switchover-ack in the final save_state_pending call. Query precopy info after vCPUs are stopped but before transitioning from PRE_COPY state, when its valid to call the ioctl. Signed-off-by: Avihai Horon <[email protected]> --- hw/vfio/migration.c | 41 +++++++++++++++++++++++++++++++++++------ hw/vfio/trace-events | 2 +- 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index caf4d5e19f..f1480fc4cc 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -681,11 +681,11 @@ static void vfio_state_pending(void *opaque, MigPendingData *pending, uint64_t precopy_size, stopcopy_size; bool request_switchover_ack = false; - if (final) { - return; - } - - if (exact) { + /* + * Skip sync in final query as sync for precopy (which is needed for + * switchover-ack) was already done during guest stop. + */ + if (exact && !final) { vfio_state_pending_sync(vbasedev); } @@ -709,7 +709,7 @@ static void vfio_state_pending(void *opaque, MigPendingData *pending, trace_vfio_state_pending(vbasedev->name, migration->stopcopy_size, migration->precopy_init_size, migration->precopy_dirty_size, - request_switchover_ack, exact); + request_switchover_ack, exact, final); } static bool vfio_is_active_iterate(void *opaque) @@ -963,6 +963,26 @@ static const SaveVMHandlers savevm_vfio_handlers = { /* ---------------------------------------------------------------------- */ +static void vfio_final_precopy_reinit_check(VFIODevice *vbasedev) +{ + VFIOMigration *migration = vbasedev->migration; + int ret; + + if (!migration->precopy_info_v2_used || !migrate_switchover_ack() || + migrate_switchover_ack_legacy()) { + return; + } + + ret = vfio_query_precopy_size(migration); + if (ret) { + error_report("%s: Final precopy reinit check failed (err: %d)", + vbasedev->name, ret); + /* If query failed, assume reinit and request switchover-ack */ + migration->request_switchover_ack = true; + migration->initial_data_sent = false; + } +} + static void vfio_vmstate_change_prepare(void *opaque, bool running, RunState state) { @@ -976,6 +996,15 @@ static void vfio_vmstate_change_prepare(void *opaque, bool running, VFIO_DEVICE_STATE_PRE_COPY_P2P : VFIO_DEVICE_STATE_RUNNING_P2P; + if (migration->device_state == VFIO_DEVICE_STATE_PRE_COPY) { + /* + * Now that vCPUs are stopped, check if new init_bytes are available + * since switchover decision, to be reported in the final + * save_query_pending. + */ + vfio_final_precopy_reinit_check(vbasedev); + } + ret = vfio_migration_set_state_or_reset(vbasedev, new_state, &local_err); if (ret) { /* diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index 464c28c860..3fbc32cb3a 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -178,7 +178,7 @@ vfio_save_iterate(const char *name, uint64_t precopy_init_size, uint64_t precopy vfio_save_iterate_start(const char *name) " (%s)" vfio_save_setup(const char *name, uint64_t data_buffer_size) " (%s) data buffer size %"PRIu64 vfio_send_init_data_flag(const char *name) " (%s)" -vfio_state_pending(const char *name, uint64_t stopcopy_size, uint64_t precopy_init_size, uint64_t precopy_dirty_size, bool request_switchover_ack, bool exact) " (%s) stopcopy size %"PRIu64", precopy initial size %"PRIu64", precopy dirty size %"PRIu64 ", request switchover ack %d, exact %d" +vfio_state_pending(const char *name, uint64_t stopcopy_size, uint64_t precopy_init_size, uint64_t precopy_dirty_size, bool request_switchover_ack, bool exact, bool final) " (%s) stopcopy size %"PRIu64", precopy initial size %"PRIu64", precopy dirty size %"PRIu64 ", request switchover ack %d, exact %d, final %d" vfio_vmstate_change(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s" vfio_vmstate_change_prepare(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s" -- 2.40.1
