On Fri, Jan 12, 2024 at 07:05:10AM -0800, Steve Sistare wrote:
> Allow cpr-reboot for vfio if the guest is in the suspended runstate.  The
> guest drivers' suspend methods flush outstanding requests and re-initialize
> the devices, and thus there is no device state to save and restore.  The
> user is responsible for suspending the guest before initiating cpr, such as
> by issuing guest-suspend-ram to the qemu guest agent.
> 
> Relax the vfio blocker so it does not apply to cpr, and add a notifier that
> verifies the guest is suspended.  Skip dirty page tracking, which is N/A for
> cpr, to avoid ioctl errors.
> 
> Signed-off-by: Steve Sistare <steven.sist...@oracle.com>
> ---
>  hw/vfio/common.c              |  2 +-
>  hw/vfio/cpr.c                 | 20 ++++++++++++++++++++
>  hw/vfio/migration.c           |  2 +-
>  include/hw/vfio/vfio-common.h |  1 +
>  migration/ram.c               |  9 +++++----
>  5 files changed, 28 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 0b3352f..09af934 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -128,7 +128,7 @@ int vfio_block_multiple_devices_migration(VFIODevice 
> *vbasedev, Error **errp)
>      error_setg(&multiple_devices_migration_blocker,
>                 "Multiple VFIO devices migration is supported only if all of "
>                 "them support P2P migration");
> -    ret = migrate_add_blocker(&multiple_devices_migration_blocker, errp);
> +    ret = migrate_add_blocker_normal(&multiple_devices_migration_blocker, 
> errp);
>  
>      return ret;
>  }
> diff --git a/hw/vfio/cpr.c b/hw/vfio/cpr.c
> index bbd1c7a..9f4b1fe 100644
> --- a/hw/vfio/cpr.c
> +++ b/hw/vfio/cpr.c
> @@ -7,13 +7,33 @@
>  
>  #include "qemu/osdep.h"
>  #include "hw/vfio/vfio-common.h"
> +#include "migration/misc.h"
>  #include "qapi/error.h"
> +#include "sysemu/runstate.h"
> +
> +static int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier,
> +                                    MigrationEvent *e, Error **errp)
> +{
> +    if (e->state == MIGRATION_STATUS_SETUP &&
> +        !runstate_check(RUN_STATE_SUSPENDED)) {
> +
> +        error_setg(errp,
> +            "VFIO device only supports cpr-reboot for runstate suspended");
> +
> +        return -1;
> +    }

What happens if the guest is suspended during SETUP, but then quickly waked
up before CPR migration completes?

> +    return 0;
> +}
>  
>  int vfio_cpr_register_container(VFIOContainer *container, Error **errp)
>  {
> +    migration_add_notifier_mode(&container->cpr_reboot_notifier,
> +                                vfio_cpr_reboot_notifier,
> +                                MIG_MODE_CPR_REBOOT);
>      return 0;
>  }
>  
>  void vfio_cpr_unregister_container(VFIOContainer *container)
>  {
> +    migration_remove_notifier(&container->cpr_reboot_notifier);
>  }
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index 534fddf..488905d 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -895,7 +895,7 @@ static int vfio_block_migration(VFIODevice *vbasedev, 
> Error *err, Error **errp)
>      vbasedev->migration_blocker = error_copy(err);
>      error_free(err);
>  
> -    return migrate_add_blocker(&vbasedev->migration_blocker, errp);
> +    return migrate_add_blocker_normal(&vbasedev->migration_blocker, errp);
>  }
>  
>  /* ---------------------------------------------------------------------- */
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index 1add5b7..7a46e24 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -78,6 +78,7 @@ struct VFIOGroup;
>  typedef struct VFIOContainer {
>      VFIOContainerBase bcontainer;
>      int fd; /* /dev/vfio/vfio, empowered by the attached groups */
> +    NotifierWithReturn cpr_reboot_notifier;
>      unsigned iommu_type;
>      QLIST_HEAD(, VFIOGroup) group_list;
>  } VFIOContainer;
> diff --git a/migration/ram.c b/migration/ram.c
> index 1923366..44ad324 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -2392,8 +2392,8 @@ static void ram_save_cleanup(void *opaque)
>      RAMState **rsp = opaque;
>      RAMBlock *block;
>  
> -    /* We don't use dirty log with background snapshots */
> -    if (!migrate_background_snapshot()) {
> +    /* We don't use dirty log with background snapshots or cpr */
> +    if (!migrate_background_snapshot() && migrate_mode() == MIG_MODE_NORMAL) 
> {

Same question here, on what happens if the user resumes the VM before
migration completes?  IIUC shared-ram is not required, then it means if
that happens the cpr migration image can contain corrupted data, and that
may be a problem.

Background snapshot is special in that it relies on totally different
tracking facilities (userfault, rather than KVM_GET_DIRTY_LOG), so it
disabled dirty tracking completely.  I assume not the case for cpr.

If cpr is not going to support that use case, IIUC it should fail that
system wakeup properly.  Or perhaps when CPR mode QEMU should instead
reject a wakeup?

>          /* caller have hold BQL or is in a bh, so there is
>           * no writing race against the migration bitmap
>           */
> @@ -2804,8 +2804,9 @@ static void ram_init_bitmaps(RAMState *rs)
>  
>      WITH_RCU_READ_LOCK_GUARD() {
>          ram_list_init_bitmaps();
> -        /* We don't use dirty log with background snapshots */
> -        if (!migrate_background_snapshot()) {
> +        /* We don't use dirty log with background snapshots or cpr */
> +        if (!migrate_background_snapshot() &&
> +            migrate_mode() == MIG_MODE_NORMAL) {
>              memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
>              migration_bitmap_sync_precopy(rs, false);
>          }
> -- 
> 1.8.3.1
> 

-- 
Peter Xu


Reply via email to