[PATCH V7 07/12] migration: preserve suspended for snapshot

2023-12-06 Thread Steve Sistare
Restoring a snapshot can break a suspended guest.  Snapshots suffer from
the same suspended-state issues that affect live migration, plus they must
handle an additional problematic scenario, which is that a running vm must
remain running if it loads a suspended snapshot.

To save, the existing vm_stop call now completely stops the suspended
state.  Finish with vm_resume to leave the vm in the state it had prior
to the save, correctly restoring the suspended state.

To load, if the snapshot is not suspended, then vm_stop + vm_resume
correctly handles all states, and leaves the vm in the state it had prior
to the load.  However, if the snapshot is suspended, restoration is
trickier.  First, call vm_resume to restore the state to suspended so the
current state matches the saved state.  Then, if the pre-load state is
running, call wakeup to resume running.

Prior to these changes, the vm_stop to RUN_STATE_SAVE_VM and
RUN_STATE_RESTORE_VM did not change runstate if the current state was
suspended, but now it does, so allow these transitions.

Signed-off-by: Steve Sistare 
Reviewed-by: Peter Xu 
---
 include/migration/snapshot.h   |  7 +++
 migration/migration-hmp-cmds.c |  8 +---
 migration/savevm.c | 23 +--
 system/runstate.c  |  5 +
 system/vl.c|  2 ++
 5 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/include/migration/snapshot.h b/include/migration/snapshot.h
index e72083b..9e4dcaa 100644
--- a/include/migration/snapshot.h
+++ b/include/migration/snapshot.h
@@ -16,6 +16,7 @@
 #define QEMU_MIGRATION_SNAPSHOT_H
 
 #include "qapi/qapi-builtin-types.h"
+#include "qapi/qapi-types-run-state.h"
 
 /**
  * save_snapshot: Save an internal snapshot.
@@ -61,4 +62,10 @@ bool delete_snapshot(const char *name,
 bool has_devices, strList *devices,
 Error **errp);
 
+/**
+ * load_snapshot_resume: Restore runstate after loading snapshot.
+ * @state: state to restore
+ */
+void load_snapshot_resume(RunState state);
+
 #endif
diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
index 86ae832..c8d70bc 100644
--- a/migration/migration-hmp-cmds.c
+++ b/migration/migration-hmp-cmds.c
@@ -399,15 +399,17 @@ void hmp_info_migrate_parameters(Monitor *mon, const 
QDict *qdict)
 
 void hmp_loadvm(Monitor *mon, const QDict *qdict)
 {
-int saved_vm_running  = runstate_is_running();
+RunState saved_state = runstate_get();
+
 const char *name = qdict_get_str(qdict, "name");
 Error *err = NULL;
 
 vm_stop(RUN_STATE_RESTORE_VM);
 
-if (load_snapshot(name, NULL, false, NULL, ) && saved_vm_running) {
-vm_start();
+if (load_snapshot(name, NULL, false, NULL, )) {
+load_snapshot_resume(saved_state);
 }
+
 hmp_handle_error(mon, err);
 }
 
diff --git a/migration/savevm.c b/migration/savevm.c
index eec5503..26866f0 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -3046,7 +3046,7 @@ bool save_snapshot(const char *name, bool overwrite, 
const char *vmstate,
 QEMUSnapshotInfo sn1, *sn = 
 int ret = -1, ret2;
 QEMUFile *f;
-int saved_vm_running;
+RunState saved_state = runstate_get();
 uint64_t vm_state_size;
 g_autoptr(GDateTime) now = g_date_time_new_now_local();
 AioContext *aio_context;
@@ -3094,8 +3094,6 @@ bool save_snapshot(const char *name, bool overwrite, 
const char *vmstate,
 }
 aio_context = bdrv_get_aio_context(bs);
 
-saved_vm_running = runstate_is_running();
-
 global_state_store();
 vm_stop(RUN_STATE_SAVE_VM);
 
@@ -3163,9 +3161,7 @@ bool save_snapshot(const char *name, bool overwrite, 
const char *vmstate,
 
 bdrv_drain_all_end();
 
-if (saved_vm_running) {
-vm_start();
-}
+vm_resume(saved_state);
 return ret == 0;
 }
 
@@ -3339,6 +3335,14 @@ err_drain:
 return false;
 }
 
+void load_snapshot_resume(RunState state)
+{
+vm_resume(state);
+if (state == RUN_STATE_RUNNING && runstate_get() == RUN_STATE_SUSPENDED) {
+qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, _abort);
+}
+}
+
 bool delete_snapshot(const char *name, bool has_devices,
  strList *devices, Error **errp)
 {
@@ -3403,16 +3407,15 @@ static void snapshot_load_job_bh(void *opaque)
 {
 Job *job = opaque;
 SnapshotJob *s = container_of(job, SnapshotJob, common);
-int orig_vm_running;
+RunState orig_state = runstate_get();
 
 job_progress_set_remaining(>common, 1);
 
-orig_vm_running = runstate_is_running();
 vm_stop(RUN_STATE_RESTORE_VM);
 
 s->ret = load_snapshot(s->tag, s->vmstate, true, s->devices, s->errp);
-if (s->ret && orig_vm_running) {
-vm_start();
+if (s->ret) {
+load_snapshot_resume(orig_state);
 }
 
 job_progress_update(>common, 1);
diff --git a/system/runstate.c b/system/runstate.c
index e2fa204..ca9eb54 100644
--- a/system/runstate.c
+++ b/system/runstate.c
@@ 

[PATCH V7 07/12] migration: preserve suspended for snapshot

2023-12-06 Thread Steve Sistare
Restoring a snapshot can break a suspended guest.  Snapshots suffer from
the same suspended-state issues that affect live migration, plus they must
handle an additional problematic scenario, which is that a running vm must
remain running if it loads a suspended snapshot.

To save, the existing vm_stop call now completely stops the suspended
state.  Finish with vm_resume to leave the vm in the state it had prior
to the save, correctly restoring the suspended state.

To load, if the snapshot is not suspended, then vm_stop + vm_resume
correctly handles all states, and leaves the vm in the state it had prior
to the load.  However, if the snapshot is suspended, restoration is
trickier.  First, call vm_resume to restore the state to suspended so the
current state matches the saved state.  Then, if the pre-load state is
running, call wakeup to resume running.

Prior to these changes, the vm_stop to RUN_STATE_SAVE_VM and
RUN_STATE_RESTORE_VM did not change runstate if the current state was
suspended, but now it does, so allow these transitions.

Signed-off-by: Steve Sistare 
Reviewed-by: Peter Xu 
---
 include/migration/snapshot.h   |  7 +++
 migration/migration-hmp-cmds.c |  8 +---
 migration/savevm.c | 23 +--
 system/runstate.c  |  5 +
 system/vl.c|  2 ++
 5 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/include/migration/snapshot.h b/include/migration/snapshot.h
index e72083b..9e4dcaa 100644
--- a/include/migration/snapshot.h
+++ b/include/migration/snapshot.h
@@ -16,6 +16,7 @@
 #define QEMU_MIGRATION_SNAPSHOT_H
 
 #include "qapi/qapi-builtin-types.h"
+#include "qapi/qapi-types-run-state.h"
 
 /**
  * save_snapshot: Save an internal snapshot.
@@ -61,4 +62,10 @@ bool delete_snapshot(const char *name,
 bool has_devices, strList *devices,
 Error **errp);
 
+/**
+ * load_snapshot_resume: Restore runstate after loading snapshot.
+ * @state: state to restore
+ */
+void load_snapshot_resume(RunState state);
+
 #endif
diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
index 86ae832..c8d70bc 100644
--- a/migration/migration-hmp-cmds.c
+++ b/migration/migration-hmp-cmds.c
@@ -399,15 +399,17 @@ void hmp_info_migrate_parameters(Monitor *mon, const 
QDict *qdict)
 
 void hmp_loadvm(Monitor *mon, const QDict *qdict)
 {
-int saved_vm_running  = runstate_is_running();
+RunState saved_state = runstate_get();
+
 const char *name = qdict_get_str(qdict, "name");
 Error *err = NULL;
 
 vm_stop(RUN_STATE_RESTORE_VM);
 
-if (load_snapshot(name, NULL, false, NULL, ) && saved_vm_running) {
-vm_start();
+if (load_snapshot(name, NULL, false, NULL, )) {
+load_snapshot_resume(saved_state);
 }
+
 hmp_handle_error(mon, err);
 }
 
diff --git a/migration/savevm.c b/migration/savevm.c
index eec5503..26866f0 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -3046,7 +3046,7 @@ bool save_snapshot(const char *name, bool overwrite, 
const char *vmstate,
 QEMUSnapshotInfo sn1, *sn = 
 int ret = -1, ret2;
 QEMUFile *f;
-int saved_vm_running;
+RunState saved_state = runstate_get();
 uint64_t vm_state_size;
 g_autoptr(GDateTime) now = g_date_time_new_now_local();
 AioContext *aio_context;
@@ -3094,8 +3094,6 @@ bool save_snapshot(const char *name, bool overwrite, 
const char *vmstate,
 }
 aio_context = bdrv_get_aio_context(bs);
 
-saved_vm_running = runstate_is_running();
-
 global_state_store();
 vm_stop(RUN_STATE_SAVE_VM);
 
@@ -3163,9 +3161,7 @@ bool save_snapshot(const char *name, bool overwrite, 
const char *vmstate,
 
 bdrv_drain_all_end();
 
-if (saved_vm_running) {
-vm_start();
-}
+vm_resume(saved_state);
 return ret == 0;
 }
 
@@ -3339,6 +3335,14 @@ err_drain:
 return false;
 }
 
+void load_snapshot_resume(RunState state)
+{
+vm_resume(state);
+if (state == RUN_STATE_RUNNING && runstate_get() == RUN_STATE_SUSPENDED) {
+qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, _abort);
+}
+}
+
 bool delete_snapshot(const char *name, bool has_devices,
  strList *devices, Error **errp)
 {
@@ -3403,16 +3407,15 @@ static void snapshot_load_job_bh(void *opaque)
 {
 Job *job = opaque;
 SnapshotJob *s = container_of(job, SnapshotJob, common);
-int orig_vm_running;
+RunState orig_state = runstate_get();
 
 job_progress_set_remaining(>common, 1);
 
-orig_vm_running = runstate_is_running();
 vm_stop(RUN_STATE_RESTORE_VM);
 
 s->ret = load_snapshot(s->tag, s->vmstate, true, s->devices, s->errp);
-if (s->ret && orig_vm_running) {
-vm_start();
+if (s->ret) {
+load_snapshot_resume(orig_state);
 }
 
 job_progress_update(>common, 1);
diff --git a/system/runstate.c b/system/runstate.c
index e2fa204..ca9eb54 100644
--- a/system/runstate.c
+++ b/system/runstate.c
@@