On 8/24/2021 5:36 AM, Zheng Chuan wrote: > Hi, Steve. > > I think I have found the problem, it is because the rom_reset() during the > cpr_exec will write dtb into the mach-virt.ram which cause the memory > corruption. > Also I found in x86 the memoryregion of acpi also changed during rom_rest. > Maybe we should keep it consistent and skip the rom_reset() like migration > does. > Here is the patch drafted(Also fix the cpr state missing saving problem):
Hi Chuan, thank-you very much for debugging the problem. rom_reset() is a great find. I also noticed and have a fix ready for the mode bug. I will add similar fixes to patch V7. - Steve > diff --git a/hw/core/loader.c b/hw/core/loader.c > index 5b34869a5417..1dcf0be1492f 100644 > --- a/hw/core/loader.c > +++ b/hw/core/loader.c > @@ -50,6 +50,7 @@ > #include "hw/hw.h" > #include "disas/disas.h" > #include "migration/vmstate.h" > +#include "migration/cpr.h" > #include "monitor/monitor.h" > #include "sysemu/reset.h" > #include "sysemu/sysemu.h" > @@ -1128,7 +1129,7 @@ static void rom_reset(void *unused) > * the data in during the next incoming migration in all cases. Note > * that some of those RAMs can actually be modified by the guest. > */ > - if (runstate_check(RUN_STATE_INMIGRATE)) { > + if (runstate_check(RUN_STATE_INMIGRATE) || cpr_is_active()) { > if (rom->data && rom->isrom) { > /* > * Free it so that a rom_reset after migration doesn't > diff --git a/include/migration/cpr.h b/include/migration/cpr.h > index e9b987f54319..0b7d7e9f6bf0 100644 > --- a/include/migration/cpr.h > +++ b/include/migration/cpr.h > @@ -20,9 +20,11 @@ void cpr_save_fd(const char *name, int id, int fd); > void cpr_delete_fd(const char *name, int id); > int cpr_find_fd(const char *name, int id); > int cpr_walk_fd(cpr_walk_fd_cb cb, void *handle); > -int cpr_state_save(Error **errp); > +int cpr_state_save(CprMode mode, Error **errp); > int cpr_state_load(Error **errp); > CprMode cpr_state_mode(void); > +void cpr_state_clear(void); > +bool cpr_is_active(void); > void cpr_state_print(void); > > int cpr_vfio_save(Error **errp); > diff --git a/migration/cpr-state.c b/migration/cpr-state.c > index 003b449bbcf8..4ac08539d932 100644 > --- a/migration/cpr-state.c > +++ b/migration/cpr-state.c > @@ -19,7 +19,7 @@ typedef struct CprState { > CprNameList fds; /* list of CprFd */ > } CprState; > > -static CprState cpr_state; > +static CprState cpr_state = { .mode = CPR_MODE_NONE }; > > /*************************************************************************/ > /* Generic list of names. */ > @@ -149,7 +149,7 @@ static const VMStateDescription vmstate_cpr_state = { > } > }; > > -int cpr_state_save(Error **errp) > +int cpr_state_save(CprMode mode, Error **errp) > { > int ret, mfd; > QEMUFile *f; > @@ -163,9 +163,11 @@ int cpr_state_save(Error **errp) > qemu_clear_cloexec(mfd); > f = qemu_fd_open(mfd, true, CPR_STATE_NAME); > > + cpr_state.mode = mode; > ret = vmstate_save_state(f, &vmstate_cpr_state, &cpr_state, 0); > if (ret) { > error_setg(errp, "vmstate_save_state error %d", ret); > + cpr_state.mode = CPR_MODE_NONE; > return ret; > } > > @@ -205,6 +207,16 @@ CprMode cpr_state_mode(void) > return cpr_state.mode; > } > > +void cpr_state_clear(void) > +{ > + cpr_state.mode = CPR_MODE_NONE; > +} > + > +bool cpr_is_active(void) > +{ > + return cpr_state.mode != CPR_MODE_NONE; > +} > + > void cpr_state_print(void) > { > CprName *elem; > diff --git a/migration/cpr.c b/migration/cpr.c > index d14bc5ad2678..97b2293c01e8 100644 > --- a/migration/cpr.c > +++ b/migration/cpr.c > @@ -115,7 +115,7 @@ void qmp_cpr_exec(strList *args, Error **errp) > return; > } > cpr_walk_fd(preserve_fd, 0); > - if (cpr_state_save(errp)) { > + if (cpr_state_save(cpr_active_mode, errp)) { > return; > } > vhost_dev_reset_all(); > @@ -173,4 +173,5 @@ void qmp_cpr_load(const char *filename, Error **errp) > > out: > cpr_active_mode = CPR_MODE_NONE; > + cpr_state_clear(); > } > > > On 2021/8/24 5:36, Steven Sistare wrote: >> Hi Zheng, testing aarch64 is on our todo list. We will run this case and try >> to >> reproduce the failure. Thanks for the report. >> >> - Steve >> >> On 8/21/2021 4:54 AM, Zheng Chuan wrote: >>> Hi, steve >>> >>> It seems the VM will stuck after cpr-load on AArch64 environment? >>> >>> My AArch64 environment and test steps: >>> 1. linux kernel: 5.14-rc6 >>> 2. QEMU version: v6.1.0-rc2 (patch your patchset), and configure with >>> `../configure --target-list=aarch64-softmmu --disable-werror --enable-kvm` >>> 4. Steps to live update: >>> # ./build/aarch64-softmmu/qemu-system-aarch64 -machine >>> virt,accel=kvm,gic-version=3,memfd-alloc=on -nodefaults -cpu host -m 2G >>> -smp 1 -drive >>> file=/usr/share/edk2/aarch64/QEMU_EFI-pflash.raw,if=pflash,format=raw,readonly=on >>> -drive file=<path/to/vm.qcow2>,format=qcow2,if=none,id=drive_image1 >>> -device virtio-blk-pci,id=image1,drive=drive_image1 -vnc :10 -device >>> virtio-gpu,id=video0 -device piix3-usb-uhci,id=usb -device >>> usb-tablet,id=input0,bus=usb.0,port=1 -device >>> usb-kbd,id=input1,bus=usb.0,port=2 -monitor stdio >>> (qemu) cpr-save /tmp/qemu.save restart >>> (qemu) cpr-exec ./build/aarch64-softmmu/qemu-system-aarch64 -machine >>> virt,accel=kvm,gic-version=3,memfd-alloc=on -nodefaults -cpu host -m 2G >>> -smp 1 -drive >>> file=/usr/share/edk2/aarch64/QEMU_EFI-pflash.raw,if=pflash,format=raw,readonly=on >>> -drive file=<path/to/vm.qcow2>,format=qcow2,if=none,id=drive_image1 >>> -device virtio-blk-pci,id=image1,drive=drive_image1 -vnc :10 -device >>> virtio-gpu,id=video0 -device piix3-usb-uhci,id=usb -device >>> usb-tablet,id=input0,bus=usb.0,port=1 -device >>> usb-kbd,id=input1,bus=usb.0,port=2 -monitor stdio -S >>> (qemu) QEMU 6.0.92 monitor - type 'help' for more information >>> (qemu) cpr-load /tmp/qemu.save >>> >>> Does I miss something? >>> >>> On 2021/8/7 5:43, Steve Sistare wrote: >>>> Provide the cpr-save, cpr-exec, and cpr-load commands for live update. >>>> These save and restore VM state, with minimal guest pause time, so that >>>> qemu may be updated to a new version in between. >>>> >>>> cpr-save stops the VM and saves vmstate to an ordinary file. It supports >>>> any type of guest image and block device, but the caller must not modify >>>> guest block devices between cpr-save and cpr-load. It supports two modes: >>>> reboot and restart. >>>> >>>> In reboot mode, the caller invokes cpr-save and then terminates qemu. >>>> The caller may then update the host kernel and system software and reboot. >>>> The caller resumes the guest by running qemu with the same arguments as the >>>> original process and invoking cpr-load. To use this mode, guest ram must >>>> be >>>> mapped to a persistent shared memory file such as /dev/dax0.0, or /dev/shm >>>> PKRAM as proposed in >>>> https://lore.kernel.org/lkml/1617140178-8773-1-git-send-email-anthony.yzn...@oracle.com. >>>> >>>> The reboot mode supports vfio devices if the caller first suspends the >>>> guest, such as by issuing guest-suspend-ram to the qemu guest agent. The >>>> guest drivers' suspend methods flush outstanding requests and re-initialize >>>> the devices, and thus there is no device state to save and restore. >>>> >>>> Restart mode preserves the guest VM across a restart of the qemu process. >>>> After cpr-save, the caller passes qemu command-line arguments to cpr-exec, >>>> which directly exec's the new qemu binary. The arguments must include -S >>>> so new qemu starts in a paused state and waits for the cpr-load command. >>>> The restart mode supports vfio devices by preserving the vfio container, >>>> group, device, and event descriptors across the qemu re-exec, and by >>>> updating DMA mapping virtual addresses using VFIO_DMA_UNMAP_FLAG_VADDR and >>>> VFIO_DMA_MAP_FLAG_VADDR as defined in >>>> https://lore.kernel.org/kvm/1611939252-7240-1-git-send-email-steven.sist...@oracle.com/ >>>> and integrated in Linux kernel 5.12. >>>> >>>> To use the restart mode, qemu must be started with the memfd-alloc option, >>>> which allocates guest ram using memfd_create. The memfd's are saved to >>>> the environment and kept open across exec, after which they are found from >>>> the environment and re-mmap'd. Hence guest ram is preserved in place, >>>> albeit with new virtual addresses in the qemu process. >>>> >>>> The caller resumes the guest by invoking cpr-load, which loads state from >>>> the file. If the VM was running at cpr-save time, then VM execution >>>> resumes. >>>> If the VM was suspended at cpr-save time (reboot mode), then the caller >>>> must >>>> issue a system_wakeup command to resume. >>>> >>>> The first patches add reboot mode: >>>> - memory: qemu_check_ram_volatile >>>> - migration: fix populate_vfio_info >>>> - migration: qemu file wrappers >>>> - migration: simplify savevm >>>> - vl: start on wakeup request >>>> - cpr: reboot mode >>>> - cpr: reboot HMP interfaces >>>> >>>> The next patches add restart mode: >>>> - memory: flat section iterator >>>> - oslib: qemu_clear_cloexec >>>> - machine: memfd-alloc option >>>> - qapi: list utility functions >>>> - vl: helper to request re-exec >>>> - cpr: preserve extra state >>>> - cpr: restart mode >>>> - cpr: restart HMP interfaces >>>> - hostmem-memfd: cpr for memory-backend-memfd >>>> >>>> The next patches add vfio support for restart mode: >>>> - pci: export functions for cpr >>>> - vfio-pci: refactor for cpr >>>> - vfio-pci: cpr part 1 (fd and dma) >>>> - vfio-pci: cpr part 2 (msi) >>>> - vfio-pci: cpr part 3 (intx) >>>> >>>> The next patches preserve various descriptor-based backend devices across >>>> cprexec: >>>> - vhost: reset vhost devices for cpr >>>> - chardev: cpr framework >>>> - chardev: cpr for simple devices >>>> - chardev: cpr for pty >>>> - chardev: cpr for sockets >>>> - cpr: only-cpr-capable option >>>> >>>> Here is an example of updating qemu from v4.2.0 to v4.2.1 using >>>> restart mode. The software update is performed while the guest is >>>> running to minimize downtime. >>>> >>>> window 1 | window 2 >>>> | >>>> # qemu-system-x86_64 ... | >>>> QEMU 4.2.0 monitor - type 'help' ... | >>>> (qemu) info status | >>>> VM status: running | >>>> | # yum update qemu >>>> (qemu) cpr-save /tmp/qemu.sav restart | >>>> (qemu) cpr-exec qemu-system-x86_64 -S ... | >>>> QEMU 4.2.1 monitor - type 'help' ... | >>>> (qemu) info status | >>>> VM status: paused (prelaunch) | >>>> (qemu) cpr-load /tmp/qemu.sav | >>>> (qemu) info status | >>>> VM status: running | >>>> >>>> >>>> Here is an example of updating the host kernel using reboot mode. >>>> >>>> window 1 | window 2 >>>> | >>>> # qemu-system-x86_64 ...mem-path=/dev/dax0.0 ...| >>>> QEMU 4.2.1 monitor - type 'help' ... | >>>> (qemu) info status | >>>> VM status: running | >>>> | # yum update kernel-uek >>>> (qemu) cpr-save /tmp/qemu.sav restart | >>>> (qemu) quit | >>>> | >>>> # systemctl kexec | >>>> kexec_core: Starting new kernel | >>>> ... | >>>> | >>>> # qemu-system-x86_64 -S mem-path=/dev/dax0.0 ...| >>>> QEMU 4.2.1 monitor - type 'help' ... | >>>> (qemu) info status | >>>> VM status: paused (prelaunch) | >>>> (qemu) cpr-load /tmp/qemu.sav | >>>> (qemu) info status | >>>> VM status: running | >>>> >>>> Changes from V1 to V2: >>>> - revert vmstate infrastructure changes >>>> - refactor cpr functions into new files >>>> - delete MADV_DOEXEC and use memfd + VFIO_DMA_UNMAP_FLAG_SUSPEND to >>>> preserve memory. >>>> - add framework to filter chardev's that support cpr >>>> - save and restore vfio eventfd's >>>> - modify cprinfo QMP interface >>>> - incorporate misc review feedback >>>> - remove unrelated and unneeded patches >>>> - refactor all patches into a shorter and easier to review series >>>> >>>> Changes from V2 to V3: >>>> - rebase to qemu 6.0.0 >>>> - use final definition of vfio ioctls (VFIO_DMA_UNMAP_FLAG_VADDR etc) >>>> - change memfd-alloc to a machine option >>>> - Use qio_channel_socket_new_fd instead of adding >>>> qio_channel_socket_new_fd >>>> - close monitor socket during cpr >>>> - fix a few unreported bugs >>>> - support memory-backend-memfd >>>> >>>> Changes from V3 to V4: >>>> - split reboot mode into separate patches >>>> - add cprexec command >>>> - delete QEMU_START_FREEZE, argv_main, and /usr/bin/qemu-exec >>>> - add more checks for vfio and cpr compatibility, and recover after >>>> errors >>>> - save vfio pci config in vmstate >>>> - rename {setenv,getenv}_event_fd to {save,load}_event_fd >>>> - use qemu_strtol >>>> - change 6.0 references to 6.1 >>>> - use strerror(), use EXIT_FAILURE, remove period from error messages >>>> - distribute MAINTAINERS additions to each patch >>>> >>>> Changes from V4 to V5: >>>> - rebase to master >>>> >>>> Changes from V5 to V6: >>>> vfio: >>>> - delete redundant bus_master_enable_region in vfio_pci_post_load >>>> - delete unmap.size warning >>>> - fix phys_config memory leak >>>> - add INTX support >>>> - add vfio_named_notifier_init() helper >>>> Other: >>>> - 6.1 -> 6.2 >>>> - rename file -> filename in qapi >>>> - delete cprinfo. qapi introspection serves the same purpose. >>>> - rename cprsave, cprexec, cprload -> cpr-save, cpr-exec, cpr-load >>>> - improve documentation in qapi/cpr.json >>>> - rename qemu_ram_volatile -> qemu_ram_check_volatile, and use >>>> qemu_ram_foreach_block >>>> - rename handle -> opaque >>>> - use ERRP_GUARD >>>> - use g_autoptr and g_autofree, and glib allocation functions >>>> - conform to error conventions for bool and int function return values >>>> and function names. >>>> - remove word "error" in error messages >>>> - rename as_flat_walk and its callback, and add comments. >>>> - rename qemu_clr_cloexec -> qemu_clear_cloexec >>>> - rename close-on-cpr -> reopen-on-cpr >>>> - add strList utility functions >>>> - factor out start on wakeup request to a separate patch >>>> - deleted unnecessary layer (cprsave etc) and squashed QMP patches >>>> - conditionally compile for CONFIG_VFIO >>>> >>>> Steve Sistare (24): >>>> memory: qemu_check_ram_volatile >>>> migration: fix populate_vfio_info >>>> migration: qemu file wrappers >>>> migration: simplify savevm >>>> vl: start on wakeup request >>>> cpr: reboot mode >>>> memory: flat section iterator >>>> oslib: qemu_clear_cloexec >>>> machine: memfd-alloc option >>>> qapi: list utility functions >>>> vl: helper to request re-exec >>>> cpr: preserve extra state >>>> cpr: restart mode >>>> cpr: restart HMP interfaces >>>> hostmem-memfd: cpr for memory-backend-memfd >>>> pci: export functions for cpr >>>> vfio-pci: refactor for cpr >>>> vfio-pci: cpr part 1 (fd and dma) >>>> vfio-pci: cpr part 2 (msi) >>>> vfio-pci: cpr part 3 (intx) >>>> chardev: cpr framework >>>> chardev: cpr for simple devices >>>> chardev: cpr for pty >>>> cpr: only-cpr-capable option >>>> >>>> Mark Kanda, Steve Sistare (3): >>>> cpr: reboot HMP interfaces >>>> vhost: reset vhost devices for cpr >>>> chardev: cpr for sockets >>>> >>>> MAINTAINERS | 12 ++ >>>> backends/hostmem-memfd.c | 21 +-- >>>> chardev/char-mux.c | 1 + >>>> chardev/char-null.c | 1 + >>>> chardev/char-pty.c | 14 +- >>>> chardev/char-serial.c | 1 + >>>> chardev/char-socket.c | 36 +++++ >>>> chardev/char-stdio.c | 8 ++ >>>> chardev/char.c | 43 +++++- >>>> gdbstub.c | 1 + >>>> hmp-commands.hx | 50 +++++++ >>>> hw/core/machine.c | 19 +++ >>>> hw/pci/msix.c | 20 ++- >>>> hw/pci/pci.c | 7 +- >>>> hw/vfio/common.c | 79 +++++++++-- >>>> hw/vfio/cpr.c | 160 ++++++++++++++++++++++ >>>> hw/vfio/meson.build | 1 + >>>> hw/vfio/pci.c | 301 >>>> +++++++++++++++++++++++++++++++++++++++--- >>>> hw/vfio/trace-events | 1 + >>>> hw/virtio/vhost.c | 11 ++ >>>> include/chardev/char.h | 6 + >>>> include/exec/memory.h | 39 ++++++ >>>> include/hw/boards.h | 1 + >>>> include/hw/pci/msix.h | 5 + >>>> include/hw/pci/pci.h | 2 + >>>> include/hw/vfio/vfio-common.h | 8 ++ >>>> include/hw/virtio/vhost.h | 1 + >>>> include/migration/cpr.h | 31 +++++ >>>> include/monitor/hmp.h | 3 + >>>> include/qapi/util.h | 28 ++++ >>>> include/qemu/osdep.h | 1 + >>>> include/sysemu/runstate.h | 2 + >>>> include/sysemu/sysemu.h | 1 + >>>> linux-headers/linux/vfio.h | 6 + >>>> migration/cpr-state.c | 215 ++++++++++++++++++++++++++++++ >>>> migration/cpr.c | 176 ++++++++++++++++++++++++ >>>> migration/meson.build | 2 + >>>> migration/migration.c | 5 + >>>> migration/qemu-file-channel.c | 36 +++++ >>>> migration/qemu-file-channel.h | 6 + >>>> migration/savevm.c | 21 +-- >>>> migration/target.c | 24 +++- >>>> migration/trace-events | 5 + >>>> monitor/hmp-cmds.c | 68 ++++++---- >>>> monitor/hmp.c | 3 + >>>> monitor/qmp.c | 3 + >>>> qapi/char.json | 7 +- >>>> qapi/cpr.json | 76 +++++++++++ >>>> qapi/meson.build | 1 + >>>> qapi/qapi-schema.json | 1 + >>>> qapi/qapi-util.c | 37 ++++++ >>>> qemu-options.hx | 40 +++++- >>>> softmmu/globals.c | 1 + >>>> softmmu/memory.c | 46 +++++++ >>>> softmmu/physmem.c | 55 ++++++-- >>>> softmmu/runstate.c | 38 +++++- >>>> softmmu/vl.c | 18 ++- >>>> stubs/cpr-state.c | 15 +++ >>>> stubs/cpr.c | 3 + >>>> stubs/meson.build | 2 + >>>> trace-events | 1 + >>>> util/oslib-posix.c | 9 ++ >>>> util/oslib-win32.c | 4 + >>>> util/qemu-config.c | 4 + >>>> 64 files changed, 1732 insertions(+), 111 deletions(-) >>>> create mode 100644 hw/vfio/cpr.c >>>> create mode 100644 include/migration/cpr.h >>>> create mode 100644 migration/cpr-state.c >>>> create mode 100644 migration/cpr.c >>>> create mode 100644 qapi/cpr.json >>>> create mode 100644 stubs/cpr-state.c >>>> create mode 100644 stubs/cpr.c >>>> >>> >> . >> >