On Sun, Jun 30, 2024 at 12:40:25PM -0700, Steve Sistare wrote:
> CPR must save state that is needed after QEMU is restarted, when devices
> are realized.  Thus the extra state cannot be saved in the migration stream,
> as objects must already exist before that stream can be loaded.  Instead,
> define auxilliary state structures and vmstate descriptions, not associated
> with any registered object, and serialize the aux state to a cpr-specific
> stream in cpr_state_save.  Deserialize in cpr_state_load after QEMU
> restarts, before devices are realized.
> 
> Provide accessors for clients to register file descriptors for saving.
> The mechanism for passing the fd's to the new process will be specific
> to each migration mode, and added in subsequent patches.
> 
> Signed-off-by: Steve Sistare <steven.sist...@oracle.com>
> ---
>  include/migration/cpr.h |  21 ++++++
>  migration/cpr.c         | 188 
> ++++++++++++++++++++++++++++++++++++++++++++++++
>  migration/meson.build   |   1 +
>  migration/migration.c   |   6 ++
>  migration/trace-events  |   5 ++
>  system/vl.c             |   3 +
>  6 files changed, 224 insertions(+)
>  create mode 100644 include/migration/cpr.h
>  create mode 100644 migration/cpr.c
> 
> diff --git a/include/migration/cpr.h b/include/migration/cpr.h
> new file mode 100644
> index 0000000..8e7e705
> --- /dev/null
> +++ b/include/migration/cpr.h
> @@ -0,0 +1,21 @@
> +/*
> + * Copyright (c) 2021, 2024 Oracle and/or its affiliates.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#ifndef MIGRATION_CPR_H
> +#define MIGRATION_CPR_H
> +
> +typedef int (*cpr_walk_fd_cb)(int fd);
> +void cpr_save_fd(const char *name, int id, int fd);
> +void cpr_delete_fd(const char *name, int id);
> +int cpr_find_fd(const char *name, int id);
> +int cpr_walk_fd(cpr_walk_fd_cb cb);
> +void cpr_resave_fd(const char *name, int id, int fd);
> +
> +int cpr_state_save(Error **errp);
> +int cpr_state_load(Error **errp);
> +
> +#endif
> diff --git a/migration/cpr.c b/migration/cpr.c
> new file mode 100644
> index 0000000..313e74e
> --- /dev/null
> +++ b/migration/cpr.c
> @@ -0,0 +1,188 @@
> +/*
> + * Copyright (c) 2021-2024 Oracle and/or its affiliates.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qapi/error.h"
> +#include "migration/cpr.h"
> +#include "migration/misc.h"
> +#include "migration/qemu-file.h"
> +#include "migration/savevm.h"
> +#include "migration/vmstate.h"
> +#include "sysemu/runstate.h"
> +#include "trace.h"
> +
> +/*************************************************************************/
> +/* cpr state container for all information to be saved. */
> +
> +typedef QLIST_HEAD(CprFdList, CprFd) CprFdList;
> +
> +typedef struct CprState {
> +    CprFdList fds;
> +} CprState;
> +
> +static CprState cpr_state;
> +
> +/****************************************************************************/
> +
> +typedef struct CprFd {
> +    char *name;
> +    unsigned int namelen;
> +    int id;
> +    int fd;

[1]

> +    QLIST_ENTRY(CprFd) next;
> +} CprFd;
> +
> +static const VMStateDescription vmstate_cpr_fd = {
> +    .name = "cpr fd",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_UINT32(namelen, CprFd),
> +        VMSTATE_VBUFFER_ALLOC_UINT32(name, CprFd, 0, NULL, namelen),
> +        VMSTATE_INT32(id, CprFd),
> +        VMSTATE_INT32(fd, CprFd),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
> +void cpr_save_fd(const char *name, int id, int fd)
> +{
> +    CprFd *elem = g_new0(CprFd, 1);
> +
> +    trace_cpr_save_fd(name, id, fd);
> +    elem->name = g_strdup(name);
> +    elem->namelen = strlen(name) + 1;
> +    elem->id = id;
> +    elem->fd = fd;
> +    QLIST_INSERT_HEAD(&cpr_state.fds, elem, next);
> +}
> +
> +static CprFd *find_fd(CprFdList *head, const char *name, int id)
> +{
> +    CprFd *elem;
> +
> +    QLIST_FOREACH(elem, head, next) {
> +        if (!strcmp(elem->name, name) && elem->id == id) {
> +            return elem;
> +        }
> +    }
> +    return NULL;
> +}
> +
> +void cpr_delete_fd(const char *name, int id)
> +{
> +    CprFd *elem = find_fd(&cpr_state.fds, name, id);
> +
> +    if (elem) {
> +        QLIST_REMOVE(elem, next);
> +        g_free(elem->name);
> +        g_free(elem);
> +    }
> +
> +    trace_cpr_delete_fd(name, id);
> +}
> +
> +int cpr_find_fd(const char *name, int id)
> +{
> +    CprFd *elem = find_fd(&cpr_state.fds, name, id);
> +    int fd = elem ? elem->fd : -1;
> +
> +    trace_cpr_find_fd(name, id, fd);
> +    return fd;
> +}
> +
> +int cpr_walk_fd(cpr_walk_fd_cb cb)
> +{
> +    CprFd *elem;
> +
> +    QLIST_FOREACH(elem, &cpr_state.fds, next) {
> +        if (elem->fd >= 0 && cb(elem->fd)) {
> +            return 1;
> +        }
> +    }
> +    return 0;
> +}
> +
> +void cpr_resave_fd(const char *name, int id, int fd)
> +{
> +    CprFd *elem = find_fd(&cpr_state.fds, name, id);
> +    int old_fd = elem ? elem->fd : -1;
> +
> +    if (old_fd < 0) {
> +        cpr_save_fd(name, id, fd);

I don't think I know well on when old_fd<0 would happen yet, as this series
doesn't look like to use this function at all.  From that POV, maybe nice
to add a comment above [1] for "fd" field.

Meanwhile, do we need to remove the old_fd<0 element here, or is it
intended to keep that and the new CprFD?

> +    } else if (old_fd != fd) {
> +        error_setg(&error_fatal,
> +                   "internal error: cpr fd '%s' id %d value %d "
> +                   "already saved with a different value %d",
> +                   name, id, fd, old_fd);
> +    }
> +}
> +/*************************************************************************/
> +#define CPR_STATE "CprState"
> +
> +static const VMStateDescription vmstate_cpr_state = {
> +    .name = CPR_STATE,
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_QLIST_V(fds, CprState, 1, vmstate_cpr_fd, CprFd, next),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +/*************************************************************************/
> +
> +int cpr_state_save(Error **errp)
> +{
> +    int ret;
> +    QEMUFile *f;
> +
> +    /* set f based on mode in a later patch in this series */
> +    return 0;
> +
> +    qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
> +    qemu_put_be32(f, QEMU_VM_FILE_VERSION);

Having magic/version makes sense to me, though I'd suggest we use CPR new
magic/versions, so that if we see an binary dump we know what it is, and we
don't mixup a CPR image against a migration stream image.

> +
> +    ret = vmstate_save_state(f, &vmstate_cpr_state, &cpr_state, 0);

s/0/NULL/

> +    if (ret) {
> +        error_setg(errp, "vmstate_save_state error %d", ret);
> +    }

Can consider using vmstate_save_state_with_err().

> +
> +    qemu_fclose(f);
> +    return ret;
> +}
> +
> +int cpr_state_load(Error **errp)
> +{
> +    int ret;
> +    uint32_t v;
> +    QEMUFile *f;
> +
> +    /* set f based on mode in a later patch in this series */
> +    return 0;
> +
> +    v = qemu_get_be32(f);
> +    if (v != QEMU_VM_FILE_MAGIC) {
> +        error_setg(errp, "Not a migration stream (bad magic %x)", v);
> +        qemu_fclose(f);
> +        return -EINVAL;
> +    }
> +    v = qemu_get_be32(f);
> +    if (v != QEMU_VM_FILE_VERSION) {
> +        error_setg(errp, "Unsupported migration stream version %d", v);
> +        qemu_fclose(f);
> +        return -ENOTSUP;
> +    }
> +
> +    ret = vmstate_load_state(f, &vmstate_cpr_state, &cpr_state, 1);
> +    if (ret) {
> +        error_setg(errp, "vmstate_load_state error %d", ret);
> +    }

Simiarly, can use vmstate_save_state_with_err().

> +
> +    qemu_fclose(f);
> +    return ret;
> +}
> +
> diff --git a/migration/meson.build b/migration/meson.build
> index 5ce2acb4..87feb4c 100644
> --- a/migration/meson.build
> +++ b/migration/meson.build
> @@ -13,6 +13,7 @@ system_ss.add(files(
>    'block-dirty-bitmap.c',
>    'channel.c',
>    'channel-block.c',
> +  'cpr.c',
>    'dirtyrate.c',
>    'exec.c',
>    'fd.c',
> diff --git a/migration/migration.c b/migration/migration.c
> index 3dea06d..e394ad7 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -27,6 +27,7 @@
>  #include "sysemu/cpu-throttle.h"
>  #include "rdma.h"
>  #include "ram.h"
> +#include "migration/cpr.h"
>  #include "migration/global_state.h"
>  #include "migration/misc.h"
>  #include "migration.h"
> @@ -2118,6 +2119,10 @@ void qmp_migrate(const char *uri, bool has_channels,
>          }
>      }
>  
> +    if (cpr_state_save(&local_err)) {
> +        goto out;
> +    }
> +
>      if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
>          SocketAddress *saddr = &addr->u.socket;
>          if (saddr->type == SOCKET_ADDRESS_TYPE_INET ||
> @@ -2142,6 +2147,7 @@ void qmp_migrate(const char *uri, bool has_channels,
>                            MIGRATION_STATUS_FAILED);
>      }
>  
> +out:
>      if (local_err) {
>          if (!resume_requested) {
>              yank_unregister_instance(MIGRATION_YANK_INSTANCE);
> diff --git a/migration/trace-events b/migration/trace-events
> index 0b7c332..173f2c0 100644
> --- a/migration/trace-events
> +++ b/migration/trace-events
> @@ -340,6 +340,11 @@ colo_receive_message(const char *msg) "Receive '%s' 
> message"
>  # colo-failover.c
>  colo_failover_set_state(const char *new_state) "new state %s"
>  
> +# cpr.c
> +cpr_save_fd(const char *name, int id, int fd) "%s, id %d, fd %d"
> +cpr_delete_fd(const char *name, int id) "%s, id %d"
> +cpr_find_fd(const char *name, int id, int fd) "%s, id %d returns %d"
> +
>  # block-dirty-bitmap.c
>  send_bitmap_header_enter(void) ""
>  send_bitmap_bits(uint32_t flags, uint64_t start_sector, uint32_t nr_sectors, 
> uint64_t data_size) "flags: 0x%x, start_sector: %" PRIu64 ", nr_sectors: %" 
> PRIu32 ", data_size: %" PRIu64
> diff --git a/system/vl.c b/system/vl.c
> index 03951be..6521ee3 100644
> --- a/system/vl.c
> +++ b/system/vl.c
> @@ -77,6 +77,7 @@
>  #include "hw/block/block.h"
>  #include "hw/i386/x86.h"
>  #include "hw/i386/pc.h"
> +#include "migration/cpr.h"
>  #include "migration/misc.h"
>  #include "migration/snapshot.h"
>  #include "sysemu/tpm.h"
> @@ -3713,6 +3714,8 @@ void qemu_init(int argc, char **argv)
>  
>      qemu_create_machine(machine_opts_dict);
>  
> +    cpr_state_load(&error_fatal);

Might be good to add a rich comment here explaining the decision on why
loading here; I think most of the tricks lie here.  E.g., it needs to be
before XXX and it needs to be after YYY.

Thanks,

> +
>      suspend_mux_open();
>  
>      qemu_disable_default_devices();
> -- 
> 1.8.3.1
> 

-- 
Peter Xu


Reply via email to