Peter Xu <[email protected]> writes:

> Introduce a new flag, VMS_ARRAY_OF_POINTER_AUTO_ALLOC, for VMSD field.  It
> must be used together with VMS_ARRAY_OF_POINTER.
>
> It can be used to allow migration of an array of pointers where the
> pointers may point to NULLs.
>
> Note that we used to allow migration of a NULL pointer within an array that
> is being migrated. That corresponds to the code around vmstate_info_nullptr
> where we may get/put one byte showing that the element of an array is NULL.
>
> That usage is fine but very limited, it's because even if it will migrate a
> NULL pointer with a marker, it still works in a way that both src and dest
> QEMUs must know exactly which elements of the array are non-NULL, so
> instead of dynamically loading an array (which can have NULL pointers), it
> actually only verifies the known NULL pointers are still NULL pointers
> after migration.
>
> Also, in that case since dest QEMU knows exactly which element is NULL,
> which is not NULL, dest QEMU's device code will manage all allocations for
> the elements before invoking vmstate_load_vmsd().
>
> That's not enough per evolving needs of new device states that may want to
> provide real dynamic array of pointers, like what Alexander proposed here
> with the NVMe device migration:
>
> https://lore.kernel.org/r/[email protected]
>
> This patch is an alternative approach to address the problem.
>
> Along with the flag, introduce two new macros:
>
>   VMSTATE_VARRAY_OF_POINTER_TO_STRUCT_UINT{8|32}_ALLOC()
>
> Which will be used very soon in the NVMe series.
>
> Signed-off-by: Peter Xu <[email protected]>
> ---
>  include/migration/vmstate.h |  49 ++++++++++++++++-
>  migration/savevm.c          |  31 ++++++++++-
>  migration/vmstate.c         | 101 ++++++++++++++++++++++++++++++++----
>  3 files changed, 169 insertions(+), 12 deletions(-)
>
> diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
> index 2e51b5ea04..70bebc60ed 100644
> --- a/include/migration/vmstate.h
> +++ b/include/migration/vmstate.h
> @@ -161,8 +161,19 @@ enum VMStateFlags {
>       * structure we are referencing to use. */
>      VMS_VSTRUCT           = 0x8000,
>  
> +    /*
> +     * This is a sub-flag for VMS_ARRAY_OF_POINTER, so VMS_ARRAY_OF_POINTER
> +     * must be set altogether.  When set, it means array elements can
> +     * contain either valid or NULL pointers, vmstate core will sync it
> +     * between the two QEMU instances via the stream protocol.  When it's a

Not sure I understand what "will sync it" means. Isn't that just what
migration does?

> +     * valid pointer, the vmstate core will be responsible to do the proper
> +     * memory allocations.  It also means user of this flag must prepare
> +     * the array to be all NULLs otherwise memory may leak.

I don't get this as well. What's prepare to be NULL?

> +     */
> +    VMS_ARRAY_OF_POINTER_AUTO_ALLOC = 0x10000,
> +
>      /* Marker for end of list */
> -    VMS_END = 0x10000
> +    VMS_END                         = 0x20000,
>  };
>  
>  typedef enum {
> @@ -580,6 +591,42 @@ extern const VMStateInfo vmstate_info_qlist;
>      .offset     = vmstate_offset_array(_s, _f, _type*, _n),          \
>  }
>  
> +/*
> + * For migrating a dynamically allocated uint{8,32}-indexed array of
> + * pointers to structures (with NULL entries and with auto memory
> + * allocation).
> + *
> + * _type: type of structure pointed to
> + * _vmsd: VMSD for structure _type (when VMS_STRUCT is set)
> + * _info: VMStateInfo for _type (when VMS_STRUCT is not set)
> + * start: size of (_type) pointed to (for auto memory allocation)
> + */
> +#define VMSTATE_VARRAY_OF_POINTER_TO_STRUCT_UINT8_ALLOC(\
> +    _field, _state, _field_num, _version, _vmsd, _type) {            \
> +    .name       = (stringify(_field)),                               \
> +    .version_id = (_version),                                        \
> +    .num_offset = vmstate_offset_value(_state, _field_num, uint8_t), \
> +    .vmsd       = &(_vmsd),                                          \
> +    .size       = sizeof(_type),                                     \
> +    .flags      = VMS_POINTER | VMS_VARRAY_UINT8 |                   \
> +                  VMS_ARRAY_OF_POINTER | VMS_STRUCT |                \
> +                  VMS_ARRAY_OF_POINTER_AUTO_ALLOC,                   \
> +    .offset     = vmstate_offset_pointer(_state, _field, _type *),   \
> +}
> +
> +#define VMSTATE_VARRAY_OF_POINTER_TO_STRUCT_UINT32_ALLOC(\
> +    _field, _state, _field_num, _version, _vmsd, _type) {             \
> +    .name       = (stringify(_field)),                                \
> +    .version_id = (_version),                                         \
> +    .num_offset = vmstate_offset_value(_state, _field_num, uint32_t), \
> +    .vmsd       = &(_vmsd),                                           \
> +    .size       = sizeof(_type),                                      \
> +    .flags      = VMS_POINTER | VMS_VARRAY_UINT32 |                   \
> +                  VMS_ARRAY_OF_POINTER | VMS_STRUCT |                 \
> +                  VMS_ARRAY_OF_POINTER_AUTO_ALLOC,                    \
> +    .offset     = vmstate_offset_pointer(_state, _field, _type *),    \
> +}
> +
>  #define VMSTATE_VARRAY_OF_POINTER_UINT32(_field, _state, _field_num, 
> _version, _info, _type) { \
>      .name       = (stringify(_field)),                                    \
>      .version_id = (_version),                                             \
> diff --git a/migration/savevm.c b/migration/savevm.c
> index f5a6fd0c66..34223de818 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -869,8 +869,37 @@ static void vmstate_check(const VMStateDescription *vmsd)
>      if (field) {
>          while (field->name) {
>              if (field->flags & VMS_ARRAY_OF_POINTER) {
> -                assert(field->size == 0);
> +                if (VMS_ARRAY_OF_POINTER_AUTO_ALLOC) {
> +                    /*
> +                     * Size must be provided because dest QEMU needs that
> +                     * info to know what to allocate
> +                     */
> +                    assert(field->size || field->size_offset);
> +                } else {
> +                    /*
> +                     * Otherwise size info isn't useful (because it's
> +                     * always the size of host pointer), enforce accidental

s/enforce/detect/

> +                     * setup of sizes in this case.
> +                     */
> +                    assert(field->size == 0 && field->size_offset == 0);
> +                }
> +                /*
> +                 * VMS_ARRAY_OF_POINTER must be used only together with one
> +                 * of VMS_(V)ARRAY* flags.
> +                 */
> +                assert(field->flags & (VMS_ARRAY | VMS_VARRAY_INT32 |
> +                                       VMS_VARRAY_UINT16 | VMS_VARRAY_UINT8 |
> +                                       VMS_VARRAY_UINT32));
>              }
> +
> +            /*
> +             * When VMS_ARRAY_OF_POINTER_ALLOW_NULL is used, we must have

The name has changed.

> +             * VMS_ARRAY_OF_POINTER set too.
> +             */
> +            if (field->flags & VMS_ARRAY_OF_POINTER_AUTO_ALLOC) {
> +                assert(field->flags & VMS_ARRAY_OF_POINTER);
> +            }
> +
>              if (field->flags & (VMS_STRUCT | VMS_VSTRUCT)) {
>                  /* Recurse to sub structures */
>                  vmstate_check(field->vmsd);
> diff --git a/migration/vmstate.c b/migration/vmstate.c
> index d65fc84dfa..7d7d9c7e18 100644
> --- a/migration/vmstate.c
> +++ b/migration/vmstate.c
> @@ -153,6 +153,12 @@ static bool vmstate_ptr_marker_load(QEMUFile *f, bool 
> *load_field,
>          return true;
>      }
>  
> +    if (byte == VMS_MARKER_PTR_VALID) {
> +        /* We need to load this field right after the marker */

s/this/the/

> +        *load_field = true;
> +        return true;
> +    }
> +
>      error_setg(errp, "Unexpected ptr marker: %d", byte);
>      return false;
>  }
> @@ -234,6 +240,22 @@ static bool vmstate_post_load(const VMStateDescription 
> *vmsd,
>      return true;
>  }
>  
> +
> +/*
> + * If we will use a ptr marker in the stream for a field?  Two use cases:
> + *
> + * (1) When used with VMS_ARRAY_OF_POINTER_ALLOW_NULL, it must always be

AUTO_ALLOC

> + *     present to imply the population status of the pointer.
> + *
> + * (2) When used with normal VMSD array fields, only emit a null ptr marker
> + *     if the pointer is NULL.
> + */
> +static inline bool
> +vmstate_use_marker_field(void *ptr, int size, bool dynamic_array)
> +{
> +    return (!ptr && size) || dynamic_array;
> +}
> +
>  bool vmstate_load_vmsd(QEMUFile *f, const VMStateDescription *vmsd,
>                         void *opaque, int version_id, Error **errp)
>  {
> @@ -271,6 +293,12 @@ bool vmstate_load_vmsd(QEMUFile *f, const 
> VMStateDescription *vmsd,
>              void *first_elem = opaque + field->offset;
>              int i, n_elems = vmstate_n_elems(opaque, field);
>              int size = vmstate_size(opaque, field);
> +            /*
> +             * When this is enabled, it means we will always push a ptr
> +             * marker first for each element saying if it's populated.

This is the load side, I'm not sure "push a ptr marker" makes sense
here.

> +             */
> +            bool use_dynamic_array =
> +                field->flags & VMS_ARRAY_OF_POINTER_AUTO_ALLOC;
>  
>              vmstate_handle_alloc(first_elem, field, opaque);
>              if (field->flags & VMS_POINTER) {
> @@ -282,18 +310,37 @@ bool vmstate_load_vmsd(QEMUFile *f, const 
> VMStateDescription *vmsd,
>                  /* If we will process the load of field? */
>                  bool load_field = true;
>                  bool ok = true;
> -                void *curr_elem = first_elem + size * i;
> +                bool use_marker_field;
> +                void *curr_elem_p = first_elem + size * i;
> +                void *curr_elem = curr_elem_p;
>  
>                  if (field->flags & VMS_ARRAY_OF_POINTER) {
> -                    curr_elem = *(void **)curr_elem;
> +                    curr_elem = *(void **)curr_elem_p;
>                  }
>  
> -                if (!curr_elem && size) {
> -                    /* Read the marker instead of VMSD itself */
> +                use_marker_field = vmstate_use_marker_field(curr_elem, size,
> +                                                            
> use_dynamic_array);
> +                if (use_marker_field) {
> +                    /* Read the marker instead of VMSD first */
>                      if (!vmstate_ptr_marker_load(f, &load_field, errp)) {
>                          trace_vmstate_load_field_error(field->name, -EINVAL);
>                          return false;
>                      }
> +
> +                    if (load_field) {
> +                        /*
> +                         * When reaching here, it means we received a
> +                         * non-NULL ptr marker, so we need to populate the
> +                         * field before loading it.
> +                         *
> +                         * NOTE: do not use vmstate_size() here, because we
> +                         * need the object size, not entry size of the
> +                         * array.
> +                         */
> +                        curr_elem = g_malloc0(field->size);
> +                        /* Remember to update the root pointer! */
> +                        *(void **)curr_elem_p = curr_elem;
> +                    }
>                  }
>  
>                  if (load_field) {
> @@ -397,6 +444,16 @@ static bool vmsd_can_compress(const VMStateField *field)
>          return false;
>      }
>  
> +    if (field->flags & VMS_ARRAY_OF_POINTER_AUTO_ALLOC) {
> +        /*
> +         * This may involve two VMSD fields to be dumped, one for the
> +         * marker to show if the pointer is NULL, followed by the real
> +         * vmstate object.  To make it simple at least for now, skip
> +         * compression for this one.
> +         */
> +        return false;
> +    }
> +
>      if (field->flags & VMS_STRUCT) {
>          const VMStateField *sfield = field->vmsd->fields;
>          while (sfield->name) {
> @@ -578,6 +635,12 @@ static bool vmstate_save_vmsd_v(QEMUFile *f, const 
> VMStateDescription *vmsd,
>              int size = vmstate_size(opaque, field);
>              JSONWriter *vmdesc_loop = vmdesc;
>              bool use_marker_field_prev = false;
> +            /*
> +             * When this is enabled, it means we will always push a ptr
> +             * marker first for each element saying if it's populated.
> +             */
> +            bool use_dynamic_array =
> +                field->flags & VMS_ARRAY_OF_POINTER_AUTO_ALLOC;
>  
>              trace_vmstate_save_state_loop(vmsd->name, field->name, n_elems);
>              if (field->flags & VMS_POINTER) {
> @@ -596,13 +659,10 @@ static bool vmstate_save_vmsd_v(QEMUFile *f, const 
> VMStateDescription *vmsd,
>                      curr_elem = *(void **)curr_elem;
>                  }
>  
> -                use_marker_field = !curr_elem && size;
> +                use_marker_field = vmstate_use_marker_field(curr_elem, size,
> +                                                            
> use_dynamic_array);
> +
>                  if (use_marker_field) {
> -                    /*
> -                     * If null pointer found (which should only happen in
> -                     * an array of pointers), use null placeholder and do
> -                     * not follow.
> -                     */
>                      inner_field = vmsd_create_ptr_marker_field(field);
>                  } else {
>                      inner_field = field;
> @@ -652,6 +712,27 @@ static bool vmstate_save_vmsd_v(QEMUFile *f, const 
> VMStateDescription *vmsd,
>                      goto out;
>                  }
>  
> +                /*
> +                 * If we're using dynamic array and the element is
> +                 * populated, dump the real object right after the marker.
> +                 */
> +                if (use_dynamic_array && curr_elem) {
> +                    /*
> +                     * NOTE: do not use vmstate_size() here because we want
> +                     * to dump the real VMSD object now.
> +                     */
> +                    ok = vmstate_save_field_with_vmdesc(f, curr_elem,
> +                                                        field->size, vmsd,
> +                                                        field, vmdesc_loop,
> +                                                        i, max_elems, errp);
> +
> +                    if (!ok) {
> +                        error_prepend(errp, "Save of field %s/%s failed: ",
> +                                      vmsd->name, field->name);
> +                        goto out;
> +                    }
> +                }
> +
>                  /* Compressed arrays only care about the first element */
>                  if (vmdesc_loop && vmsd_can_compress(field)) {
>                      vmdesc_loop = NULL;

Reply via email to