On Fri, 11/27 10:48, Peter Xu wrote:
> This patch implements "detach" for "dump-guest-memory" command. When
> specified, one background thread is created to do the dump work.
> 
> When there is a dump running in background, the commands "stop",
> "cont" and "dump-guest-memory" will fail directly, with a hint
> telling user: "there is a dump in progress".
> 
> Although it is not quite essential for now, the new codes are trying
> to make sure the dump is thread safe. To do this, one list is added
> into GuestPhysBlockList to track all the referenced MemoryRegions
> during dump process. We should make sure each MemoryRegion would
> only be referenced for once.
> 
> One global struct "GlobalDumpState" is added to store some global
> informations for dump procedures. One mutex is used to protect the
> global dump info.

This patch doesn't handle the incoming migration case, i.e. when QEMU is
started with "-incoming", or "-incoming defer". Dump can go wrong when incoming
migration happens at the same time.

> 
> Signed-off-by: Peter Xu <pet...@redhat.com>
> ---
>  dump.c                          | 114 
> +++++++++++++++++++++++++++++++++++++---
>  include/qemu-common.h           |   4 ++
>  include/sysemu/dump.h           |  10 ++++
>  include/sysemu/memory_mapping.h |   8 +++
>  memory_mapping.c                |  46 +++++++++++++++-
>  qmp.c                           |  14 +++++
>  6 files changed, 188 insertions(+), 8 deletions(-)
> 
> diff --git a/dump.c b/dump.c
> index d79e0ed..dfd56aa 100644
> --- a/dump.c
> +++ b/dump.c
> @@ -73,6 +73,7 @@ uint64_t cpu_to_dump64(DumpState *s, uint64_t val)
>  static int dump_cleanup(DumpState *s)
>  {
>      guest_phys_blocks_free(&s->guest_phys_blocks);
> +    guest_memory_region_free(&s->guest_phys_blocks);
>      memory_mapping_list_free(&s->list);
>      close(s->fd);
>      if (s->resume) {
> @@ -1427,6 +1428,9 @@ static void dump_init(DumpState *s, int fd, bool 
> has_format,
>      Error *err = NULL;
>      int ret;
>  
> +    s->has_format = has_format;
> +    s->format = format;
> +
>      /* kdump-compressed is conflict with paging and filter */
>      if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
>          assert(!paging && !has_filter);
> @@ -1580,6 +1584,86 @@ cleanup:
>      dump_cleanup(s);
>  }
>  
> +extern GlobalDumpState global_dump_state;

dump_state_get_global() returns a pointer to the local static variable, why do
you need this?

But what I really wonder is why a single boolean is not enough: the DumpState
pointer can be passed to dump_thread, so it doesn't need to be global, then you
don't need the mutex.

> +
> +static GlobalDumpState *dump_state_get_global(void)
> +{
> +    static bool init = false;
> +    static GlobalDumpState global_dump_state;
> +    if (unlikely(!init)) {
> +        qemu_mutex_init(&global_dump_state.gds_mutex);
> +        global_dump_state.gds_cur = NULL;
> +        init = true;
> +    }
> +    return &global_dump_state;
> +}
> +
> +/* Returns non-zero if there is existing dump in progress, otherwise
> + * zero is returned. */
> +bool dump_in_progress(void)
> +{
> +    GlobalDumpState *global = dump_state_get_global();
> +    /* we do not need to take the mutex here if we are checking the
> +     * pointer only. */
> +    return (!!global->gds_cur);
> +}
> +
> +/* Trying to create one DumpState. This will fail if there is an
> + * existing one. Return DumpState pointer if succeeded, otherwise
> + * NULL is returned. */
> +static DumpState *dump_state_create(GlobalDumpState *global)
> +{
> +    DumpState *cur = NULL;
> +    qemu_mutex_lock(&global->gds_mutex);
> +
> +    cur = global->gds_cur;
> +    if (cur) {
> +        /* one dump in progress */
> +        cur = NULL;
> +    } else {
> +        /* we are the first! */
> +        cur = g_malloc0(sizeof(*cur));
> +        global->gds_cur = cur;
> +    }
> +
> +    qemu_mutex_unlock(&global->gds_mutex);
> +    return cur;
> +}
> +
> +/* release current DumpState structure */
> +static void dump_state_release(GlobalDumpState *global)
> +{
> +    DumpState *cur = NULL;
> +    qemu_mutex_lock(&global->gds_mutex);
> +
> +    cur = global->gds_cur;
> +    /* we should never call release before having one */
> +    assert(cur);
> +    global->gds_cur = NULL;
> +
> +    qemu_mutex_unlock(&global->gds_mutex);
> +    dump_cleanup(cur);
> +    g_free(cur);
> +}
> +
> +/* this operation might be time consuming. */
> +static void dump_process(DumpState *s, Error **errp)
> +{
> +    if (s->has_format && s->format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
> +        create_kdump_vmcore(s, errp);
> +    } else {
> +        create_vmcore(s, errp);
> +    }
> +}
> +
> +static void *dump_thread(void *data)
> +{
> +    GlobalDumpState *global = (GlobalDumpState *)data;
> +    dump_process(global->gds_cur, NULL);
> +    dump_state_release(global);
> +    return NULL;
> +}
> +
>  void qmp_dump_guest_memory(bool paging, const char *file,
>                             bool has_detach, bool detach,
>                             bool has_begin, int64_t begin, bool has_length,
> @@ -1590,6 +1674,14 @@ void qmp_dump_guest_memory(bool paging, const char 
> *file,
>      int fd = -1;
>      DumpState *s;
>      Error *local_err = NULL;
> +    /* by default, we are keeping the old style, which is sync dump */
> +    bool detach_p = false;
> +    GlobalDumpState *global = dump_state_get_global();
> +
> +    if (runstate_check(RUN_STATE_INMIGRATE)) {
> +        error_setg(errp, "Dump not allowed during incoming migration.");
> +        return;
> +    }
>  
>      /*
>       * kdump-compressed format need the whole memory dumped, so paging or
> @@ -1609,6 +1701,9 @@ void qmp_dump_guest_memory(bool paging, const char 
> *file,
>          error_setg(errp, QERR_MISSING_PARAMETER, "begin");
>          return;
>      }
> +    if (has_detach) {
> +        detach_p = detach;
> +    }
>  
>      /* check whether lzo/snappy is supported */
>  #ifndef CONFIG_LZO
> @@ -1647,7 +1742,11 @@ void qmp_dump_guest_memory(bool paging, const char 
> *file,
>          return;
>      }
>  
> -    s = g_malloc0(sizeof(DumpState));
> +    s = dump_state_create(global);
> +    if (!s) {
> +        error_setg(errp, "One dump is in progress.");
> +        return;
> +    }
>  
>      dump_init(s, fd, has_format, format, paging, has_begin,
>                begin, length, &local_err);
> @@ -1657,14 +1756,15 @@ void qmp_dump_guest_memory(bool paging, const char 
> *file,
>          return;
>      }
>  
> -    if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
> -        create_kdump_vmcore(s, errp);
> +    if (!detach_p) {
> +        /* sync dump */
> +        dump_process(s, errp);
> +        dump_state_release(global);
>      } else {
> -        create_vmcore(s, errp);
> +        qemu_thread_create(&s->dump_thread, "dump_thread", dump_thread,
> +                           global, QEMU_THREAD_DETACHED);
> +        /* DumpState is freed in dump thread */
>      }
> -
> -    dump_cleanup(s);
> -    g_free(s);
>  }
>  
>  DumpGuestMemoryCapability *qmp_query_dump_guest_memory_capability(Error 
> **errp)
> diff --git a/include/qemu-common.h b/include/qemu-common.h
> index 405364f..7b74961 100644
> --- a/include/qemu-common.h
> +++ b/include/qemu-common.h
> @@ -501,4 +501,8 @@ int parse_debug_env(const char *name, int max, int 
> initial);
>  const char *qemu_ether_ntoa(const MACAddr *mac);
>  void page_size_init(void);
>  
> +/* returns non-zero if dump is in progress, otherwise zero is
> + * returned. */
> +bool dump_in_progress(void);
> +
>  #endif
> diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
> index 7e4ec5c..13c913e 100644
> --- a/include/sysemu/dump.h
> +++ b/include/sysemu/dump.h
> @@ -183,9 +183,19 @@ typedef struct DumpState {
>      off_t offset_page;          /* offset of page part in vmcore */
>      size_t num_dumpable;        /* number of page that can be dumped */
>      uint32_t flag_compress;     /* indicate the compression format */
> +
> +    QemuThread dump_thread;     /* only used when do async dump */
> +    bool has_format;            /* whether format is provided */
> +    DumpGuestMemoryFormat format; /* valid only if has_format == true */
>  } DumpState;
>  
> +typedef struct GlobalDumpState {
> +    QemuMutex gds_mutex;  /* protector for GlobalDumpState itself */
> +    DumpState *gds_cur;   /* current DumpState (might be NULL) */
> +} GlobalDumpState;
> +
>  uint16_t cpu_to_dump16(DumpState *s, uint16_t val);
>  uint32_t cpu_to_dump32(DumpState *s, uint32_t val);
>  uint64_t cpu_to_dump64(DumpState *s, uint64_t val);
> +
>  #endif
> diff --git a/include/sysemu/memory_mapping.h b/include/sysemu/memory_mapping.h
> index a75d59a..dd56fc7 100644
> --- a/include/sysemu/memory_mapping.h
> +++ b/include/sysemu/memory_mapping.h
> @@ -30,10 +30,17 @@ typedef struct GuestPhysBlock {
>      QTAILQ_ENTRY(GuestPhysBlock) next;
>  } GuestPhysBlock;
>  
> +typedef struct GuestMemoryRegion {
> +    MemoryRegion *gmr_mr;
> +    QTAILQ_ENTRY(GuestMemoryRegion) next;
> +} GuestMemoryRegion;
> +
>  /* point-in-time snapshot of guest-visible physical mappings */
>  typedef struct GuestPhysBlockList {
>      unsigned num;
>      QTAILQ_HEAD(GuestPhysBlockHead, GuestPhysBlock) head;
> +    /* housekeep all the MemoryRegion that is referenced */
> +    QTAILQ_HEAD(GuestMemRegionHead, GuestMemoryRegion) head_mr;
>  } GuestPhysBlockList;
>  
>  /* The physical and virtual address in the memory mapping are contiguous. */
> @@ -65,6 +72,7 @@ void memory_mapping_list_free(MemoryMappingList *list);
>  void memory_mapping_list_init(MemoryMappingList *list);
>  
>  void guest_phys_blocks_free(GuestPhysBlockList *list);
> +void guest_memory_region_free(GuestPhysBlockList *list);
>  void guest_phys_blocks_init(GuestPhysBlockList *list);
>  void guest_phys_blocks_append(GuestPhysBlockList *list);
>  
> diff --git a/memory_mapping.c b/memory_mapping.c
> index 36d6b26..a279552 100644
> --- a/memory_mapping.c
> +++ b/memory_mapping.c
> @@ -186,6 +186,7 @@ void guest_phys_blocks_init(GuestPhysBlockList *list)
>  {
>      list->num = 0;
>      QTAILQ_INIT(&list->head);
> +    QTAILQ_INIT(&list->head_mr);
>  }
>  
>  typedef struct GuestPhysListener {
> @@ -193,6 +194,39 @@ typedef struct GuestPhysListener {
>      MemoryListener listener;
>  } GuestPhysListener;
>  
> +static void guest_memory_region_add(GuestPhysBlockList *list,
> +                                    MemoryRegion *mr)
> +{
> +    GuestMemoryRegion *gmr = NULL;
> +    QTAILQ_FOREACH(gmr, &list->head_mr, next) {
> +        if (gmr->gmr_mr == mr) {
> +            /* we already refererenced the MemoryRegion */
> +            return;
> +        }

This is O(N^2). I think it should be fine to skip this check and just grab the
reference. If that is the case, we can probably embed the mr pointer in
GuestPhysBlock.  Paolo?

> +    }
> +    /* reference the MemoryRegion to make sure it's valid during dump */
> +#ifdef DEBUG_DUMP_GUEST_MEMORY
> +    fprintf(stderr, "DUMP: ref mem region: %s\n", mr->name);
> +#endif
> +    memory_region_ref(mr);
> +    gmr = g_malloc0(sizeof(*gmr));
> +    gmr->gmr_mr = mr;
> +    QTAILQ_INSERT_TAIL(&list->head_mr, gmr, next);
> +}
> +
> +void guest_memory_region_free(GuestPhysBlockList *list)
> +{
> +    GuestMemoryRegion *gmr = NULL, *q = NULL;
> +    QTAILQ_FOREACH_SAFE(gmr, &list->head_mr, next, q) {
> +#ifdef DEBUG_DUMP_GUEST_MEMORY
> +        fprintf(stderr, "DUMP: unref mem region: %s\n", gmr->gmr_mr->name);
> +#endif
> +        QTAILQ_REMOVE(&list->head_mr, gmr, next);
> +        memory_region_unref(gmr->gmr_mr);
> +        g_free(gmr);
> +    }
> +}
> +
>  static void guest_phys_blocks_region_add(MemoryListener *listener,
>                                           MemoryRegionSection *section)
>  {
> @@ -241,6 +275,10 @@ static void guest_phys_blocks_region_add(MemoryListener 
> *listener,
>          block->target_end   = target_end;
>          block->host_addr    = host_addr;
>  
> +        /* keep all the MemoryRegion that is referenced by the dump
> +         * process */
> +        guest_memory_region_add(g->list, section->mr);
> +
>          QTAILQ_INSERT_TAIL(&g->list->head, block, next);
>          ++g->list->num;
>      } else {
> @@ -250,7 +288,7 @@ static void guest_phys_blocks_region_add(MemoryListener 
> *listener,
>          predecessor->target_end = target_end;
>      }
>  
> -#ifdef DEBUG_GUEST_PHYS_REGION_ADD
> +#ifdef DEBUG_DUMP_GUEST_MEMORY
>      fprintf(stderr, "%s: target_start=" TARGET_FMT_plx " target_end="
>              TARGET_FMT_plx ": %s (count: %u)\n", __FUNCTION__, target_start,
>              target_end, predecessor ? "joined" : "added", g->list->num);
> @@ -263,8 +301,14 @@ void guest_phys_blocks_append(GuestPhysBlockList *list)
>  
>      g.list = list;
>      g.listener.region_add = &guest_phys_blocks_region_add;
> +    /* We should make sure all the memory objects are valid during
> +     * add dump regions. Before releasing it, we should also make
> +     * sure all the MemoryRegions to be used during dump is
> +     * referenced. */
> +    rcu_read_lock();
>      memory_listener_register(&g.listener, &address_space_memory);
>      memory_listener_unregister(&g.listener);
> +    rcu_read_unlock();

I don't think this is necessary if VM is stopped and incoming migration is
not running - address_space_memory will be safe to access as usual. Otherwise
this is a separate problem.

>  }
>  
>  static CPUState *find_paging_enabled_cpu(CPUState *start_cpu)
> diff --git a/qmp.c b/qmp.c
> index 0a1fa19..055586d 100644
> --- a/qmp.c
> +++ b/qmp.c
> @@ -102,6 +102,13 @@ void qmp_quit(Error **errp)
>  
>  void qmp_stop(Error **errp)
>  {
> +    /* if there is a dump in background, we should wait until the dump
> +     * finished */
> +    if (dump_in_progress()) {
> +        error_setg(errp, "There is a dump in process, please wait.");
> +        return;
> +    }
> +
>      if (runstate_check(RUN_STATE_INMIGRATE)) {
>          autostart = 0;
>      } else {
> @@ -174,6 +181,13 @@ void qmp_cont(Error **errp)
>      BlockBackend *blk;
>      BlockDriverState *bs;
>  
> +    /* if there is a dump in background, we should wait until the dump
> +     * finished */
> +    if (dump_in_progress()) {
> +        error_setg(errp, "There is a dump in process, please wait.");
> +        return;
> +    }
> +
>      if (runstate_needs_reset()) {
>          error_setg(errp, "Resetting the Virtual Machine is required");
>          return;
> -- 
> 2.4.3
> 
> 

Reply via email to