On Fri, 11/27 10:48, Peter Xu wrote: > This patch implements "detach" for "dump-guest-memory" command. When > specified, one background thread is created to do the dump work. > > When there is a dump running in background, the commands "stop", > "cont" and "dump-guest-memory" will fail directly, with a hint > telling user: "there is a dump in progress". > > Although it is not quite essential for now, the new codes are trying > to make sure the dump is thread safe. To do this, one list is added > into GuestPhysBlockList to track all the referenced MemoryRegions > during dump process. We should make sure each MemoryRegion would > only be referenced for once. > > One global struct "GlobalDumpState" is added to store some global > informations for dump procedures. One mutex is used to protect the > global dump info.
This patch doesn't handle the incoming migration case, i.e. when QEMU is started with "-incoming", or "-incoming defer". Dump can go wrong when incoming migration happens at the same time. > > Signed-off-by: Peter Xu <pet...@redhat.com> > --- > dump.c | 114 > +++++++++++++++++++++++++++++++++++++--- > include/qemu-common.h | 4 ++ > include/sysemu/dump.h | 10 ++++ > include/sysemu/memory_mapping.h | 8 +++ > memory_mapping.c | 46 +++++++++++++++- > qmp.c | 14 +++++ > 6 files changed, 188 insertions(+), 8 deletions(-) > > diff --git a/dump.c b/dump.c > index d79e0ed..dfd56aa 100644 > --- a/dump.c > +++ b/dump.c > @@ -73,6 +73,7 @@ uint64_t cpu_to_dump64(DumpState *s, uint64_t val) > static int dump_cleanup(DumpState *s) > { > guest_phys_blocks_free(&s->guest_phys_blocks); > + guest_memory_region_free(&s->guest_phys_blocks); > memory_mapping_list_free(&s->list); > close(s->fd); > if (s->resume) { > @@ -1427,6 +1428,9 @@ static void dump_init(DumpState *s, int fd, bool > has_format, > Error *err = NULL; > int ret; > > + s->has_format = has_format; > + s->format = format; > + > /* kdump-compressed is conflict with paging and filter */ > if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) { > assert(!paging && !has_filter); > @@ -1580,6 +1584,86 @@ cleanup: > dump_cleanup(s); > } > > +extern GlobalDumpState global_dump_state; dump_state_get_global() returns a pointer to the local static variable, why do you need this? But what I really wonder is why a single boolean is not enough: the DumpState pointer can be passed to dump_thread, so it doesn't need to be global, then you don't need the mutex. > + > +static GlobalDumpState *dump_state_get_global(void) > +{ > + static bool init = false; > + static GlobalDumpState global_dump_state; > + if (unlikely(!init)) { > + qemu_mutex_init(&global_dump_state.gds_mutex); > + global_dump_state.gds_cur = NULL; > + init = true; > + } > + return &global_dump_state; > +} > + > +/* Returns non-zero if there is existing dump in progress, otherwise > + * zero is returned. */ > +bool dump_in_progress(void) > +{ > + GlobalDumpState *global = dump_state_get_global(); > + /* we do not need to take the mutex here if we are checking the > + * pointer only. */ > + return (!!global->gds_cur); > +} > + > +/* Trying to create one DumpState. This will fail if there is an > + * existing one. Return DumpState pointer if succeeded, otherwise > + * NULL is returned. */ > +static DumpState *dump_state_create(GlobalDumpState *global) > +{ > + DumpState *cur = NULL; > + qemu_mutex_lock(&global->gds_mutex); > + > + cur = global->gds_cur; > + if (cur) { > + /* one dump in progress */ > + cur = NULL; > + } else { > + /* we are the first! */ > + cur = g_malloc0(sizeof(*cur)); > + global->gds_cur = cur; > + } > + > + qemu_mutex_unlock(&global->gds_mutex); > + return cur; > +} > + > +/* release current DumpState structure */ > +static void dump_state_release(GlobalDumpState *global) > +{ > + DumpState *cur = NULL; > + qemu_mutex_lock(&global->gds_mutex); > + > + cur = global->gds_cur; > + /* we should never call release before having one */ > + assert(cur); > + global->gds_cur = NULL; > + > + qemu_mutex_unlock(&global->gds_mutex); > + dump_cleanup(cur); > + g_free(cur); > +} > + > +/* this operation might be time consuming. */ > +static void dump_process(DumpState *s, Error **errp) > +{ > + if (s->has_format && s->format != DUMP_GUEST_MEMORY_FORMAT_ELF) { > + create_kdump_vmcore(s, errp); > + } else { > + create_vmcore(s, errp); > + } > +} > + > +static void *dump_thread(void *data) > +{ > + GlobalDumpState *global = (GlobalDumpState *)data; > + dump_process(global->gds_cur, NULL); > + dump_state_release(global); > + return NULL; > +} > + > void qmp_dump_guest_memory(bool paging, const char *file, > bool has_detach, bool detach, > bool has_begin, int64_t begin, bool has_length, > @@ -1590,6 +1674,14 @@ void qmp_dump_guest_memory(bool paging, const char > *file, > int fd = -1; > DumpState *s; > Error *local_err = NULL; > + /* by default, we are keeping the old style, which is sync dump */ > + bool detach_p = false; > + GlobalDumpState *global = dump_state_get_global(); > + > + if (runstate_check(RUN_STATE_INMIGRATE)) { > + error_setg(errp, "Dump not allowed during incoming migration."); > + return; > + } > > /* > * kdump-compressed format need the whole memory dumped, so paging or > @@ -1609,6 +1701,9 @@ void qmp_dump_guest_memory(bool paging, const char > *file, > error_setg(errp, QERR_MISSING_PARAMETER, "begin"); > return; > } > + if (has_detach) { > + detach_p = detach; > + } > > /* check whether lzo/snappy is supported */ > #ifndef CONFIG_LZO > @@ -1647,7 +1742,11 @@ void qmp_dump_guest_memory(bool paging, const char > *file, > return; > } > > - s = g_malloc0(sizeof(DumpState)); > + s = dump_state_create(global); > + if (!s) { > + error_setg(errp, "One dump is in progress."); > + return; > + } > > dump_init(s, fd, has_format, format, paging, has_begin, > begin, length, &local_err); > @@ -1657,14 +1756,15 @@ void qmp_dump_guest_memory(bool paging, const char > *file, > return; > } > > - if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) { > - create_kdump_vmcore(s, errp); > + if (!detach_p) { > + /* sync dump */ > + dump_process(s, errp); > + dump_state_release(global); > } else { > - create_vmcore(s, errp); > + qemu_thread_create(&s->dump_thread, "dump_thread", dump_thread, > + global, QEMU_THREAD_DETACHED); > + /* DumpState is freed in dump thread */ > } > - > - dump_cleanup(s); > - g_free(s); > } > > DumpGuestMemoryCapability *qmp_query_dump_guest_memory_capability(Error > **errp) > diff --git a/include/qemu-common.h b/include/qemu-common.h > index 405364f..7b74961 100644 > --- a/include/qemu-common.h > +++ b/include/qemu-common.h > @@ -501,4 +501,8 @@ int parse_debug_env(const char *name, int max, int > initial); > const char *qemu_ether_ntoa(const MACAddr *mac); > void page_size_init(void); > > +/* returns non-zero if dump is in progress, otherwise zero is > + * returned. */ > +bool dump_in_progress(void); > + > #endif > diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h > index 7e4ec5c..13c913e 100644 > --- a/include/sysemu/dump.h > +++ b/include/sysemu/dump.h > @@ -183,9 +183,19 @@ typedef struct DumpState { > off_t offset_page; /* offset of page part in vmcore */ > size_t num_dumpable; /* number of page that can be dumped */ > uint32_t flag_compress; /* indicate the compression format */ > + > + QemuThread dump_thread; /* only used when do async dump */ > + bool has_format; /* whether format is provided */ > + DumpGuestMemoryFormat format; /* valid only if has_format == true */ > } DumpState; > > +typedef struct GlobalDumpState { > + QemuMutex gds_mutex; /* protector for GlobalDumpState itself */ > + DumpState *gds_cur; /* current DumpState (might be NULL) */ > +} GlobalDumpState; > + > uint16_t cpu_to_dump16(DumpState *s, uint16_t val); > uint32_t cpu_to_dump32(DumpState *s, uint32_t val); > uint64_t cpu_to_dump64(DumpState *s, uint64_t val); > + > #endif > diff --git a/include/sysemu/memory_mapping.h b/include/sysemu/memory_mapping.h > index a75d59a..dd56fc7 100644 > --- a/include/sysemu/memory_mapping.h > +++ b/include/sysemu/memory_mapping.h > @@ -30,10 +30,17 @@ typedef struct GuestPhysBlock { > QTAILQ_ENTRY(GuestPhysBlock) next; > } GuestPhysBlock; > > +typedef struct GuestMemoryRegion { > + MemoryRegion *gmr_mr; > + QTAILQ_ENTRY(GuestMemoryRegion) next; > +} GuestMemoryRegion; > + > /* point-in-time snapshot of guest-visible physical mappings */ > typedef struct GuestPhysBlockList { > unsigned num; > QTAILQ_HEAD(GuestPhysBlockHead, GuestPhysBlock) head; > + /* housekeep all the MemoryRegion that is referenced */ > + QTAILQ_HEAD(GuestMemRegionHead, GuestMemoryRegion) head_mr; > } GuestPhysBlockList; > > /* The physical and virtual address in the memory mapping are contiguous. */ > @@ -65,6 +72,7 @@ void memory_mapping_list_free(MemoryMappingList *list); > void memory_mapping_list_init(MemoryMappingList *list); > > void guest_phys_blocks_free(GuestPhysBlockList *list); > +void guest_memory_region_free(GuestPhysBlockList *list); > void guest_phys_blocks_init(GuestPhysBlockList *list); > void guest_phys_blocks_append(GuestPhysBlockList *list); > > diff --git a/memory_mapping.c b/memory_mapping.c > index 36d6b26..a279552 100644 > --- a/memory_mapping.c > +++ b/memory_mapping.c > @@ -186,6 +186,7 @@ void guest_phys_blocks_init(GuestPhysBlockList *list) > { > list->num = 0; > QTAILQ_INIT(&list->head); > + QTAILQ_INIT(&list->head_mr); > } > > typedef struct GuestPhysListener { > @@ -193,6 +194,39 @@ typedef struct GuestPhysListener { > MemoryListener listener; > } GuestPhysListener; > > +static void guest_memory_region_add(GuestPhysBlockList *list, > + MemoryRegion *mr) > +{ > + GuestMemoryRegion *gmr = NULL; > + QTAILQ_FOREACH(gmr, &list->head_mr, next) { > + if (gmr->gmr_mr == mr) { > + /* we already refererenced the MemoryRegion */ > + return; > + } This is O(N^2). I think it should be fine to skip this check and just grab the reference. If that is the case, we can probably embed the mr pointer in GuestPhysBlock. Paolo? > + } > + /* reference the MemoryRegion to make sure it's valid during dump */ > +#ifdef DEBUG_DUMP_GUEST_MEMORY > + fprintf(stderr, "DUMP: ref mem region: %s\n", mr->name); > +#endif > + memory_region_ref(mr); > + gmr = g_malloc0(sizeof(*gmr)); > + gmr->gmr_mr = mr; > + QTAILQ_INSERT_TAIL(&list->head_mr, gmr, next); > +} > + > +void guest_memory_region_free(GuestPhysBlockList *list) > +{ > + GuestMemoryRegion *gmr = NULL, *q = NULL; > + QTAILQ_FOREACH_SAFE(gmr, &list->head_mr, next, q) { > +#ifdef DEBUG_DUMP_GUEST_MEMORY > + fprintf(stderr, "DUMP: unref mem region: %s\n", gmr->gmr_mr->name); > +#endif > + QTAILQ_REMOVE(&list->head_mr, gmr, next); > + memory_region_unref(gmr->gmr_mr); > + g_free(gmr); > + } > +} > + > static void guest_phys_blocks_region_add(MemoryListener *listener, > MemoryRegionSection *section) > { > @@ -241,6 +275,10 @@ static void guest_phys_blocks_region_add(MemoryListener > *listener, > block->target_end = target_end; > block->host_addr = host_addr; > > + /* keep all the MemoryRegion that is referenced by the dump > + * process */ > + guest_memory_region_add(g->list, section->mr); > + > QTAILQ_INSERT_TAIL(&g->list->head, block, next); > ++g->list->num; > } else { > @@ -250,7 +288,7 @@ static void guest_phys_blocks_region_add(MemoryListener > *listener, > predecessor->target_end = target_end; > } > > -#ifdef DEBUG_GUEST_PHYS_REGION_ADD > +#ifdef DEBUG_DUMP_GUEST_MEMORY > fprintf(stderr, "%s: target_start=" TARGET_FMT_plx " target_end=" > TARGET_FMT_plx ": %s (count: %u)\n", __FUNCTION__, target_start, > target_end, predecessor ? "joined" : "added", g->list->num); > @@ -263,8 +301,14 @@ void guest_phys_blocks_append(GuestPhysBlockList *list) > > g.list = list; > g.listener.region_add = &guest_phys_blocks_region_add; > + /* We should make sure all the memory objects are valid during > + * add dump regions. Before releasing it, we should also make > + * sure all the MemoryRegions to be used during dump is > + * referenced. */ > + rcu_read_lock(); > memory_listener_register(&g.listener, &address_space_memory); > memory_listener_unregister(&g.listener); > + rcu_read_unlock(); I don't think this is necessary if VM is stopped and incoming migration is not running - address_space_memory will be safe to access as usual. Otherwise this is a separate problem. > } > > static CPUState *find_paging_enabled_cpu(CPUState *start_cpu) > diff --git a/qmp.c b/qmp.c > index 0a1fa19..055586d 100644 > --- a/qmp.c > +++ b/qmp.c > @@ -102,6 +102,13 @@ void qmp_quit(Error **errp) > > void qmp_stop(Error **errp) > { > + /* if there is a dump in background, we should wait until the dump > + * finished */ > + if (dump_in_progress()) { > + error_setg(errp, "There is a dump in process, please wait."); > + return; > + } > + > if (runstate_check(RUN_STATE_INMIGRATE)) { > autostart = 0; > } else { > @@ -174,6 +181,13 @@ void qmp_cont(Error **errp) > BlockBackend *blk; > BlockDriverState *bs; > > + /* if there is a dump in background, we should wait until the dump > + * finished */ > + if (dump_in_progress()) { > + error_setg(errp, "There is a dump in process, please wait."); > + return; > + } > + > if (runstate_needs_reset()) { > error_setg(errp, "Resetting the Virtual Machine is required"); > return; > -- > 2.4.3 > >