Re: [Qemu-devel] [RFC][PATCH 11/14 v9] introduce a new monitor command 'dump' to dump guest's memory

Luiz Capitulino Wed, 14 Mar 2012 10:19:25 -0700

On Wed, 14 Mar 2012 10:11:35 +0800
Wen Congyang <we...@cn.fujitsu.com> wrote:


> The command's usage:
>    dump [-p] file
> file should be start with "file:"(the file's path) or "fd:"(the fd's name).
> 
> Note:
>   1. If you want to use gdb to analyse the core, please specify -p option.
>   2. This command doesn't support the fd that is is associated with a pipe,
>      socket, or FIFO(lseek will fail with such fd).
> 
> Signed-off-by: Wen Congyang <we...@cn.fujitsu.com>
> ---
>  Makefile.target  |    2 +-
>  dump.c           |  714 
> ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  elf.h            |    5 +
>  hmp-commands.hx  |   21 ++
>  hmp.c            |   10 +
>  hmp.h            |    1 +
>  qapi-schema.json |   14 +
>  qmp-commands.hx  |   34 +++
>  8 files changed, 800 insertions(+), 1 deletions(-)
>  create mode 100644 dump.c
> 
> diff --git a/Makefile.target b/Makefile.target
> index c81c4fa..287fbe7 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -213,7 +213,7 @@ obj-$(CONFIG_NO_KVM) += kvm-stub.o
>  obj-$(CONFIG_VGA) += vga.o
>  obj-y += memory.o savevm.o
>  obj-y += memory_mapping.o
> -obj-$(CONFIG_HAVE_CORE_DUMP) += arch_dump.o
> +obj-$(CONFIG_HAVE_CORE_DUMP) += arch_dump.o dump.o
>  LIBS+=-lz
>  
>  obj-i386-$(CONFIG_KVM) += hyperv.o
> diff --git a/dump.c b/dump.c
> new file mode 100644
> index 0000000..42e1681
> --- /dev/null
> +++ b/dump.c
> @@ -0,0 +1,714 @@
> +/*
> + * QEMU dump
> + *
> + * Copyright Fujitsu, Corp. 2011
> + *
> + * Authors:
> + *     Wen Congyang <we...@cn.fujitsu.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> + * the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "qemu-common.h"
> +#include <unistd.h>
> +#include "elf.h"
> +#include <sys/procfs.h>
> +#include <glib.h>
> +#include "cpu.h"
> +#include "cpu-all.h"
> +#include "targphys.h"
> +#include "monitor.h"
> +#include "kvm.h"
> +#include "dump.h"
> +#include "sysemu.h"
> +#include "bswap.h"
> +#include "memory_mapping.h"
> +#include "error.h"
> +#include "qmp-commands.h"
> +#include "gdbstub.h"
> +
> +static inline uint16_t cpu_convert_to_target16(uint16_t val, int endian)
> +{
> +    if (endian == ELFDATA2LSB) {
> +        val = cpu_to_le16(val);
> +    } else {
> +        val = cpu_to_be16(val);
> +    }
> +
> +    return val;
> +}
> +
> +static inline uint32_t cpu_convert_to_target32(uint32_t val, int endian)
> +{
> +    if (endian == ELFDATA2LSB) {
> +        val = cpu_to_le32(val);
> +    } else {
> +        val = cpu_to_be32(val);
> +    }
> +
> +    return val;
> +}
> +
> +static inline uint64_t cpu_convert_to_target64(uint64_t val, int endian)
> +{
> +    if (endian == ELFDATA2LSB) {
> +        val = cpu_to_le64(val);
> +    } else {
> +        val = cpu_to_be64(val);
> +    }
> +
> +    return val;
> +}
> +
> +enum {
> +    DUMP_STATE_ERROR,
> +    DUMP_STATE_SETUP,
> +    DUMP_STATE_CANCELLED,
> +    DUMP_STATE_ACTIVE,
> +    DUMP_STATE_COMPLETED,
> +};
> +
> +typedef struct DumpState {
> +    ArchDumpInfo dump_info;
> +    MemoryMappingList list;
> +    uint16_t phdr_num;
> +    uint32_t sh_info;
> +    bool have_section;
> +    int state;
> +    bool resume;
> +    char *error;
> +    target_phys_addr_t memory_offset;
> +    write_core_dump_function f;
> +    void (*cleanup)(void *opaque);
> +    void *opaque;
> +} DumpState;
> +
> +static DumpState *dump_get_current(void)
> +{
> +    static DumpState current_dump = {
> +        .state = DUMP_STATE_SETUP,
> +    };
> +
> +    return &current_dump;
> +}

You just dropped a few asynchronous bits and resent this as a synchronous
command, letting all the asynchronous infrastructure in. This is bad, as the
command is more complex then it should be and doesn't make full use of the
added infrastructure.

For example, does the synchronous version really uses DumpState? If it doesn't,
let's just drop it and everything else which is not necessary.

*However*, note that while it's fine with me to have this as a synchronous
command we need a few more ACKs (from libvirt and Anthony and/or Jan). So, I
wouldn't go too far on making changes before we get those ACKs.

> +
> +static int dump_cleanup(DumpState *s)
> +{
> +    int ret = 0;
> +
> +    memory_mapping_list_free(&s->list);
> +    s->cleanup(s->opaque);
> +    if (s->resume) {
> +        vm_start();
> +    }
> +
> +    return ret;
> +}
> +
> +static void dump_error(DumpState *s, const char *reason)
> +{
> +    s->state = DUMP_STATE_ERROR;
> +    s->error = g_strdup(reason);
> +    dump_cleanup(s);
> +}
> +
> +static int write_elf64_header(DumpState *s)
> +{
> +    Elf64_Ehdr elf_header;
> +    int ret;
> +    int endian = s->dump_info.d_endian;
> +
> +    memset(&elf_header, 0, sizeof(Elf64_Ehdr));
> +    memcpy(&elf_header, ELFMAG, 4);
> +    elf_header.e_ident[EI_CLASS] = ELFCLASS64;
> +    elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
> +    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
> +    elf_header.e_type = cpu_convert_to_target16(ET_CORE, endian);
> +    elf_header.e_machine = cpu_convert_to_target16(s->dump_info.d_machine,
> +                                                   endian);
> +    elf_header.e_version = cpu_convert_to_target32(EV_CURRENT, endian);
> +    elf_header.e_ehsize = cpu_convert_to_target16(sizeof(elf_header), 
> endian);
> +    elf_header.e_phoff = cpu_convert_to_target64(sizeof(Elf64_Ehdr), endian);
> +    elf_header.e_phentsize = cpu_convert_to_target16(sizeof(Elf64_Phdr),
> +                                                     endian);
> +    elf_header.e_phnum = cpu_convert_to_target16(s->phdr_num, endian);
> +    if (s->have_section) {
> +        uint64_t shoff = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * 
> s->sh_info;
> +
> +        elf_header.e_shoff = cpu_convert_to_target64(shoff, endian);
> +        elf_header.e_shentsize = cpu_convert_to_target16(sizeof(Elf64_Shdr),
> +                                                         endian);
> +        elf_header.e_shnum = cpu_convert_to_target16(1, endian);
> +    }
> +
> +    ret = s->f(0, &elf_header, sizeof(elf_header), s->opaque);
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to write elf header.\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int write_elf32_header(DumpState *s)
> +{
> +    Elf32_Ehdr elf_header;
> +    int ret;
> +    int endian = s->dump_info.d_endian;
> +
> +    memset(&elf_header, 0, sizeof(Elf32_Ehdr));
> +    memcpy(&elf_header, ELFMAG, 4);
> +    elf_header.e_ident[EI_CLASS] = ELFCLASS32;
> +    elf_header.e_ident[EI_DATA] = endian;
> +    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
> +    elf_header.e_type = cpu_convert_to_target16(ET_CORE, endian);
> +    elf_header.e_machine = cpu_convert_to_target16(s->dump_info.d_machine,
> +                                                   endian);
> +    elf_header.e_version = cpu_convert_to_target32(EV_CURRENT, endian);
> +    elf_header.e_ehsize = cpu_convert_to_target16(sizeof(elf_header), 
> endian);
> +    elf_header.e_phoff = cpu_convert_to_target32(sizeof(Elf32_Ehdr), endian);
> +    elf_header.e_phentsize = cpu_convert_to_target16(sizeof(Elf32_Phdr),
> +                                                     endian);
> +    elf_header.e_phnum = cpu_convert_to_target16(s->phdr_num, endian);
> +    if (s->have_section) {
> +        uint32_t shoff = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * 
> s->sh_info;
> +
> +        elf_header.e_shoff = cpu_convert_to_target32(shoff, endian);
> +        elf_header.e_shentsize = cpu_convert_to_target16(sizeof(Elf32_Shdr),
> +                                                         endian);
> +        elf_header.e_shnum = cpu_convert_to_target16(1, endian);
> +    }
> +
> +    ret = s->f(0, &elf_header, sizeof(elf_header), s->opaque);
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to write elf header.\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
> +                            int phdr_index, target_phys_addr_t offset)
> +{
> +    Elf64_Phdr phdr;
> +    off_t phdr_offset;
> +    int ret;
> +    int endian = s->dump_info.d_endian;
> +
> +    memset(&phdr, 0, sizeof(Elf64_Phdr));
> +    phdr.p_type = cpu_convert_to_target32(PT_LOAD, endian);
> +    phdr.p_offset = cpu_convert_to_target64(offset, endian);
> +    phdr.p_paddr = cpu_convert_to_target64(memory_mapping->phys_addr, 
> endian);
> +    if (offset == -1) {
> +        phdr.p_filesz = 0;
> +    } else {
> +        phdr.p_filesz = cpu_convert_to_target64(memory_mapping->length, 
> endian);
> +    }
> +    phdr.p_memsz = cpu_convert_to_target64(memory_mapping->length, endian);
> +    phdr.p_vaddr = cpu_convert_to_target64(memory_mapping->virt_addr, 
> endian);
> +
> +    phdr_offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr)*phdr_index;
> +    ret = s->f(phdr_offset, &phdr, sizeof(Elf64_Phdr), s->opaque);
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to write program header table.\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
> +                            int phdr_index, target_phys_addr_t offset)
> +{
> +    Elf32_Phdr phdr;
> +    off_t phdr_offset;
> +    int ret;
> +    int endian = s->dump_info.d_endian;
> +
> +    memset(&phdr, 0, sizeof(Elf32_Phdr));
> +    phdr.p_type = cpu_convert_to_target32(PT_LOAD, endian);
> +    phdr.p_offset = cpu_convert_to_target32(offset, endian);
> +    phdr.p_paddr = cpu_convert_to_target32(memory_mapping->phys_addr, 
> endian);
> +    if (offset == -1) {
> +        phdr.p_filesz = 0;
> +    } else {
> +        phdr.p_filesz = cpu_convert_to_target32(memory_mapping->length, 
> endian);
> +    }
> +    phdr.p_memsz = cpu_convert_to_target32(memory_mapping->length, endian);
> +    phdr.p_vaddr = cpu_convert_to_target32(memory_mapping->virt_addr, 
> endian);
> +
> +    phdr_offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr)*phdr_index;
> +    ret = s->f(phdr_offset, &phdr, sizeof(Elf32_Phdr), s->opaque);
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to write program header table.\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int write_elf64_notes(DumpState *s, int phdr_index,
> +                             target_phys_addr_t *offset)
> +{
> +    CPUState *env;
> +    int ret;
> +    target_phys_addr_t begin = *offset;
> +    Elf64_Phdr phdr;
> +    off_t phdr_offset;
> +    int id;
> +    int endian = s->dump_info.d_endian;
> +
> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
> +        id = gdb_id(env);
> +        ret = cpu_write_elf64_note(s->f, env, id, offset, s->opaque);
> +        if (ret < 0) {
> +            dump_error(s, "dump: failed to write elf notes.\n");
> +            return -1;
> +        }
> +    }
> +
> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
> +        ret = cpu_write_elf64_qemunote(s->f, env, offset, s->opaque);
> +        if (ret < 0) {
> +            dump_error(s, "dump: failed to write CPU status.\n");
> +            return -1;
> +        }
> +    }
> +
> +    memset(&phdr, 0, sizeof(Elf64_Phdr));
> +    phdr.p_type = cpu_convert_to_target32(PT_NOTE, endian);
> +    phdr.p_offset = cpu_convert_to_target64(begin, endian);
> +    phdr.p_paddr = 0;
> +    phdr.p_filesz = cpu_convert_to_target64(*offset - begin, endian);
> +    phdr.p_memsz = cpu_convert_to_target64(*offset - begin, endian);
> +    phdr.p_vaddr = 0;
> +
> +    phdr_offset = sizeof(Elf64_Ehdr);
> +    ret = s->f(phdr_offset, &phdr, sizeof(Elf64_Phdr), s->opaque);
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to write program header table.\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int write_elf32_notes(DumpState *s, int phdr_index,
> +                             target_phys_addr_t *offset)
> +{
> +    CPUState *env;
> +    int ret;
> +    target_phys_addr_t begin = *offset;
> +    Elf32_Phdr phdr;
> +    off_t phdr_offset;
> +    int id;
> +    int endian = s->dump_info.d_endian;
> +
> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
> +        id = gdb_id(env);
> +        ret = cpu_write_elf32_note(s->f, env, id, offset, s->opaque);
> +        if (ret < 0) {
> +            dump_error(s, "dump: failed to write elf notes.\n");
> +            return -1;
> +        }
> +    }
> +
> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
> +        ret = cpu_write_elf32_qemunote(s->f, env, offset, s->opaque);
> +        if (ret < 0) {
> +            dump_error(s, "dump: failed to write CPU status.\n");
> +            return -1;
> +        }
> +    }
> +
> +    memset(&phdr, 0, sizeof(Elf32_Phdr));
> +    phdr.p_type = cpu_convert_to_target32(PT_NOTE, endian);
> +    phdr.p_offset = cpu_convert_to_target32(begin, endian);
> +    phdr.p_paddr = 0;
> +    phdr.p_filesz = cpu_convert_to_target32(*offset - begin, endian);
> +    phdr.p_memsz = cpu_convert_to_target32(*offset - begin, endian);
> +    phdr.p_vaddr = 0;
> +
> +    phdr_offset = sizeof(Elf32_Ehdr);
> +    ret = s->f(phdr_offset, &phdr, sizeof(Elf32_Phdr), s->opaque);
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to write program header table.\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int write_elf_section(DumpState *s, target_phys_addr_t *offset, int 
> type)
> +{
> +    Elf32_Shdr shdr32;
> +    Elf64_Shdr shdr64;
> +    int endian = s->dump_info.d_endian;
> +    int shdr_size;
> +    void *shdr;
> +    int ret;
> +
> +    if (type == 0) {
> +        shdr_size = sizeof(Elf32_Shdr);
> +        memset(&shdr32, 0, shdr_size);
> +        shdr32.sh_info = cpu_convert_to_target32(s->sh_info, endian);
> +        shdr = &shdr32;
> +    } else {
> +        shdr_size = sizeof(Elf64_Shdr);
> +        memset(&shdr64, 0, shdr_size);
> +        shdr64.sh_info = cpu_convert_to_target32(s->sh_info, endian);
> +        shdr = &shdr64;
> +    }
> +
> +    ret = s->f(*offset, &shdr, shdr_size, s->opaque);
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to write section header table.\n");
> +        return -1;
> +    }
> +
> +    *offset += shdr_size;
> +    return 0;
> +}
> +
> +static int write_data(DumpState *s, void *buf, int length,
> +                      target_phys_addr_t *offset)
> +{
> +    int ret;
> +
> +    ret = s->f(*offset, buf, length, s->opaque);
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to save memory.\n");
> +        return -1;
> +    }
> +
> +    *offset += length;
> +    return 0;
> +}
> +
> +/* write the memroy to vmcore. 1 page per I/O. */
> +static int write_memory(DumpState *s, RAMBlock *block,
> +                        target_phys_addr_t *offset)
> +{
> +    int i, ret;
> +
> +    for (i = 0; i < block->length / TARGET_PAGE_SIZE; i++) {
> +        ret = write_data(s, block->host + i * TARGET_PAGE_SIZE,
> +                         TARGET_PAGE_SIZE, offset);
> +        if (ret < 0) {
> +            return -1;
> +        }
> +    }
> +
> +    if ((block->length % TARGET_PAGE_SIZE) != 0) {
> +        ret = write_data(s, block->host + i * TARGET_PAGE_SIZE,
> +                         block->length % TARGET_PAGE_SIZE, offset);
> +        if (ret < 0) {
> +            return -1;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +/* get the memory's offset in the vmcore */
> +static target_phys_addr_t get_offset(target_phys_addr_t phys_addr,
> +                                     target_phys_addr_t memory_offset)
> +{
> +    RAMBlock *block;
> +    target_phys_addr_t offset = memory_offset;
> +
> +    QLIST_FOREACH(block, &ram_list.blocks, next) {
> +        if (phys_addr >= block->offset &&
> +            phys_addr < block->offset + block->length) {
> +            return phys_addr - block->offset + offset;
> +        }
> +        offset += block->length;
> +    }
> +
> +    return -1;
> +}
> +
> +/* write elf header, PT_NOTE and elf note to vmcore. */
> +static int dump_begin(DumpState *s)
> +{
> +    target_phys_addr_t offset;
> +    int ret;
> +
> +    s->state = DUMP_STATE_ACTIVE;
> +
> +    /*
> +     * the vmcore's format is:
> +     *   --------------
> +     *   |  elf header |
> +     *   --------------
> +     *   |  PT_NOTE    |
> +     *   --------------
> +     *   |  PT_LOAD    |
> +     *   --------------
> +     *   |  ......     |
> +     *   --------------
> +     *   |  PT_LOAD    |
> +     *   --------------
> +     *   |  sec_hdr    |
> +     *   --------------
> +     *   |  elf note   |
> +     *   --------------
> +     *   |  memory     |
> +     *   --------------
> +     *
> +     * we only know where the memory is saved after we write elf note into
> +     * vmcore.
> +     */
> +
> +    /* write elf header to vmcore */
> +    if (s->dump_info.d_class == ELFCLASS64) {
> +        ret = write_elf64_header(s);
> +    } else {
> +        ret = write_elf32_header(s);
> +    }
> +    if (ret < 0) {
> +        return -1;
> +    }
> +
> +    /* write elf section and notes to vmcore */
> +    if (s->dump_info.d_class == ELFCLASS64) {
> +        if (s->have_section) {
> +            offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr)*s->sh_info;
> +            if (write_elf_section(s, &offset, 1) < 0) {
> +                return -1;
> +            }
> +        } else {
> +            offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr)*s->phdr_num;
> +        }
> +        ret = write_elf64_notes(s, 0, &offset);
> +    } else {
> +        if (s->have_section) {
> +            offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr)*s->sh_info;
> +            if (write_elf_section(s, &offset, 0) < 0) {
> +                return -1;
> +            }
> +        } else {
> +            offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr)*s->phdr_num;
> +        }
> +        ret = write_elf32_notes(s, 0, &offset);
> +    }
> +
> +    if (ret < 0) {
> +        return -1;
> +    }
> +
> +    s->memory_offset = offset;
> +    return 0;
> +}
> +
> +/* write PT_LOAD to vmcore */
> +static int dump_completed(DumpState *s)
> +{
> +    target_phys_addr_t offset;
> +    MemoryMapping *memory_mapping;
> +    int phdr_index = 1, ret;
> +
> +    QTAILQ_FOREACH(memory_mapping, &s->list.head, next) {
> +        offset = get_offset(memory_mapping->phys_addr, s->memory_offset);
> +        if (s->dump_info.d_class == ELFCLASS64) {
> +            ret = write_elf64_load(s, memory_mapping, phdr_index++, offset);
> +        } else {
> +            ret = write_elf32_load(s, memory_mapping, phdr_index++, offset);
> +        }
> +        if (ret < 0) {
> +            return -1;
> +        }
> +    }
> +
> +    s->state = DUMP_STATE_COMPLETED;
> +    dump_cleanup(s);
> +    return 0;
> +}
> +
> +/* write all memory to vmcore */
> +static int dump_iterate(DumpState *s)
> +{
> +    RAMBlock *block;
> +    target_phys_addr_t offset = s->memory_offset;
> +    int ret;
> +
> +    /* write all memory to vmcore */
> +    QLIST_FOREACH(block, &ram_list.blocks, next) {
> +        ret = write_memory(s, block, &offset);
> +        if (ret < 0) {
> +            return -1;
> +        }
> +    }
> +
> +    return dump_completed(s);
> +}
> +
> +static int create_vmcore(DumpState *s)
> +{
> +    int ret;
> +
> +    ret = dump_begin(s);
> +    if (ret < 0) {
> +        return -1;
> +    }
> +
> +    ret = dump_iterate(s);
> +    if (ret < 0) {
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static DumpState *dump_init(bool paging, Error **errp)
> +{
> +    CPUState *env;
> +    DumpState *s = dump_get_current();
> +    int ret;
> +
> +    if (runstate_is_running()) {
> +        vm_stop(RUN_STATE_PAUSED);
> +        s->resume = true;

Hmm, you actually stop the VM. Seems obvious now, but when people talked about
making this asynchronous I automatically assumed that what we didn't want was
having the global mutex held for too much time (ie. while this command was
running).

The only disadvantage of having this as a synchronous command is that libvirt
won't be able to cancel it and won't be able to run other commands in parallel.
Doesn't seem that serious to me.

Btw, RUN_STATE_PAUSED is not a good one. Doesn't matter that much, as this
is unlikely to be visible, but you should use RUN_STATE_SAVE_VM or
RUN_STATE_DEBUG.

> +    } else {
> +        s->resume = false;
> +    }
> +    s->state = DUMP_STATE_SETUP;
> +    if (s->error) {
> +        g_free(s->error);
> +        s->error = NULL;
> +    }
> +
> +    /*
> +     * get dump info: endian, class and architecture.
> +     * If the target architecture is not supported, cpu_get_dump_info() will
> +     * return -1.
> +     *
> +     * if we use kvm, we should synchronize the register before we get dump
> +     * info.
> +     */
> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
> +        cpu_synchronize_state(env);
> +    }
> +
> +    ret = cpu_get_dump_info(&s->dump_info);
> +    if (ret < 0) {
> +        error_set(errp, QERR_UNSUPPORTED);

This will let the VM paused.

> +        return NULL;
> +    }
> +
> +    /* get memory mapping */
> +    memory_mapping_list_init(&s->list);
> +    if (paging) {
> +        qemu_get_guest_memory_mapping(&s->list);
> +    } else {
> +        qemu_get_guest_simple_memory_mapping(&s->list);
> +    }
> +
> +    /*
> +     * calculate phdr_num
> +     *
> +     * the type of phdr->num is uint16_t, so we should avoid overflow
> +     */
> +    s->phdr_num = 1; /* PT_NOTE */
> +    if (s->list.num < (1 << 16) - 2) {
> +        s->phdr_num += s->list.num;
> +        s->have_section = false;
> +    } else {
> +        s->have_section = true;
> +        s->phdr_num = PN_XNUM;
> +
> +        /* the type of shdr->sh_info is uint32_t, so we should avoid 
> overflow */
> +        if (s->list.num > (1ULL << 32) - 2) {
> +            s->sh_info = 0xffffffff;
> +        } else {
> +            s->sh_info += s->list.num;
> +        }
> +    }
> +
> +    return s;
> +}
> +
> +static int fd_write_vmcore(target_phys_addr_t offset, void *buf, size_t size,
> +                           void *opaque)
> +{
> +    int fd = (int)(intptr_t)opaque;
> +    int ret;
> +
> +    ret = lseek(fd, offset, SEEK_SET);
> +    if (ret < 0) {
> +        return -1;
> +    }
> +
> +    ret = write(fd, buf, size);
> +    if (ret != size) {
> +        return -1;
> +    }

I think you should use send_all() instead of plain write().

> +
> +    return 0;
> +}
> +
> +static void fd_cleanup(void *opaque)
> +{
> +    int fd = (int)(intptr_t)opaque;
> +
> +    if (fd != -1) {
> +        close(fd);
> +    }
> +}
> +
> +static DumpState *dump_init_fd(int fd, bool paging, Error **errp)
> +{
> +    DumpState *s = dump_init(paging, errp);
> +
> +    if (s == NULL) {
> +        return NULL;
> +    }
> +
> +    s->f = fd_write_vmcore;
> +    s->cleanup = fd_cleanup;
> +    s->opaque = (void *)(intptr_t)fd;

Do we really need all these indirections?

> +
> +    return s;
> +}
> +
> +void qmp_dump(bool paging, const char *file, Error **errp)
> +{
> +    const char *p;
> +    int fd = -1;
> +    DumpState *s;
> +
> +#if !defined(WIN32)
> +    if (strstart(file, "fd:", &p)) {
> +        fd = qemu_get_fd(p);

qemu_get_fd() won't be merged, you should use monitor_get_fd(cur_mon, p);

> +        if (fd == -1) {
> +            error_set(errp, QERR_FD_NOT_FOUND, p);
> +            return;
> +        }
> +    }
> +#endif
> +
> +    if  (strstart(file, "file:", &p)) {
> +        fd = open(p, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR);

This is minor, but I'd use qemu_open() here.

> +        if (fd < 0) {
> +            error_set(errp, QERR_OPEN_FILE_FAILED, p);
> +            return;
> +        }
> +    }
> +
> +    if (fd == -1) {
> +        error_set(errp, QERR_INVALID_PARAMETER, "file");
> +        return;
> +    }
> +
> +    s = dump_init_fd(fd, paging, errp);
> +    if (!s) {
> +        return;
> +    }
> +
> +    if (create_vmcore(s) < 0) {
> +        error_set(errp, QERR_IO_ERROR);
> +    }
> +}
> diff --git a/elf.h b/elf.h
> index 2e05d34..6a10657 100644
> --- a/elf.h
> +++ b/elf.h
> @@ -1000,6 +1000,11 @@ typedef struct elf64_sym {
>  
>  #define EI_NIDENT    16
>  
> +/* Special value for e_phnum.  This indicates that the real number of
> +   program headers is too large to fit into e_phnum.  Instead the real
> +   value is in the field sh_info of section 0.  */
> +#define PN_XNUM         0xffff
> +
>  typedef struct elf32_hdr{
>    unsigned char      e_ident[EI_NIDENT];
>    Elf32_Half e_type;
> diff --git a/hmp-commands.hx b/hmp-commands.hx
> index 6980214..d4cf2e5 100644
> --- a/hmp-commands.hx
> +++ b/hmp-commands.hx
> @@ -880,6 +880,27 @@ server will ask the spice/vnc client to automatically 
> reconnect using the
>  new parameters (if specified) once the vm migration finished successfully.
>  ETEXI
>  
> +#if defined(CONFIG_HAVE_CORE_DUMP)
> +    {
> +        .name       = "dump",
> +        .args_type  = "paging:-p,file:s",
> +        .params     = "[-p] file",
> +        .help       = "dump to file",
> +        .user_print = monitor_user_noop,
> +        .mhandler.cmd = hmp_dump,
> +    },
> +
> +
> +STEXI
> +@item dump [-p] @var{file}
> +@findex dump
> +Dump to @var{file}. The file can be processed with crash or gdb.
> +    file: destination file(started with "file:") or destination file 
> descriptor
> +          (started with "fd:")
> +  paging: do paging to get guest's memory mapping
> +ETEXI
> +#endif
> +
>      {
>          .name       = "snapshot_blkdev",
>          .args_type  = "reuse:-n,device:B,snapshot-file:s?,format:s?",
> diff --git a/hmp.c b/hmp.c
> index 290c43d..e13b793 100644
> --- a/hmp.c
> +++ b/hmp.c
> @@ -860,3 +860,13 @@ void hmp_block_job_cancel(Monitor *mon, const QDict 
> *qdict)
>  
>      hmp_handle_error(mon, &error);
>  }
> +
> +void hmp_dump(Monitor *mon, const QDict *qdict)
> +{
> +    Error *errp = NULL;
> +    int paging = qdict_get_try_bool(qdict, "paging", 0);
> +    const char *file = qdict_get_str(qdict, "file");
> +
> +    qmp_dump(!!paging, file, &errp);

Why the double negation on 'paging'?

> +    hmp_handle_error(mon, &errp);
> +}
> diff --git a/hmp.h b/hmp.h
> index 5409464..b055e50 100644
> --- a/hmp.h
> +++ b/hmp.h
> @@ -59,5 +59,6 @@ void hmp_block_set_io_throttle(Monitor *mon, const QDict 
> *qdict);
>  void hmp_block_stream(Monitor *mon, const QDict *qdict);
>  void hmp_block_job_set_speed(Monitor *mon, const QDict *qdict);
>  void hmp_block_job_cancel(Monitor *mon, const QDict *qdict);
> +void hmp_dump(Monitor *mon, const QDict *qdict);
>  
>  #endif
> diff --git a/qapi-schema.json b/qapi-schema.json
> index 04fa84f..81b8c7c 100644
> --- a/qapi-schema.json
> +++ b/qapi-schema.json
> @@ -1663,3 +1663,17 @@
>  { 'command': 'qom-list-types',
>    'data': { '*implements': 'str', '*abstract': 'bool' },
>    'returns': [ 'ObjectTypeInfo' ] }
> +
> +##
> +# @dump

'dump' is too generic, please call this dump-guest-memory-vmcore or something
more descriptive.

> +#
> +# Dump guest's memory to vmcore.
> +#
> +# @paging: if true, do paging to get guest's memory mapping
> +# @file: the filename or file descriptor of the vmcore.

'file' is not a good name because it can also dump to an fd, maybe 'protocol'?

> +#
> +# Returns: nothing on success
> +#
> +# Since: 1.1
> +##
> +{ 'command': 'dump', 'data': { 'paging': 'bool', 'file': 'str' } }
> diff --git a/qmp-commands.hx b/qmp-commands.hx
> index dfe8a5b..9e39bd9 100644
> --- a/qmp-commands.hx
> +++ b/qmp-commands.hx
> @@ -586,6 +586,40 @@ Example:
>  
>  EQMP
>  
> +#if defined(CONFIG_HAVE_CORE_DUMP)
> +    {
> +        .name       = "dump",
> +        .args_type  = "paging:-p,file:s",
> +        .params     = "[-p] file",
> +        .help       = "dump to file",
> +        .user_print = monitor_user_noop,
> +        .mhandler.cmd_new = qmp_marshal_input_dump,
> +    },
> +
> +SQMP
> +dump
> +
> +
> +Dump to file. The file can be processed with crash or gdb.
> +
> +Arguments:
> +
> +- "paging": do paging to get guest's memory mapping (json-bool)
> +- "file": destination file(started with "file:") or destination file 
> descriptor
> +          (started with "fd:") (json-string)
> +
> +Example:
> +
> +-> { "execute": "dump", "arguments": { "file": "fd:dump" } }
> +<- { "return": {} }
> +
> +Notes:
> +
> +(1) All boolean arguments default to false
> +
> +EQMP
> +#endif
> +
>      {
>          .name       = "netdev_add",
>          .args_type  = "netdev:O",

Re: [Qemu-devel] [RFC][PATCH 11/14 v9] introduce a new monitor command 'dump' to dump guest's memory

Reply via email to