In cloud environment, when using DRAM-emulated-PMEM DAX device (by kernel parameter 'memmap=nn!ss') to mmap guest memory, multiple VMs may need to share the same DAX device. This is because we can't dynamically create multiple namespaces in one DRAM-emulated-PMEM region. It is also hard to figure out how many 'memmap=nn!ss' regions need to be created at kernel boot time.
This patch introduces a parameter 'offset' for HostMemoryBackendFile to specify the offset in the file this HostMemoryBackendFile will mmap to, so that different HostMemoryBackendFiles can mmap to different address ranges of the backing file. Signed-off-by: Jason Zeng <jason.z...@intel.com> --- backends/hostmem-file.c | 46 ++++++++++++++++++++++++++++++++++++++- exec.c | 20 ++++++++++------- hw/core/numa.c | 2 +- include/exec/memory.h | 1 + include/exec/ram_addr.h | 4 ++-- include/qemu/mmap-alloc.h | 1 + memory.c | 6 +++-- util/mmap-alloc.c | 5 +++-- util/oslib-posix.c | 2 +- 9 files changed, 70 insertions(+), 17 deletions(-) diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c index be64020746..1fe814d52e 100644 --- a/backends/hostmem-file.c +++ b/backends/hostmem-file.c @@ -34,6 +34,7 @@ struct HostMemoryBackendFile { HostMemoryBackend parent_obj; char *mem_path; + uint64_t offset; uint64_t align; bool discard_data; bool is_pmem; @@ -57,6 +58,10 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) error_setg(errp, "mem-path property not set"); return; } + if (fb->align && fb->offset && (fb->offset % fb->align)) { + error_setg(errp, "offset doesn't match align"); + return; + } backend->force_prealloc = mem_prealloc; name = host_memory_backend_get_name(backend); @@ -65,7 +70,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) backend->size, fb->align, (backend->share ? RAM_SHARED : 0) | (fb->is_pmem ? RAM_PMEM : 0), - fb->mem_path, errp); + fb->mem_path, fb->offset, errp); g_free(name); #endif } @@ -137,6 +142,41 @@ static void file_memory_backend_set_align(Object *o, Visitor *v, error_propagate(errp, local_err); } +static void file_memory_backend_get_offset(Object *o, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o); + uint64_t val = fb->offset; + + visit_type_size(v, name, &val, errp); +} + +static void file_memory_backend_set_offset(Object *o, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(o); + HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o); + Error *local_err = NULL; + uint64_t val; + + if (host_memory_backend_mr_inited(backend)) { + error_setg(&local_err, "cannot change property '%s' of %s", + name, object_get_typename(o)); + goto out; + } + + visit_type_size(v, name, &val, &local_err); + if (local_err) { + goto out; + } + fb->offset = val; + + out: + error_propagate(errp, local_err); +} + static bool file_memory_backend_get_pmem(Object *o, Error **errp) { return MEMORY_BACKEND_FILE(o)->is_pmem; @@ -197,6 +237,10 @@ file_backend_class_init(ObjectClass *oc, void *data) object_class_property_add_str(oc, "mem-path", get_mem_path, set_mem_path, &error_abort); + object_class_property_add(oc, "offset", "uint64", + file_memory_backend_get_offset, + file_memory_backend_set_offset, + NULL, NULL, &error_abort); object_class_property_add(oc, "align", "int", file_memory_backend_get_align, file_memory_backend_set_align, diff --git a/exec.c b/exec.c index ffdb518535..7018ef343f 100644 --- a/exec.c +++ b/exec.c @@ -1838,6 +1838,7 @@ static int file_ram_open(const char *path, static void *file_ram_alloc(RAMBlock *block, ram_addr_t memory, int fd, + off_t offset, bool truncate, Error **errp) { @@ -1889,7 +1890,7 @@ static void *file_ram_alloc(RAMBlock *block, perror("ftruncate"); } - area = qemu_ram_mmap(fd, memory, block->mr->align, + area = qemu_ram_mmap(fd, memory, block->mr->align, offset, block->flags & RAM_SHARED, block->flags & RAM_PMEM); if (area == MAP_FAILED) { error_setg_errno(errp, errno, @@ -2277,7 +2278,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp, bool shared) #ifdef CONFIG_POSIX RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, uint32_t ram_flags, int fd, - Error **errp) + off_t offset, Error **errp) { RAMBlock *new_block; Error *local_err = NULL; @@ -2309,11 +2310,13 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, } size = HOST_PAGE_ALIGN(size); + offset = HOST_PAGE_ALIGN(offset); file_size = get_file_size(fd); - if (file_size > 0 && file_size < size) { + if (file_size > 0 && file_size < offset + size) { error_setg(errp, "backing store %s size 0x%" PRIx64 - " does not match 'size' option 0x" RAM_ADDR_FMT, - mem_path, file_size, size); + " does not match 'size' option 0x" RAM_ADDR_FMT + " and 'offset' option 0x" RAM_ADDR_FMT, + mem_path, file_size, size, offset); return NULL; } @@ -2322,7 +2325,8 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, new_block->used_length = size; new_block->max_length = size; new_block->flags = ram_flags; - new_block->host = file_ram_alloc(new_block, size, fd, !file_size, errp); + new_block->host = file_ram_alloc(new_block, size, fd, offset, + !file_size, errp); if (!new_block->host) { g_free(new_block); return NULL; @@ -2341,7 +2345,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, uint32_t ram_flags, const char *mem_path, - Error **errp) + off_t offset, Error **errp) { int fd; bool created; @@ -2352,7 +2356,7 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, return NULL; } - block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, errp); + block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, offset, errp); if (!block) { if (created) { unlink(mem_path); diff --git a/hw/core/numa.c b/hw/core/numa.c index e3332a984f..be310fc0cc 100644 --- a/hw/core/numa.c +++ b/hw/core/numa.c @@ -494,7 +494,7 @@ static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner, #ifdef __linux__ Error *err = NULL; memory_region_init_ram_from_file(mr, owner, name, ram_size, 0, 0, - mem_path, &err); + mem_path, 0, &err); if (err) { error_report_err(err); if (mem_prealloc) { diff --git a/include/exec/memory.h b/include/exec/memory.h index e499dc215b..eba2fc894e 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -690,6 +690,7 @@ void memory_region_init_ram_from_file(MemoryRegion *mr, uint64_t align, uint32_t ram_flags, const char *path, + off_t offset, Error **errp); /** diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index bed0554f4d..721c26a4c0 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -156,10 +156,10 @@ long qemu_maxrampagesize(void); */ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, uint32_t ram_flags, const char *mem_path, - Error **errp); + off_t offset, Error **errp); RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, uint32_t ram_flags, int fd, - Error **errp); + off_t offset, Error **errp); RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, MemoryRegion *mr, Error **errp); diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h index e786266b92..80b8df7f70 100644 --- a/include/qemu/mmap-alloc.h +++ b/include/qemu/mmap-alloc.h @@ -24,6 +24,7 @@ size_t qemu_mempath_getpagesize(const char *mem_path); void *qemu_ram_mmap(int fd, size_t size, size_t align, + off_t offset_in_fd, bool shared, bool is_pmem); diff --git a/memory.c b/memory.c index 06484c2bff..62e8d8acbe 100644 --- a/memory.c +++ b/memory.c @@ -1563,6 +1563,7 @@ void memory_region_init_ram_from_file(MemoryRegion *mr, uint64_t align, uint32_t ram_flags, const char *path, + off_t offset, Error **errp) { Error *err = NULL; @@ -1571,7 +1572,8 @@ void memory_region_init_ram_from_file(MemoryRegion *mr, mr->terminates = true; mr->destructor = memory_region_destructor_ram; mr->align = align; - mr->ram_block = qemu_ram_alloc_from_file(size, mr, ram_flags, path, &err); + mr->ram_block = qemu_ram_alloc_from_file(size, mr, ram_flags, + path, offset, &err); mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0; if (err) { mr->size = int128_zero(); @@ -1595,7 +1597,7 @@ void memory_region_init_ram_from_fd(MemoryRegion *mr, mr->destructor = memory_region_destructor_ram; mr->ram_block = qemu_ram_alloc_from_fd(size, mr, share ? RAM_SHARED : 0, - fd, &err); + fd, 0, &err); mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0; if (err) { mr->size = int128_zero(); diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c index 27dcccd8ec..e99e63c5fe 100644 --- a/util/mmap-alloc.c +++ b/util/mmap-alloc.c @@ -85,6 +85,7 @@ size_t qemu_mempath_getpagesize(const char *mem_path) void *qemu_ram_mmap(int fd, size_t size, size_t align, + off_t offset_in_fd, bool shared, bool is_pmem) { @@ -147,7 +148,7 @@ void *qemu_ram_mmap(int fd, offset = QEMU_ALIGN_UP((uintptr_t)guardptr, align) - (uintptr_t)guardptr; ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE, - flags | map_sync_flags, fd, 0); + flags | map_sync_flags, fd, offset_in_fd); if (ptr == MAP_FAILED && map_sync_flags) { if (errno == ENOTSUP) { @@ -172,7 +173,7 @@ void *qemu_ram_mmap(int fd, * we will remove these flags to handle compatibility. */ ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE, - flags, fd, 0); + flags, fd, offset_in_fd); } if (ptr == MAP_FAILED) { diff --git a/util/oslib-posix.c b/util/oslib-posix.c index 5a291cc982..f8c13ada9a 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -205,7 +205,7 @@ void *qemu_memalign(size_t alignment, size_t size) void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared) { size_t align = QEMU_VMALLOC_ALIGN; - void *ptr = qemu_ram_mmap(-1, size, align, shared, false); + void *ptr = qemu_ram_mmap(-1, size, align, 0, shared, false); if (ptr == MAP_FAILED) { return NULL; -- 2.20.1