On 03/06/2015 14:22, Igor Mammedov wrote: > Patch adds memory_region_init_rsvd_hva() and > memory_region_find_rsvd_hva() API to allocate and lookup > reserved HVA MemoryRegion. > > MemoryRegion with reserved HVA range will be used for > providing linear 1:1 HVA->GVA mapping for RAM MemoryRegion-s > that is added as subregions inside it. > > It will be used for memory hotplug and vhost integration > reducing all hotplugged MemoryRegions down to one > memory range descriptor, which allows to overcome > vhost's limitation on number of allowed memory ranges. > > Signed-off-by: Igor Mammedov <imamm...@redhat.com>
Very nice patches. Really. About the naming, I would use memory_region_init_hva_range and memory_region_find_hva_range. Also, it has to be Linux-only (due to MAP_NORESERVE and mremap). Finally, instead of overloading ram_addr, just add "void *rsvd_hva" and make it NULL for other kinds of regions. This matches what we do for e.g. iommu_ops. One question: > void *memory_region_get_ram_ptr(MemoryRegion *mr) > { > + if (mr->rsvd_hva) { > + return (void *)mr->ram_addr; > + } > + I see how this is "nice" and I don't object to it, but is it also necessary? Paolo > --- > exec.c | 13 +++++++++++ > include/exec/cpu-common.h | 1 + > include/exec/memory.h | 42 ++++++++++++++++++++++++++++++++++-- > memory.c | 55 > +++++++++++++++++++++++++++++++++++++++++++++++ > 4 files changed, 109 insertions(+), 2 deletions(-) > > diff --git a/exec.c b/exec.c > index e19ab22..e01cd8f 100644 > --- a/exec.c > +++ b/exec.c > @@ -1325,6 +1325,19 @@ static int memory_try_enable_merging(void *addr, > size_t len) > return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE); > } > > +void qemu_ram_remap_hva(ram_addr_t addr, void *new_hva) > +{ > + RAMBlock *block = find_ram_block(addr); > + > + assert(block); > + block->host = mremap(block->host, block->used_length, > + block->used_length, > + MREMAP_MAYMOVE | MREMAP_FIXED, new_hva); > + memory_try_enable_merging(block->host, block->used_length); > + qemu_ram_setup_dump(block->host, block->used_length); > +} > + > + > /* Only legal before guest might have detected the memory size: e.g. on > * incoming migration, or right after reset. > * > diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h > index 43428bd..a9f73d6 100644 > --- a/include/exec/cpu-common.h > +++ b/include/exec/cpu-common.h > @@ -60,6 +60,7 @@ typedef void CPUWriteMemoryFunc(void *opaque, hwaddr addr, > uint32_t value); > typedef uint32_t CPUReadMemoryFunc(void *opaque, hwaddr addr); > > void qemu_ram_remap(ram_addr_t addr, ram_addr_t length); > +void qemu_ram_remap_hva(ram_addr_t addr, void *new_hva); > /* This should not be used by devices. */ > MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr); > void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev); > diff --git a/include/exec/memory.h b/include/exec/memory.h > index b61c84f..9b0858e 100644 > --- a/include/exec/memory.h > +++ b/include/exec/memory.h > @@ -174,6 +174,7 @@ struct MemoryRegion { > bool terminates; > bool romd_mode; > bool ram; > + bool rsvd_hva; > bool skip_dump; > bool readonly; /* For RAM regions */ > bool enabled; > @@ -281,6 +282,23 @@ void memory_region_init(MemoryRegion *mr, > struct Object *owner, > const char *name, > uint64_t size); > +/** > + * memory_region_init_rsvd_hva: Initialize a reserved HVA memory region > + * > + * The container for RAM memory regions. > + * When adding subregion with memory_region_add_subregion(), subregion's > + * backing host memory will be remapped to inside the reserved by this > + * region HVA. > + * > + * @mr: the #MemoryRegion to be initialized > + * @owner: the object that tracks the region's reference count > + * @name: used for debugging; not visible to the user or ABI > + * @size: size of the region; any subregions beyond this size will be clipped > + */ > +void memory_region_init_rsvd_hva(MemoryRegion *mr, > + struct Object *owner, > + const char *name, > + uint64_t size); > > /** > * memory_region_ref: Add 1 to a memory region's reference count > @@ -620,8 +638,8 @@ int memory_region_get_fd(MemoryRegion *mr); > * memory_region_get_ram_ptr: Get a pointer into a RAM memory region. > * > * Returns a host pointer to a RAM memory region (created with > - * memory_region_init_ram() or memory_region_init_ram_ptr()). Use with > - * care. > + * memory_region_init_ram() or memory_region_init_ram_ptr()) or > + * memory_region_init_rsvd_hva(). Use with care. > * > * @mr: the memory region being queried. > */ > @@ -1014,6 +1032,26 @@ MemoryRegionSection memory_region_find(MemoryRegion > *mr, > hwaddr addr, uint64_t size); > > /** > + * memory_region_find_rsvd_hva: finds a parent MemoryRegion with > + * reserved HVA and translates it into a #MemoryRegionSection. > + * > + * Locates the first parent #MemoryRegion of @mr that is > + * of reserved HVA type. > + * > + * Returns a #MemoryRegionSection that describes a reserved HVA > + * memory region. > + * .@offset_within_address_space is offset of found > + * (in the .@mr field) memory region relative to the address > + * space that contains it. > + * .@offset_within_region is offset of @mr relative > + * to the returned region (in the .@mr field). > + * .@size is size of found memory region > + * > + * @mr: a MemoryRegion whose HVA pernt is looked up > + */ > +MemoryRegionSection memory_region_find_rsvd_hva(MemoryRegion *mr); > + > +/** > * address_space_sync_dirty_bitmap: synchronize the dirty log for all memory > * > * Synchronizes the dirty page log for an entire address space. > diff --git a/memory.c b/memory.c > index 03c536b..70564dc 100644 > --- a/memory.c > +++ b/memory.c > @@ -25,6 +25,7 @@ > #include "exec/memory-internal.h" > #include "exec/ram_addr.h" > #include "sysemu/sysemu.h" > +#include <sys/mman.h> > > //#define DEBUG_UNASSIGNED > > @@ -939,6 +940,17 @@ void memory_region_init(MemoryRegion *mr, > } > } > > +void memory_region_init_rsvd_hva(MemoryRegion *mr, > + Object *owner, > + const char *name, > + uint64_t size) > +{ > + memory_region_init(mr, owner, name, size); > + mr->rsvd_hva = true; > + mr->ram_addr = (ram_addr_t)mmap(0, memory_region_size(mr), > + PROT_NONE, MAP_NORESERVE | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); > +} > + > static void memory_region_get_addr(Object *obj, Visitor *v, void *opaque, > const char *name, Error **errp) > { > @@ -1514,6 +1526,10 @@ int memory_region_get_fd(MemoryRegion *mr) > > void *memory_region_get_ram_ptr(MemoryRegion *mr) > { > + if (mr->rsvd_hva) { > + return (void *)mr->ram_addr; > + } > + > if (mr->alias) { > return memory_region_get_ram_ptr(mr->alias) + mr->alias_offset; > } > @@ -1742,6 +1758,17 @@ static void > memory_region_add_subregion_common(MemoryRegion *mr, > assert(!subregion->container); > subregion->container = mr; > subregion->addr = offset; > + > + if (mr->ram) { > + MemoryRegionSection rsvd_hva = memory_region_find_rsvd_hva(mr); > + > + if (rsvd_hva.mr) { > + qemu_ram_remap_hva(mr->ram_addr, > + memory_region_get_ram_ptr(rsvd_hva.mr) + > + rsvd_hva.offset_within_region); > + } > + } > + > memory_region_update_container_subregions(subregion); > } > > @@ -1884,6 +1911,34 @@ bool memory_region_is_mapped(MemoryRegion *mr) > return mr->container ? true : false; > } > > +MemoryRegionSection memory_region_find_rsvd_hva(MemoryRegion *mr) > +{ > + MemoryRegionSection ret = { .mr = NULL }; > + MemoryRegion *hva_container = NULL; > + hwaddr addr = mr->addr; > + MemoryRegion *root; > + > + addr += mr->addr; > + for (root = mr; root->container; ) { > + if (!hva_container && root->rsvd_hva) { > + hva_container = root; > + ret.offset_within_region = addr; > + } > + root = root->container; > + addr += root->addr; > + } > + > + ret.address_space = memory_region_to_address_space(root); > + if (!ret.address_space || !hva_container) { > + return ret; > + } > + > + ret.mr = hva_container; > + ret.offset_within_address_space = addr; > + ret.size = int128_make64(memory_region_size(ret.mr)); > + return ret; > +} > + > MemoryRegionSection memory_region_find(MemoryRegion *mr, > hwaddr addr, uint64_t size) > { >