Peter Xu <pet...@redhat.com> wrote: > Add a RAMBlock.host_mirror for all the hugetlbfs backed guest memories. > It'll be used to remap the same region twice and it'll be used to service > page faults using UFFDIO_CONTINUE. > > To make sure all accesses to these ranges will generate minor page faults > not missing page faults, we need to pre-allocate the files to make sure > page cache exist start from the beginning. > > Signed-off-by: Peter Xu <pet...@redhat.com>
Reviewed-by: Juan Quintela <quint...@redhat.com> but what about this change > --- > include/exec/ramblock.h | 7 +++++ > migration/ram.c | 59 +++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 66 insertions(+) > > diff --git a/include/exec/ramblock.h b/include/exec/ramblock.h > index 3f31ce1591..c76683c3c8 100644 > --- a/include/exec/ramblock.h > +++ b/include/exec/ramblock.h > @@ -28,6 +28,13 @@ struct RAMBlock { > struct rcu_head rcu; > struct MemoryRegion *mr; > uint8_t *host; > + /* > + * This is only used for hugetlbfs ramblocks where doublemap is > + * enabled. The pointer is managed by dest host migration code, and > + * should be NULL when migration is finished. On src host, it should > + * always be NULL. > + */ > + uint8_t *host_mirror; I would consider here: uint8_t *host_doublemap; as I have not a small name that means uint8_t *host_map_smaller_size_pages; That explains why we need it. > uint8_t *colo_cache; /* For colo, VM's ram cache */ > ram_addr_t offset; > ram_addr_t used_length; > diff --git a/migration/ram.c b/migration/ram.c > index 2ebf414f5f..37d7b3553a 100644 > --- a/migration/ram.c > +++ b/migration/ram.c > @@ -3879,6 +3879,57 @@ void colo_release_ram_cache(void) > ram_state_cleanup(&ram_state); > } > > +static int migrate_hugetlb_doublemap_init(void) > +{ > + RAMBlock *rb; > + void *addr; > + int ret; Not initialized variables, remove the last two. > + if (!migrate_hugetlb_doublemap()) { > + return 0; > + } > + I would move the declaration of the RAMBlock here. > + RAMBLOCK_FOREACH_NOT_IGNORED(rb) { > + if (qemu_ram_is_hugetlb(rb)) { > + /* > + * Firstly, we remap the same ramblock into another range of > + * virtual address, so that we can write to the pages without > + * touching the page tables that directly mapped for the guest. > + */ > + addr = ramblock_file_map(rb); void *addr = ramblock_file_map(rb); > + if (addr == MAP_FAILED) { > + ret = -errno; int ret = -errno; > + error_report("%s: Duplicate mapping for hugetlb ramblock > '%s'" > + "failed: %s", __func__, qemu_ram_get_idstr(rb), > + strerror(errno)); > + return ret; > + } > + rb->host_mirror = addr; > + > + /* > + * We need to make sure we pre-allocate the range with > + * hugetlbfs pages before hand, so that all the page fault will > + * be trapped as MINOR faults always, rather than MISSING > + * faults in userfaultfd. > + */ > + ret = qemu_madvise(addr, rb->mmap_length, > QEMU_MADV_POPULATE_WRITE); int ret = qemu_madvise(addr, rb->mmap_length, QEMU_MADV_POPULATE_WRITE); > + if (ret) { > + error_report("Failed to populate hugetlb ramblock '%s': " > + "%s", qemu_ram_get_idstr(rb), strerror(-ret)); > + return ret; > + } > + } > + } > + > + /* > + * When reach here, it means we've setup the mirror mapping for all the > + * hugetlbfs pages. Hence when page fault happens, we'll be able to > + * resolve page faults using UFFDIO_CONTINUE for hugetlbfs pages, but > + * we'll keep using UFFDIO_COPY for anonymous pages. > + */ > + return 0; > +}