From: "Dr. David Alan Gilbert" <dgilb...@redhat.com> Provide functions to be called before the start of a postcopy enabled migration (even if it's not eventually used) and at the end.
During the init we must disable huge pages in the RAM that we will receive postcopy data into, since if they start off as hugepage and get a 4k page written to them, the rest of the hugepage won't get userfault'd and won't work as a destination for remap. Signed-off-by: Dr. David Alan Gilbert <dgilb...@redhat.com> --- include/migration/postcopy-ram.h | 12 +++++ postcopy-ram.c | 101 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+) diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h index e06cb45..ca2045b 100644 --- a/include/migration/postcopy-ram.h +++ b/include/migration/postcopy-ram.h @@ -30,6 +30,18 @@ int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start, /* + * Initialise postcopy-ram, setting the RAM to a state where we can go into + * postcopy later; must be called prior to any precopy. + * called from arch_init's similarly named ram_postcopy_incoming_init + */ +int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages); + +/* + * At the end of a migration where postcopy_ram_incoming_init was called. + */ +int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis); + +/* * Called back from arch_init's ram_postcopy_each_ram_discard to handle * discarding one RAMBlock's pre-postcopy dirty pages */ diff --git a/postcopy-ram.c b/postcopy-ram.c index b41fec8..b1fbabd 100644 --- a/postcopy-ram.c +++ b/postcopy-ram.c @@ -263,6 +263,95 @@ int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start, return 0; } +/* + * Setup an area of RAM so that it *can* be used for postcopy later; this + * must be done right at the start prior to pre-copy. + * opaque should be the MIS. + */ +static int init_area(const char *block_name, void *host_addr, + ram_addr_t offset, ram_addr_t length, void *opaque) +{ + MigrationIncomingState *mis = opaque; + + DPRINTF("init_area: %s: %p offset=%zx length=%zd(%zx)", + block_name, host_addr, offset, length, length); + /* + * We need the whole of RAM to be truly empty for postcopy, so things + * like ROMs and any data tables built during init must be zero'd + * - we're going to get the copy from the source anyway. + */ + if (postcopy_ram_discard_range(mis, host_addr, (host_addr + length - 1))) { + return -1; + } + + /* + * We also need the area to be normal 4k pages, not huge pages + * (otherwise we can't be sure we can use remap_anon_pages to put + * a 4k page in later). THP might come along and map a 2MB page + * and when it's partially accessed in precopy it might not break + * it down, but leave a 2MB zero'd page. + */ + if (madvise(host_addr, length, MADV_NOHUGEPAGE)) { + perror("init_area: NOHUGEPAGE"); + return -1; + } + + return 0; +} + +/* + * At the end of migration, undo the effects of init_area + * opaque should be the MIS. + */ +static int cleanup_area(const char *block_name, void *host_addr, + ram_addr_t offset, ram_addr_t length, void *opaque) +{ + /* Turn off userfault here as well? */ + + DPRINTF("cleanup_area: %s: %p offset=%zx length=%zd(%zx)", + block_name, host_addr, offset, length, length); + /* + * We turned off hugepage for the precopy stage with postcopy enabled + * we can turn it back on now. + */ + if (madvise(host_addr, length, MADV_HUGEPAGE)) { + perror("init_area: HUGEPAGE"); + return -1; + } + + return 0; +} + +/* + * Initialise postcopy-ram, setting the RAM to a state where we can go into + * postcopy later; must be called prior to any precopy. + * called from arch_init's similarly named ram_postcopy_incoming_init + */ +int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages) +{ + postcopy_pmi_init(mis, ram_pages); + + if (qemu_ram_foreach_block(init_area, mis)) { + return -1; + } + + return 0; +} + +/* + * At the end of a migration where postcopy_ram_incoming_init was called. + */ +int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) +{ + /* TODO: Join the fault thread once we're sure it will exit */ + close(mis->userfault_fd); + if (qemu_ram_foreach_block(cleanup_area, mis)) { + return -1; + } + + return 0; +} + #else /* No target OS support, stubs just fail */ @@ -286,6 +375,18 @@ void postcopy_hook_early_receive(MigrationIncomingState *mis, /* We don't support postcopy so don't care */ } +int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages) +{ + error_report("postcopy_ram_incoming_init: No OS support\n"); + return -1; +} + +int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) +{ + error_report("postcopy_ram_incoming_cleanup: No OS support\n"); + return -1; +} + #endif /* ------------------------------------------------------------------------- */ -- 1.9.3