Signed-off-by: Isaku Yamahata <yamah...@valinux.co.jp> --- migration-postcopy.c | 207 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 204 insertions(+), 3 deletions(-)
diff --git a/migration-postcopy.c b/migration-postcopy.c index 421fb39..9298cd4 100644 --- a/migration-postcopy.c +++ b/migration-postcopy.c @@ -274,6 +274,9 @@ static void postcopy_outgoing_free_req(struct qemu_umem_req *req) #define QEMU_VM_POSTCOPY_INIT 0 #define QEMU_VM_POSTCOPY_SECTION_FULL 1 +/* options in QEMU_VM_POSTCOPY_INIT section */ +#define POSTCOPY_OPTION_PRECOPY 1ULL + /*************************************************************************** * outgoing part */ @@ -739,6 +742,7 @@ struct PostcopyIncomingUMemDaemon { int nr_target_pages_per_host_page; int target_to_host_page_shift; int version_id; /* save/load format version id */ + bool precopy_enabled; QemuThread thread; QLIST_HEAD(, UMemBlock) blocks; @@ -784,6 +788,7 @@ static PostcopyIncomingState state = { static PostcopyIncomingUMemDaemon umemd = { .state = 0, + .precopy_enabled = false, .to_qemu_fd = -1, .to_qemu = NULL, .from_qemu_fd = -1, @@ -797,6 +802,8 @@ static PostcopyIncomingUMemDaemon umemd = { static void *postcopy_incoming_umemd(void*); static void postcopy_incoming_qemu_handle_req(void *opaque); +static UMemBlock *postcopy_incoming_umem_block_from_stream( + QEMUFile *f, int flags); /* protected by qemu_mutex_lock_ramlist() */ void postcopy_incoming_ram_free(RAMBlock *ram_block) @@ -875,6 +882,25 @@ int postcopy_incoming_ram_load(QEMUFile *f, void *opaque, int version_id) return -EINVAL; } +static void* +postcopy_incoming_shmem_from_stream_offset(QEMUFile *f, ram_addr_t offset, + int flags) +{ + UMemBlock *block = postcopy_incoming_umem_block_from_stream(f, flags); + if (block == NULL) { + DPRINTF("error block = NULL\n"); + return NULL; + } + return block->umem->shmem + offset; +} + +static int postcopy_incoming_ram_load_precopy(QEMUFile *f, void *opaque, + int version_id) +{ + return ram_load(f, opaque, version_id, + &postcopy_incoming_shmem_from_stream_offset); +} + static void postcopy_incoming_umem_block_free(void) { UMemBlock *block; @@ -982,6 +1008,12 @@ static int postcopy_incoming_loadvm_init(QEMUFile *f, uint32_t size) return -EINVAL; } options = qemu_get_be64(f); + if (options & POSTCOPY_OPTION_PRECOPY) { + options &= ~POSTCOPY_OPTION_PRECOPY; + umemd.precopy_enabled = true; + } else { + umemd.precopy_enabled = false; + } if (options) { fprintf(stderr, "unknown options 0x%"PRIx64, options); return -ENOSYS; @@ -999,12 +1031,17 @@ static int postcopy_incoming_loadvm_init(QEMUFile *f, uint32_t size) return -ENOSYS; } - DPRINTF("detected POSTCOPY\n"); + DPRINTF("detected POSTCOPY precpoy %d\n", umemd.precopy_enabled); error = postcopy_incoming_prepare(); if (error) { return error; } - savevm_ram_handlers.load_state = postcopy_incoming_ram_load; + if (umemd.precopy_enabled) { + savevm_ram_handlers.load_state = postcopy_incoming_ram_load_precopy; + } else { + savevm_ram_handlers.load_state = postcopy_incoming_ram_load; + } + incoming_postcopy = true; return 0; } @@ -1515,6 +1552,169 @@ static int postcopy_incoming_umem_ram_load(void) return 0; } +static int postcopy_incoming_umemd_read_dirty_bitmap( + QEMUFile *f, const char *idstr, uint8_t idlen, + uint64_t block_offset, uint64_t block_length, uint64_t bitmap_length) +{ + UMemBlock *block; + uint64_t bit_start = block_offset >> TARGET_PAGE_BITS; + uint64_t bit_end = (block_offset + block_length) >> TARGET_PAGE_BITS; + uint64_t bit_offset; + uint8_t *buffer; + uint64_t index; + + if ((bitmap_length % sizeof(uint64_t)) != 0) { + return -EINVAL; + } + QLIST_FOREACH(block, &umemd.blocks, next) { + if (!strncmp(block->idstr, idstr, idlen)) { + break; + } + } + if (block == NULL) { + return -EINVAL; + } + + DPRINTF("bitmap %s 0x%"PRIx64" 0x%"PRIx64" 0x%"PRIx64"\n", + block->idstr, block_offset, block_length, bitmap_length); + buffer = g_malloc(bitmap_length); + qemu_get_buffer(f, buffer, bitmap_length); + + bit_offset = bit_start & ~63; + index = 0; + while (index < bitmap_length) { + uint64_t bitmap; + int i; + int j; + int bit; + + bitmap = be64_to_cpup((uint64_t*)(buffer + index)); + for (i = 0; i < 64; i++) { + bit = bit_offset + i; + if (bit < bit_start) { + continue; + } + if (bit >= bit_end) { + break; + } + if (!(bitmap & (1ULL << i))) { + set_bit(bit, umemd.phys_received); + + /* this is racy, but write side just sends redundant request */ + set_bit(bit, umemd.phys_requested); + } + } + + umemd.page_cached->nr = 0; + if (TARGET_PAGE_SIZE >= umemd.host_page_size) { + for (i = 0; i < 64; i++) { + uint64_t pgoff; + bit = bit_offset + i; + if (bit < bit_start) { + continue; + } + if (bit >= bit_end) { + break; + } + if (!test_bit(bit, umemd.phys_received)) { + continue; + } + pgoff = (bit - bit_start) << umemd.target_to_host_page_shift; + for (j = 0; j < umemd.nr_host_pages_per_target_page; j++) { + umemd.page_cached->pgoffs[umemd.page_cached->nr] = + pgoff + j; + umemd.page_cached->nr++; + } + } + } else { + for (i = 0; i < 64; i += umemd.nr_target_pages_per_host_page) { + bool mark_cache = true; + bit = bit_offset + i; + if (bit < bit_start) { + continue; + } + if (bit >= bit_end) { + break; + } + if (!test_bit(bit, umemd.phys_received)) { + continue; + } + for (j = 0; j < umemd.nr_target_pages_per_host_page; j++) { + if (!test_bit(bit + j, umemd.phys_received)) { + mark_cache = false; + break; + } + } + if (mark_cache) { + umemd.page_cached->pgoffs[umemd.page_cached->nr] = + (bit - bit_start) >> + (umemd.host_page_shift - TARGET_PAGE_BITS); + umemd.page_cached->nr++; + } + } + } + + if (umemd.page_cached->nr > 0) { + umem_mark_page_cached(block->umem, umemd.page_cached); + postcopy_incoming_umem_page_fault(block, umemd.page_cached); + } + + bit_offset += 64; + index += sizeof(bitmap); + } + + g_free(buffer); + return 0; +} + +static int postcopy_incoming_umemd_mig_read_init(void) +{ + QEMUFile *f = umemd.mig_read; +#ifdef DEBUG_POSTCOPY + uint64_t start = qemu_get_clock_ns(rt_clock); + uint64_t end; +#endif + + if (!umemd.precopy_enabled) { + return 0; + } + + for (;;) { + uint8_t idlen; + char idstr[256]; + uint64_t block_offset; + uint64_t block_length; + uint64_t bitmap_length; + int ret; + + idlen = qemu_get_byte(f); + qemu_get_buffer(f, (uint8_t*)idstr, idlen); + idstr[idlen] = 0; + block_offset = qemu_get_be64(f); + block_length = qemu_get_be64(f); + bitmap_length = qemu_get_be64(f); + + if (idlen == 0 && block_offset == 0 && block_length == 0 && + bitmap_length == 0) { + DPRINTF("bitmap done\n"); + break; + } + ret = postcopy_incoming_umemd_read_dirty_bitmap( + f, idstr, idlen, block_offset, block_length, bitmap_length); + if (ret < 0) { + return ret; + } + } + if (postcopy_incoming_umem_check_umem_done()) { + postcopy_incoming_umem_done(); + } +#ifdef DEBUG_POSTCOPY + end = qemu_get_clock_ns(rt_clock); + DPRINTF("bitmap %"PRIu64" nsec\n", end - start); +#endif + return 0; +} + static int postcopy_incoming_umemd_mig_read_loop(void) { int error; @@ -1704,7 +1904,8 @@ static void *postcopy_incoming_umemd(void* unused) qemu_thread_create(&umemd.mig_read_thread, &postcopy_incoming_umemd_thread, &(IncomingThread) { - NULL, &postcopy_incoming_umemd_mig_read_loop,}, + &postcopy_incoming_umemd_mig_read_init, + &postcopy_incoming_umemd_mig_read_loop,}, QEMU_THREAD_JOINABLE); qemu_thread_create(&umemd.mig_write_thread, &postcopy_incoming_umemd_thread, -- 1.7.10.4