From: William Roche <william.ro...@oracle.com> A memory page poisoned from the hypervisor level is no longer readable. Thus, it is now treated as a zero-page for the ram saving migration phase.
The migration of a VM will crash Qemu when it tries to read the memory address space and stumbles on the poisoned page with a similar stack trace: Program terminated with signal SIGBUS, Bus error. #0 _mm256_loadu_si256 #1 buffer_zero_avx2 #2 select_accel_fn #3 buffer_is_zero #4 save_zero_page #5 ram_save_target_page_legacy #6 ram_save_host_page #7 ram_find_and_save_block #8 ram_save_iterate #9 qemu_savevm_state_iterate #10 migration_iteration_run #11 migration_thread #12 qemu_thread_start Fix it by considering poisoned pages as if they were zero-pages for the migration copy. This fix also works with underlying large pages, taking into account the RAMBlock segment "page-size". Standard migration and compressed transfers are handled by this code. RDMA transfer isn't touched. Reviewed-by: Peter Xu <pet...@redhat.com> Tested-by: Li Zhijian <lizhij...@fujitsu.com> # RDMA Signed-off-by: William Roche <william.ro...@oracle.com> --- accel/kvm/kvm-all.c | 14 ++++++++++++++ accel/stubs/kvm-stub.c | 5 +++++ include/sysemu/kvm.h | 10 ++++++++++ migration/ram-compress.c | 3 ++- migration/ram.c | 24 ++++++++++++++++++++++-- migration/ram.h | 2 ++ 6 files changed, 55 insertions(+), 3 deletions(-) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index e39a810a4e..64c0b37823 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -1149,6 +1149,20 @@ static void kvm_unpoison_all(void *param) } } +bool kvm_hwpoisoned_page(RAMBlock *block, void *offset) +{ + HWPoisonPage *pg; + ram_addr_t ram_addr = (ram_addr_t) offset; + + QLIST_FOREACH(pg, &hwpoison_page_list, list) { + if ((ram_addr >= pg->ram_addr) && + (ram_addr - pg->ram_addr < block->page_size)) { + return true; + } + } + return false; +} + void kvm_hwpoison_page_add(ram_addr_t ram_addr) { HWPoisonPage *page; diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c index 1b37d9a302..17774fa5ef 100644 --- a/accel/stubs/kvm-stub.c +++ b/accel/stubs/kvm-stub.c @@ -124,3 +124,8 @@ uint32_t kvm_dirty_ring_size(void) { return 0; } + +bool kvm_hwpoisoned_page(RAMBlock *block, void *ram_addr) +{ + return false; +} diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 80b69d88f6..66937f9dfe 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -539,4 +539,14 @@ bool kvm_arch_cpu_check_are_resettable(void); bool kvm_dirty_ring_enabled(void); uint32_t kvm_dirty_ring_size(void); + +/** + * kvm_hwpoisoned_page - indicate if the given page is poisoned + * @block: memory block of the given page + * @ram_addr: offset of the page + * + * Returns: true: page is poisoned + * false: page not yet poisoned + */ +bool kvm_hwpoisoned_page(RAMBlock *block, void *ram_addr); #endif diff --git a/migration/ram-compress.c b/migration/ram-compress.c index fa4388f6a6..a7772a08a2 100644 --- a/migration/ram-compress.c +++ b/migration/ram-compress.c @@ -35,6 +35,7 @@ #include "qemu/stats64.h" #include "migration.h" #include "options.h" +#include "ram.h" #include "io/channel-null.h" #include "exec/target_page.h" #include "exec/ramblock.h" @@ -214,7 +215,7 @@ static CompressResult do_compress_ram_page(QEMUFile *f, z_stream *stream, assert(qemu_file_buffer_empty(f)); - if (buffer_is_zero(p, page_size)) { + if (migration_buffer_is_zero(block, offset, page_size)) { return RES_ZEROPAGE; } diff --git a/migration/ram.c b/migration/ram.c index 8c7886ab79..5fd4d27854 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1107,6 +1107,26 @@ void ram_release_page(const char *rbname, uint64_t offset) ram_discard_range(rbname, offset, TARGET_PAGE_SIZE); } +/** + * migration_buffer_is_zero: indicate if the page at the given + * location is entirely filled with zero, or is a poisoned page. + * + * @block: block that contains the page + * @offset: offset inside the block for the page + * @len: size to consider + */ +bool migration_buffer_is_zero(RAMBlock *block, ram_addr_t offset, + size_t len) +{ + uint8_t *p = block->host + offset; + + if (kvm_enabled() && kvm_hwpoisoned_page(block, (void *)offset)) { + return true; + } + + return buffer_is_zero(p, len); +} + /** * save_zero_page: send the zero page to the stream * @@ -1119,11 +1139,10 @@ void ram_release_page(const char *rbname, uint64_t offset) static int save_zero_page(RAMState *rs, PageSearchStatus *pss, ram_addr_t offset) { - uint8_t *p = pss->block->host + offset; QEMUFile *file = pss->pss_channel; int len = 0; - if (!buffer_is_zero(p, TARGET_PAGE_SIZE)) { + if (!migration_buffer_is_zero(pss->block, offset, TARGET_PAGE_SIZE)) { return 0; } @@ -1154,6 +1173,7 @@ static int save_zero_page(RAMState *rs, PageSearchStatus *pss, * > 0 - number of pages written * * Return true if the pages has been saved, otherwise false is returned. + * TODO: hwpoison pages fail RDMA migration, should be handled. */ static bool control_save_page(PageSearchStatus *pss, ram_addr_t offset, int *pages) diff --git a/migration/ram.h b/migration/ram.h index 9b937a446b..d34ba79d36 100644 --- a/migration/ram.h +++ b/migration/ram.h @@ -65,6 +65,8 @@ void ram_handle_zero(void *host, uint64_t size); void ram_transferred_add(uint64_t bytes); void ram_release_page(const char *rbname, uint64_t offset); +bool migration_buffer_is_zero(RAMBlock *block, ram_addr_t offset, size_t len); + int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr); bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset); void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr); -- 2.39.3