From: "Michael R. Hines" <mrhi...@us.ibm.com> Just as RDMA has custom routines for saving memory, this provides RDMA with custom routines for loading and copying memory as well.
Micro-checkpointing needs this support to avoid modifying the arch_init.c as little as possible while stilling being able to load RDMA-based memory from checkpoints in a performance-optimal way as they are received from the network. Signed-off-by: Michael R. Hines <mrhi...@us.ibm.com> --- arch_init.c | 9 +++-- include/migration/migration.h | 33 ++++++++++++++++-- include/migration/qemu-file.h | 54 +++++++++++++++++++++++++++-- qemu-file.c | 80 +++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 167 insertions(+), 9 deletions(-) diff --git a/arch_init.c b/arch_init.c index b8364b0..db75120 100644 --- a/arch_init.c +++ b/arch_init.c @@ -540,7 +540,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage) /* In doubt sent page as normal */ bytes_sent = -1; ret = ram_control_save_page(f, block->offset, - offset, TARGET_PAGE_SIZE, &bytes_sent); + block->host, offset, TARGET_PAGE_SIZE, &bytes_sent); if (ret != RAM_SAVE_CONTROL_NOT_SUPP) { if (ret != RAM_SAVE_CONTROL_DELAYED) { @@ -1004,13 +1004,18 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) ram_handle_compressed(host, ch, TARGET_PAGE_SIZE); } else if (flags & RAM_SAVE_FLAG_PAGE) { void *host; + int r; host = host_from_stream_offset(f, addr, flags); if (!host) { return -EINVAL; } - qemu_get_buffer(f, host, TARGET_PAGE_SIZE); + r = ram_control_load_page(f, host, TARGET_PAGE_SIZE); + + if (r == RAM_LOAD_CONTROL_NOT_SUPP) { + qemu_get_buffer(f, host, TARGET_PAGE_SIZE); + } } else if (flags & RAM_SAVE_FLAG_XBZRLE) { void *host = host_from_stream_offset(f, addr, flags); if (!host) { diff --git a/include/migration/migration.h b/include/migration/migration.h index 9c62e2f..5c1a574 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -190,9 +190,38 @@ void ram_control_load_hook(QEMUFile *f, uint64_t flags); #define RAM_SAVE_CONTROL_NOT_SUPP -1000 #define RAM_SAVE_CONTROL_DELAYED -2000 +#define RAM_LOAD_CONTROL_NOT_SUPP -3000 +#define RAM_LOAD_CONTROL_DELAYED -4000 +#define RAM_COPY_CONTROL_NOT_SUPP -5000 +#define RAM_COPY_CONTROL_DELAYED -6000 -size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset, - ram_addr_t offset, size_t size, +#define RDMA_CONTROL_VERSION_CURRENT 1 + +int ram_control_save_page(QEMUFile *f, ram_addr_t block_offset, + uint8_t *host_addr, + ram_addr_t offset, long size, int *bytes_sent); +int ram_control_load_page(QEMUFile *f, + void *host_addr, + long size); + +int ram_control_copy_page(QEMUFile *f, + ram_addr_t block_offset_dest, + ram_addr_t offset_dest, + ram_addr_t block_offset_source, + ram_addr_t offset_source, + long size); + +int migrate_use_mc(void); +int migrate_use_mc_net(void); +int migrate_use_mc_rdma_copy(void); + +#define MC_VERSION 1 + +int mc_info_load(QEMUFile *f, void *opaque, int version_id); +void mc_info_save(QEMUFile *f, void *opaque); + +void qemu_rdma_info_save(QEMUFile *f, void *opaque); +int qemu_rdma_info_load(QEMUFile *f, void *opaque, int version_id); #endif diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h index a191fb6..c50de0d 100644 --- a/include/migration/qemu-file.h +++ b/include/migration/qemu-file.h @@ -71,17 +71,63 @@ typedef int (QEMURamHookFunc)(QEMUFile *f, void *opaque, uint64_t flags); #define RAM_CONTROL_ROUND 1 #define RAM_CONTROL_HOOK 2 #define RAM_CONTROL_FINISH 3 +#define RAM_CONTROL_FLUSH 4 /* * This function allows override of where the RAM page * is saved (such as RDMA, for example.) */ -typedef size_t (QEMURamSaveFunc)(QEMUFile *f, void *opaque, +typedef int (QEMURamSaveFunc)(QEMUFile *f, void *opaque, ram_addr_t block_offset, + uint8_t *host_addr, ram_addr_t offset, - size_t size, + long size, int *bytes_sent); +/* + * This function allows override of where the RAM page + * is saved (such as RDMA, for example.) + */ +typedef int (QEMURamLoadFunc)(QEMUFile *f, + void *opaque, + void *host_addr, + long size); + +/* + * This function allows *local* RDMA copying memory between two registered + * RAMBlocks, both real ones as well as private memory areas independently + * registered by external callers (such as MC). If RDMA is not available, + * then this function does nothing and the caller should just use memcpy(). + */ +typedef int (QEMURamCopyFunc)(QEMUFile *f, void *opaque, + ram_addr_t block_offset_dest, + ram_addr_t offset_dest, + ram_addr_t block_offset_source, + ram_addr_t offset_source, + long size); + +/* + * Inform the underlying transport of a new virtual memory area. + * If this area is an actual RAMBlock, then pass the corresponding + * parameters of that block. + * If this area is an arbitrary virtual memory address, then + * pass the same value for both @host_addr and @block_offset. + */ +typedef int (QEMURamAddFunc)(QEMUFile *f, void *opaque, + void *host_addr, + ram_addr_t block_offset, + uint64_t length); + +/* + * Remove an underlying new virtual memory area. + * If this area is an actual RAMBlock, then pass the corresponding + * parameters of that block. + * If this area is an arbitrary virtual memory address, then + * pass the same value for both @host_addr and @block_offset. + */ +typedef int (QEMURamRemoveFunc)(QEMUFile *f, void *opaque, + ram_addr_t block_offset); + typedef struct QEMUFileOps { QEMUFilePutBufferFunc *put_buffer; QEMUFileGetBufferFunc *get_buffer; @@ -92,6 +138,10 @@ typedef struct QEMUFileOps { QEMURamHookFunc *after_ram_iterate; QEMURamHookFunc *hook_ram_load; QEMURamSaveFunc *save_page; + QEMURamLoadFunc *load_page; + QEMURamCopyFunc *copy_page; + QEMURamAddFunc *add; + QEMURamRemoveFunc *remove; } QEMUFileOps; QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops); diff --git a/qemu-file.c b/qemu-file.c index 9473b67..3d7428f 100644 --- a/qemu-file.c +++ b/qemu-file.c @@ -501,14 +501,17 @@ void ram_control_load_hook(QEMUFile *f, uint64_t flags) } } -size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset, - ram_addr_t offset, size_t size, int *bytes_sent) +int ram_control_save_page(QEMUFile *f, ram_addr_t block_offset, + uint8_t *host_addr, + ram_addr_t offset, long size, int *bytes_sent) { if (f->ops->save_page) { int ret = f->ops->save_page(f, f->opaque, block_offset, + host_addr, offset, size, bytes_sent); - if (ret != RAM_SAVE_CONTROL_DELAYED) { + if (ret != RAM_SAVE_CONTROL_DELAYED + && ret != RAM_SAVE_CONTROL_NOT_SUPP) { if (bytes_sent && *bytes_sent > 0) { qemu_update_position(f, *bytes_sent); } else if (ret < 0) { @@ -522,6 +525,77 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset, return RAM_SAVE_CONTROL_NOT_SUPP; } +int ram_control_load_page(QEMUFile *f, void *host_addr, long size) +{ + if (f->ops->load_page) { + int ret = f->ops->load_page(f, f->opaque, host_addr, size); + + if (ret != RAM_LOAD_CONTROL_DELAYED + && ret != RAM_LOAD_CONTROL_NOT_SUPP) { + if (ret < 0) { + qemu_file_set_error(f, ret); + } + } + + return ret; + } + + return RAM_LOAD_CONTROL_NOT_SUPP; +} + +int ram_control_copy_page(QEMUFile *f, + ram_addr_t block_offset_dest, + ram_addr_t offset_dest, + ram_addr_t block_offset_source, + ram_addr_t offset_source, + long size) +{ + if (f->ops->copy_page) { + int ret = f->ops->copy_page(f, f->opaque, + block_offset_dest, + offset_dest, + block_offset_source, + offset_source, + size); + + if (ret != RAM_COPY_CONTROL_DELAYED) { + if (ret < 0) { + qemu_file_set_error(f, ret); + } + } + + return ret; + } + + return RAM_COPY_CONTROL_NOT_SUPP; +} + + +void ram_control_add(QEMUFile *f, void *host_addr, + ram_addr_t block_offset, uint64_t length) +{ + int ret = 0; + + if (f->ops->add) { + ret = f->ops->add(f, f->opaque, host_addr, block_offset, length); + if (ret < 0) { + qemu_file_set_error(f, ret); + } + } +} + +void ram_control_remove(QEMUFile *f, ram_addr_t block_offset) +{ + int ret = 0; + + if (f->ops->remove) { + ret = f->ops->remove(f, f->opaque, block_offset); + if (ret < 0) { + qemu_file_set_error(f, ret); + } + } +} + static void qemu_fill_buffer(QEMUFile *f) { int len; -- 1.8.1.2