From: "Michael R. Hines" <mrhi...@us.ibm.com>

Just as RDMA has custom routines for saving memory,
this provides RDMA with custom routines for loading
and copying memory as well.

Micro-checkpointing needs this support to avoid modifying
the arch_init.c as little as possible while stilling being
able to load RDMA-based memory from checkpoints in a
performance-optimal way as they are received from the network.

Signed-off-by: Michael R. Hines <mrhi...@us.ibm.com>
---
 arch_init.c                   |  9 +++--
 include/migration/migration.h | 33 ++++++++++++++++--
 include/migration/qemu-file.h | 54 +++++++++++++++++++++++++++--
 qemu-file.c                   | 80 +++++++++++++++++++++++++++++++++++++++++--
 4 files changed, 167 insertions(+), 9 deletions(-)

diff --git a/arch_init.c b/arch_init.c
index b8364b0..db75120 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -540,7 +540,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
             /* In doubt sent page as normal */
             bytes_sent = -1;
             ret = ram_control_save_page(f, block->offset,
-                               offset, TARGET_PAGE_SIZE, &bytes_sent);
+                       block->host, offset, TARGET_PAGE_SIZE, &bytes_sent);
 
             if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
                 if (ret != RAM_SAVE_CONTROL_DELAYED) {
@@ -1004,13 +1004,18 @@ static int ram_load(QEMUFile *f, void *opaque, int 
version_id)
             ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
         } else if (flags & RAM_SAVE_FLAG_PAGE) {
             void *host;
+            int r;
 
             host = host_from_stream_offset(f, addr, flags);
             if (!host) {
                 return -EINVAL;
             }
 
-            qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
+            r = ram_control_load_page(f, host, TARGET_PAGE_SIZE);
+
+            if (r == RAM_LOAD_CONTROL_NOT_SUPP) {
+                qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
+            }
         } else if (flags & RAM_SAVE_FLAG_XBZRLE) {
             void *host = host_from_stream_offset(f, addr, flags);
             if (!host) {
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 9c62e2f..5c1a574 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -190,9 +190,38 @@ void ram_control_load_hook(QEMUFile *f, uint64_t flags);
 
 #define RAM_SAVE_CONTROL_NOT_SUPP -1000
 #define RAM_SAVE_CONTROL_DELAYED  -2000
+#define RAM_LOAD_CONTROL_NOT_SUPP -3000
+#define RAM_LOAD_CONTROL_DELAYED  -4000
+#define RAM_COPY_CONTROL_NOT_SUPP -5000
+#define RAM_COPY_CONTROL_DELAYED  -6000
 
-size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
-                             ram_addr_t offset, size_t size,
+#define RDMA_CONTROL_VERSION_CURRENT 1
+
+int ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
+                             uint8_t *host_addr,
+                             ram_addr_t offset, long size,
                              int *bytes_sent);
 
+int ram_control_load_page(QEMUFile *f,
+                             void *host_addr,
+                             long size);
+
+int ram_control_copy_page(QEMUFile *f, 
+                             ram_addr_t block_offset_dest,
+                             ram_addr_t offset_dest,
+                             ram_addr_t block_offset_source,
+                             ram_addr_t offset_source,
+                             long size);
+
+int migrate_use_mc(void);
+int migrate_use_mc_net(void);
+int migrate_use_mc_rdma_copy(void);
+
+#define MC_VERSION 1
+
+int mc_info_load(QEMUFile *f, void *opaque, int version_id);
+void mc_info_save(QEMUFile *f, void *opaque);
+
+void qemu_rdma_info_save(QEMUFile *f, void *opaque);
+int qemu_rdma_info_load(QEMUFile *f, void *opaque, int version_id);
 #endif
diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
index a191fb6..c50de0d 100644
--- a/include/migration/qemu-file.h
+++ b/include/migration/qemu-file.h
@@ -71,17 +71,63 @@ typedef int (QEMURamHookFunc)(QEMUFile *f, void *opaque, 
uint64_t flags);
 #define RAM_CONTROL_ROUND    1
 #define RAM_CONTROL_HOOK     2
 #define RAM_CONTROL_FINISH   3
+#define RAM_CONTROL_FLUSH    4
 
 /*
  * This function allows override of where the RAM page
  * is saved (such as RDMA, for example.)
  */
-typedef size_t (QEMURamSaveFunc)(QEMUFile *f, void *opaque,
+typedef int (QEMURamSaveFunc)(QEMUFile *f, void *opaque,
                                ram_addr_t block_offset,
+                               uint8_t *host_addr,
                                ram_addr_t offset,
-                               size_t size,
+                               long size,
                                int *bytes_sent);
 
+/*
+ * This function allows override of where the RAM page
+ * is saved (such as RDMA, for example.)
+ */
+typedef int (QEMURamLoadFunc)(QEMUFile *f,
+                               void *opaque,
+                               void *host_addr,
+                               long size);
+
+/*
+ * This function allows *local* RDMA copying memory between two registered
+ * RAMBlocks, both real ones as well as private memory areas independently
+ * registered by external callers (such as MC). If RDMA is not available,
+ * then this function does nothing and the caller should just use memcpy().
+ */
+typedef int (QEMURamCopyFunc)(QEMUFile *f, void *opaque,
+                               ram_addr_t block_offset_dest,
+                               ram_addr_t offset_dest,
+                               ram_addr_t block_offset_source,
+                               ram_addr_t offset_source,
+                               long size);
+
+/* 
+ * Inform the underlying transport of a new virtual memory area.
+ * If this area is an actual RAMBlock, then pass the corresponding
+ * parameters of that block.
+ * If this area is an arbitrary virtual memory address, then
+ * pass the same value for both @host_addr and @block_offset.
+ */
+typedef int (QEMURamAddFunc)(QEMUFile *f, void *opaque,
+                               void *host_addr,
+                               ram_addr_t block_offset,
+                               uint64_t length);
+
+/* 
+ * Remove an underlying new virtual memory area.
+ * If this area is an actual RAMBlock, then pass the corresponding
+ * parameters of that block.
+ * If this area is an arbitrary virtual memory address, then
+ * pass the same value for both @host_addr and @block_offset.
+ */
+typedef int (QEMURamRemoveFunc)(QEMUFile *f, void *opaque,
+                               ram_addr_t block_offset);
+
 typedef struct QEMUFileOps {
     QEMUFilePutBufferFunc *put_buffer;
     QEMUFileGetBufferFunc *get_buffer;
@@ -92,6 +138,10 @@ typedef struct QEMUFileOps {
     QEMURamHookFunc *after_ram_iterate;
     QEMURamHookFunc *hook_ram_load;
     QEMURamSaveFunc *save_page;
+    QEMURamLoadFunc *load_page;
+    QEMURamCopyFunc *copy_page;
+    QEMURamAddFunc *add;
+    QEMURamRemoveFunc *remove;
 } QEMUFileOps;
 
 QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops);
diff --git a/qemu-file.c b/qemu-file.c
index 9473b67..3d7428f 100644
--- a/qemu-file.c
+++ b/qemu-file.c
@@ -501,14 +501,17 @@ void ram_control_load_hook(QEMUFile *f, uint64_t flags)
     }
 }
 
-size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
-                         ram_addr_t offset, size_t size, int *bytes_sent)
+int ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
+                         uint8_t *host_addr,
+                         ram_addr_t offset, long size, int *bytes_sent)
 {
     if (f->ops->save_page) {
         int ret = f->ops->save_page(f, f->opaque, block_offset,
+                                    host_addr,
                                     offset, size, bytes_sent);
 
-        if (ret != RAM_SAVE_CONTROL_DELAYED) {
+        if (ret != RAM_SAVE_CONTROL_DELAYED
+                && ret != RAM_SAVE_CONTROL_NOT_SUPP) {
             if (bytes_sent && *bytes_sent > 0) {
                 qemu_update_position(f, *bytes_sent);
             } else if (ret < 0) {
@@ -522,6 +525,77 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t 
block_offset,
     return RAM_SAVE_CONTROL_NOT_SUPP;
 }
 
+int ram_control_load_page(QEMUFile *f, void *host_addr, long size)
+{
+    if (f->ops->load_page) {
+        int ret = f->ops->load_page(f, f->opaque, host_addr, size);
+
+        if (ret != RAM_LOAD_CONTROL_DELAYED 
+                && ret != RAM_LOAD_CONTROL_NOT_SUPP) {
+            if (ret < 0) {
+                qemu_file_set_error(f, ret);
+            }
+        }
+
+        return ret;
+    }
+
+    return RAM_LOAD_CONTROL_NOT_SUPP;
+}
+
+int ram_control_copy_page(QEMUFile *f, 
+                             ram_addr_t block_offset_dest,
+                             ram_addr_t offset_dest,
+                             ram_addr_t block_offset_source,
+                             ram_addr_t offset_source,
+                             long size)
+{
+    if (f->ops->copy_page) {
+        int ret = f->ops->copy_page(f, f->opaque,
+                                    block_offset_dest,
+                                    offset_dest,
+                                    block_offset_source,
+                                    offset_source,
+                                    size);
+
+        if (ret != RAM_COPY_CONTROL_DELAYED) {
+            if (ret < 0) {
+                qemu_file_set_error(f, ret);
+            }
+        }
+
+        return ret;
+    }
+
+    return RAM_COPY_CONTROL_NOT_SUPP;
+}
+
+
+void ram_control_add(QEMUFile *f, void *host_addr,
+                         ram_addr_t block_offset, uint64_t length)
+{
+    int ret = 0;
+
+    if (f->ops->add) {
+        ret = f->ops->add(f, f->opaque, host_addr, block_offset, length);
+        if (ret < 0) {
+            qemu_file_set_error(f, ret);
+        }
+    }
+}
+
+void ram_control_remove(QEMUFile *f, ram_addr_t block_offset)
+{
+    int ret = 0;
+
+    if (f->ops->remove) {
+        ret = f->ops->remove(f, f->opaque, block_offset);
+        if (ret < 0) {
+            qemu_file_set_error(f, ret);
+        }
+    }
+}
+
 static void qemu_fill_buffer(QEMUFile *f)
 {
     int len;
-- 
1.8.1.2


Reply via email to