This patch implements postcopy live migration for outgoing part

Signed-off-by: Isaku Yamahata <yamah...@valinux.co.jp>
---
Changes v1 -> v2:
- fix parameter to qemu_fdopen()
- handle QEMU_UMEM_REQ_EOC properly
  when PO_STATE_ALL_PAGES_SENT, QEMU_UMEM_REQ_EOC request was ignored.
  handle properly it.
- flush on-demand page unconditionally
- improve postcopy_outgoing_ram_save_live and postcopy_outgoing_begin()
- use qemu_fopen_fd
- use memory api instead of obsolete api
- segv in postcopy_outgoing_check_all_ram_sent()
- catch up qapi change
---
 arch_init.c               |   19 ++-
 migration-exec.c          |    4 +
 migration-fd.c            |   17 ++
 migration-postcopy-stub.c |   22 +++
 migration-postcopy.c      |  450 +++++++++++++++++++++++++++++++++++++++++++++
 migration-tcp.c           |   25 ++-
 migration-unix.c          |   26 ++-
 migration.c               |   32 +++-
 migration.h               |   12 ++
 savevm.c                  |   22 ++-
 sysemu.h                  |    2 +-
 11 files changed, 614 insertions(+), 17 deletions(-)

diff --git a/arch_init.c b/arch_init.c
index 22d9691..3599e5c 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -154,6 +154,13 @@ static int is_dup_page(uint8_t *page)
     return 1;
 }
 
+static bool outgoing_postcopy = false;
+
+void ram_save_set_params(const MigrationParams *params, void *opaque)
+{
+    outgoing_postcopy = params->postcopy;
+}
+
 static RAMBlock *last_block_sent = NULL;
 static uint64_t bytes_transferred;
 
@@ -343,6 +350,15 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque)
     uint64_t expected_time = 0;
     int ret;
 
+    if (stage == 1) {
+        bytes_transferred = 0;
+        last_block_sent = NULL;
+        ram_save_set_last_block(NULL, 0);
+    }
+    if (outgoing_postcopy) {
+        return postcopy_outgoing_ram_save_live(f, stage, opaque);
+    }
+
     if (stage < 0) {
         memory_global_dirty_log_stop();
         return 0;
@@ -351,9 +367,6 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque)
     memory_global_sync_dirty_bitmap(get_system_memory());
 
     if (stage == 1) {
-        bytes_transferred = 0;
-        last_block_sent = NULL;
-        ram_save_set_last_block(NULL, 0);
         sort_ram_list();
 
         /* Make sure all dirty bits are set */
diff --git a/migration-exec.c b/migration-exec.c
index 7f08b3b..a90da5c 100644
--- a/migration-exec.c
+++ b/migration-exec.c
@@ -64,6 +64,10 @@ int exec_start_outgoing_migration(MigrationState *s, const 
char *command)
 {
     FILE *f;
 
+    if (s->params.postcopy) {
+        return -ENOSYS;
+    }
+
     f = popen(command, "w");
     if (f == NULL) {
         DPRINTF("Unable to popen exec target\n");
diff --git a/migration-fd.c b/migration-fd.c
index 42b8162..83b5f18 100644
--- a/migration-fd.c
+++ b/migration-fd.c
@@ -90,6 +90,23 @@ int fd_start_outgoing_migration(MigrationState *s, const 
char *fdname)
     s->write = fd_write;
     s->close = fd_close;
 
+    if (s->params.postcopy) {
+        int flags = fcntl(s->fd, F_GETFL);
+        if ((flags & O_ACCMODE) != O_RDWR) {
+            goto err_after_open;
+        }
+
+        s->fd_read = dup(s->fd);
+        if (s->fd_read == -1) {
+            goto err_after_open;
+        }
+        s->file_read = qemu_fopen_fd(s->fd_read);
+        if (s->file_read == NULL) {
+            close(s->fd_read);
+            goto err_after_open;
+        }
+    }
+
     migrate_fd_connect(s);
     return 0;
 
diff --git a/migration-postcopy-stub.c b/migration-postcopy-stub.c
index f9ebcbe..9c64827 100644
--- a/migration-postcopy-stub.c
+++ b/migration-postcopy-stub.c
@@ -24,6 +24,28 @@
 #include "sysemu.h"
 #include "migration.h"
 
+int postcopy_outgoing_create_read_socket(MigrationState *s)
+{
+    return -ENOSYS;
+}
+
+int postcopy_outgoing_ram_save_live(Monitor *mon,
+                                    QEMUFile *f, int stage, void *opaque)
+{
+    return -ENOSYS;
+}
+
+void *postcopy_outgoing_begin(MigrationState *ms)
+{
+    return NULL;
+}
+
+int postcopy_outgoing_ram_save_background(Monitor *mon, QEMUFile *f,
+                                          void *postcopy)
+{
+    return -ENOSYS;
+}
+
 int postcopy_incoming_init(const char *incoming, bool incoming_postcopy)
 {
     return -ENOSYS;
diff --git a/migration-postcopy.c b/migration-postcopy.c
index 5913e05..eb37094 100644
--- a/migration-postcopy.c
+++ b/migration-postcopy.c
@@ -177,6 +177,456 @@ static void postcopy_incoming_send_req(QEMUFile *f,
     }
 }
 
+static int postcopy_outgoing_recv_req_idstr(QEMUFile *f,
+                                            struct qemu_umem_req *req,
+                                            size_t *offset)
+{
+    int ret;
+
+    req->len = qemu_peek_byte(f, *offset);
+    *offset += 1;
+    if (req->len == 0) {
+        return -EAGAIN;
+    }
+    req->idstr = g_malloc((int)req->len + 1);
+    ret = qemu_peek_buffer(f, (uint8_t*)req->idstr, req->len, *offset);
+    *offset += ret;
+    if (ret != req->len) {
+        g_free(req->idstr);
+        req->idstr = NULL;
+        return -EAGAIN;
+    }
+    req->idstr[req->len] = 0;
+    return 0;
+}
+
+static int postcopy_outgoing_recv_req_pgoffs(QEMUFile *f,
+                                             struct qemu_umem_req *req,
+                                             size_t *offset)
+{
+    int ret;
+    uint32_t be32;
+    uint32_t i;
+
+    ret = qemu_peek_buffer(f, (uint8_t*)&be32, sizeof(be32), *offset);
+    *offset += sizeof(be32);
+    if (ret != sizeof(be32)) {
+        return -EAGAIN;
+    }
+
+    req->nr = be32_to_cpu(be32);
+    req->pgoffs = g_new(uint64_t, req->nr);
+    for (i = 0; i < req->nr; i++) {
+        uint64_t be64;
+        ret = qemu_peek_buffer(f, (uint8_t*)&be64, sizeof(be64), *offset);
+        *offset += sizeof(be64);
+        if (ret != sizeof(be64)) {
+            g_free(req->pgoffs);
+            req->pgoffs = NULL;
+            return -EAGAIN;
+        }
+        req->pgoffs[i] = be64_to_cpu(be64);
+    }
+    return 0;
+}
+
+static int postcopy_outgoing_recv_req(QEMUFile *f, struct qemu_umem_req *req)
+{
+    int size;
+    int ret;
+    size_t offset = 0;
+
+    size = qemu_peek_buffer(f, (uint8_t*)&req->cmd, 1, offset);
+    if (size <= 0) {
+        return -EAGAIN;
+    }
+    offset += 1;
+
+    switch (req->cmd) {
+    case QEMU_UMEM_REQ_INIT:
+    case QEMU_UMEM_REQ_EOC:
+        /* nothing */
+        break;
+    case QEMU_UMEM_REQ_ON_DEMAND:
+    case QEMU_UMEM_REQ_BACKGROUND:
+    case QEMU_UMEM_REQ_REMOVE:
+        ret = postcopy_outgoing_recv_req_idstr(f, req, &offset);
+        if (ret < 0) {
+            return ret;
+        }
+        ret = postcopy_outgoing_recv_req_pgoffs(f, req, &offset);
+        if (ret < 0) {
+            return ret;
+        }
+        break;
+    case QEMU_UMEM_REQ_ON_DEMAND_CONT:
+    case QEMU_UMEM_REQ_BACKGROUND_CONT:
+        ret = postcopy_outgoing_recv_req_pgoffs(f, req, &offset);
+        if (ret < 0) {
+            return ret;
+        }
+        break;
+    default:
+        abort();
+        break;
+    }
+    qemu_file_skip(f, offset);
+    DPRINTF("cmd %d\n", req->cmd);
+    return 0;
+}
+
+static void postcopy_outgoing_free_req(struct qemu_umem_req *req)
+{
+    g_free(req->idstr);
+    g_free(req->pgoffs);
+}
+
+/***************************************************************************
+ * outgoing part
+ */
+
+#define QEMU_SAVE_LIVE_STAGE_START      0x01    /* = QEMU_VM_SECTION_START */
+#define QEMU_SAVE_LIVE_STAGE_PART       0x02    /* = QEMU_VM_SECTION_PART */
+#define QEMU_SAVE_LIVE_STAGE_END        0x03    /* = QEMU_VM_SECTION_END */
+
+enum POState {
+    PO_STATE_ERROR_RECEIVE,
+    PO_STATE_ACTIVE,
+    PO_STATE_EOC_RECEIVED,
+    PO_STATE_ALL_PAGES_SENT,
+    PO_STATE_COMPLETED,
+};
+typedef enum POState POState;
+
+struct PostcopyOutgoingState {
+    POState state;
+    QEMUFile *mig_read;
+    int fd_read;
+    RAMBlock *last_block_read;
+
+    QEMUFile *mig_buffered_write;
+    MigrationState *ms;
+
+    /* For nobg mode. Check if all pages are sent */
+    RAMBlock *block;
+    ram_addr_t offset;
+};
+typedef struct PostcopyOutgoingState PostcopyOutgoingState;
+
+int postcopy_outgoing_create_read_socket(MigrationState *s)
+{
+    if (!s->params.postcopy) {
+        return 0;
+    }
+
+    s->fd_read = dup(s->fd);
+    if (s->fd_read == -1) {
+        int ret = -errno;
+        perror("dup");
+        return ret;
+    }
+    s->file_read = qemu_fopen_socket(s->fd_read);
+    if (s->file_read == NULL) {
+        return -EINVAL;
+    }
+    return 0;
+}
+
+int postcopy_outgoing_ram_save_live(QEMUFile *f, int stage, void *opaque)
+{
+    int ret = 0;
+    DPRINTF("stage %d\n", stage);
+    switch (stage) {
+    case QEMU_SAVE_LIVE_STAGE_START:
+        sort_ram_list();
+        ram_save_live_mem_size(f);
+        break;
+    case QEMU_SAVE_LIVE_STAGE_PART:
+        ret = 1;
+        break;
+    case QEMU_SAVE_LIVE_STAGE_END:
+        break;
+    default:
+        abort();
+    }
+    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+    return ret;
+}
+
+/*
+ * return value
+ *   0: continue postcopy mode
+ * > 0: completed postcopy mode.
+ * < 0: error
+ */
+static int postcopy_outgoing_handle_req(PostcopyOutgoingState *s,
+                                        const struct qemu_umem_req *req,
+                                        bool *written)
+{
+    int i;
+    RAMBlock *block;
+
+    DPRINTF("cmd %d state %d\n", req->cmd, s->state);
+    switch(req->cmd) {
+    case QEMU_UMEM_REQ_INIT:
+        /* nothing */
+        break;
+    case QEMU_UMEM_REQ_EOC:
+        /* tell to finish migration. */
+        if (s->state == PO_STATE_ALL_PAGES_SENT) {
+            s->state = PO_STATE_COMPLETED;
+            DPRINTF("-> PO_STATE_COMPLETED\n");
+        } else {
+            s->state = PO_STATE_EOC_RECEIVED;
+            DPRINTF("-> PO_STATE_EOC_RECEIVED\n");
+        }
+        return 1;
+    case QEMU_UMEM_REQ_ON_DEMAND:
+    case QEMU_UMEM_REQ_BACKGROUND:
+        DPRINTF("idstr: %s\n", req->idstr);
+        block = ram_find_block(req->idstr, strlen(req->idstr));
+        if (block == NULL) {
+            return -EINVAL;
+        }
+        s->last_block_read = block;
+        /* fall through */
+    case QEMU_UMEM_REQ_ON_DEMAND_CONT:
+    case QEMU_UMEM_REQ_BACKGROUND_CONT:
+        DPRINTF("nr %d\n", req->nr);
+        if (s->mig_buffered_write == NULL) {
+            assert(s->state == PO_STATE_ALL_PAGES_SENT);
+            break;
+        }
+        for (i = 0; i < req->nr; i++) {
+            DPRINTF("offs[%d] 0x%"PRIx64"\n", i, req->pgoffs[i]);
+            int ret = ram_save_page(s->mig_buffered_write, s->last_block_read,
+                                    req->pgoffs[i] << TARGET_PAGE_BITS);
+            if (ret > 0) {
+                *written = true;
+            }
+        }
+        break;
+    case QEMU_UMEM_REQ_REMOVE:
+        block = ram_find_block(req->idstr, strlen(req->idstr));
+        if (block == NULL) {
+            return -EINVAL;
+        }
+        for (i = 0; i < req->nr; i++) {
+            ram_addr_t offset = req->pgoffs[i] << TARGET_PAGE_BITS;
+            memory_region_reset_dirty(block->mr, offset, TARGET_PAGE_SIZE,
+                                      MIGRATION_DIRTY_FLAG);
+        }
+        break;
+    default:
+        return -EINVAL;
+    }
+    return 0;
+}
+
+static void postcopy_outgoing_close_mig_read(PostcopyOutgoingState *s)
+{
+    if (s->mig_read != NULL) {
+        qemu_set_fd_handler(s->fd_read, NULL, NULL, NULL);
+        qemu_fclose(s->mig_read);
+        s->mig_read = NULL;
+        fd_close(&s->fd_read);
+
+        s->ms->file_read = NULL;
+        s->ms->fd_read = -1;
+    }
+}
+
+static void postcopy_outgoing_completed(PostcopyOutgoingState *s)
+{
+    postcopy_outgoing_close_mig_read(s);
+    s->ms->postcopy = NULL;
+    g_free(s);
+}
+
+static void postcopy_outgoing_recv_handler(void *opaque)
+{
+    PostcopyOutgoingState *s = opaque;
+    bool written = false;
+    int ret = 0;
+
+    assert(s->state == PO_STATE_ACTIVE ||
+           s->state == PO_STATE_ALL_PAGES_SENT);
+
+    do {
+        struct qemu_umem_req req = {.idstr = NULL,
+                                    .pgoffs = NULL};
+
+        ret = postcopy_outgoing_recv_req(s->mig_read, &req);
+        if (ret < 0) {
+            if (ret == -EAGAIN) {
+                ret = 0;
+            }
+            break;
+        }
+
+        /* Even when s->state == PO_STATE_ALL_PAGES_SENT,
+           some request can be received like QEMU_UMEM_REQ_EOC */
+        ret = postcopy_outgoing_handle_req(s, &req, &written);
+        postcopy_outgoing_free_req(&req);
+    } while (ret == 0);
+
+    /*
+     * flush buffered_file.
+     * Although mig_write is rate-limited buffered file, those written pages
+     * are requested on demand by the destination. So forcibly push
+     * those pages ignoring rate limiting
+     */
+    if (written) {
+        qemu_buffered_file_drain(s->mig_buffered_write);
+    }
+
+    if (ret < 0) {
+        switch (s->state) {
+        case PO_STATE_ACTIVE:
+            s->state = PO_STATE_ERROR_RECEIVE;
+            DPRINTF("-> PO_STATE_ERROR_RECEIVE\n");
+            break;
+        case PO_STATE_ALL_PAGES_SENT:
+            s->state = PO_STATE_COMPLETED;
+            DPRINTF("-> PO_STATE_ALL_PAGES_SENT\n");
+            break;
+        default:
+            abort();
+        }
+    }
+    if (s->state == PO_STATE_ERROR_RECEIVE || s->state == PO_STATE_COMPLETED) {
+        postcopy_outgoing_close_mig_read(s);
+    }
+    if (s->state == PO_STATE_COMPLETED) {
+        DPRINTF("PO_STATE_COMPLETED\n");
+        MigrationState *ms = s->ms;
+        postcopy_outgoing_completed(s);
+        migrate_fd_completed(ms);
+    }
+}
+
+void *postcopy_outgoing_begin(MigrationState *ms)
+{
+    PostcopyOutgoingState *s = g_new(PostcopyOutgoingState, 1);
+    DPRINTF("outgoing begin\n");
+    qemu_fflush(ms->file);
+
+    s->ms = ms;
+    s->state = PO_STATE_ACTIVE;
+    s->fd_read = ms->fd_read;
+    s->mig_read = ms->file_read;
+    s->mig_buffered_write = ms->file;
+    s->block = NULL;
+    s->offset = 0;
+
+    /* Make sure all dirty bits are set */
+    cpu_physical_memory_set_dirty_tracking(0);
+    ram_save_memory_set_dirty();
+
+    qemu_set_fd_handler(s->fd_read,
+                        &postcopy_outgoing_recv_handler, NULL, s);
+    return s;
+}
+
+static void postcopy_outgoing_ram_all_sent(QEMUFile *f,
+                                           PostcopyOutgoingState *s)
+{
+    assert(s->state == PO_STATE_ACTIVE);
+
+    s->state = PO_STATE_ALL_PAGES_SENT;
+    /* tell incoming side that all pages are sent */
+    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+    qemu_buffered_file_drain(f);
+    DPRINTF("sent RAM_SAVE_FLAG_EOS\n");
+    migrate_fd_cleanup(s->ms);
+
+    /* Later migrate_fd_complete() will be called which calls
+     * migrate_fd_cleanup() again. So dummy file is created
+     * for qemu monitor to keep working.
+     */
+    s->ms->file = qemu_fopen_ops(NULL, NULL, NULL, NULL, NULL,
+                                 NULL, NULL);
+    s->mig_buffered_write = NULL;
+}
+
+static int postcopy_outgoing_check_all_ram_sent(PostcopyOutgoingState *s,
+                                                RAMBlock *block,
+                                                ram_addr_t offset)
+{
+    if (block == NULL) {
+        block = QLIST_FIRST(&ram_list.blocks);
+        offset = 0;
+    }
+
+    for (; block != NULL; block = QLIST_NEXT(block, next), offset = 0) {
+        for (; offset < block->length; offset += TARGET_PAGE_SIZE) {
+            if (memory_region_get_dirty(block->mr, offset, TARGET_PAGE_SIZE,
+                                        DIRTY_MEMORY_MIGRATION)) {
+                s->block = block;
+                s->offset = offset;
+                return 0;
+            }
+        }
+    }
+
+    return 1;
+}
+
+int postcopy_outgoing_ram_save_background(QEMUFile *f, void *postcopy)
+{
+    PostcopyOutgoingState *s = postcopy;
+
+    assert(s->state == PO_STATE_ACTIVE ||
+           s->state == PO_STATE_EOC_RECEIVED ||
+           s->state == PO_STATE_ERROR_RECEIVE);
+
+    switch (s->state) {
+    case PO_STATE_ACTIVE:
+        /* nothing. processed below */
+        break;
+    case PO_STATE_EOC_RECEIVED:
+        qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+        s->state = PO_STATE_COMPLETED;
+        postcopy_outgoing_completed(s);
+        DPRINTF("PO_STATE_COMPLETED\n");
+        return 1;
+    case PO_STATE_ERROR_RECEIVE:
+        postcopy_outgoing_completed(s);
+        DPRINTF("PO_STATE_ERROR_RECEIVE\n");
+        return -1;
+    default:
+        abort();
+    }
+
+    if (s->ms->params.nobg) {
+        /* See if all pages are sent. */
+        if (postcopy_outgoing_check_all_ram_sent(s,
+                                                 s->block, s->offset) == 0) {
+            return 0;
+        }
+        /* ram_list can be reordered. (it doesn't seem so during migration,
+           though) So the whole list needs to be checked again */
+        if (postcopy_outgoing_check_all_ram_sent(s, NULL, 0) == 0) {
+            return 0;
+        }
+
+        postcopy_outgoing_ram_all_sent(f, s);
+        return 0;
+    }
+
+    DPRINTF("outgoing background state: %d\n", s->state);
+
+    while (qemu_file_rate_limit(f) == 0) {
+        if (ram_save_block(f) == 0) { /* no more blocks */
+            assert(s->state == PO_STATE_ACTIVE);
+            postcopy_outgoing_ram_all_sent(f, s);
+            return 0;
+        }
+    }
+
+    return 0;
+}
+
 /***************************************************************************
  * incoming part
  */
diff --git a/migration-tcp.c b/migration-tcp.c
index 440804d..98be560 100644
--- a/migration-tcp.c
+++ b/migration-tcp.c
@@ -65,23 +65,32 @@ static void tcp_wait_for_connect(void *opaque)
     } while (ret == -1 && (socket_error()) == EINTR);
 
     if (ret < 0) {
-        migrate_fd_error(s);
-        return;
+        goto error_out;
     }
 
     qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
 
-    if (val == 0)
+    if (val == 0) {
+        ret = postcopy_outgoing_create_read_socket(s);
+        if (ret < 0) {
+            goto error_out;
+        }
         migrate_fd_connect(s);
-    else {
+    } else {
         DPRINTF("error connecting %d\n", val);
-        migrate_fd_error(s);
+        goto error_out;
     }
+    return;
+
+error_out:
+    migrate_fd_error(s);
 }
 
 int tcp_start_outgoing_migration(MigrationState *s, const char *host_port,
                                  Error **errp)
 {
+    int ret;
+
     s->get_error = socket_errno;
     s->write = socket_write;
     s->close = tcp_close;
@@ -105,6 +114,12 @@ int tcp_start_outgoing_migration(MigrationState *s, const 
char *host_port,
         return -1;
     }
 
+    ret = postcopy_outgoing_create_read_socket(s);
+    if (ret < 0) {
+        migrate_fd_error(s);
+        return ret;
+    }
+
     return 0;
 }
 
diff --git a/migration-unix.c b/migration-unix.c
index 169de88..f3ebaff 100644
--- a/migration-unix.c
+++ b/migration-unix.c
@@ -71,12 +71,20 @@ static void unix_wait_for_connect(void *opaque)
 
     qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
 
-    if (val == 0)
+    if (val == 0) {
+        ret = postcopy_outgoing_create_read_socket(s);
+        if (ret < 0) {
+            goto error_out;
+        }
         migrate_fd_connect(s);
-    else {
+    } else {
         DPRINTF("error connecting %d\n", val);
-        migrate_fd_error(s);
+        goto error_out;
     }
+    return;
+
+error_out:
+    migrate_fd_error(s);
 }
 
 int unix_start_outgoing_migration(MigrationState *s, const char *path)
@@ -111,11 +119,19 @@ int unix_start_outgoing_migration(MigrationState *s, 
const char *path)
 
     if (ret < 0) {
         DPRINTF("connect failed\n");
-        migrate_fd_error(s);
-        return ret;
+        goto error_out;
+    }
+
+    ret = postcopy_outgoing_create_read_socket(s);
+    if (ret < 0) {
+        goto error_out;
     }
     migrate_fd_connect(s);
     return 0;
+
+error_out:
+    migrate_fd_error(s);
+    return ret;
 }
 
 static void unix_accept_incoming_migration(void *opaque)
diff --git a/migration.c b/migration.c
index 462620f..e8be0d1 100644
--- a/migration.c
+++ b/migration.c
@@ -41,6 +41,11 @@ enum {
     MIG_STATE_COMPLETED,
 };
 
+enum {
+    MIG_SUBSTATE_PRECOPY,
+    MIG_SUBSTATE_POSTCOPY,
+};
+
 #define MAX_THROTTLE  (32 << 20)      /* Migration speed throttling */
 
 static NotifierList migration_state_notifiers =
@@ -248,6 +253,17 @@ static void migrate_fd_put_ready(void *opaque)
         return;
     }
 
+    if (s->substate == MIG_SUBSTATE_POSTCOPY) {
+        /* PRINTF("postcopy background\n"); */
+        ret = postcopy_outgoing_ram_save_background(s->file, s->postcopy);
+        if (ret > 0) {
+            migrate_fd_completed(s);
+        } else if (ret < 0) {
+            migrate_fd_error(s);
+        }
+        return;
+    }
+
     DPRINTF("iterate\n");
     ret = qemu_savevm_state_iterate(s->file);
     if (ret < 0) {
@@ -259,7 +275,20 @@ static void migrate_fd_put_ready(void *opaque)
         qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
         vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
 
-        if (qemu_savevm_state_complete(s->file) < 0) {
+        if (s->params.postcopy) {
+            if (qemu_savevm_state_complete(s->file, s->params.postcopy) < 0) {
+                migrate_fd_error(s);
+                if (old_vm_running) {
+                    vm_start();
+                }
+                return;
+            }
+            s->substate = MIG_SUBSTATE_POSTCOPY;
+            s->postcopy = postcopy_outgoing_begin(s);
+            return;
+        }
+
+        if (qemu_savevm_state_complete(s->file, s->params.postcopy) < 0) {
             migrate_fd_error(s);
         } else {
             migrate_fd_completed(s);
@@ -348,6 +377,7 @@ void migrate_fd_connect(MigrationState *s)
     int ret;
 
     s->state = MIG_STATE_ACTIVE;
+    s->substate = MIG_SUBSTATE_PRECOPY;
     s->file = qemu_fopen_ops_buffered(s,
                                       s->bandwidth_limit,
                                       migrate_fd_put_buffer,
diff --git a/migration.h b/migration.h
index e6f8006..90f3bdf 100644
--- a/migration.h
+++ b/migration.h
@@ -39,6 +39,12 @@ struct MigrationState
     int (*write)(MigrationState *s, const void *buff, size_t size);
     void *opaque;
     MigrationParams params;
+
+    /* for postcopy */
+    int substate;              /* precopy or postcopy */
+    int fd_read;
+    QEMUFile *file_read;        /* connection from the detination */
+    void *postcopy;
 };
 
 void process_incoming_migration(QEMUFile *f);
@@ -106,6 +112,12 @@ void migrate_add_blocker(Error *reason);
  */
 void migrate_del_blocker(Error *reason);
 
+/* For outgoing postcopy */
+int postcopy_outgoing_create_read_socket(MigrationState *s);
+int postcopy_outgoing_ram_save_live(QEMUFile *f, int stage, void *opaque);
+void *postcopy_outgoing_begin(MigrationState *s);
+int postcopy_outgoing_ram_save_background(QEMUFile *f, void *postcopy);
+
 /* For incoming postcopy */
 extern bool incoming_postcopy;
 extern unsigned long incoming_postcopy_flags;
diff --git a/savevm.c b/savevm.c
index 74b15e7..48b636d 100644
--- a/savevm.c
+++ b/savevm.c
@@ -1698,8 +1698,10 @@ int qemu_savevm_state_iterate(QEMUFile *f)
     return ret;
 }
 
-int qemu_savevm_state_complete(QEMUFile *f)
+int qemu_savevm_state_complete(QEMUFile *f, bool postcopy)
 {
+    QEMUFile *orig_f = NULL;
+    QEMUFileBuf *buf_file = NULL;
     SaveStateEntry *se;
     int ret;
 
@@ -1719,6 +1721,12 @@ int qemu_savevm_state_complete(QEMUFile *f)
         }
     }
 
+    if (postcopy) {
+        orig_f = f;
+        buf_file = qemu_fopen_buf_write();
+        f = buf_file->file;
+    }
+
     QTAILQ_FOREACH(se, &savevm_handlers, entry) {
         int len;
 
@@ -1742,6 +1750,16 @@ int qemu_savevm_state_complete(QEMUFile *f)
 
     qemu_put_byte(f, QEMU_VM_EOF);
 
+    if (postcopy) {
+        qemu_fflush(f);
+        qemu_put_byte(orig_f, QEMU_VM_POSTCOPY);
+        qemu_put_be32(orig_f, buf_file->buf.buffer_size);
+        qemu_put_buffer(orig_f,
+                        buf_file->buf.buffer, buf_file->buf.buffer_size);
+        qemu_fclose(f);
+        f = orig_f;
+    }
+
     return qemu_file_get_error(f);
 }
 
@@ -1781,7 +1799,7 @@ static int qemu_savevm_state(QEMUFile *f)
             goto out;
     } while (ret == 0);
 
-    ret = qemu_savevm_state_complete(f);
+    ret = qemu_savevm_state_complete(f, params.postcopy);
 
 out:
     if (ret == 0) {
diff --git a/sysemu.h b/sysemu.h
index 3857cf0..6ee4cd8 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -79,7 +79,7 @@ void qemu_announce_self(void);
 bool qemu_savevm_state_blocked(Error **errp);
 int qemu_savevm_state_begin(QEMUFile *f, const MigrationParams *params);
 int qemu_savevm_state_iterate(QEMUFile *f);
-int qemu_savevm_state_complete(QEMUFile *f);
+int qemu_savevm_state_complete(QEMUFile *f, bool postcopy);
 void qemu_savevm_state_cancel(QEMUFile *f);
 int qemu_loadvm_state(QEMUFile *f);
 
-- 
1.7.1.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to