[Qemu-devel] [PULL 50/57] Postcopy; Handle userfault requests

2015-11-09 Thread Juan Quintela
From: "Dr. David Alan Gilbert" 

userfaultfd is a Linux syscall that gives an fd that receives a stream
of notifications of accesses to pages registered with it and allows
the program to acknowledge those stalls and tell the accessing
thread to carry on.

We convert the requests from the kernel into messages back to the
source asking for the pages.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Juan Quintela 
Reviewed-by: Amit Shah 
Signed-off-by: Juan Quintela 
---
 include/migration/migration.h |   3 +
 migration/postcopy-ram.c  | 155 +++---
 trace-events  |   9 +++
 3 files changed, 158 insertions(+), 9 deletions(-)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index a48471e..329d535 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -89,11 +89,14 @@ struct MigrationIncomingState {
  */
 QemuEvent main_thread_load_event;

+bool   have_fault_thread;
 QemuThread fault_thread;
 QemuSemaphore  fault_thread_sem;

 /* For the kernel to send us notifications */
 int   userfault_fd;
+/* To tell the fault_thread to quit */
+int   userfault_quit_fd;
 QEMUFile *to_src_file;
 QemuMutex rp_mutex;/* We send replies from multiple threads */
 void *postcopy_tmp_page;
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 58492c0..4f1e329 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -51,6 +51,8 @@ struct PostcopyDiscardState {
  */
 #if defined(__linux__)

+#include 
+#include 
 #include 
 #include 
 #include 
@@ -267,15 +269,41 @@ int postcopy_ram_incoming_init(MigrationIncomingState 
*mis, size_t ram_pages)
  */
 int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
 {
-/* TODO: Join the fault thread once we're sure it will exit */
-if (qemu_ram_foreach_block(cleanup_range, mis)) {
-return -1;
+trace_postcopy_ram_incoming_cleanup_entry();
+
+if (mis->have_fault_thread) {
+uint64_t tmp64;
+
+if (qemu_ram_foreach_block(cleanup_range, mis)) {
+return -1;
+}
+/*
+ * Tell the fault_thread to exit, it's an eventfd that should
+ * currently be at 0, we're going to increment it to 1
+ */
+tmp64 = 1;
+if (write(mis->userfault_quit_fd, &tmp64, 8) == 8) {
+trace_postcopy_ram_incoming_cleanup_join();
+qemu_thread_join(&mis->fault_thread);
+} else {
+/* Not much we can do here, but may as well report it */
+error_report("%s: incrementing userfault_quit_fd: %s", __func__,
+ strerror(errno));
+}
+trace_postcopy_ram_incoming_cleanup_closeuf();
+close(mis->userfault_fd);
+close(mis->userfault_quit_fd);
+mis->have_fault_thread = false;
 }

+postcopy_state_set(POSTCOPY_INCOMING_END);
+migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
+
 if (mis->postcopy_tmp_page) {
 munmap(mis->postcopy_tmp_page, getpagesize());
 mis->postcopy_tmp_page = NULL;
 }
+trace_postcopy_ram_incoming_cleanup_exit();
 return 0;
 }

@@ -314,31 +342,140 @@ static int ram_block_enable_notify(const char 
*block_name, void *host_addr,
 static void *postcopy_ram_fault_thread(void *opaque)
 {
 MigrationIncomingState *mis = opaque;
+struct uffd_msg msg;
+int ret;
+size_t hostpagesize = getpagesize();
+RAMBlock *rb = NULL;
+RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */

-fprintf(stderr, "postcopy_ram_fault_thread\n");
-/* TODO: In later patch */
+trace_postcopy_ram_fault_thread_entry();
 qemu_sem_post(&mis->fault_thread_sem);
-while (1) {
-/* TODO: In later patch */
+
+while (true) {
+ram_addr_t rb_offset;
+ram_addr_t in_raspace;
+struct pollfd pfd[2];
+
+/*
+ * We're mainly waiting for the kernel to give us a faulting HVA,
+ * however we can be told to quit via userfault_quit_fd which is
+ * an eventfd
+ */
+pfd[0].fd = mis->userfault_fd;
+pfd[0].events = POLLIN;
+pfd[0].revents = 0;
+pfd[1].fd = mis->userfault_quit_fd;
+pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */
+pfd[1].revents = 0;
+
+if (poll(pfd, 2, -1 /* Wait forever */) == -1) {
+error_report("%s: userfault poll: %s", __func__, strerror(errno));
+break;
+}
+
+if (pfd[1].revents) {
+trace_postcopy_ram_fault_thread_quit();
+break;
+}
+
+ret = read(mis->userfault_fd, &msg, sizeof(msg));
+if (ret != sizeof(msg)) {
+if (errno == EAGAIN) {
+/*
+ * if a wake up happens on the other thread just after
+ * the poll, there is nothing 

[Qemu-devel] [PULL 50/57] Postcopy; Handle userfault requests

2015-11-10 Thread Juan Quintela
From: "Dr. David Alan Gilbert" 

userfaultfd is a Linux syscall that gives an fd that receives a stream
of notifications of accesses to pages registered with it and allows
the program to acknowledge those stalls and tell the accessing
thread to carry on.

We convert the requests from the kernel into messages back to the
source asking for the pages.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Juan Quintela 
Reviewed-by: Amit Shah 
Signed-off-by: Juan Quintela 
---
 include/migration/migration.h |   3 +
 migration/postcopy-ram.c  | 155 +++---
 trace-events  |   9 +++
 3 files changed, 158 insertions(+), 9 deletions(-)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index a48471e..329d535 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -89,11 +89,14 @@ struct MigrationIncomingState {
  */
 QemuEvent main_thread_load_event;

+bool   have_fault_thread;
 QemuThread fault_thread;
 QemuSemaphore  fault_thread_sem;

 /* For the kernel to send us notifications */
 int   userfault_fd;
+/* To tell the fault_thread to quit */
+int   userfault_quit_fd;
 QEMUFile *to_src_file;
 QemuMutex rp_mutex;/* We send replies from multiple threads */
 void *postcopy_tmp_page;
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 58492c0..4f1e329 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -51,6 +51,8 @@ struct PostcopyDiscardState {
  */
 #if defined(__linux__)

+#include 
+#include 
 #include 
 #include 
 #include 
@@ -267,15 +269,41 @@ int postcopy_ram_incoming_init(MigrationIncomingState 
*mis, size_t ram_pages)
  */
 int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
 {
-/* TODO: Join the fault thread once we're sure it will exit */
-if (qemu_ram_foreach_block(cleanup_range, mis)) {
-return -1;
+trace_postcopy_ram_incoming_cleanup_entry();
+
+if (mis->have_fault_thread) {
+uint64_t tmp64;
+
+if (qemu_ram_foreach_block(cleanup_range, mis)) {
+return -1;
+}
+/*
+ * Tell the fault_thread to exit, it's an eventfd that should
+ * currently be at 0, we're going to increment it to 1
+ */
+tmp64 = 1;
+if (write(mis->userfault_quit_fd, &tmp64, 8) == 8) {
+trace_postcopy_ram_incoming_cleanup_join();
+qemu_thread_join(&mis->fault_thread);
+} else {
+/* Not much we can do here, but may as well report it */
+error_report("%s: incrementing userfault_quit_fd: %s", __func__,
+ strerror(errno));
+}
+trace_postcopy_ram_incoming_cleanup_closeuf();
+close(mis->userfault_fd);
+close(mis->userfault_quit_fd);
+mis->have_fault_thread = false;
 }

+postcopy_state_set(POSTCOPY_INCOMING_END);
+migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
+
 if (mis->postcopy_tmp_page) {
 munmap(mis->postcopy_tmp_page, getpagesize());
 mis->postcopy_tmp_page = NULL;
 }
+trace_postcopy_ram_incoming_cleanup_exit();
 return 0;
 }

@@ -314,31 +342,140 @@ static int ram_block_enable_notify(const char 
*block_name, void *host_addr,
 static void *postcopy_ram_fault_thread(void *opaque)
 {
 MigrationIncomingState *mis = opaque;
+struct uffd_msg msg;
+int ret;
+size_t hostpagesize = getpagesize();
+RAMBlock *rb = NULL;
+RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */

-fprintf(stderr, "postcopy_ram_fault_thread\n");
-/* TODO: In later patch */
+trace_postcopy_ram_fault_thread_entry();
 qemu_sem_post(&mis->fault_thread_sem);
-while (1) {
-/* TODO: In later patch */
+
+while (true) {
+ram_addr_t rb_offset;
+ram_addr_t in_raspace;
+struct pollfd pfd[2];
+
+/*
+ * We're mainly waiting for the kernel to give us a faulting HVA,
+ * however we can be told to quit via userfault_quit_fd which is
+ * an eventfd
+ */
+pfd[0].fd = mis->userfault_fd;
+pfd[0].events = POLLIN;
+pfd[0].revents = 0;
+pfd[1].fd = mis->userfault_quit_fd;
+pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */
+pfd[1].revents = 0;
+
+if (poll(pfd, 2, -1 /* Wait forever */) == -1) {
+error_report("%s: userfault poll: %s", __func__, strerror(errno));
+break;
+}
+
+if (pfd[1].revents) {
+trace_postcopy_ram_fault_thread_quit();
+break;
+}
+
+ret = read(mis->userfault_fd, &msg, sizeof(msg));
+if (ret != sizeof(msg)) {
+if (errno == EAGAIN) {
+/*
+ * if a wake up happens on the other thread just after
+ * the poll, there is nothing