This patch reworks the IO thread to use signalfd() instead of sigtimedwait().
This will eliminate the need to use SIGIO everywhere.

Since v2, I've fixed a nasty bug in qemu_kvm_aio_wait().  We can't use
main_loop_wait() to sleep if it's at all possible we're being called from
a handler in main_loop_wait() (which is the case with qemu_kvm_aio_wait()).

Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]>

diff --git a/qemu/qemu-kvm.c b/qemu/qemu-kvm.c
index 7134e56..492c3c4 100644
--- a/qemu/qemu-kvm.c
+++ b/qemu/qemu-kvm.c
@@ -17,6 +17,7 @@ int kvm_pit = 1;
 #include "sysemu.h"
 #include "qemu-common.h"
 #include "console.h"
+#include "block.h"
 
 #include "qemu-kvm.h"
 #include <libkvm.h>
@@ -36,18 +37,11 @@ pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER;
 pthread_cond_t qemu_aio_cond = PTHREAD_COND_INITIALIZER;
 pthread_cond_t qemu_vcpu_cond = PTHREAD_COND_INITIALIZER;
 pthread_cond_t qemu_system_cond = PTHREAD_COND_INITIALIZER;
+pthread_cond_t qemu_pause_cond = PTHREAD_COND_INITIALIZER;
 __thread struct vcpu_info *vcpu;
 
 static int qemu_system_ready;
 
-struct qemu_kvm_signal_table {
-    sigset_t sigset;
-    sigset_t negsigset;
-};
-
-static struct qemu_kvm_signal_table io_signal_table;
-static struct qemu_kvm_signal_table vcpu_signal_table;
-
 #define SIG_IPI (SIGRTMIN+4)
 
 struct vcpu_info {
@@ -64,6 +58,7 @@ struct vcpu_info {
 
 pthread_t io_thread;
 static int io_thread_fd = -1;
+static int io_thread_sigfd = -1;
 
 static inline unsigned long kvm_get_thread_id(void)
 {
@@ -172,37 +167,23 @@ static int has_work(CPUState *env)
     return kvm_arch_has_work(env);
 }
 
-static int kvm_process_signal(int si_signo)
-{
-    struct sigaction sa;
-
-    switch (si_signo) {
-    case SIGUSR2:
-        pthread_cond_signal(&qemu_aio_cond);
-        break;
-    case SIGALRM:
-    case SIGIO:
-        sigaction(si_signo, NULL, &sa);
-        sa.sa_handler(si_signo);
-        break;
-    }
-
-    return 1;
-}
-
-static int kvm_eat_signal(struct qemu_kvm_signal_table *waitset, CPUState *env,
-                          int timeout)
+static int kvm_eat_signal(CPUState *env, int timeout)
 {
     struct timespec ts;
     int r, e, ret = 0;
     siginfo_t siginfo;
+    sigset_t waitset;
 
     ts.tv_sec = timeout / 1000;
     ts.tv_nsec = (timeout % 1000) * 1000000;
-    r = sigtimedwait(&waitset->sigset, &siginfo, &ts);
+    sigemptyset(&waitset);
+    sigaddset(&waitset, SIG_IPI);
+
+    r = sigtimedwait(&waitset, &siginfo, &ts);
     if (r == -1 && (errno == EAGAIN || errno == EINTR) && !timeout)
        return 0;
     e = errno;
+
     pthread_mutex_lock(&qemu_mutex);
     if (env && vcpu)
         cpu_single_env = vcpu->env;
@@ -211,12 +192,12 @@ static int kvm_eat_signal(struct qemu_kvm_signal_table 
*waitset, CPUState *env,
        exit(1);
     }
     if (r != -1)
-        ret = kvm_process_signal(siginfo.si_signo);
+       ret = 1;
 
     if (env && vcpu_info[env->cpu_index].stop) {
        vcpu_info[env->cpu_index].stop = 0;
        vcpu_info[env->cpu_index].stopped = 1;
-       qemu_kvm_notify_work();
+       pthread_cond_signal(&qemu_pause_cond);
     }
     pthread_mutex_unlock(&qemu_mutex);
 
@@ -227,14 +208,13 @@ static int kvm_eat_signal(struct qemu_kvm_signal_table 
*waitset, CPUState *env,
 static void kvm_eat_signals(CPUState *env, int timeout)
 {
     int r = 0;
-    struct qemu_kvm_signal_table *waitset = &vcpu_signal_table;
 
-    while (kvm_eat_signal(waitset, env, 0))
+    while (kvm_eat_signal(env, 0))
        r = 1;
     if (!r && timeout) {
-       r = kvm_eat_signal(waitset, env, timeout);
+       r = kvm_eat_signal(env, timeout);
        if (r)
-           while (kvm_eat_signal(waitset, env, 0))
+           while (kvm_eat_signal(env, 0))
                ;
     }
 }
@@ -266,12 +246,8 @@ static void pause_all_threads(void)
        vcpu_info[i].stop = 1;
        pthread_kill(vcpu_info[i].thread, SIG_IPI);
     }
-    while (!all_threads_paused()) {
-       pthread_mutex_unlock(&qemu_mutex);
-       kvm_eat_signal(&io_signal_table, NULL, 1000);
-       pthread_mutex_lock(&qemu_mutex);
-       cpu_single_env = NULL;
-    }
+    while (!all_threads_paused())
+       pthread_cond_wait(&qemu_pause_cond, &qemu_mutex);
 }
 
 static void resume_all_threads(void)
@@ -310,6 +286,12 @@ static void setup_kernel_sigmask(CPUState *env)
 {
     sigset_t set;
 
+    sigemptyset(&set);
+    sigaddset(&set, SIGUSR2);
+    sigaddset(&set, SIGIO);
+    sigaddset(&set, SIGALRM);
+    sigprocmask(SIG_BLOCK, &set, NULL);
+
     sigprocmask(SIG_BLOCK, NULL, &set);
     sigdelset(&set, SIG_IPI);
     
@@ -346,7 +328,7 @@ static int kvm_main_loop_cpu(CPUState *env)
     cpu_single_env = env;
     while (1) {
        while (!has_work(env))
-           kvm_main_loop_wait(env, 10);
+           kvm_main_loop_wait(env, 1000);
        if (env->interrupt_request & CPU_INTERRUPT_HARD)
            env->hflags &= ~HF_HALTED_MASK;
        if (!kvm_irqchip_in_kernel(kvm_context) && info->sipi_needed)
@@ -394,18 +376,6 @@ static void *ap_main_loop(void *_env)
     return NULL;
 }
 
-static void qemu_kvm_init_signal_table(struct qemu_kvm_signal_table *sigtab)
-{
-    sigemptyset(&sigtab->sigset);
-    sigfillset(&sigtab->negsigset);
-}
-
-static void kvm_add_signal(struct qemu_kvm_signal_table *sigtab, int signum)
-{
-    sigaddset(&sigtab->sigset, signum);
-    sigdelset(&sigtab->negsigset, signum);
-}
-
 void kvm_init_new_ap(int cpu, CPUState *env)
 {
     pthread_create(&vcpu_info[cpu].thread, NULL, ap_main_loop, env);
@@ -414,27 +384,12 @@ void kvm_init_new_ap(int cpu, CPUState *env)
        pthread_cond_wait(&qemu_vcpu_cond, &qemu_mutex);
 }
 
-static void qemu_kvm_init_signal_tables(void)
-{
-    qemu_kvm_init_signal_table(&io_signal_table);
-    qemu_kvm_init_signal_table(&vcpu_signal_table);
-
-    kvm_add_signal(&io_signal_table, SIGIO);
-    kvm_add_signal(&io_signal_table, SIGALRM);
-    kvm_add_signal(&io_signal_table, SIGUSR2);
-
-    kvm_add_signal(&vcpu_signal_table, SIG_IPI);
-
-    sigprocmask(SIG_BLOCK, &io_signal_table.sigset, NULL);
-}
-
 int kvm_init_ap(void)
 {
 #ifdef TARGET_I386
     kvm_tpr_opt_setup();
 #endif
     qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL);
-    qemu_kvm_init_signal_tables();
 
     signal(SIG_IPI, sig_ipi_handler);
     return 0;
@@ -468,6 +423,61 @@ void qemu_kvm_notify_work(void)
        fprintf(stderr, "failed to notify io thread\n");
 }
 
+static int received_signal;
+
+/* QEMU relies on periodically breaking out of select via EINTR to poll for IO
+   and timer signals.  Since we're now using a file descriptor to handle
+   signals, select() won't be interrupted by a signal.  We need to forcefully
+   break the select() loop when a signal is received hence
+   kvm_check_received_signal(). */
+
+int kvm_check_received_signal(void)
+{
+    if (received_signal) {
+       received_signal = 0;
+       return 1;
+    }
+
+    return 0;
+}
+
+/* If we have signalfd, we mask out the signals we want to handle and then
+ * use signalfd to listen for them.  We rely on whatever the current signal
+ * handler is to dispatch the signals when we receive them.
+ */
+
+static void sigfd_handler(void *opaque)
+{
+    int fd = (unsigned long)opaque;
+    struct signalfd_siginfo info;
+    struct sigaction action;
+    ssize_t len;
+
+    while (1) {
+       do {
+           len = read(fd, &info, sizeof(info));
+       } while (len == -1 && errno == EINTR);
+
+       if (len == -1 && errno == EAGAIN)
+           break;
+
+       if (len != sizeof(info)) {
+           printf("read from sigfd returned %ld: %m\n", len);
+           return;
+       }
+
+       sigaction(info.ssi_signo, NULL, &action);
+       if (action.sa_handler)
+           action.sa_handler(info.ssi_signo);
+
+       if (info.ssi_signo == SIGUSR2) {
+           pthread_cond_signal(&qemu_aio_cond); 
+       }
+    }
+
+    received_signal = 1;
+}
+
 /* Used to break IO thread out of select */
 static void io_thread_wakeup(void *opaque)
 {
@@ -487,17 +497,15 @@ static void io_thread_wakeup(void *opaque)
 
        offset += len;
     }
-}
 
-/*
- * The IO thread has all signals that inform machine events
- * blocked (io_signal_table), so it won't get interrupted
- * while processing in main_loop_wait().
- */
+    received_signal = 1;
+}
 
 int kvm_main_loop(void)
 {
     int fds[2];
+    sigset_t mask;
+    int sigfd;
 
     io_thread = pthread_self();
     qemu_system_ready = 1;
@@ -511,15 +519,31 @@ int kvm_main_loop(void)
                         (void *)(unsigned long)fds[0]);
 
     io_thread_fd = fds[1];
-    pthread_mutex_unlock(&qemu_mutex);
+
+    sigemptyset(&mask);
+    sigaddset(&mask, SIGIO);
+    sigaddset(&mask, SIGALRM);
+    sigaddset(&mask, SIGUSR2);
+    sigprocmask(SIG_BLOCK, &mask, NULL);
+
+    sigfd = kvm_signalfd(&mask);
+    if (sigfd == -1) {
+       fprintf(stderr, "failed to create signalfd\n");
+       return -errno;
+    }
+
+    fcntl(sigfd, F_SETFL, O_NONBLOCK);
+
+    qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL,
+                        (void *)(unsigned long)sigfd);
 
     pthread_cond_broadcast(&qemu_system_cond);
 
+    io_thread_sigfd = sigfd;
+    cpu_single_env = NULL;
+
     while (1) {
-        kvm_eat_signal(&io_signal_table, NULL, 1000);
-        pthread_mutex_lock(&qemu_mutex);
-        cpu_single_env = NULL;
-        main_loop_wait(0);
+        main_loop_wait(1000);
         if (qemu_shutdown_requested())
             break;
         else if (qemu_powerdown_requested())
@@ -528,7 +552,6 @@ int kvm_main_loop(void)
             pthread_kill(vcpu_info[0].thread, SIG_IPI);
             qemu_kvm_reset_requested = 1;
         }
-        pthread_mutex_unlock(&qemu_mutex);
     }
 
     pause_all_threads();
@@ -891,10 +914,21 @@ void qemu_kvm_aio_wait(void)
     CPUState *cpu_single = cpu_single_env;
 
     if (!cpu_single_env) {
-        pthread_mutex_unlock(&qemu_mutex);
-        kvm_eat_signal(&io_signal_table, NULL, 1000);
-        pthread_mutex_lock(&qemu_mutex);
-        cpu_single_env = NULL;
+       if (io_thread_sigfd != -1) {
+           fd_set rfds;
+           int ret;
+
+           FD_ZERO(&rfds);
+           FD_SET(io_thread_sigfd, &rfds);
+
+           /* this is a rare case where we do want to hold qemu_mutex
+            * while sleeping.  We cannot allow anything else to run
+            * right now. */
+           ret = select(io_thread_sigfd + 1, &rfds, NULL, NULL, NULL);
+           if (ret > 0 && FD_ISSET(io_thread_sigfd, &rfds))
+               sigfd_handler((void *)(unsigned long)io_thread_sigfd);
+       }
+       qemu_aio_poll();
     } else {
         pthread_cond_wait(&qemu_aio_cond, &qemu_mutex);
         cpu_single_env = cpu_single;
@@ -921,3 +955,14 @@ void kvm_cpu_destroy_phys_mem(target_phys_addr_t 
start_addr,
 {
     kvm_destroy_phys_mem(kvm_context, start_addr, size);
 }
+
+void kvm_mutex_unlock(void)
+{
+    pthread_mutex_unlock(&qemu_mutex);
+}
+
+void kvm_mutex_lock(void)
+{
+    pthread_mutex_lock(&qemu_mutex);
+    cpu_single_env = NULL;
+}
diff --git a/qemu/qemu-kvm.h b/qemu/qemu-kvm.h
index a0dd4a8..df573ec 100644
--- a/qemu/qemu-kvm.h
+++ b/qemu/qemu-kvm.h
@@ -12,6 +12,8 @@
 
 #include <signal.h>
 
+#include <signal.h>
+
 int kvm_main_loop(void);
 int kvm_qemu_init(void);
 int kvm_qemu_create_context(void);
@@ -111,4 +113,28 @@ extern kvm_context_t kvm_context;
 #define qemu_kvm_pit_in_kernel() (0)
 #endif
 
+void kvm_mutex_unlock(void);
+void kvm_mutex_lock(void);
+
+static inline void kvm_sleep_begin(void)
+{
+    if (kvm_enabled())
+       kvm_mutex_unlock();
+}
+
+static inline void kvm_sleep_end(void)
+{
+    if (kvm_enabled())
+       kvm_mutex_lock();
+}
+
+int kvm_check_received_signal(void);
+
+static inline int kvm_received_signal(void)
+{
+    if (kvm_enabled())
+       return kvm_check_received_signal();
+    return 0;
+}
+
 #endif
diff --git a/qemu/vl.c b/qemu/vl.c
index 3fcf6b6..541aacc 100644
--- a/qemu/vl.c
+++ b/qemu/vl.c
@@ -7271,6 +7271,23 @@ void qemu_register_boot_set(QEMUBootSetHandler *func)
        qemu_boot_set_handler = func;
 }
 
+static int qemu_select(int max_fd, fd_set *rfds, fd_set *wfds, fd_set *xfds,
+                      struct timeval *tv)
+{
+    int ret;
+
+    /* KVM holds a mutex while QEMU code is running, we need hooks to
+       release the mutex whenever QEMU code sleeps. */
+
+    kvm_sleep_begin();
+
+    ret = select(max_fd, rfds, wfds, xfds, tv);
+
+    kvm_sleep_end();
+
+    return ret;
+}
+
 void main_loop_wait(int timeout)
 {
     IOHandlerRecord *ioh;
@@ -7342,11 +7359,12 @@ void main_loop_wait(int timeout)
         }
     }
 
-    tv.tv_sec = 0;
 #ifdef _WIN32
+    tv.tv_sec = 0;
     tv.tv_usec = 0;
 #else
-    tv.tv_usec = timeout * 1000;
+    tv.tv_sec = timeout / 1000;
+    tv.tv_usec = (timeout % 1000) * 1000;
 #endif
 #if defined(CONFIG_SLIRP)
     if (slirp_inited) {
@@ -7354,7 +7372,7 @@ void main_loop_wait(int timeout)
     }
 #endif
  moreio:
-    ret = select(nfds + 1, &rfds, &wfds, &xfds, &tv);
+    ret = qemu_select(nfds + 1, &rfds, &wfds, &xfds, &tv);
     if (ret > 0) {
         IOHandlerRecord **pioh;
         int more = 0;
@@ -7383,7 +7401,7 @@ void main_loop_wait(int timeout)
             } else
                 pioh = &ioh->next;
         }
-        if (more)
+        if (more && !kvm_received_signal())
             goto moreio;
     }
 #if defined(CONFIG_SLIRP)

-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

Reply via email to