This patch eliminates the use of sigtimedwait() in the IO thread.  To avoid the
signal/select race condition, we use a pipe that we write to in the signal
handlers.  This was suggested by Rusty and seems to work well.

There are a lot of cleanup/simplification opportunities with this but I've
limited it just to the signal masking/eating routines.  We've got at least one
live lock left in the code that I haven't yet identified.  My goal is to get
this all a lot simplier though so that it's easier to fix the remaining
lock-ups.

I'm looking for some feedback that this is a sane direction.  I haven't tested
this enough yet so please don't apply it.

Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]>

diff --git a/qemu/qemu-kvm.c b/qemu/qemu-kvm.c
index 9a9bf59..46d7425 100644
--- a/qemu/qemu-kvm.c
+++ b/qemu/qemu-kvm.c
@@ -7,6 +7,9 @@
  */
 #include "config.h"
 #include "config-host.h"
+#include "qemu-common.h"
+#include "block.h"
+#include "console.h"
 
 int kvm_allowed = 1;
 int kvm_irqchip = 1;
@@ -38,14 +41,6 @@ __thread struct vcpu_info *vcpu;
 
 static int qemu_system_ready;
 
-struct qemu_kvm_signal_table {
-    sigset_t sigset;
-    sigset_t negsigset;
-};
-
-static struct qemu_kvm_signal_table io_signal_table;
-static struct qemu_kvm_signal_table vcpu_signal_table;
-
 #define SIG_IPI (SIGRTMIN+4)
 
 struct vcpu_info {
@@ -169,53 +164,37 @@ static int has_work(CPUState *env)
     return kvm_arch_has_work(env);
 }
 
-static int kvm_process_signal(int si_signo)
-{
-    struct sigaction sa;
-
-    switch (si_signo) {
-    case SIGUSR2:
-        pthread_cond_signal(&qemu_aio_cond);
-        break;
-    case SIGALRM:
-    case SIGIO:
-        sigaction(si_signo, NULL, &sa);
-        sa.sa_handler(si_signo);
-        break;
-    }
-
-    return 1;
-}
-
-static int kvm_eat_signal(struct qemu_kvm_signal_table *waitset, CPUState *env,
-                          int timeout)
+static int kvm_eat_signal(CPUState *env, int timeout)
 {
     struct timespec ts;
     int r, e, ret = 0;
     siginfo_t siginfo;
+    sigset_t waitset;
 
+    sigemptyset(&waitset);
+    sigaddset(&waitset, SIG_IPI);
     ts.tv_sec = timeout / 1000;
     ts.tv_nsec = (timeout % 1000) * 1000000;
-    r = sigtimedwait(&waitset->sigset, &siginfo, &ts);
+    qemu_kvm_unlock();
+    r = sigtimedwait(&waitset, &siginfo, &ts);
+    qemu_kvm_lock(env);
+    cpu_single_env = env;
     if (r == -1 && (errno == EAGAIN || errno == EINTR) && !timeout)
        return 0;
     e = errno;
-    pthread_mutex_lock(&qemu_mutex);
     if (env && vcpu)
         cpu_single_env = vcpu->env;
     if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
        printf("sigtimedwait: %s\n", strerror(e));
        exit(1);
     }
-    if (r != -1)
-        ret = kvm_process_signal(siginfo.si_signo);
+    ret = 1;
 
     if (env && vcpu_info[env->cpu_index].stop) {
        vcpu_info[env->cpu_index].stop = 0;
        vcpu_info[env->cpu_index].stopped = 1;
        pthread_kill(io_thread, SIGUSR1);
     }
-    pthread_mutex_unlock(&qemu_mutex);
 
     return ret;
 }
@@ -224,24 +203,20 @@ static int kvm_eat_signal(struct qemu_kvm_signal_table 
*waitset, CPUState *env,
 static void kvm_eat_signals(CPUState *env, int timeout)
 {
     int r = 0;
-    struct qemu_kvm_signal_table *waitset = &vcpu_signal_table;
 
-    while (kvm_eat_signal(waitset, env, 0))
+    while (kvm_eat_signal(env, 0))
        r = 1;
     if (!r && timeout) {
-       r = kvm_eat_signal(waitset, env, timeout);
+       r = kvm_eat_signal(env, timeout);
        if (r)
-           while (kvm_eat_signal(waitset, env, 0))
+           while (kvm_eat_signal(env, 0))
                ;
     }
 }
 
 static void kvm_main_loop_wait(CPUState *env, int timeout)
 {
-    pthread_mutex_unlock(&qemu_mutex);
     kvm_eat_signals(env, timeout);
-    pthread_mutex_lock(&qemu_mutex);
-    cpu_single_env = env;
     vcpu_info[env->cpu_index].signalled = 0;
 }
 
@@ -263,12 +238,8 @@ static void pause_all_threads(void)
        vcpu_info[i].stop = 1;
        pthread_kill(vcpu_info[i].thread, SIG_IPI);
     }
-    while (!all_threads_paused()) {
-       pthread_mutex_unlock(&qemu_mutex);
-       kvm_eat_signal(&io_signal_table, NULL, 1000);
-       pthread_mutex_lock(&qemu_mutex);
-       cpu_single_env = NULL;
-    }
+    while (!all_threads_paused())
+       main_loop_wait(10);
 }
 
 static void resume_all_threads(void)
@@ -391,18 +362,6 @@ static void *ap_main_loop(void *_env)
     return NULL;
 }
 
-static void qemu_kvm_init_signal_table(struct qemu_kvm_signal_table *sigtab)
-{
-    sigemptyset(&sigtab->sigset);
-    sigfillset(&sigtab->negsigset);
-}
-
-static void kvm_add_signal(struct qemu_kvm_signal_table *sigtab, int signum)
-{
-    sigaddset(&sigtab->sigset, signum);
-    sigdelset(&sigtab->negsigset, signum);
-}
-
 void kvm_init_new_ap(int cpu, CPUState *env)
 {
     pthread_create(&vcpu_info[cpu].thread, NULL, ap_main_loop, env);
@@ -411,28 +370,12 @@ void kvm_init_new_ap(int cpu, CPUState *env)
        pthread_cond_wait(&qemu_vcpu_cond, &qemu_mutex);
 }
 
-static void qemu_kvm_init_signal_tables(void)
-{
-    qemu_kvm_init_signal_table(&io_signal_table);
-    qemu_kvm_init_signal_table(&vcpu_signal_table);
-
-    kvm_add_signal(&io_signal_table, SIGIO);
-    kvm_add_signal(&io_signal_table, SIGALRM);
-    kvm_add_signal(&io_signal_table, SIGUSR1);
-    kvm_add_signal(&io_signal_table, SIGUSR2);
-
-    kvm_add_signal(&vcpu_signal_table, SIG_IPI);
-
-    sigprocmask(SIG_BLOCK, &io_signal_table.sigset, NULL);
-}
-
 int kvm_init_ap(void)
 {
 #ifdef TARGET_I386
     kvm_tpr_opt_setup();
 #endif
     qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL);
-    qemu_kvm_init_signal_tables();
 
     signal(SIG_IPI, sig_ipi_handler);
     return 0;
@@ -450,8 +393,67 @@ void qemu_kvm_notify_work(void)
  * while processing in main_loop_wait().
  */
 
+static int kvm_sigfd[2] = {-1, -1};
+static void (*sigalrm_handler)(int signo);
+static void (*sigio_handler)(int signo);
+
+static void sig_aio_handler(int signum)
+{
+    if (kvm_sigfd[1] != -1) {
+       ssize_t len;
+
+       len = write(kvm_sigfd[1], &signum, sizeof(signum));
+    }
+}
+
+static void sig_aio_fd_read(void *opaque)
+{
+    int signum;
+    ssize_t len;
+
+    do { 
+       len = read(kvm_sigfd[0], &signum, sizeof(signum));
+    } while (len == -1 && errno == EINTR);
+
+    if (len != 4)
+       abort();
+
+    switch (signum) {
+    case SIGUSR2:
+        pthread_cond_signal(&qemu_aio_cond);
+       break;
+    case SIGALRM:
+       sigalrm_handler(signum);
+       break;
+    case SIGIO:
+       sigio_handler(signum);
+       break;
+    }
+}
+
 int kvm_main_loop(void)
 {
+    struct sigaction sa;
+
+    sigaction(SIGALRM, NULL, &sa);
+    sigalrm_handler = sa.sa_handler;
+
+    sigaction(SIGIO, NULL, &sa);
+    sigio_handler = sa.sa_handler;
+
+    signal(SIGUSR1, sig_aio_handler);
+    signal(SIGUSR2, sig_aio_handler);
+    signal(SIGALRM, sig_aio_handler);
+    signal(SIGIO, sig_aio_handler);
+
+    if (pipe(kvm_sigfd) == -1)
+       abort();
+
+    fcntl(kvm_sigfd[0], F_SETFL, O_NONBLOCK);
+    fcntl(kvm_sigfd[1], F_SETFL, O_NONBLOCK);
+
+    qemu_set_fd_handler2(kvm_sigfd[0], NULL, sig_aio_fd_read, NULL, NULL);
+
     io_thread = pthread_self();
     qemu_system_ready = 1;
     pthread_mutex_unlock(&qemu_mutex);
@@ -459,10 +461,8 @@ int kvm_main_loop(void)
     pthread_cond_broadcast(&qemu_system_cond);
 
     while (1) {
-        kvm_eat_signal(&io_signal_table, NULL, 1000);
         pthread_mutex_lock(&qemu_mutex);
-        cpu_single_env = NULL;
-        main_loop_wait(0);
+       main_loop_wait(10);
         if (qemu_shutdown_requested())
             break;
         else if (qemu_powerdown_requested())
@@ -834,10 +834,7 @@ void qemu_kvm_aio_wait(void)
     CPUState *cpu_single = cpu_single_env;
 
     if (!cpu_single_env) {
-        pthread_mutex_unlock(&qemu_mutex);
-        kvm_eat_signal(&io_signal_table, NULL, 1000);
-        pthread_mutex_lock(&qemu_mutex);
-        cpu_single_env = NULL;
+       main_loop_wait(10);
     } else {
         pthread_cond_wait(&qemu_aio_cond, &qemu_mutex);
         cpu_single_env = cpu_single;
@@ -864,3 +861,17 @@ void kvm_cpu_destroy_phys_mem(target_phys_addr_t 
start_addr,
 {
     kvm_destroy_phys_mem(kvm_context, start_addr, size);
 }
+
+void qemu_kvm_lock(CPUState *env)
+{
+    pthread_mutex_lock(&qemu_mutex);
+    cpu_single_env = env;
+}
+
+CPUState *qemu_kvm_unlock(void)
+{
+    CPUState *env = cpu_single_env;
+    pthread_mutex_unlock(&qemu_mutex);
+    return env;
+}
+
diff --git a/qemu/qemu-kvm.h b/qemu/qemu-kvm.h
index 024a653..2c2b0be 100644
--- a/qemu/qemu-kvm.h
+++ b/qemu/qemu-kvm.h
@@ -74,6 +74,9 @@ int qemu_kvm_get_dirty_pages(unsigned long phys_addr, void 
*buf);
 
 void qemu_kvm_system_reset_request(void);
 
+void qemu_kvm_lock(CPUState *env);
+CPUState *qemu_kvm_unlock(void);
+
 #ifdef TARGET_PPC
 int handle_powerpc_dcr_read(int vcpu, uint32_t dcrn, uint32_t *data);
 int handle_powerpc_dcr_write(int vcpu,uint32_t dcrn, uint32_t data);
@@ -97,4 +100,16 @@ extern kvm_context_t kvm_context;
 #define qemu_kvm_pit_in_kernel() (0)
 #endif
 
+static inline void kvm_pre_select(void)
+{
+    if (kvm_enabled())
+       qemu_kvm_unlock();
+}
+
+static inline void kvm_post_select(void)
+{
+    if (kvm_enabled())
+       qemu_kvm_lock(NULL);
+}
+
 #endif
diff --git a/qemu/vl.c b/qemu/vl.c
index 74be059..cf7677d 100644
--- a/qemu/vl.c
+++ b/qemu/vl.c
@@ -7919,7 +7919,9 @@ void main_loop_wait(int timeout)
     }
 #endif
  moreio:
+    kvm_pre_select();
     ret = select(nfds + 1, &rfds, &wfds, &xfds, &tv);
+    kvm_post_select();
     if (ret > 0) {
         IOHandlerRecord **pioh;
         int more = 0;

-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

Reply via email to