This patch eliminates the use of sigtimedwait() in the IO thread. To avoid the signal/select race condition, we use a pipe that we write to in the signal handlers. This was suggested by Rusty and seems to work well.
There are a lot of cleanup/simplification opportunities with this but I've limited it just to the signal masking/eating routines. We've got at least one live lock left in the code that I haven't yet identified. My goal is to get this all a lot simplier though so that it's easier to fix the remaining lock-ups. I'm looking for some feedback that this is a sane direction. I haven't tested this enough yet so please don't apply it. Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]> diff --git a/qemu/qemu-kvm.c b/qemu/qemu-kvm.c index 9a9bf59..46d7425 100644 --- a/qemu/qemu-kvm.c +++ b/qemu/qemu-kvm.c @@ -7,6 +7,9 @@ */ #include "config.h" #include "config-host.h" +#include "qemu-common.h" +#include "block.h" +#include "console.h" int kvm_allowed = 1; int kvm_irqchip = 1; @@ -38,14 +41,6 @@ __thread struct vcpu_info *vcpu; static int qemu_system_ready; -struct qemu_kvm_signal_table { - sigset_t sigset; - sigset_t negsigset; -}; - -static struct qemu_kvm_signal_table io_signal_table; -static struct qemu_kvm_signal_table vcpu_signal_table; - #define SIG_IPI (SIGRTMIN+4) struct vcpu_info { @@ -169,53 +164,37 @@ static int has_work(CPUState *env) return kvm_arch_has_work(env); } -static int kvm_process_signal(int si_signo) -{ - struct sigaction sa; - - switch (si_signo) { - case SIGUSR2: - pthread_cond_signal(&qemu_aio_cond); - break; - case SIGALRM: - case SIGIO: - sigaction(si_signo, NULL, &sa); - sa.sa_handler(si_signo); - break; - } - - return 1; -} - -static int kvm_eat_signal(struct qemu_kvm_signal_table *waitset, CPUState *env, - int timeout) +static int kvm_eat_signal(CPUState *env, int timeout) { struct timespec ts; int r, e, ret = 0; siginfo_t siginfo; + sigset_t waitset; + sigemptyset(&waitset); + sigaddset(&waitset, SIG_IPI); ts.tv_sec = timeout / 1000; ts.tv_nsec = (timeout % 1000) * 1000000; - r = sigtimedwait(&waitset->sigset, &siginfo, &ts); + qemu_kvm_unlock(); + r = sigtimedwait(&waitset, &siginfo, &ts); + qemu_kvm_lock(env); + cpu_single_env = env; if (r == -1 && (errno == EAGAIN || errno == EINTR) && !timeout) return 0; e = errno; - pthread_mutex_lock(&qemu_mutex); if (env && vcpu) cpu_single_env = vcpu->env; if (r == -1 && !(errno == EAGAIN || errno == EINTR)) { printf("sigtimedwait: %s\n", strerror(e)); exit(1); } - if (r != -1) - ret = kvm_process_signal(siginfo.si_signo); + ret = 1; if (env && vcpu_info[env->cpu_index].stop) { vcpu_info[env->cpu_index].stop = 0; vcpu_info[env->cpu_index].stopped = 1; pthread_kill(io_thread, SIGUSR1); } - pthread_mutex_unlock(&qemu_mutex); return ret; } @@ -224,24 +203,20 @@ static int kvm_eat_signal(struct qemu_kvm_signal_table *waitset, CPUState *env, static void kvm_eat_signals(CPUState *env, int timeout) { int r = 0; - struct qemu_kvm_signal_table *waitset = &vcpu_signal_table; - while (kvm_eat_signal(waitset, env, 0)) + while (kvm_eat_signal(env, 0)) r = 1; if (!r && timeout) { - r = kvm_eat_signal(waitset, env, timeout); + r = kvm_eat_signal(env, timeout); if (r) - while (kvm_eat_signal(waitset, env, 0)) + while (kvm_eat_signal(env, 0)) ; } } static void kvm_main_loop_wait(CPUState *env, int timeout) { - pthread_mutex_unlock(&qemu_mutex); kvm_eat_signals(env, timeout); - pthread_mutex_lock(&qemu_mutex); - cpu_single_env = env; vcpu_info[env->cpu_index].signalled = 0; } @@ -263,12 +238,8 @@ static void pause_all_threads(void) vcpu_info[i].stop = 1; pthread_kill(vcpu_info[i].thread, SIG_IPI); } - while (!all_threads_paused()) { - pthread_mutex_unlock(&qemu_mutex); - kvm_eat_signal(&io_signal_table, NULL, 1000); - pthread_mutex_lock(&qemu_mutex); - cpu_single_env = NULL; - } + while (!all_threads_paused()) + main_loop_wait(10); } static void resume_all_threads(void) @@ -391,18 +362,6 @@ static void *ap_main_loop(void *_env) return NULL; } -static void qemu_kvm_init_signal_table(struct qemu_kvm_signal_table *sigtab) -{ - sigemptyset(&sigtab->sigset); - sigfillset(&sigtab->negsigset); -} - -static void kvm_add_signal(struct qemu_kvm_signal_table *sigtab, int signum) -{ - sigaddset(&sigtab->sigset, signum); - sigdelset(&sigtab->negsigset, signum); -} - void kvm_init_new_ap(int cpu, CPUState *env) { pthread_create(&vcpu_info[cpu].thread, NULL, ap_main_loop, env); @@ -411,28 +370,12 @@ void kvm_init_new_ap(int cpu, CPUState *env) pthread_cond_wait(&qemu_vcpu_cond, &qemu_mutex); } -static void qemu_kvm_init_signal_tables(void) -{ - qemu_kvm_init_signal_table(&io_signal_table); - qemu_kvm_init_signal_table(&vcpu_signal_table); - - kvm_add_signal(&io_signal_table, SIGIO); - kvm_add_signal(&io_signal_table, SIGALRM); - kvm_add_signal(&io_signal_table, SIGUSR1); - kvm_add_signal(&io_signal_table, SIGUSR2); - - kvm_add_signal(&vcpu_signal_table, SIG_IPI); - - sigprocmask(SIG_BLOCK, &io_signal_table.sigset, NULL); -} - int kvm_init_ap(void) { #ifdef TARGET_I386 kvm_tpr_opt_setup(); #endif qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL); - qemu_kvm_init_signal_tables(); signal(SIG_IPI, sig_ipi_handler); return 0; @@ -450,8 +393,67 @@ void qemu_kvm_notify_work(void) * while processing in main_loop_wait(). */ +static int kvm_sigfd[2] = {-1, -1}; +static void (*sigalrm_handler)(int signo); +static void (*sigio_handler)(int signo); + +static void sig_aio_handler(int signum) +{ + if (kvm_sigfd[1] != -1) { + ssize_t len; + + len = write(kvm_sigfd[1], &signum, sizeof(signum)); + } +} + +static void sig_aio_fd_read(void *opaque) +{ + int signum; + ssize_t len; + + do { + len = read(kvm_sigfd[0], &signum, sizeof(signum)); + } while (len == -1 && errno == EINTR); + + if (len != 4) + abort(); + + switch (signum) { + case SIGUSR2: + pthread_cond_signal(&qemu_aio_cond); + break; + case SIGALRM: + sigalrm_handler(signum); + break; + case SIGIO: + sigio_handler(signum); + break; + } +} + int kvm_main_loop(void) { + struct sigaction sa; + + sigaction(SIGALRM, NULL, &sa); + sigalrm_handler = sa.sa_handler; + + sigaction(SIGIO, NULL, &sa); + sigio_handler = sa.sa_handler; + + signal(SIGUSR1, sig_aio_handler); + signal(SIGUSR2, sig_aio_handler); + signal(SIGALRM, sig_aio_handler); + signal(SIGIO, sig_aio_handler); + + if (pipe(kvm_sigfd) == -1) + abort(); + + fcntl(kvm_sigfd[0], F_SETFL, O_NONBLOCK); + fcntl(kvm_sigfd[1], F_SETFL, O_NONBLOCK); + + qemu_set_fd_handler2(kvm_sigfd[0], NULL, sig_aio_fd_read, NULL, NULL); + io_thread = pthread_self(); qemu_system_ready = 1; pthread_mutex_unlock(&qemu_mutex); @@ -459,10 +461,8 @@ int kvm_main_loop(void) pthread_cond_broadcast(&qemu_system_cond); while (1) { - kvm_eat_signal(&io_signal_table, NULL, 1000); pthread_mutex_lock(&qemu_mutex); - cpu_single_env = NULL; - main_loop_wait(0); + main_loop_wait(10); if (qemu_shutdown_requested()) break; else if (qemu_powerdown_requested()) @@ -834,10 +834,7 @@ void qemu_kvm_aio_wait(void) CPUState *cpu_single = cpu_single_env; if (!cpu_single_env) { - pthread_mutex_unlock(&qemu_mutex); - kvm_eat_signal(&io_signal_table, NULL, 1000); - pthread_mutex_lock(&qemu_mutex); - cpu_single_env = NULL; + main_loop_wait(10); } else { pthread_cond_wait(&qemu_aio_cond, &qemu_mutex); cpu_single_env = cpu_single; @@ -864,3 +861,17 @@ void kvm_cpu_destroy_phys_mem(target_phys_addr_t start_addr, { kvm_destroy_phys_mem(kvm_context, start_addr, size); } + +void qemu_kvm_lock(CPUState *env) +{ + pthread_mutex_lock(&qemu_mutex); + cpu_single_env = env; +} + +CPUState *qemu_kvm_unlock(void) +{ + CPUState *env = cpu_single_env; + pthread_mutex_unlock(&qemu_mutex); + return env; +} + diff --git a/qemu/qemu-kvm.h b/qemu/qemu-kvm.h index 024a653..2c2b0be 100644 --- a/qemu/qemu-kvm.h +++ b/qemu/qemu-kvm.h @@ -74,6 +74,9 @@ int qemu_kvm_get_dirty_pages(unsigned long phys_addr, void *buf); void qemu_kvm_system_reset_request(void); +void qemu_kvm_lock(CPUState *env); +CPUState *qemu_kvm_unlock(void); + #ifdef TARGET_PPC int handle_powerpc_dcr_read(int vcpu, uint32_t dcrn, uint32_t *data); int handle_powerpc_dcr_write(int vcpu,uint32_t dcrn, uint32_t data); @@ -97,4 +100,16 @@ extern kvm_context_t kvm_context; #define qemu_kvm_pit_in_kernel() (0) #endif +static inline void kvm_pre_select(void) +{ + if (kvm_enabled()) + qemu_kvm_unlock(); +} + +static inline void kvm_post_select(void) +{ + if (kvm_enabled()) + qemu_kvm_lock(NULL); +} + #endif diff --git a/qemu/vl.c b/qemu/vl.c index 74be059..cf7677d 100644 --- a/qemu/vl.c +++ b/qemu/vl.c @@ -7919,7 +7919,9 @@ void main_loop_wait(int timeout) } #endif moreio: + kvm_pre_select(); ret = select(nfds + 1, &rfds, &wfds, &xfds, &tv); + kvm_post_select(); if (ret > 0) { IOHandlerRecord **pioh; int more = 0; ------------------------------------------------------------------------- This SF.net email is sponsored by the 2008 JavaOne(SM) Conference Don't miss this year's exciting event. There's still time to save $100. Use priority code J8TL2D2. http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone _______________________________________________ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel