This patch eliminates the use of sigtimedwait() in the IO thread. To avoid the
signal/select race condition, we use a pipe that we write to in the signal
handlers. This was suggested by Rusty and seems to work well.
There are a lot of cleanup/simplification opportunities with this but I've
limited it just to the signal masking/eating routines. We've got at least one
live lock left in the code that I haven't yet identified. My goal is to get
this all a lot simplier though so that it's easier to fix the remaining
lock-ups.
I'm looking for some feedback that this is a sane direction. I haven't tested
this enough yet so please don't apply it.
Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]>
diff --git a/qemu/qemu-kvm.c b/qemu/qemu-kvm.c
index 9a9bf59..46d7425 100644
--- a/qemu/qemu-kvm.c
+++ b/qemu/qemu-kvm.c
@@ -7,6 +7,9 @@
*/
#include "config.h"
#include "config-host.h"
+#include "qemu-common.h"
+#include "block.h"
+#include "console.h"
int kvm_allowed = 1;
int kvm_irqchip = 1;
@@ -38,14 +41,6 @@ __thread struct vcpu_info *vcpu;
static int qemu_system_ready;
-struct qemu_kvm_signal_table {
- sigset_t sigset;
- sigset_t negsigset;
-};
-
-static struct qemu_kvm_signal_table io_signal_table;
-static struct qemu_kvm_signal_table vcpu_signal_table;
-
#define SIG_IPI (SIGRTMIN+4)
struct vcpu_info {
@@ -169,53 +164,37 @@ static int has_work(CPUState *env)
return kvm_arch_has_work(env);
}
-static int kvm_process_signal(int si_signo)
-{
- struct sigaction sa;
-
- switch (si_signo) {
- case SIGUSR2:
- pthread_cond_signal(&qemu_aio_cond);
- break;
- case SIGALRM:
- case SIGIO:
- sigaction(si_signo, NULL, &sa);
- sa.sa_handler(si_signo);
- break;
- }
-
- return 1;
-}
-
-static int kvm_eat_signal(struct qemu_kvm_signal_table *waitset, CPUState *env,
- int timeout)
+static int kvm_eat_signal(CPUState *env, int timeout)
{
struct timespec ts;
int r, e, ret = 0;
siginfo_t siginfo;
+ sigset_t waitset;
+ sigemptyset(&waitset);
+ sigaddset(&waitset, SIG_IPI);
ts.tv_sec = timeout / 1000;
ts.tv_nsec = (timeout % 1000) * 1000000;
- r = sigtimedwait(&waitset->sigset, &siginfo, &ts);
+ qemu_kvm_unlock();
+ r = sigtimedwait(&waitset, &siginfo, &ts);
+ qemu_kvm_lock(env);
+ cpu_single_env = env;
if (r == -1 && (errno == EAGAIN || errno == EINTR) && !timeout)
return 0;
e = errno;
- pthread_mutex_lock(&qemu_mutex);
if (env && vcpu)
cpu_single_env = vcpu->env;
if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
printf("sigtimedwait: %s\n", strerror(e));
exit(1);
}
- if (r != -1)
- ret = kvm_process_signal(siginfo.si_signo);
+ ret = 1;
if (env && vcpu_info[env->cpu_index].stop) {
vcpu_info[env->cpu_index].stop = 0;
vcpu_info[env->cpu_index].stopped = 1;
pthread_kill(io_thread, SIGUSR1);
}
- pthread_mutex_unlock(&qemu_mutex);
return ret;
}
@@ -224,24 +203,20 @@ static int kvm_eat_signal(struct qemu_kvm_signal_table
*waitset, CPUState *env,
static void kvm_eat_signals(CPUState *env, int timeout)
{
int r = 0;
- struct qemu_kvm_signal_table *waitset = &vcpu_signal_table;
- while (kvm_eat_signal(waitset, env, 0))
+ while (kvm_eat_signal(env, 0))
r = 1;
if (!r && timeout) {
- r = kvm_eat_signal(waitset, env, timeout);
+ r = kvm_eat_signal(env, timeout);
if (r)
- while (kvm_eat_signal(waitset, env, 0))
+ while (kvm_eat_signal(env, 0))
;
}
}
static void kvm_main_loop_wait(CPUState *env, int timeout)
{
- pthread_mutex_unlock(&qemu_mutex);
kvm_eat_signals(env, timeout);
- pthread_mutex_lock(&qemu_mutex);
- cpu_single_env = env;
vcpu_info[env->cpu_index].signalled = 0;
}
@@ -263,12 +238,8 @@ static void pause_all_threads(void)
vcpu_info[i].stop = 1;
pthread_kill(vcpu_info[i].thread, SIG_IPI);
}
- while (!all_threads_paused()) {
- pthread_mutex_unlock(&qemu_mutex);
- kvm_eat_signal(&io_signal_table, NULL, 1000);
- pthread_mutex_lock(&qemu_mutex);
- cpu_single_env = NULL;
- }
+ while (!all_threads_paused())
+ main_loop_wait(10);
}
static void resume_all_threads(void)
@@ -391,18 +362,6 @@ static void *ap_main_loop(void *_env)
return NULL;
}
-static void qemu_kvm_init_signal_table(struct qemu_kvm_signal_table *sigtab)
-{
- sigemptyset(&sigtab->sigset);
- sigfillset(&sigtab->negsigset);
-}
-
-static void kvm_add_signal(struct qemu_kvm_signal_table *sigtab, int signum)
-{
- sigaddset(&sigtab->sigset, signum);
- sigdelset(&sigtab->negsigset, signum);
-}
-
void kvm_init_new_ap(int cpu, CPUState *env)
{
pthread_create(&vcpu_info[cpu].thread, NULL, ap_main_loop, env);
@@ -411,28 +370,12 @@ void kvm_init_new_ap(int cpu, CPUState *env)
pthread_cond_wait(&qemu_vcpu_cond, &qemu_mutex);
}
-static void qemu_kvm_init_signal_tables(void)
-{
- qemu_kvm_init_signal_table(&io_signal_table);
- qemu_kvm_init_signal_table(&vcpu_signal_table);
-
- kvm_add_signal(&io_signal_table, SIGIO);
- kvm_add_signal(&io_signal_table, SIGALRM);
- kvm_add_signal(&io_signal_table, SIGUSR1);
- kvm_add_signal(&io_signal_table, SIGUSR2);
-
- kvm_add_signal(&vcpu_signal_table, SIG_IPI);
-
- sigprocmask(SIG_BLOCK, &io_signal_table.sigset, NULL);
-}
-
int kvm_init_ap(void)
{
#ifdef TARGET_I386
kvm_tpr_opt_setup();
#endif
qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL);
- qemu_kvm_init_signal_tables();
signal(SIG_IPI, sig_ipi_handler);
return 0;
@@ -450,8 +393,67 @@ void qemu_kvm_notify_work(void)
* while processing in main_loop_wait().
*/
+static int kvm_sigfd[2] = {-1, -1};
+static void (*sigalrm_handler)(int signo);
+static void (*sigio_handler)(int signo);
+
+static void sig_aio_handler(int signum)
+{
+ if (kvm_sigfd[1] != -1) {
+ ssize_t len;
+
+ len = write(kvm_sigfd[1], &signum, sizeof(signum));
+ }
+}
+
+static void sig_aio_fd_read(void *opaque)
+{
+ int signum;
+ ssize_t len;
+
+ do {
+ len = read(kvm_sigfd[0], &signum, sizeof(signum));
+ } while (len == -1 && errno == EINTR);
+
+ if (len != 4)
+ abort();
+
+ switch (signum) {
+ case SIGUSR2:
+ pthread_cond_signal(&qemu_aio_cond);
+ break;
+ case SIGALRM:
+ sigalrm_handler(signum);
+ break;
+ case SIGIO:
+ sigio_handler(signum);
+ break;
+ }
+}
+
int kvm_main_loop(void)
{
+ struct sigaction sa;
+
+ sigaction(SIGALRM, NULL, &sa);
+ sigalrm_handler = sa.sa_handler;
+
+ sigaction(SIGIO, NULL, &sa);
+ sigio_handler = sa.sa_handler;
+
+ signal(SIGUSR1, sig_aio_handler);
+ signal(SIGUSR2, sig_aio_handler);
+ signal(SIGALRM, sig_aio_handler);
+ signal(SIGIO, sig_aio_handler);
+
+ if (pipe(kvm_sigfd) == -1)
+ abort();
+
+ fcntl(kvm_sigfd[0], F_SETFL, O_NONBLOCK);
+ fcntl(kvm_sigfd[1], F_SETFL, O_NONBLOCK);
+
+ qemu_set_fd_handler2(kvm_sigfd[0], NULL, sig_aio_fd_read, NULL, NULL);
+
io_thread = pthread_self();
qemu_system_ready = 1;
pthread_mutex_unlock(&qemu_mutex);
@@ -459,10 +461,8 @@ int kvm_main_loop(void)
pthread_cond_broadcast(&qemu_system_cond);
while (1) {
- kvm_eat_signal(&io_signal_table, NULL, 1000);
pthread_mutex_lock(&qemu_mutex);
- cpu_single_env = NULL;
- main_loop_wait(0);
+ main_loop_wait(10);
if (qemu_shutdown_requested())
break;
else if (qemu_powerdown_requested())
@@ -834,10 +834,7 @@ void qemu_kvm_aio_wait(void)
CPUState *cpu_single = cpu_single_env;
if (!cpu_single_env) {
- pthread_mutex_unlock(&qemu_mutex);
- kvm_eat_signal(&io_signal_table, NULL, 1000);
- pthread_mutex_lock(&qemu_mutex);
- cpu_single_env = NULL;
+ main_loop_wait(10);
} else {
pthread_cond_wait(&qemu_aio_cond, &qemu_mutex);
cpu_single_env = cpu_single;
@@ -864,3 +861,17 @@ void kvm_cpu_destroy_phys_mem(target_phys_addr_t
start_addr,
{
kvm_destroy_phys_mem(kvm_context, start_addr, size);
}
+
+void qemu_kvm_lock(CPUState *env)
+{
+ pthread_mutex_lock(&qemu_mutex);
+ cpu_single_env = env;
+}
+
+CPUState *qemu_kvm_unlock(void)
+{
+ CPUState *env = cpu_single_env;
+ pthread_mutex_unlock(&qemu_mutex);
+ return env;
+}
+
diff --git a/qemu/qemu-kvm.h b/qemu/qemu-kvm.h
index 024a653..2c2b0be 100644
--- a/qemu/qemu-kvm.h
+++ b/qemu/qemu-kvm.h
@@ -74,6 +74,9 @@ int qemu_kvm_get_dirty_pages(unsigned long phys_addr, void
*buf);
void qemu_kvm_system_reset_request(void);
+void qemu_kvm_lock(CPUState *env);
+CPUState *qemu_kvm_unlock(void);
+
#ifdef TARGET_PPC
int handle_powerpc_dcr_read(int vcpu, uint32_t dcrn, uint32_t *data);
int handle_powerpc_dcr_write(int vcpu,uint32_t dcrn, uint32_t data);
@@ -97,4 +100,16 @@ extern kvm_context_t kvm_context;
#define qemu_kvm_pit_in_kernel() (0)
#endif
+static inline void kvm_pre_select(void)
+{
+ if (kvm_enabled())
+ qemu_kvm_unlock();
+}
+
+static inline void kvm_post_select(void)
+{
+ if (kvm_enabled())
+ qemu_kvm_lock(NULL);
+}
+
#endif
diff --git a/qemu/vl.c b/qemu/vl.c
index 74be059..cf7677d 100644
--- a/qemu/vl.c
+++ b/qemu/vl.c
@@ -7919,7 +7919,9 @@ void main_loop_wait(int timeout)
}
#endif
moreio:
+ kvm_pre_select();
ret = select(nfds + 1, &rfds, &wfds, &xfds, &tv);
+ kvm_post_select();
if (ret > 0) {
IOHandlerRecord **pioh;
int more = 0;
-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference
Don't miss this year's exciting event. There's still time to save $100.
Use priority code J8TL2D2.
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
_______________________________________________
kvm-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/kvm-devel