Currently, timers run on iothread inside QBL, this limits the usage of timers in some case, e.g. virtio-blk-dataplane. In order to run timers on private thread, we arm AioContext with three lists in according to three QemuClock (and later, run timers in aio_poll).
Signed-off-by: Liu Ping Fan <pingf...@linux.vnet.ibm.com> ------ issue to fix --- Note: before this patch, there should be another one to fix the race issue by qemu_mod_timer() and _run_timers(). I plan to adopt the BH method for timers to fix it. --- async.c | 9 +++++++ include/block/aio.h | 13 ++++++++++ include/qemu/timer.h | 20 ++++++++++++++++ qemu-timer.c | 67 +++++++++++++++++++++++++++++----------------------- 4 files changed, 80 insertions(+), 29 deletions(-) diff --git a/async.c b/async.c index 8209cea..36df208 100644 --- a/async.c +++ b/async.c @@ -202,12 +202,16 @@ static void aio_ctx_finalize(GSource *source) { AioContext *ctx = (AioContext *) source; + int i; thread_pool_free(ctx->thread_pool); aio_set_event_notifier(ctx, &ctx->notifier, NULL, NULL); event_notifier_cleanup(&ctx->notifier); g_array_free(ctx->pollfds, TRUE); alarm_timer_destroy(ctx->alarm_timer); + for (i = 0; i < QEMU_CLOCK_MAXCNT; i++) { + timer_list_finalize(&ctx->timer_list[i]); + } } static GSourceFuncs aio_source_funcs = { @@ -239,6 +243,8 @@ void aio_notify(AioContext *ctx) AioContext *aio_context_new(void) { AioContext *ctx; + int i; + ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext)); ctx->pollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD)); ctx->thread_pool = NULL; @@ -248,6 +254,9 @@ AioContext *aio_context_new(void) (EventNotifierHandler *) event_notifier_test_and_clear, NULL); ctx->alarm_timer = alarm_timer_create(ctx); + for (i = 0; i < QEMU_CLOCK_MAXCNT; i++) { + timer_list_init(&ctx->timer_list[i]); + } return ctx; } diff --git a/include/block/aio.h b/include/block/aio.h index 84537a2..3f1a7b4 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -43,6 +43,18 @@ typedef struct AioHandler AioHandler; typedef void QEMUBHFunc(void *opaque); typedef void IOHandler(void *opaque); +/* Related timer with AioContext */ +typedef struct QEMUTimer QEMUTimer; +#define QEMU_CLOCK_MAXCNT 3 + +typedef struct TimerList { + QEMUTimer *active_timers; + QemuMutex active_timers_lock; +} TimerList; + +void timer_list_init(TimerList *tlist); +void timer_list_finalize(TimerList *tlist); + typedef struct AioContext { GSource source; @@ -74,6 +86,7 @@ typedef struct AioContext { /* Thread pool for performing work and receiving completion callbacks */ struct ThreadPool *thread_pool; struct qemu_alarm_timer *alarm_timer; + TimerList timer_list[QEMU_CLOCK_MAXCNT]; } AioContext; /* Returns 1 if there are still outstanding AIO requests; 0 otherwise */ diff --git a/include/qemu/timer.h b/include/qemu/timer.h index 4a72c99..a5acfe0 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -33,9 +33,14 @@ extern QEMUClock *vm_clock; extern QEMUClock *host_clock; int64_t qemu_get_clock_ns(QEMUClock *clock); +/* qemu_clock_has_timers, qemu_clock_expired, qemu_clock_deadline + * run In tcg icount mode. There is only one AioContext i.e. qemu_aio_context. + * So we only count the timers on qemu_aio_context. + */ int64_t qemu_clock_has_timers(QEMUClock *clock); int64_t qemu_clock_expired(QEMUClock *clock); int64_t qemu_clock_deadline(QEMUClock *clock); + void qemu_clock_enable(QEMUClock *clock, bool enabled); void qemu_clock_warp(QEMUClock *clock); @@ -45,6 +50,9 @@ void qemu_unregister_clock_reset_notifier(QEMUClock *clock, QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale, QEMUTimerCB *cb, void *opaque); +QEMUTimer *aioctx_new_timer(QEMUClock *clock, int scale, + QEMUTimerCB *cb, void *opaque, AioContext *ctx); + void qemu_free_timer(QEMUTimer *ts); void qemu_del_timer(QEMUTimer *ts); void qemu_mod_timer_ns(QEMUTimer *ts, int64_t expire_time); @@ -77,6 +85,18 @@ static inline QEMUTimer *qemu_new_timer_ms(QEMUClock *clock, QEMUTimerCB *cb, return qemu_new_timer(clock, SCALE_MS, cb, opaque); } +static inline QEMUTimer *aioctx_new_timer_ns(QEMUClock *clock, QEMUTimerCB *cb, + void *opaque, AioContext *ctx) +{ + return aioctx_new_timer(clock, SCALE_NS, cb, opaque, ctx); +} + +static inline QEMUTimer *aioctx_new_timer_ms(QEMUClock *clock, QEMUTimerCB *cb, + void *opaque, AioContext *ctx) +{ + return aioctx_new_timer(clock, SCALE_MS, cb, opaque, ctx); +} + static inline int64_t qemu_get_clock_ms(QEMUClock *clock) { return qemu_get_clock_ns(clock) / SCALE_MS; diff --git a/qemu-timer.c b/qemu-timer.c index 0ee68dc..1a0cbae 100644 --- a/qemu-timer.c +++ b/qemu-timer.c @@ -45,14 +45,6 @@ #define QEMU_CLOCK_REALTIME 0 #define QEMU_CLOCK_VIRTUAL 1 #define QEMU_CLOCK_HOST 2 -#define QEMU_CLOCK_MAXCNT 3 - -typedef struct TimerList { - QEMUTimer *active_timers; - QemuMutex active_timers_lock; -} TimerList; - -static TimerList timer_list[QEMU_CLOCK_MAXCNT]; struct QEMUClock { NotifierList reset_notifiers; @@ -64,7 +56,9 @@ struct QEMUClock { struct QEMUTimer { int64_t expire_time; /* in nanoseconds */ + /* quick link to AioContext timer list */ TimerList *list; + AioContext *ctx; QEMUTimerCB *cb; void *opaque; QEMUTimer *next; @@ -128,11 +122,12 @@ void alarm_timer_destroy(struct qemu_alarm_timer *t) g_free(t); } -static TimerList *clock_to_timerlist(QEMUClock *clock) +static TimerList *clock_to_timerlist(QEMUClock *clock, AioContext *ctx) { int type = clock->type; - return &timer_list[type]; + assert(ctx); + return &ctx->timer_list[type]; } static bool qemu_timer_expired_ns(QEMUTimer *timer_head, int64_t current_time) @@ -140,7 +135,8 @@ static bool qemu_timer_expired_ns(QEMUTimer *timer_head, int64_t current_time) return timer_head && (timer_head->expire_time <= current_time); } -static int64_t qemu_next_clock_deadline(QEMUClock *clock, int64_t delta) +static int64_t qemu_next_clock_deadline(QEMUClock *clock, int64_t delta, + AioContext *ctx) { int64_t expire_time, next; bool has_timer = false; @@ -150,7 +146,7 @@ static int64_t qemu_next_clock_deadline(QEMUClock *clock, int64_t delta) return delta; } - tlist = clock_to_timerlist(clock); + tlist = clock_to_timerlist(clock, ctx); qemu_mutex_lock(&tlist->active_timers_lock); if (tlist->active_timers) { has_timer = true; @@ -165,18 +161,15 @@ static int64_t qemu_next_clock_deadline(QEMUClock *clock, int64_t delta) return MIN(next, delta); } -/* Soon this will be fixed: till now, timer list is not associated with - * AioContext, so @ctx has no effect on deadline currently. - */ static int64_t qemu_next_alarm_deadline(AioContext *ctx) { int64_t delta = INT64_MAX; if (!use_icount) { - delta = qemu_next_clock_deadline(vm_clock, delta); + delta = qemu_next_clock_deadline(vm_clock, delta, ctx); } - delta = qemu_next_clock_deadline(host_clock, delta); - return qemu_next_clock_deadline(rt_clock, delta); + delta = qemu_next_clock_deadline(host_clock, delta, ctx); + return qemu_next_clock_deadline(rt_clock, delta, ctx); } static void qemu_rearm_alarm_timer(struct qemu_alarm_timer *t) @@ -309,24 +302,27 @@ QEMUClock *rt_clock; QEMUClock *vm_clock; QEMUClock *host_clock; -static void timer_list_init(TimerList *tlist) +void timer_list_init(TimerList *tlist) { qemu_mutex_init(&tlist->active_timers_lock); tlist->active_timers = NULL; } +void timer_list_finalize(TimerList *tlist) +{ + qemu_mutex_destroy(&tlist->active_timers_lock); + assert(!tlist->active_timers); +} + static QEMUClock *qemu_new_clock(int type) { QEMUClock *clock; - TimerList *tlist; clock = g_malloc0(sizeof(QEMUClock)); clock->type = type; clock->enabled = true; clock->last = INT64_MIN; notifier_list_init(&clock->reset_notifiers); - tlist = clock_to_timerlist(clock); - timer_list_init(tlist); return clock; } @@ -340,10 +336,14 @@ void qemu_clock_enable(QEMUClock *clock, bool enabled) } } +/* qemu_clock_has_timers, qemu_clock_expired, qemu_clock_deadline + * run In tcg icount mode. There is only one AioContext i.e. qemu_aio_context. + * So we only count the timers on qemu_aio_context. +*/ int64_t qemu_clock_has_timers(QEMUClock *clock) { bool has_timers; - TimerList *tlist = clock_to_timerlist(clock); + TimerList *tlist = clock_to_timerlist(clock, qemu_get_aio_context()); qemu_mutex_lock(&tlist->active_timers_lock); has_timers = !!tlist->active_timers; @@ -355,7 +355,7 @@ int64_t qemu_clock_expired(QEMUClock *clock) { bool has_timers; int64_t expire_time; - TimerList *tlist = clock_to_timerlist(clock); + TimerList *tlist = clock_to_timerlist(clock, qemu_get_aio_context()); qemu_mutex_lock(&tlist->active_timers_lock); has_timers = tlist->active_timers; @@ -371,7 +371,7 @@ int64_t qemu_clock_deadline(QEMUClock *clock) int64_t delta = INT32_MAX; bool has_timers; int64_t expire_time; - TimerList *tlist = clock_to_timerlist(clock); + TimerList *tlist = clock_to_timerlist(clock, qemu_get_aio_context()); qemu_mutex_lock(&tlist->active_timers_lock); has_timers = tlist->active_timers; @@ -387,19 +387,26 @@ int64_t qemu_clock_deadline(QEMUClock *clock) return delta; } -QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale, - QEMUTimerCB *cb, void *opaque) +QEMUTimer *aioctx_new_timer(QEMUClock *clock, int scale, + QEMUTimerCB *cb, void *opaque, AioContext *ctx) { QEMUTimer *ts; ts = g_malloc0(sizeof(QEMUTimer)); - ts->list = clock_to_timerlist(clock); + ts->list = clock_to_timerlist(clock, ctx); ts->cb = cb; ts->opaque = opaque; ts->scale = scale; + ts->ctx = ctx; return ts; } +QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale, + QEMUTimerCB *cb, void *opaque) +{ + return aioctx_new_timer(clock, scale, cb, opaque, qemu_get_aio_context()); +} + void qemu_free_timer(QEMUTimer *ts) { g_free(ts); @@ -491,12 +498,14 @@ void qemu_run_timers(QEMUClock *clock) QEMUTimer *ts; int64_t current_time; TimerList *tlist; + AioContext *ctx; if (!clock->enabled) return; current_time = qemu_get_clock_ns(clock); - tlist = clock_to_timerlist(clock); + ctx = *tls_get_thread_aio_context(); + tlist = clock_to_timerlist(clock, ctx); for(;;) { qemu_mutex_lock(&tlist->active_timers_lock); -- 1.8.1.4