From: Avi Kivity <a...@redhat.com>

ucontext-based coroutines use a free pool to reduce allocations and
deallocations of coroutine objects.  The pool is per-thread, presumably
to improve locality.  However, as coroutines are usually allocated in
a vcpu thread and freed in the I/O thread, the pool accounting gets
screwed up and we end allocating and freeing a coroutine for every I/O
request.  This is expensive since large objects are allocated via the
kernel, and are not cached by the C runtime.

Fix by switching to a global pool.  This is safe since we're protected
by the global mutex.

Signed-off-by: Avi Kivity <a...@redhat.com>
Signed-off-by: Kevin Wolf <kw...@redhat.com>
---
 coroutine-ucontext.c |   30 ++++++++++++++++--------------
 1 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/coroutine-ucontext.c b/coroutine-ucontext.c
index 2b8d3e9..3d01075 100644
--- a/coroutine-ucontext.c
+++ b/coroutine-ucontext.c
@@ -35,6 +35,10 @@ enum {
     POOL_MAX_SIZE = 64,
 };
 
+/** Free list to speed up creation */
+static QLIST_HEAD(, Coroutine) pool = QLIST_HEAD_INITIALIZER(pool);
+static unsigned int pool_size;
+
 typedef struct {
     Coroutine base;
     void *stack;
@@ -48,10 +52,6 @@ typedef struct {
     /** Currently executing coroutine */
     Coroutine *current;
 
-    /** Free list to speed up creation */
-    QLIST_HEAD(, Coroutine) pool;
-    unsigned int pool_size;
-
     /** The default coroutine */
     CoroutineUContext leader;
 } CoroutineThreadState;
@@ -75,7 +75,6 @@ static CoroutineThreadState *coroutine_get_thread_state(void)
     if (!s) {
         s = g_malloc0(sizeof(*s));
         s->current = &s->leader.base;
-        QLIST_INIT(&s->pool);
         pthread_setspecific(thread_state_key, s);
     }
     return s;
@@ -84,14 +83,19 @@ static CoroutineThreadState 
*coroutine_get_thread_state(void)
 static void qemu_coroutine_thread_cleanup(void *opaque)
 {
     CoroutineThreadState *s = opaque;
+
+    g_free(s);
+}
+
+static void __attribute__((destructor)) coroutine_cleanup(void)
+{
     Coroutine *co;
     Coroutine *tmp;
 
-    QLIST_FOREACH_SAFE(co, &s->pool, pool_next, tmp) {
+    QLIST_FOREACH_SAFE(co, &pool, pool_next, tmp) {
         g_free(DO_UPCAST(CoroutineUContext, base, co)->stack);
         g_free(co);
     }
-    g_free(s);
 }
 
 static void __attribute__((constructor)) coroutine_init(void)
@@ -169,13 +173,12 @@ static Coroutine *coroutine_new(void)
 
 Coroutine *qemu_coroutine_new(void)
 {
-    CoroutineThreadState *s = coroutine_get_thread_state();
     Coroutine *co;
 
-    co = QLIST_FIRST(&s->pool);
+    co = QLIST_FIRST(&pool);
     if (co) {
         QLIST_REMOVE(co, pool_next);
-        s->pool_size--;
+        pool_size--;
     } else {
         co = coroutine_new();
     }
@@ -184,13 +187,12 @@ Coroutine *qemu_coroutine_new(void)
 
 void qemu_coroutine_delete(Coroutine *co_)
 {
-    CoroutineThreadState *s = coroutine_get_thread_state();
     CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_);
 
-    if (s->pool_size < POOL_MAX_SIZE) {
-        QLIST_INSERT_HEAD(&s->pool, &co->base, pool_next);
+    if (pool_size < POOL_MAX_SIZE) {
+        QLIST_INSERT_HEAD(&pool, &co->base, pool_next);
         co->base.caller = NULL;
-        s->pool_size++;
+        pool_size++;
         return;
     }
 
-- 
1.7.6.4


Reply via email to