[Qemu-devel] [RFC 02/30] tcg: add tcg_cmpxchg_lock

Emilio G. Cota Mon, 27 Jun 2016 12:10:00 -0700

This set of locks will allow us to correctly emulate cmpxchg16
in a parallel TCG. The key observation is that no architecture
supports 16-byte regular atomic load/stores; only "locked" accesses
(e.g. via cmpxchg16b on x86) are allowed, and therefore we can emulate
them by using locks.


We use a small array of locks so that we can have some scalability.
Further improvements are possible (e.g. using a radix tree); but
we should have a workload to benchmark in order to justify the
additional complexity.

Signed-off-by: Emilio G. Cota <c...@braap.org>
---
 cpu-exec.c        |  1 +
 linux-user/main.c |  1 +
 tcg/tcg.h         |  5 +++++
 translate-all.c   | 39 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 46 insertions(+)

diff --git a/cpu-exec.c b/cpu-exec.c
index b840e1d..26f3bd6 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -643,6 +643,7 @@ int cpu_exec(CPUState *cpu)
 #endif /* buggy compiler */
             cpu->can_do_io = 1;
             tb_lock_reset();
+            tcg_cmpxchg_lock_reset();
         }
     } /* for(;;) */
 
diff --git a/linux-user/main.c b/linux-user/main.c
index 78d8d04..af9e8e3 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -140,6 +140,7 @@ void fork_end(int child)
         pthread_cond_init(&exclusive_cond, NULL);
         pthread_cond_init(&exclusive_resume, NULL);
         qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
+        tcg_cmpxchg_lock_init();
         gdbserver_fork(thread_cpu);
     } else {
         pthread_mutex_unlock(&exclusive_lock);
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 1fd7ec3..1c9c8bc 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -650,6 +650,11 @@ void tb_lock(void);
 void tb_unlock(void);
 void tb_lock_reset(void);
 
+void tcg_cmpxchg_lock(uintptr_t addr);
+void tcg_cmpxchg_unlock(void);
+void tcg_cmpxchg_lock_reset(void);
+void tcg_cmpxchg_lock_init(void);
+
 static inline void *tcg_malloc(int size)
 {
     TCGContext *s = &tcg_ctx;
diff --git a/translate-all.c b/translate-all.c
index eaa95e4..19432e5 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -153,6 +153,44 @@ void tb_lock_reset(void)
 #endif
 }
 
+#define TCG_CMPXCHG_NR_LOCKS 16
+static QemuMutex tcg_cmpxchg_locks[TCG_CMPXCHG_NR_LOCKS];
+static __thread QemuMutex *tcg_cmpxchg_curr_lock;
+
+void tcg_cmpxchg_lock(uintptr_t addr)
+{
+    assert(tcg_cmpxchg_curr_lock == NULL);
+    /* choose lock based on cache line address. We assume lines are 64b long */
+    addr >>= 6;
+    addr &= TCG_CMPXCHG_NR_LOCKS - 1;
+    tcg_cmpxchg_curr_lock = &tcg_cmpxchg_locks[addr];
+    qemu_mutex_lock(tcg_cmpxchg_curr_lock);
+}
+
+void tcg_cmpxchg_unlock(void)
+{
+    qemu_mutex_unlock(tcg_cmpxchg_curr_lock);
+    tcg_cmpxchg_curr_lock = NULL;
+}
+
+void tcg_cmpxchg_lock_reset(void)
+{
+    if (unlikely(tcg_cmpxchg_curr_lock)) {
+        tcg_cmpxchg_unlock();
+    }
+}
+
+void tcg_cmpxchg_lock_init(void)
+{
+    int i;
+
+    /* set current to NULL; useful after a child forks in user-mode */
+    tcg_cmpxchg_curr_lock = NULL;
+    for (i = 0; i < TCG_CMPXCHG_NR_LOCKS; i++) {
+        qemu_mutex_init(&tcg_cmpxchg_locks[i]);
+    }
+}
+
 static TranslationBlock *tb_find_pc(uintptr_t tc_ptr);
 
 void cpu_gen_init(void)
@@ -731,6 +769,7 @@ static inline void code_gen_alloc(size_t tb_size)
     tcg_ctx.tb_ctx.tbs = g_new(TranslationBlock, tcg_ctx.code_gen_max_blocks);
 
     qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
+    tcg_cmpxchg_lock_init();
 }
 
 static void tb_htable_init(void)
-- 
2.5.0

[Qemu-devel] [RFC 02/30] tcg: add tcg_cmpxchg_lock

Reply via email to