Use __sync_bool_compare_and_swap to yield correctly atomic results.
As yet, this assumes running on an strict-memory-ordering host (i.e. x86),
since we're still "implementing" the memory-barrier instructions as nops.

Rename the "lock" cpu field to "lock_addr" and add a "lock_value" field
to be used as the "old" value for the compare-and-swap.  Use -1 in the
lock_addr field to indicate no lock held.  Break the lock when processing
any sort of exception.

Signed-off-by: Richard Henderson <r...@twiddle.net>
---
 linux-user/main.c        |   11 ++++
 target-alpha/cpu.h       |    3 +-
 target-alpha/helper.c    |    6 +-
 target-alpha/helper.h    |    3 +
 target-alpha/op_helper.c |   70 ++++++++++++++++++++++
 target-alpha/translate.c |  146 ++++++++++++++++++++++++---------------------
 6 files changed, 168 insertions(+), 71 deletions(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index d4a29cb..cbff027 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -2361,6 +2361,14 @@ void cpu_loop (CPUState *env)
           that the intr_flag should be cleared.  */
        env->intr_flag = 0;
 
+        /* Similarly with the lock_flag, where "clear" is -1 here.
+           ??? Note that it's not possible to single-step through
+           a ldl_l/stl_c sequence on real hardware, but let's see
+           if we can do better than that in the emulator.  */
+        if (trapnr != EXCP_DEBUG) {
+            env->lock_addr = -1;
+        }
+
         switch (trapnr) {
         case EXCP_RESET:
             fprintf(stderr, "Reset requested. Exit\n");
@@ -2512,6 +2520,9 @@ void cpu_loop (CPUState *env)
         case EXCP_DEBUG:
             info.si_signo = gdb_handlesig (env, TARGET_SIGTRAP);
             if (info.si_signo) {
+                /* ??? See above re single-stepping and ldl_l.  If we're
+                   actually going to deliver a signal, break the lock.  */
+                env->lock_addr = -1;
                 info.si_errno = 0;
                 info.si_code = TARGET_TRAP_BRKPT;
                 queue_signal(env, info.si_signo, &info);
diff --git a/target-alpha/cpu.h b/target-alpha/cpu.h
index 8afe16d..cf2b587 100644
--- a/target-alpha/cpu.h
+++ b/target-alpha/cpu.h
@@ -355,11 +355,12 @@ struct CPUAlphaState {
     uint64_t ir[31];
     float64 fir[31];
     uint64_t pc;
-    uint64_t lock;
     uint32_t pcc[2];
     uint64_t ipr[IPR_LAST];
     uint64_t ps;
     uint64_t unique;
+    uint64_t lock_addr;
+    uint64_t lock_value;
     float_status fp_status;
     /* The following fields make up the FPCR, but in FP_STATUS format.  */
     uint8_t fpcr_exc_status;
diff --git a/target-alpha/helper.c b/target-alpha/helper.c
index 46335cd..e4dd124 100644
--- a/target-alpha/helper.c
+++ b/target-alpha/helper.c
@@ -556,12 +556,14 @@ void cpu_dump_state (CPUState *env, FILE *f,
         if ((i % 3) == 2)
             cpu_fprintf(f, "\n");
     }
-    cpu_fprintf(f, "\n");
+
+    cpu_fprintf(f, "lock_a   " TARGET_FMT_lx " lock_v   " TARGET_FMT_lx "\n",
+                env->lock_addr, env->lock_value);
+
     for (i = 0; i < 31; i++) {
         cpu_fprintf(f, "FIR%02d    " TARGET_FMT_lx " ", i,
                     *((uint64_t *)(&env->fir[i])));
         if ((i % 3) == 2)
             cpu_fprintf(f, "\n");
     }
-    cpu_fprintf(f, "\nlock     " TARGET_FMT_lx "\n", env->lock);
 }
diff --git a/target-alpha/helper.h b/target-alpha/helper.h
index ccf6a2a..a540eeb 100644
--- a/target-alpha/helper.h
+++ b/target-alpha/helper.h
@@ -99,6 +99,9 @@ DEF_HELPER_1(ieee_input, i64, i64)
 DEF_HELPER_1(ieee_input_cmp, i64, i64)
 DEF_HELPER_1(ieee_input_s, i64, i64)
 
+DEF_HELPER_2(stl_c, i64, i64, i64)
+DEF_HELPER_2(stq_c, i64, i64, i64)
+
 #if !defined (CONFIG_USER_ONLY)
 DEF_HELPER_0(hw_rei, void)
 DEF_HELPER_1(hw_ret, void, i64)
diff --git a/target-alpha/op_helper.c b/target-alpha/op_helper.c
index a209130..ea68222 100644
--- a/target-alpha/op_helper.c
+++ b/target-alpha/op_helper.c
@@ -1152,6 +1152,74 @@ uint64_t helper_cvtqg (uint64_t a)
     return float64_to_g(fr);
 }
 
+uint64_t helper_stl_c (uint64_t addr, uint64_t val)
+{
+    uint64_t ret = 0;
+
+    /* The resultant virtual and physical address must specify a location
+       within the same natually aligned 16-byte block as the preceeding
+       load instruction.  Note that (1) we're only checking the physical
+       address here, since ADDR has already been through virt_to_phys,
+       and (2) LOCK_ADDR has already been masked with -16.  We do this
+       ahead of time so that we can assign LOCK_ADDR = -1 to force a 
+       failure here.  */
+    /* ??? The "16" in the spec is no doubt the minimum architectural
+       cacheline size.  If we are attempting to model the real hardware
+       implementation, we should probably expand this to the real cache
+       line size.  But this is certainly good enough for now.  */
+    if ((addr & -16) == env->lock_addr) {
+        int32_t *host_addr;
+
+#if defined(CONFIG_USER_ONLY)
+        host_addr = (int32_t *)addr;
+#else
+        /* FIXME */
+        cpu_abort();
+#endif
+
+        /* Emulate the ldl_l/stl_c pair with a compare-and-swap.  */
+        ret = __sync_bool_compare_and_swap(host_addr,
+                                           (int32_t)env->lock_value,
+                                           (int32_t)val);
+    }
+
+    env->lock_addr = -1;
+    return ret;
+}
+
+uint64_t helper_stq_c (uint64_t addr, uint64_t val)
+{
+    uint64_t ret = 0;
+
+    /* The resultant virtual and physical address must specify a location
+       within the same natually aligned 16-byte block as the preceeding
+       load instruction.  Note that (1) we're only checking the physical
+       address here, since ADDR has already been through virt_to_phys,
+       and (2) LOCK_ADDR has already been masked with -16.  We do this
+       ahead of time so that we can assign LOCK_ADDR = -1 to force a 
+       failure here.  */
+    /* ??? The "16" in the spec is no doubt the minimum architectural
+       cacheline size.  If we are attempting to model the real hardware
+       implementation, we should probably expand this to the real cache
+       line size.  But this is certainly good enough for now.  */
+    if ((addr & -16) == env->lock_addr) {
+        uint64_t *host_addr;
+
+#if defined(CONFIG_USER_ONLY)
+        host_addr = (uint64_t *)addr;
+#else
+        /* FIXME */
+        cpu_abort();
+#endif
+
+        /* Emulate the ldq_l/stq_c pair with a compare-and-swap.  */
+        ret = __sync_bool_compare_and_swap(host_addr, env->lock_value, val);
+    }
+
+    env->lock_addr = -1;
+    return ret;
+}
+
 /* PALcode support special instructions */
 #if !defined (CONFIG_USER_ONLY)
 void helper_hw_rei (void)
@@ -1159,6 +1227,7 @@ void helper_hw_rei (void)
     env->pc = env->ipr[IPR_EXC_ADDR] & ~3;
     env->ipr[IPR_EXC_ADDR] = env->ipr[IPR_EXC_ADDR] & 1;
     env->intr_flag = 0;
+    env->lock_addr = -1;
     /* XXX: re-enable interrupts and memory mapping */
 }
 
@@ -1167,6 +1236,7 @@ void helper_hw_ret (uint64_t a)
     env->pc = a & ~3;
     env->ipr[IPR_EXC_ADDR] = a & 1;
     env->intr_flag = 0;
+    env->lock_addr = -1;
     /* XXX: re-enable interrupts and memory mapping */
 }
 
diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index fe693b3..7a67ff8 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -82,7 +82,8 @@ static TCGv_ptr cpu_env;
 static TCGv cpu_ir[31];
 static TCGv cpu_fir[31];
 static TCGv cpu_pc;
-static TCGv cpu_lock;
+static TCGv cpu_lock_addr;
+static TCGv cpu_lock_value;
 #ifdef CONFIG_USER_ONLY
 static TCGv cpu_uniq;
 #endif
@@ -119,8 +120,12 @@ static void alpha_translate_init(void)
     cpu_pc = tcg_global_mem_new_i64(TCG_AREG0,
                                     offsetof(CPUState, pc), "pc");
 
-    cpu_lock = tcg_global_mem_new_i64(TCG_AREG0,
-                                      offsetof(CPUState, lock), "lock");
+    cpu_lock_addr = tcg_global_mem_new_i64(TCG_AREG0,
+                                          offsetof(CPUState, lock_addr),
+                                          "lock_addr");
+    cpu_lock_value = tcg_global_mem_new_i64(TCG_AREG0,
+                                           offsetof(CPUState, lock_value),
+                                           "lock_value");
 
 #ifdef CONFIG_USER_ONLY
     cpu_uniq = tcg_global_mem_new_i64(TCG_AREG0,
@@ -181,16 +186,33 @@ static inline void gen_qemu_lds(TCGv t0, TCGv t1, int 
flags)
     tcg_temp_free(tmp);
 }
 
+static void gen_virt_to_phys(TCGv out, TCGv in, int store)
+{
+#if defined(CONFIG_USER_ONLY)
+    tcg_gen_addi_i64(out, in, GUEST_BASE);
+#else
+    if (store) {
+        gen_helper_st_virt_to_phys(out, in);
+    } else {
+        gen_helper_ld_virt_to_phys(out, in);
+    }
+#endif
+}
+
 static inline void gen_qemu_ldl_l(TCGv t0, TCGv t1, int flags)
 {
-    tcg_gen_mov_i64(cpu_lock, t1);
     tcg_gen_qemu_ld32s(t0, t1, flags);
+    gen_virt_to_phys(cpu_lock_addr, t1, 0);
+    tcg_gen_andi_i64(cpu_lock_addr, cpu_lock_addr, -16);
+    tcg_gen_mov_i64(cpu_lock_value, t0);
 }
 
 static inline void gen_qemu_ldq_l(TCGv t0, TCGv t1, int flags)
 {
-    tcg_gen_mov_i64(cpu_lock, t1);
     tcg_gen_qemu_ld64(t0, t1, flags);
+    gen_virt_to_phys(cpu_lock_addr, t1, 0);
+    tcg_gen_andi_i64(cpu_lock_addr, cpu_lock_addr, -16);
+    tcg_gen_mov_i64(cpu_lock_value, t0);
 }
 
 static inline void gen_load_mem(DisasContext *ctx,
@@ -199,25 +221,31 @@ static inline void gen_load_mem(DisasContext *ctx,
                                 int ra, int rb, int32_t disp16, int fp,
                                 int clear)
 {
-    TCGv addr;
+    TCGv addr, va;
 
-    if (unlikely(ra == 31))
+    /* LDQ_U with ra $31 is UNOP.  Other various loads are forms of
+       prefetches, which we can treat as nops.  No worries about
+       missed exceptions here.  */
+    if (unlikely(ra == 31)) {
         return;
+    }
 
     addr = tcg_temp_new();
     if (rb != 31) {
         tcg_gen_addi_i64(addr, cpu_ir[rb], disp16);
-        if (clear)
+        if (clear) {
             tcg_gen_andi_i64(addr, addr, ~0x7);
+        }
     } else {
-        if (clear)
+        if (clear) {
             disp16 &= ~0x7;
+        }
         tcg_gen_movi_i64(addr, disp16);
     }
-    if (fp)
-        tcg_gen_qemu_load(cpu_fir[ra], addr, ctx->mem_idx);
-    else
-        tcg_gen_qemu_load(cpu_ir[ra], addr, ctx->mem_idx);
+
+    va = (fp ? cpu_fir[ra] : cpu_ir[ra]);
+    tcg_gen_qemu_load(va, addr, ctx->mem_idx);
+
     tcg_temp_free(addr);
 }
 
@@ -253,71 +281,52 @@ static inline void gen_qemu_sts(TCGv t0, TCGv t1, int 
flags)
 
 static inline void gen_qemu_stl_c(TCGv t0, TCGv t1, int flags)
 {
-    int l1, l2;
-
-    l1 = gen_new_label();
-    l2 = gen_new_label();
-    tcg_gen_brcond_i64(TCG_COND_NE, cpu_lock, t1, l1);
-    tcg_gen_qemu_st32(t0, t1, flags);
-    tcg_gen_movi_i64(t0, 1);
-    tcg_gen_br(l2);
-    gen_set_label(l1);
-    tcg_gen_movi_i64(t0, 0);
-    gen_set_label(l2);
-    tcg_gen_movi_i64(cpu_lock, -1);
+    TCGv va = tcg_temp_new();
+    gen_virt_to_phys(va, t1, 1);
+    gen_helper_stl_c(t0, va, t0);
+    tcg_temp_free(va);
 }
 
 static inline void gen_qemu_stq_c(TCGv t0, TCGv t1, int flags)
 {
-    int l1, l2;
-
-    l1 = gen_new_label();
-    l2 = gen_new_label();
-    tcg_gen_brcond_i64(TCG_COND_NE, cpu_lock, t1, l1);
-    tcg_gen_qemu_st64(t0, t1, flags);
-    tcg_gen_movi_i64(t0, 1);
-    tcg_gen_br(l2);
-    gen_set_label(l1);
-    tcg_gen_movi_i64(t0, 0);
-    gen_set_label(l2);
-    tcg_gen_movi_i64(cpu_lock, -1);
+    TCGv va = tcg_temp_new();
+    gen_virt_to_phys(va, t1, 1);
+    gen_helper_stq_c(t0, va, t0);
+    tcg_temp_free(va);
 }
 
 static inline void gen_store_mem(DisasContext *ctx,
                                  void (*tcg_gen_qemu_store)(TCGv t0, TCGv t1,
                                                             int flags),
                                  int ra, int rb, int32_t disp16, int fp,
-                                 int clear, int local)
+                                 int clear)
 {
-    TCGv addr;
-    if (local)
-        addr = tcg_temp_local_new();
-    else
-        addr = tcg_temp_new();
+    TCGv addr, va;
+
+    addr = tcg_temp_new();
     if (rb != 31) {
         tcg_gen_addi_i64(addr, cpu_ir[rb], disp16);
-        if (clear)
+        if (clear) {
             tcg_gen_andi_i64(addr, addr, ~0x7);
+        }
     } else {
-        if (clear)
+        if (clear) {
             disp16 &= ~0x7;
+        }
         tcg_gen_movi_i64(addr, disp16);
     }
-    if (ra != 31) {
-        if (fp)
-            tcg_gen_qemu_store(cpu_fir[ra], addr, ctx->mem_idx);
-        else
-            tcg_gen_qemu_store(cpu_ir[ra], addr, ctx->mem_idx);
+
+    if (ra == 31) {
+        va = tcg_const_i64(0);
     } else {
-        TCGv zero;
-        if (local)
-            zero = tcg_const_local_i64(0);
-        else
-            zero = tcg_const_i64(0);
-        tcg_gen_qemu_store(zero, addr, ctx->mem_idx);
-        tcg_temp_free(zero);
+        va = (fp ? cpu_fir[ra] : cpu_ir[ra]);
     }
+    tcg_gen_qemu_store(va, addr, ctx->mem_idx);
+
     tcg_temp_free(addr);
+    if (ra == 31) {
+        tcg_temp_free(va);
+    }
 }
 
 static int use_goto_tb(DisasContext *ctx, uint64_t dest)
@@ -1530,15 +1539,15 @@ static ExitStatus translate_one(DisasContext *ctx, 
uint32_t insn)
         break;
     case 0x0D:
         /* STW */
-        gen_store_mem(ctx, &tcg_gen_qemu_st16, ra, rb, disp16, 0, 0, 0);
+        gen_store_mem(ctx, &tcg_gen_qemu_st16, ra, rb, disp16, 0, 0);
         break;
     case 0x0E:
         /* STB */
-        gen_store_mem(ctx, &tcg_gen_qemu_st8, ra, rb, disp16, 0, 0, 0);
+        gen_store_mem(ctx, &tcg_gen_qemu_st8, ra, rb, disp16, 0, 0);
         break;
     case 0x0F:
         /* STQ_U */
-        gen_store_mem(ctx, &tcg_gen_qemu_st64, ra, rb, disp16, 0, 1, 0);
+        gen_store_mem(ctx, &tcg_gen_qemu_st64, ra, rb, disp16, 0, 1);
         break;
     case 0x10:
         switch (fn7) {
@@ -2968,19 +2977,19 @@ static ExitStatus translate_one(DisasContext *ctx, 
uint32_t insn)
         break;
     case 0x24:
         /* STF */
-        gen_store_mem(ctx, &gen_qemu_stf, ra, rb, disp16, 1, 0, 0);
+        gen_store_mem(ctx, &gen_qemu_stf, ra, rb, disp16, 1, 0);
         break;
     case 0x25:
         /* STG */
-        gen_store_mem(ctx, &gen_qemu_stg, ra, rb, disp16, 1, 0, 0);
+        gen_store_mem(ctx, &gen_qemu_stg, ra, rb, disp16, 1, 0);
         break;
     case 0x26:
         /* STS */
-        gen_store_mem(ctx, &gen_qemu_sts, ra, rb, disp16, 1, 0, 0);
+        gen_store_mem(ctx, &gen_qemu_sts, ra, rb, disp16, 1, 0);
         break;
     case 0x27:
         /* STT */
-        gen_store_mem(ctx, &tcg_gen_qemu_st64, ra, rb, disp16, 1, 0, 0);
+        gen_store_mem(ctx, &tcg_gen_qemu_st64, ra, rb, disp16, 1, 0);
         break;
     case 0x28:
         /* LDL */
@@ -3000,19 +3009,19 @@ static ExitStatus translate_one(DisasContext *ctx, 
uint32_t insn)
         break;
     case 0x2C:
         /* STL */
-        gen_store_mem(ctx, &tcg_gen_qemu_st32, ra, rb, disp16, 0, 0, 0);
+        gen_store_mem(ctx, &tcg_gen_qemu_st32, ra, rb, disp16, 0, 0);
         break;
     case 0x2D:
         /* STQ */
-        gen_store_mem(ctx, &tcg_gen_qemu_st64, ra, rb, disp16, 0, 0, 0);
+        gen_store_mem(ctx, &tcg_gen_qemu_st64, ra, rb, disp16, 0, 0);
         break;
     case 0x2E:
         /* STL_C */
-        gen_store_mem(ctx, &gen_qemu_stl_c, ra, rb, disp16, 0, 0, 1);
+        gen_store_mem(ctx, &gen_qemu_stl_c, ra, rb, disp16, 0, 0);
         break;
     case 0x2F:
         /* STQ_C */
-        gen_store_mem(ctx, &gen_qemu_stq_c, ra, rb, disp16, 0, 0, 1);
+        gen_store_mem(ctx, &gen_qemu_stq_c, ra, rb, disp16, 0, 0);
         break;
     case 0x30:
         /* BR */
@@ -3279,6 +3288,7 @@ CPUAlphaState * cpu_alpha_init (const char *cpu_model)
 #else
     pal_init(env);
 #endif
+    env->lock_addr = -1;
 
     /* Initialize IPR */
 #if defined (CONFIG_USER_ONLY)
-- 
1.6.6.1



Reply via email to