Use __sync_bool_compare_and_swap to yield correctly atomic results. As yet, this assumes running on an strict-memory-ordering host (i.e. x86), since we're still "implementing" the memory-barrier instructions as nops.
Rename the "lock" cpu field to "lock_addr" and add a "lock_value" field to be used as the "old" value for the compare-and-swap. Use -1 in the lock_addr field to indicate no lock held. Break the lock when processing any sort of exception. Signed-off-by: Richard Henderson <r...@twiddle.net> --- linux-user/main.c | 11 ++++ target-alpha/cpu.h | 3 +- target-alpha/helper.c | 6 +- target-alpha/helper.h | 3 + target-alpha/op_helper.c | 70 ++++++++++++++++++++++ target-alpha/translate.c | 146 ++++++++++++++++++++++++--------------------- 6 files changed, 168 insertions(+), 71 deletions(-) diff --git a/linux-user/main.c b/linux-user/main.c index d4a29cb..cbff027 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -2361,6 +2361,14 @@ void cpu_loop (CPUState *env) that the intr_flag should be cleared. */ env->intr_flag = 0; + /* Similarly with the lock_flag, where "clear" is -1 here. + ??? Note that it's not possible to single-step through + a ldl_l/stl_c sequence on real hardware, but let's see + if we can do better than that in the emulator. */ + if (trapnr != EXCP_DEBUG) { + env->lock_addr = -1; + } + switch (trapnr) { case EXCP_RESET: fprintf(stderr, "Reset requested. Exit\n"); @@ -2512,6 +2520,9 @@ void cpu_loop (CPUState *env) case EXCP_DEBUG: info.si_signo = gdb_handlesig (env, TARGET_SIGTRAP); if (info.si_signo) { + /* ??? See above re single-stepping and ldl_l. If we're + actually going to deliver a signal, break the lock. */ + env->lock_addr = -1; info.si_errno = 0; info.si_code = TARGET_TRAP_BRKPT; queue_signal(env, info.si_signo, &info); diff --git a/target-alpha/cpu.h b/target-alpha/cpu.h index 8afe16d..cf2b587 100644 --- a/target-alpha/cpu.h +++ b/target-alpha/cpu.h @@ -355,11 +355,12 @@ struct CPUAlphaState { uint64_t ir[31]; float64 fir[31]; uint64_t pc; - uint64_t lock; uint32_t pcc[2]; uint64_t ipr[IPR_LAST]; uint64_t ps; uint64_t unique; + uint64_t lock_addr; + uint64_t lock_value; float_status fp_status; /* The following fields make up the FPCR, but in FP_STATUS format. */ uint8_t fpcr_exc_status; diff --git a/target-alpha/helper.c b/target-alpha/helper.c index 46335cd..e4dd124 100644 --- a/target-alpha/helper.c +++ b/target-alpha/helper.c @@ -556,12 +556,14 @@ void cpu_dump_state (CPUState *env, FILE *f, if ((i % 3) == 2) cpu_fprintf(f, "\n"); } - cpu_fprintf(f, "\n"); + + cpu_fprintf(f, "lock_a " TARGET_FMT_lx " lock_v " TARGET_FMT_lx "\n", + env->lock_addr, env->lock_value); + for (i = 0; i < 31; i++) { cpu_fprintf(f, "FIR%02d " TARGET_FMT_lx " ", i, *((uint64_t *)(&env->fir[i]))); if ((i % 3) == 2) cpu_fprintf(f, "\n"); } - cpu_fprintf(f, "\nlock " TARGET_FMT_lx "\n", env->lock); } diff --git a/target-alpha/helper.h b/target-alpha/helper.h index ccf6a2a..a540eeb 100644 --- a/target-alpha/helper.h +++ b/target-alpha/helper.h @@ -99,6 +99,9 @@ DEF_HELPER_1(ieee_input, i64, i64) DEF_HELPER_1(ieee_input_cmp, i64, i64) DEF_HELPER_1(ieee_input_s, i64, i64) +DEF_HELPER_2(stl_c, i64, i64, i64) +DEF_HELPER_2(stq_c, i64, i64, i64) + #if !defined (CONFIG_USER_ONLY) DEF_HELPER_0(hw_rei, void) DEF_HELPER_1(hw_ret, void, i64) diff --git a/target-alpha/op_helper.c b/target-alpha/op_helper.c index a209130..ea68222 100644 --- a/target-alpha/op_helper.c +++ b/target-alpha/op_helper.c @@ -1152,6 +1152,74 @@ uint64_t helper_cvtqg (uint64_t a) return float64_to_g(fr); } +uint64_t helper_stl_c (uint64_t addr, uint64_t val) +{ + uint64_t ret = 0; + + /* The resultant virtual and physical address must specify a location + within the same natually aligned 16-byte block as the preceeding + load instruction. Note that (1) we're only checking the physical + address here, since ADDR has already been through virt_to_phys, + and (2) LOCK_ADDR has already been masked with -16. We do this + ahead of time so that we can assign LOCK_ADDR = -1 to force a + failure here. */ + /* ??? The "16" in the spec is no doubt the minimum architectural + cacheline size. If we are attempting to model the real hardware + implementation, we should probably expand this to the real cache + line size. But this is certainly good enough for now. */ + if ((addr & -16) == env->lock_addr) { + int32_t *host_addr; + +#if defined(CONFIG_USER_ONLY) + host_addr = (int32_t *)addr; +#else + /* FIXME */ + cpu_abort(); +#endif + + /* Emulate the ldl_l/stl_c pair with a compare-and-swap. */ + ret = __sync_bool_compare_and_swap(host_addr, + (int32_t)env->lock_value, + (int32_t)val); + } + + env->lock_addr = -1; + return ret; +} + +uint64_t helper_stq_c (uint64_t addr, uint64_t val) +{ + uint64_t ret = 0; + + /* The resultant virtual and physical address must specify a location + within the same natually aligned 16-byte block as the preceeding + load instruction. Note that (1) we're only checking the physical + address here, since ADDR has already been through virt_to_phys, + and (2) LOCK_ADDR has already been masked with -16. We do this + ahead of time so that we can assign LOCK_ADDR = -1 to force a + failure here. */ + /* ??? The "16" in the spec is no doubt the minimum architectural + cacheline size. If we are attempting to model the real hardware + implementation, we should probably expand this to the real cache + line size. But this is certainly good enough for now. */ + if ((addr & -16) == env->lock_addr) { + uint64_t *host_addr; + +#if defined(CONFIG_USER_ONLY) + host_addr = (uint64_t *)addr; +#else + /* FIXME */ + cpu_abort(); +#endif + + /* Emulate the ldq_l/stq_c pair with a compare-and-swap. */ + ret = __sync_bool_compare_and_swap(host_addr, env->lock_value, val); + } + + env->lock_addr = -1; + return ret; +} + /* PALcode support special instructions */ #if !defined (CONFIG_USER_ONLY) void helper_hw_rei (void) @@ -1159,6 +1227,7 @@ void helper_hw_rei (void) env->pc = env->ipr[IPR_EXC_ADDR] & ~3; env->ipr[IPR_EXC_ADDR] = env->ipr[IPR_EXC_ADDR] & 1; env->intr_flag = 0; + env->lock_addr = -1; /* XXX: re-enable interrupts and memory mapping */ } @@ -1167,6 +1236,7 @@ void helper_hw_ret (uint64_t a) env->pc = a & ~3; env->ipr[IPR_EXC_ADDR] = a & 1; env->intr_flag = 0; + env->lock_addr = -1; /* XXX: re-enable interrupts and memory mapping */ } diff --git a/target-alpha/translate.c b/target-alpha/translate.c index fe693b3..7a67ff8 100644 --- a/target-alpha/translate.c +++ b/target-alpha/translate.c @@ -82,7 +82,8 @@ static TCGv_ptr cpu_env; static TCGv cpu_ir[31]; static TCGv cpu_fir[31]; static TCGv cpu_pc; -static TCGv cpu_lock; +static TCGv cpu_lock_addr; +static TCGv cpu_lock_value; #ifdef CONFIG_USER_ONLY static TCGv cpu_uniq; #endif @@ -119,8 +120,12 @@ static void alpha_translate_init(void) cpu_pc = tcg_global_mem_new_i64(TCG_AREG0, offsetof(CPUState, pc), "pc"); - cpu_lock = tcg_global_mem_new_i64(TCG_AREG0, - offsetof(CPUState, lock), "lock"); + cpu_lock_addr = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUState, lock_addr), + "lock_addr"); + cpu_lock_value = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUState, lock_value), + "lock_value"); #ifdef CONFIG_USER_ONLY cpu_uniq = tcg_global_mem_new_i64(TCG_AREG0, @@ -181,16 +186,33 @@ static inline void gen_qemu_lds(TCGv t0, TCGv t1, int flags) tcg_temp_free(tmp); } +static void gen_virt_to_phys(TCGv out, TCGv in, int store) +{ +#if defined(CONFIG_USER_ONLY) + tcg_gen_addi_i64(out, in, GUEST_BASE); +#else + if (store) { + gen_helper_st_virt_to_phys(out, in); + } else { + gen_helper_ld_virt_to_phys(out, in); + } +#endif +} + static inline void gen_qemu_ldl_l(TCGv t0, TCGv t1, int flags) { - tcg_gen_mov_i64(cpu_lock, t1); tcg_gen_qemu_ld32s(t0, t1, flags); + gen_virt_to_phys(cpu_lock_addr, t1, 0); + tcg_gen_andi_i64(cpu_lock_addr, cpu_lock_addr, -16); + tcg_gen_mov_i64(cpu_lock_value, t0); } static inline void gen_qemu_ldq_l(TCGv t0, TCGv t1, int flags) { - tcg_gen_mov_i64(cpu_lock, t1); tcg_gen_qemu_ld64(t0, t1, flags); + gen_virt_to_phys(cpu_lock_addr, t1, 0); + tcg_gen_andi_i64(cpu_lock_addr, cpu_lock_addr, -16); + tcg_gen_mov_i64(cpu_lock_value, t0); } static inline void gen_load_mem(DisasContext *ctx, @@ -199,25 +221,31 @@ static inline void gen_load_mem(DisasContext *ctx, int ra, int rb, int32_t disp16, int fp, int clear) { - TCGv addr; + TCGv addr, va; - if (unlikely(ra == 31)) + /* LDQ_U with ra $31 is UNOP. Other various loads are forms of + prefetches, which we can treat as nops. No worries about + missed exceptions here. */ + if (unlikely(ra == 31)) { return; + } addr = tcg_temp_new(); if (rb != 31) { tcg_gen_addi_i64(addr, cpu_ir[rb], disp16); - if (clear) + if (clear) { tcg_gen_andi_i64(addr, addr, ~0x7); + } } else { - if (clear) + if (clear) { disp16 &= ~0x7; + } tcg_gen_movi_i64(addr, disp16); } - if (fp) - tcg_gen_qemu_load(cpu_fir[ra], addr, ctx->mem_idx); - else - tcg_gen_qemu_load(cpu_ir[ra], addr, ctx->mem_idx); + + va = (fp ? cpu_fir[ra] : cpu_ir[ra]); + tcg_gen_qemu_load(va, addr, ctx->mem_idx); + tcg_temp_free(addr); } @@ -253,71 +281,52 @@ static inline void gen_qemu_sts(TCGv t0, TCGv t1, int flags) static inline void gen_qemu_stl_c(TCGv t0, TCGv t1, int flags) { - int l1, l2; - - l1 = gen_new_label(); - l2 = gen_new_label(); - tcg_gen_brcond_i64(TCG_COND_NE, cpu_lock, t1, l1); - tcg_gen_qemu_st32(t0, t1, flags); - tcg_gen_movi_i64(t0, 1); - tcg_gen_br(l2); - gen_set_label(l1); - tcg_gen_movi_i64(t0, 0); - gen_set_label(l2); - tcg_gen_movi_i64(cpu_lock, -1); + TCGv va = tcg_temp_new(); + gen_virt_to_phys(va, t1, 1); + gen_helper_stl_c(t0, va, t0); + tcg_temp_free(va); } static inline void gen_qemu_stq_c(TCGv t0, TCGv t1, int flags) { - int l1, l2; - - l1 = gen_new_label(); - l2 = gen_new_label(); - tcg_gen_brcond_i64(TCG_COND_NE, cpu_lock, t1, l1); - tcg_gen_qemu_st64(t0, t1, flags); - tcg_gen_movi_i64(t0, 1); - tcg_gen_br(l2); - gen_set_label(l1); - tcg_gen_movi_i64(t0, 0); - gen_set_label(l2); - tcg_gen_movi_i64(cpu_lock, -1); + TCGv va = tcg_temp_new(); + gen_virt_to_phys(va, t1, 1); + gen_helper_stq_c(t0, va, t0); + tcg_temp_free(va); } static inline void gen_store_mem(DisasContext *ctx, void (*tcg_gen_qemu_store)(TCGv t0, TCGv t1, int flags), int ra, int rb, int32_t disp16, int fp, - int clear, int local) + int clear) { - TCGv addr; - if (local) - addr = tcg_temp_local_new(); - else - addr = tcg_temp_new(); + TCGv addr, va; + + addr = tcg_temp_new(); if (rb != 31) { tcg_gen_addi_i64(addr, cpu_ir[rb], disp16); - if (clear) + if (clear) { tcg_gen_andi_i64(addr, addr, ~0x7); + } } else { - if (clear) + if (clear) { disp16 &= ~0x7; + } tcg_gen_movi_i64(addr, disp16); } - if (ra != 31) { - if (fp) - tcg_gen_qemu_store(cpu_fir[ra], addr, ctx->mem_idx); - else - tcg_gen_qemu_store(cpu_ir[ra], addr, ctx->mem_idx); + + if (ra == 31) { + va = tcg_const_i64(0); } else { - TCGv zero; - if (local) - zero = tcg_const_local_i64(0); - else - zero = tcg_const_i64(0); - tcg_gen_qemu_store(zero, addr, ctx->mem_idx); - tcg_temp_free(zero); + va = (fp ? cpu_fir[ra] : cpu_ir[ra]); } + tcg_gen_qemu_store(va, addr, ctx->mem_idx); + tcg_temp_free(addr); + if (ra == 31) { + tcg_temp_free(va); + } } static int use_goto_tb(DisasContext *ctx, uint64_t dest) @@ -1530,15 +1539,15 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn) break; case 0x0D: /* STW */ - gen_store_mem(ctx, &tcg_gen_qemu_st16, ra, rb, disp16, 0, 0, 0); + gen_store_mem(ctx, &tcg_gen_qemu_st16, ra, rb, disp16, 0, 0); break; case 0x0E: /* STB */ - gen_store_mem(ctx, &tcg_gen_qemu_st8, ra, rb, disp16, 0, 0, 0); + gen_store_mem(ctx, &tcg_gen_qemu_st8, ra, rb, disp16, 0, 0); break; case 0x0F: /* STQ_U */ - gen_store_mem(ctx, &tcg_gen_qemu_st64, ra, rb, disp16, 0, 1, 0); + gen_store_mem(ctx, &tcg_gen_qemu_st64, ra, rb, disp16, 0, 1); break; case 0x10: switch (fn7) { @@ -2968,19 +2977,19 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn) break; case 0x24: /* STF */ - gen_store_mem(ctx, &gen_qemu_stf, ra, rb, disp16, 1, 0, 0); + gen_store_mem(ctx, &gen_qemu_stf, ra, rb, disp16, 1, 0); break; case 0x25: /* STG */ - gen_store_mem(ctx, &gen_qemu_stg, ra, rb, disp16, 1, 0, 0); + gen_store_mem(ctx, &gen_qemu_stg, ra, rb, disp16, 1, 0); break; case 0x26: /* STS */ - gen_store_mem(ctx, &gen_qemu_sts, ra, rb, disp16, 1, 0, 0); + gen_store_mem(ctx, &gen_qemu_sts, ra, rb, disp16, 1, 0); break; case 0x27: /* STT */ - gen_store_mem(ctx, &tcg_gen_qemu_st64, ra, rb, disp16, 1, 0, 0); + gen_store_mem(ctx, &tcg_gen_qemu_st64, ra, rb, disp16, 1, 0); break; case 0x28: /* LDL */ @@ -3000,19 +3009,19 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn) break; case 0x2C: /* STL */ - gen_store_mem(ctx, &tcg_gen_qemu_st32, ra, rb, disp16, 0, 0, 0); + gen_store_mem(ctx, &tcg_gen_qemu_st32, ra, rb, disp16, 0, 0); break; case 0x2D: /* STQ */ - gen_store_mem(ctx, &tcg_gen_qemu_st64, ra, rb, disp16, 0, 0, 0); + gen_store_mem(ctx, &tcg_gen_qemu_st64, ra, rb, disp16, 0, 0); break; case 0x2E: /* STL_C */ - gen_store_mem(ctx, &gen_qemu_stl_c, ra, rb, disp16, 0, 0, 1); + gen_store_mem(ctx, &gen_qemu_stl_c, ra, rb, disp16, 0, 0); break; case 0x2F: /* STQ_C */ - gen_store_mem(ctx, &gen_qemu_stq_c, ra, rb, disp16, 0, 0, 1); + gen_store_mem(ctx, &gen_qemu_stq_c, ra, rb, disp16, 0, 0); break; case 0x30: /* BR */ @@ -3279,6 +3288,7 @@ CPUAlphaState * cpu_alpha_init (const char *cpu_model) #else pal_init(env); #endif + env->lock_addr = -1; /* Initialize IPR */ #if defined (CONFIG_USER_ONLY) -- 1.6.6.1