Richard Henderson <r...@twiddle.net> writes: > Emulating LL/SC with cmpxchg is not correct, since it can > suffer from the ABA problem. However, portable parallel > code is writting assuming only cmpxchg which means that in > practice this is a viable alternative. > > Signed-off-by: Richard Henderson <r...@twiddle.net> > --- > linux-user/main.c | 49 ---------------------- > target-alpha/cpu.h | 4 -- > target-alpha/helper.c | 6 --- > target-alpha/machine.c | 2 - > target-alpha/translate.c | 104 > ++++++++++++++++++++--------------------------- > 5 files changed, 45 insertions(+), 120 deletions(-) > > diff --git a/linux-user/main.c b/linux-user/main.c > index 64838bf..6cdd0da 100644 > --- a/linux-user/main.c > +++ b/linux-user/main.c > @@ -2994,51 +2994,6 @@ void cpu_loop(CPUM68KState *env) > #endif /* TARGET_M68K */ > > #ifdef TARGET_ALPHA > -static void do_store_exclusive(CPUAlphaState *env, int reg, int quad) > -{ > - target_ulong addr, val, tmp; > - target_siginfo_t info; > - int ret = 0; > - > - addr = env->lock_addr; > - tmp = env->lock_st_addr; > - env->lock_addr = -1; > - env->lock_st_addr = 0; > - > - start_exclusive(); > - mmap_lock(); > - > - if (addr == tmp) { > - if (quad ? get_user_s64(val, addr) : get_user_s32(val, addr)) { > - goto do_sigsegv; > - } > - > - if (val == env->lock_value) { > - tmp = env->ir[reg]; > - if (quad ? put_user_u64(tmp, addr) : put_user_u32(tmp, addr)) { > - goto do_sigsegv; > - } > - ret = 1; > - } > - } > - env->ir[reg] = ret; > - env->pc += 4; > - > - mmap_unlock(); > - end_exclusive(); > - return; > - > - do_sigsegv: > - mmap_unlock(); > - end_exclusive(); > - > - info.si_signo = TARGET_SIGSEGV; > - info.si_errno = 0; > - info.si_code = TARGET_SEGV_MAPERR; > - info._sifields._sigfault._addr = addr; > - queue_signal(env, TARGET_SIGSEGV, &info); > -} > - > void cpu_loop(CPUAlphaState *env) > { > CPUState *cs = CPU(alpha_env_get_cpu(env)); > @@ -3212,10 +3167,6 @@ void cpu_loop(CPUAlphaState *env) > queue_signal(env, info.si_signo, &info); > } > break; > - case EXCP_STL_C: > - case EXCP_STQ_C: > - do_store_exclusive(env, env->error_code, trapnr - EXCP_STL_C); > - break; > case EXCP_INTERRUPT: > /* Just indicate that signals should be handled asap. */ > break; > diff --git a/target-alpha/cpu.h b/target-alpha/cpu.h > index 9d9489c..a4068c8 100644 > --- a/target-alpha/cpu.h > +++ b/target-alpha/cpu.h > @@ -230,7 +230,6 @@ struct CPUAlphaState { > uint64_t pc; > uint64_t unique; > uint64_t lock_addr; > - uint64_t lock_st_addr; > uint64_t lock_value; > > /* The FPCR, and disassembled portions thereof. */ > @@ -346,9 +345,6 @@ enum { > EXCP_ARITH, > EXCP_FEN, > EXCP_CALL_PAL, > - /* For Usermode emulation. */ > - EXCP_STL_C, > - EXCP_STQ_C, > }; > > /* Alpha-specific interrupt pending bits. */ > diff --git a/target-alpha/helper.c b/target-alpha/helper.c > index 1ed0725..95453f5 100644 > --- a/target-alpha/helper.c > +++ b/target-alpha/helper.c > @@ -306,12 +306,6 @@ void alpha_cpu_do_interrupt(CPUState *cs) > case EXCP_CALL_PAL: > name = "call_pal"; > break; > - case EXCP_STL_C: > - name = "stl_c"; > - break; > - case EXCP_STQ_C: > - name = "stq_c"; > - break; > } > qemu_log("INT %6d: %s(%#x) pc=%016" PRIx64 " sp=%016" PRIx64 "\n", > ++count, name, env->error_code, env->pc, env->ir[IR_SP]); > diff --git a/target-alpha/machine.c b/target-alpha/machine.c > index 710b783..b99a123 100644 > --- a/target-alpha/machine.c > +++ b/target-alpha/machine.c > @@ -45,8 +45,6 @@ static VMStateField vmstate_env_fields[] = { > VMSTATE_UINTTL(unique, CPUAlphaState), > VMSTATE_UINTTL(lock_addr, CPUAlphaState), > VMSTATE_UINTTL(lock_value, CPUAlphaState), > - /* Note that lock_st_addr is not saved; it is a temporary > - used during the execution of the st[lq]_c insns. */ > > VMSTATE_UINT8(ps, CPUAlphaState), > VMSTATE_UINT8(intr_flag, CPUAlphaState), > diff --git a/target-alpha/translate.c b/target-alpha/translate.c > index 2941159..ed353de 100644 > --- a/target-alpha/translate.c > +++ b/target-alpha/translate.c > @@ -99,7 +99,6 @@ static TCGv cpu_std_ir[31]; > static TCGv cpu_fir[31]; > static TCGv cpu_pc; > static TCGv cpu_lock_addr; > -static TCGv cpu_lock_st_addr; > static TCGv cpu_lock_value; > > #ifndef CONFIG_USER_ONLY > @@ -116,7 +115,6 @@ void alpha_translate_init(void) > static const GlobalVar vars[] = { > DEF_VAR(pc), > DEF_VAR(lock_addr), > - DEF_VAR(lock_st_addr), > DEF_VAR(lock_value), > }; > > @@ -198,6 +196,23 @@ static TCGv dest_sink(DisasContext *ctx) > return ctx->sink; > } > > +static void free_context_temps(DisasContext *ctx) > +{ > + if (!TCGV_IS_UNUSED_I64(ctx->sink)) { > + tcg_gen_discard_i64(ctx->sink); > + tcg_temp_free(ctx->sink); > + TCGV_UNUSED_I64(ctx->sink); > + } > + if (!TCGV_IS_UNUSED_I64(ctx->zero)) { > + tcg_temp_free(ctx->zero); > + TCGV_UNUSED_I64(ctx->zero); > + } > + if (!TCGV_IS_UNUSED_I64(ctx->lit)) { > + tcg_temp_free(ctx->lit); > + TCGV_UNUSED_I64(ctx->lit); > + } > +} > + > static TCGv load_gpr(DisasContext *ctx, unsigned reg) > { > if (likely(reg < 31)) { > @@ -395,56 +410,37 @@ static ExitStatus gen_store_conditional(DisasContext > *ctx, int ra, int rb, > int32_t disp16, int mem_idx, > TCGMemOp op) > { > - TCGv addr; > - > - if (ra == 31) { > - /* ??? Don't bother storing anything. The user can't tell > - the difference, since the zero register always reads zero. */ > - return NO_EXIT; > - } > - > -#if defined(CONFIG_USER_ONLY) > - addr = cpu_lock_st_addr; > -#else > - addr = tcg_temp_local_new(); > -#endif > + TCGLabel *lab_fail, *lab_done; > + TCGv addr, val; > > + addr = tcg_temp_new_i64(); > tcg_gen_addi_i64(addr, load_gpr(ctx, rb), disp16); > + free_context_temps(ctx); > > -#if defined(CONFIG_USER_ONLY) > - /* ??? This is handled via a complicated version of compare-and-swap > - in the cpu_loop. Hopefully one day we'll have a real CAS opcode > - in TCG so that this isn't necessary. */ > - return gen_excp(ctx, (op & MO_SIZE) == MO_64 ? EXCP_STQ_C : EXCP_STL_C, > ra); > -#else > - /* ??? In system mode we are never multi-threaded, so CAS can be > - implemented via a non-atomic load-compare-store sequence. */ > - { > - TCGLabel *lab_fail, *lab_done; > - TCGv val; > + lab_fail = gen_new_label(); > + lab_done = gen_new_label(); > + tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_lock_addr, lab_fail); > + tcg_temp_free_i64(addr); > > - lab_fail = gen_new_label(); > - lab_done = gen_new_label(); > - tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_lock_addr, lab_fail); > + val = tcg_temp_new_i64(); > + tcg_gen_atomic_cmpxchg_i64(val, cpu_lock_addr, cpu_lock_value, > + load_gpr(ctx, ra), mem_idx, op); > + free_context_temps(ctx);
I don't quite follow what free_context_temps() is doing and why it needs to be done twice during the building of this instructions TCGOps? > > - val = tcg_temp_new(); > - tcg_gen_qemu_ld_i64(val, addr, mem_idx, op); > - tcg_gen_brcond_i64(TCG_COND_NE, val, cpu_lock_value, lab_fail); > - > - tcg_gen_qemu_st_i64(ctx->ir[ra], addr, mem_idx, op); > - tcg_gen_movi_i64(ctx->ir[ra], 1); > - tcg_gen_br(lab_done); > + if (ra != 31) { > + tcg_gen_setcond_i64(TCG_COND_EQ, ctx->ir[ra], val, cpu_lock_value); > + } > + tcg_temp_free_i64(val); > + tcg_gen_br(lab_done); > > - gen_set_label(lab_fail); > + gen_set_label(lab_fail); > + if (ra != 31) { > tcg_gen_movi_i64(ctx->ir[ra], 0); > - > - gen_set_label(lab_done); > - tcg_gen_movi_i64(cpu_lock_addr, -1); > - > - tcg_temp_free(addr); > - return NO_EXIT; > } > -#endif > + > + gen_set_label(lab_done); > + tcg_gen_movi_i64(cpu_lock_addr, -1); > + return NO_EXIT; > } > > static bool in_superpage(DisasContext *ctx, int64_t addr) > @@ -2914,6 +2910,10 @@ void gen_intermediate_code(CPUAlphaState *env, struct > TranslationBlock *tb) > /* Similarly for flush-to-zero. */ > ctx.tb_ftz = -1; > > + TCGV_UNUSED_I64(ctx.zero); > + TCGV_UNUSED_I64(ctx.sink); > + TCGV_UNUSED_I64(ctx.lit); > + > num_insns = 0; > max_insns = tb->cflags & CF_COUNT_MASK; > if (max_insns == 0) { > @@ -2948,23 +2948,9 @@ void gen_intermediate_code(CPUAlphaState *env, struct > TranslationBlock *tb) > } > insn = cpu_ldl_code(env, ctx.pc); > > - TCGV_UNUSED_I64(ctx.zero); > - TCGV_UNUSED_I64(ctx.sink); > - TCGV_UNUSED_I64(ctx.lit); > - > ctx.pc += 4; > ret = translate_one(ctxp, insn); > - > - if (!TCGV_IS_UNUSED_I64(ctx.sink)) { > - tcg_gen_discard_i64(ctx.sink); > - tcg_temp_free(ctx.sink); > - } > - if (!TCGV_IS_UNUSED_I64(ctx.zero)) { > - tcg_temp_free(ctx.zero); > - } > - if (!TCGV_IS_UNUSED_I64(ctx.lit)) { > - tcg_temp_free(ctx.lit); > - } > + free_context_temps(ctxp); > > /* If we reach a page boundary, are single stepping, > or exhaust instruction count, stop generation. */ -- Alex Bennée