From: "Emilio G. Cota" <c...@braap.org> The diff here is uglier than necessary. All this does is to turn
FOO into: if (s->prefix & PREFIX_LOCK) { BAR } else { FOO } where FOO is the original implementation of an unlocked cmpxchg. [rth: Adjust unlocked cmpxchg to use movcond instead of branches. Adjust helpers to use atomic helpers.] Signed-off-by: Emilio G. Cota <c...@braap.org> Message-Id: <1467054136-10430-6-git-send-email-c...@braap.org> Signed-off-by: Richard Henderson <r...@twiddle.net> --- target-i386/mem_helper.c | 96 ++++++++++++++++++++++++++++++++++++++---------- target-i386/translate.c | 87 +++++++++++++++++++++---------------------- 2 files changed, 120 insertions(+), 63 deletions(-) diff --git a/target-i386/mem_helper.c b/target-i386/mem_helper.c index c2f4769..5c0558f 100644 --- a/target-i386/mem_helper.c +++ b/target-i386/mem_helper.c @@ -22,6 +22,8 @@ #include "exec/helper-proto.h" #include "exec/exec-all.h" #include "exec/cpu_ldst.h" +#include "qemu/int128.h" +#include "tcg.h" /* broken thread support */ @@ -58,20 +60,39 @@ void helper_lock_init(void) void helper_cmpxchg8b(CPUX86State *env, target_ulong a0) { - uint64_t d; + uintptr_t ra = GETPC(); + uint64_t oldv, cmpv, newv; int eflags; eflags = cpu_cc_compute_all(env, CC_OP); - d = cpu_ldq_data_ra(env, a0, GETPC()); - if (d == (((uint64_t)env->regs[R_EDX] << 32) | (uint32_t)env->regs[R_EAX])) { - cpu_stq_data_ra(env, a0, ((uint64_t)env->regs[R_ECX] << 32) - | (uint32_t)env->regs[R_EBX], GETPC()); - eflags |= CC_Z; + + cmpv = deposit64(env->regs[R_EAX], 32, 32, env->regs[R_EDX]); + newv = deposit64(env->regs[R_EBX], 32, 32, env->regs[R_ECX]); + + if (parallel_cpus) { +#ifdef CONFIG_USER_ONLY + uint64_t *haddr = g2h(a0); + cmpv = cpu_to_le64(cmpv); + newv = cpu_to_le64(newv); + oldv = atomic_cmpxchg(haddr, cmpv, newv); + oldv = le64_to_cpu(oldv); +#else + int mem_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi = make_memop_idx(MO_TEQ, mem_idx); + oldv = helper_atomic_cmpxchgq_le_mmu(env, a0, cmpv, newv, oi, ra); +#endif } else { + oldv = cpu_ldq_data_ra(env, a0, ra); + newv = (cmpv == oldv ? newv : oldv); /* always do the store */ - cpu_stq_data_ra(env, a0, d, GETPC()); - env->regs[R_EDX] = (uint32_t)(d >> 32); - env->regs[R_EAX] = (uint32_t)d; + cpu_stq_data_ra(env, a0, newv, ra); + } + + if (oldv == cmpv) { + eflags |= CC_Z; + } else { + env->regs[R_EAX] = (uint32_t)oldv; + env->regs[R_EDX] = (uint32_t)(oldv >> 32); eflags &= ~CC_Z; } CC_SRC = eflags; @@ -80,25 +101,60 @@ void helper_cmpxchg8b(CPUX86State *env, target_ulong a0) #ifdef TARGET_X86_64 void helper_cmpxchg16b(CPUX86State *env, target_ulong a0) { - uint64_t d0, d1; + uintptr_t ra = GETPC(); + Int128 oldv, cmpv, newv; int eflags; + bool success; if ((a0 & 0xf) != 0) { raise_exception_ra(env, EXCP0D_GPF, GETPC()); } eflags = cpu_cc_compute_all(env, CC_OP); - d0 = cpu_ldq_data_ra(env, a0, GETPC()); - d1 = cpu_ldq_data_ra(env, a0 + 8, GETPC()); - if (d0 == env->regs[R_EAX] && d1 == env->regs[R_EDX]) { - cpu_stq_data_ra(env, a0, env->regs[R_EBX], GETPC()); - cpu_stq_data_ra(env, a0 + 8, env->regs[R_ECX], GETPC()); + + cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]); + newv = int128_make128(env->regs[R_EBX], env->regs[R_ECX]); + + if (parallel_cpus) { +#ifndef CONFIG_ATOMIC128 + cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); +#elif defined(CONFIG_USER_ONLY) + Int128 *haddr = g2h(a0); + oldv = cmpv; +#ifdef HOST_WORDS_BIGENDIAN + oldv = bswap128(oldv); + newv = bswap128(newv); +#endif + success = __atomic_compare_exchange_16(haddr, &oldv, newv, false, + __ATOMIC_SEQ_CST, + __ATOMIC_SEQ_CST); +#ifdef HOST_WORDS_BIGENDIAN + oldv = bswap128(oldv); +#endif +#else + int mem_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); + oldv = helper_atomic_cmpxchgo_le_mmu(env, a0, cmpv, newv, oi, ra); + success = int128_eq(oldv, cmpv); +#endif + } else { + uint64_t o0 = cpu_ldq_data_ra(env, a0 + 0, ra); + uint64_t o1 = cpu_ldq_data_ra(env, a0 + 8, ra); + + oldv = int128_make128(o0, o1); + success = int128_eq(oldv, cmpv); + if (!success) { + newv = oldv; + } + + cpu_stq_data_ra(env, a0 + 0, int128_getlo(newv), ra); + cpu_stq_data_ra(env, a0 + 8, int128_gethi(newv), ra); + } + + if (success) { eflags |= CC_Z; } else { - /* always do the store */ - cpu_stq_data_ra(env, a0, d0, GETPC()); - cpu_stq_data_ra(env, a0 + 8, d1, GETPC()); - env->regs[R_EDX] = d1; - env->regs[R_EAX] = d0; + env->regs[R_EAX] = int128_getlo(oldv); + env->regs[R_EDX] = int128_gethi(oldv); eflags &= ~CC_Z; } CC_SRC = eflags; diff --git a/target-i386/translate.c b/target-i386/translate.c index 7dea18b..2244f38 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -5070,57 +5070,58 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, case 0x1b0: case 0x1b1: /* cmpxchg Ev, Gv */ { - TCGLabel *label1, *label2; - TCGv t0, t1, t2, a0; + TCGv oldv, newv, cmpv; ot = mo_b_d(b, dflag); modrm = cpu_ldub_code(env, s->pc++); reg = ((modrm >> 3) & 7) | rex_r; mod = (modrm >> 6) & 3; - t0 = tcg_temp_local_new(); - t1 = tcg_temp_local_new(); - t2 = tcg_temp_local_new(); - a0 = tcg_temp_local_new(); - gen_op_mov_v_reg(ot, t1, reg); - if (mod == 3) { - rm = (modrm & 7) | REX_B(s); - gen_op_mov_v_reg(ot, t0, rm); - } else { + oldv = tcg_temp_new(); + newv = tcg_temp_new(); + cmpv = tcg_temp_new(); + gen_op_mov_v_reg(ot, newv, reg); + tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]); + + if (s->prefix & PREFIX_LOCK) { + if (mod == 3) { + goto illegal_op; + } gen_lea_modrm(env, s, modrm); - tcg_gen_mov_tl(a0, cpu_A0); - gen_op_ld_v(s, ot, t0, a0); - rm = 0; /* avoid warning */ - } - label1 = gen_new_label(); - tcg_gen_mov_tl(t2, cpu_regs[R_EAX]); - gen_extu(ot, t0); - gen_extu(ot, t2); - tcg_gen_brcond_tl(TCG_COND_EQ, t2, t0, label1); - label2 = gen_new_label(); - if (mod == 3) { - gen_op_mov_reg_v(ot, R_EAX, t0); - tcg_gen_br(label2); - gen_set_label(label1); - gen_op_mov_reg_v(ot, rm, t1); + tcg_gen_atomic_cmpxchg_tl(oldv, cpu_A0, cmpv, newv, + s->mem_index, ot | MO_LE); + gen_op_mov_reg_v(ot, R_EAX, oldv); } else { - /* perform no-op store cycle like physical cpu; must be - before changing accumulator to ensure idempotency if - the store faults and the instruction is restarted */ - gen_op_st_v(s, ot, t0, a0); - gen_op_mov_reg_v(ot, R_EAX, t0); - tcg_gen_br(label2); - gen_set_label(label1); - gen_op_st_v(s, ot, t1, a0); - } - gen_set_label(label2); - tcg_gen_mov_tl(cpu_cc_src, t0); - tcg_gen_mov_tl(cpu_cc_srcT, t2); - tcg_gen_sub_tl(cpu_cc_dst, t2, t0); + if (mod == 3) { + rm = (modrm & 7) | REX_B(s); + gen_op_mov_v_reg(ot, oldv, rm); + } else { + gen_lea_modrm(env, s, modrm); + gen_op_ld_v(s, ot, oldv, cpu_A0); + rm = 0; /* avoid warning */ + } + gen_extu(ot, oldv); + gen_extu(ot, cmpv); + /* store value = (old == cmp ? new : old); */ + tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv); + if (mod == 3) { + gen_op_mov_reg_v(ot, R_EAX, oldv); + gen_op_mov_reg_v(ot, rm, newv); + } else { + /* Perform an unconditional store cycle like physical cpu; + must be before changing accumulator to ensure + idempotency if the store faults and the instruction + is restarted */ + gen_op_st_v(s, ot, newv, cpu_A0); + gen_op_mov_reg_v(ot, R_EAX, oldv); + } + } + tcg_gen_mov_tl(cpu_cc_src, oldv); + tcg_gen_mov_tl(cpu_cc_srcT, cmpv); + tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv); set_cc_op(s, CC_OP_SUBB + ot); - tcg_temp_free(t0); - tcg_temp_free(t1); - tcg_temp_free(t2); - tcg_temp_free(a0); + tcg_temp_free(oldv); + tcg_temp_free(newv); + tcg_temp_free(cmpv); } break; case 0x1c7: /* cmpxchg8b */ -- 2.5.5