From: "Emilio G. Cota" <c...@braap.org> The diff here is uglier than necessary. All this does is to turn
FOO into: if (s->prefix & PREFIX_LOCK) { BAR } else { FOO } where FOO is the original implementation of an unlocked cmpxchg. [rth: Adjust unlocked cmpxchg to use movcond instead of branches. Adjust helpers to use atomic helpers.] Signed-off-by: Emilio G. Cota <c...@braap.org> Message-Id: <1467054136-10430-6-git-send-email-c...@braap.org> Signed-off-by: Richard Henderson <r...@twiddle.net> --- target-i386/helper.h | 2 + target-i386/mem_helper.c | 134 +++++++++++++++++++++++++++++++++++++++-------- target-i386/translate.c | 99 ++++++++++++++++++---------------- 3 files changed, 169 insertions(+), 66 deletions(-) diff --git a/target-i386/helper.h b/target-i386/helper.h index 1320edc..729d4b6 100644 --- a/target-i386/helper.h +++ b/target-i386/helper.h @@ -74,8 +74,10 @@ DEF_HELPER_3(boundw, void, env, tl, int) DEF_HELPER_3(boundl, void, env, tl, int) DEF_HELPER_1(rsm, void, env) DEF_HELPER_2(into, void, env, int) +DEF_HELPER_2(cmpxchg8b_unlocked, void, env, tl) DEF_HELPER_2(cmpxchg8b, void, env, tl) #ifdef TARGET_X86_64 +DEF_HELPER_2(cmpxchg16b_unlocked, void, env, tl) DEF_HELPER_2(cmpxchg16b, void, env, tl) #endif DEF_HELPER_1(single_step, void, env) diff --git a/target-i386/mem_helper.c b/target-i386/mem_helper.c index 5bc0594..c4b5c5b 100644 --- a/target-i386/mem_helper.c +++ b/target-i386/mem_helper.c @@ -22,6 +22,8 @@ #include "exec/helper-proto.h" #include "exec/exec-all.h" #include "exec/cpu_ldst.h" +#include "qemu/int128.h" +#include "tcg.h" /* broken thread support */ @@ -56,53 +58,143 @@ void helper_lock_init(void) } #endif +void helper_cmpxchg8b_unlocked(CPUX86State *env, target_ulong a0) +{ + uintptr_t ra = GETPC(); + uint64_t oldv, cmpv, newv; + int eflags; + + eflags = cpu_cc_compute_all(env, CC_OP); + + cmpv = deposit64(env->regs[R_EAX], 32, 32, env->regs[R_EDX]); + newv = deposit64(env->regs[R_EBX], 32, 32, env->regs[R_ECX]); + + oldv = cpu_ldq_data_ra(env, a0, ra); + newv = (cmpv == oldv ? newv : oldv); + /* always do the store */ + cpu_stq_data_ra(env, a0, newv, ra); + + if (oldv == cmpv) { + eflags |= CC_Z; + } else { + env->regs[R_EAX] = (uint32_t)oldv; + env->regs[R_EDX] = (uint32_t)(oldv >> 32); + eflags &= ~CC_Z; + } + CC_SRC = eflags; +} + void helper_cmpxchg8b(CPUX86State *env, target_ulong a0) { - uint64_t d; +#ifdef CONFIG_ATOMIC64 + uint64_t oldv, cmpv, newv; int eflags; eflags = cpu_cc_compute_all(env, CC_OP); - d = cpu_ldq_data_ra(env, a0, GETPC()); - if (d == (((uint64_t)env->regs[R_EDX] << 32) | (uint32_t)env->regs[R_EAX])) { - cpu_stq_data_ra(env, a0, ((uint64_t)env->regs[R_ECX] << 32) - | (uint32_t)env->regs[R_EBX], GETPC()); + + cmpv = deposit64(env->regs[R_EAX], 32, 32, env->regs[R_EDX]); + newv = deposit64(env->regs[R_EBX], 32, 32, env->regs[R_ECX]); + +#ifdef CONFIG_USER_ONLY + { + uint64_t *haddr = g2h(a0); + cmpv = cpu_to_le64(cmpv); + newv = cpu_to_le64(newv); + oldv = atomic_cmpxchg__nocheck(haddr, cmpv, newv); + oldv = le64_to_cpu(oldv); + } +#else + { + uintptr_t ra = GETPC(); + int mem_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi = make_memop_idx(MO_TEQ, mem_idx); + oldv = helper_atomic_cmpxchgq_le_mmu(env, a0, cmpv, newv, oi, ra); + } +#endif + + if (oldv == cmpv) { eflags |= CC_Z; } else { - /* always do the store */ - cpu_stq_data_ra(env, a0, d, GETPC()); - env->regs[R_EDX] = (uint32_t)(d >> 32); - env->regs[R_EAX] = (uint32_t)d; + env->regs[R_EAX] = (uint32_t)oldv; + env->regs[R_EDX] = (uint32_t)(oldv >> 32); eflags &= ~CC_Z; } CC_SRC = eflags; +#else + cpu_loop_exit_atomic(ENV_GET_CPU(env), GETPC()); +#endif /* CONFIG_ATOMIC64 */ } #ifdef TARGET_X86_64 -void helper_cmpxchg16b(CPUX86State *env, target_ulong a0) +void helper_cmpxchg16b_unlocked(CPUX86State *env, target_ulong a0) { - uint64_t d0, d1; + uintptr_t ra = GETPC(); + Int128 oldv, cmpv, newv; + uint64_t o0, o1; int eflags; + bool success; if ((a0 & 0xf) != 0) { raise_exception_ra(env, EXCP0D_GPF, GETPC()); } eflags = cpu_cc_compute_all(env, CC_OP); - d0 = cpu_ldq_data_ra(env, a0, GETPC()); - d1 = cpu_ldq_data_ra(env, a0 + 8, GETPC()); - if (d0 == env->regs[R_EAX] && d1 == env->regs[R_EDX]) { - cpu_stq_data_ra(env, a0, env->regs[R_EBX], GETPC()); - cpu_stq_data_ra(env, a0 + 8, env->regs[R_ECX], GETPC()); + + cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]); + newv = int128_make128(env->regs[R_EBX], env->regs[R_ECX]); + + o0 = cpu_ldq_data_ra(env, a0 + 0, ra); + o1 = cpu_ldq_data_ra(env, a0 + 8, ra); + + oldv = int128_make128(o0, o1); + success = int128_eq(oldv, cmpv); + if (!success) { + newv = oldv; + } + + cpu_stq_data_ra(env, a0 + 0, int128_getlo(newv), ra); + cpu_stq_data_ra(env, a0 + 8, int128_gethi(newv), ra); + + if (success) { eflags |= CC_Z; } else { - /* always do the store */ - cpu_stq_data_ra(env, a0, d0, GETPC()); - cpu_stq_data_ra(env, a0 + 8, d1, GETPC()); - env->regs[R_EDX] = d1; - env->regs[R_EAX] = d0; + env->regs[R_EAX] = int128_getlo(oldv); + env->regs[R_EDX] = int128_gethi(oldv); eflags &= ~CC_Z; } CC_SRC = eflags; } + +void helper_cmpxchg16b(CPUX86State *env, target_ulong a0) +{ + uintptr_t ra = GETPC(); + + if ((a0 & 0xf) != 0) { + raise_exception_ra(env, EXCP0D_GPF, ra); + } else { +#ifndef CONFIG_ATOMIC128 + cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); +#else + int eflags = cpu_cc_compute_all(env, CC_OP); + + Int128 cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]); + Int128 newv = int128_make128(env->regs[R_EBX], env->regs[R_ECX]); + + int mem_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); + Int128 oldv = helper_atomic_cmpxchgo_le_mmu(env, a0, cmpv, + newv, oi, ra); + + if (int128_eq(oldv, cmpv)) { + eflags |= CC_Z; + } else { + env->regs[R_EAX] = int128_getlo(oldv); + env->regs[R_EDX] = int128_gethi(oldv); + eflags &= ~CC_Z; + } + CC_SRC = eflags; +#endif + } +} #endif void helper_boundw(CPUX86State *env, target_ulong a0, int v) diff --git a/target-i386/translate.c b/target-i386/translate.c index 9447557..5d9790a 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -5070,57 +5070,58 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, case 0x1b0: case 0x1b1: /* cmpxchg Ev, Gv */ { - TCGLabel *label1, *label2; - TCGv t0, t1, t2, a0; + TCGv oldv, newv, cmpv; ot = mo_b_d(b, dflag); modrm = cpu_ldub_code(env, s->pc++); reg = ((modrm >> 3) & 7) | rex_r; mod = (modrm >> 6) & 3; - t0 = tcg_temp_local_new(); - t1 = tcg_temp_local_new(); - t2 = tcg_temp_local_new(); - a0 = tcg_temp_local_new(); - gen_op_mov_v_reg(ot, t1, reg); - if (mod == 3) { - rm = (modrm & 7) | REX_B(s); - gen_op_mov_v_reg(ot, t0, rm); - } else { + oldv = tcg_temp_new(); + newv = tcg_temp_new(); + cmpv = tcg_temp_new(); + gen_op_mov_v_reg(ot, newv, reg); + tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]); + + if (s->prefix & PREFIX_LOCK) { + if (mod == 3) { + goto illegal_op; + } gen_lea_modrm(env, s, modrm); - tcg_gen_mov_tl(a0, cpu_A0); - gen_op_ld_v(s, ot, t0, a0); - rm = 0; /* avoid warning */ - } - label1 = gen_new_label(); - tcg_gen_mov_tl(t2, cpu_regs[R_EAX]); - gen_extu(ot, t0); - gen_extu(ot, t2); - tcg_gen_brcond_tl(TCG_COND_EQ, t2, t0, label1); - label2 = gen_new_label(); - if (mod == 3) { - gen_op_mov_reg_v(ot, R_EAX, t0); - tcg_gen_br(label2); - gen_set_label(label1); - gen_op_mov_reg_v(ot, rm, t1); + tcg_gen_atomic_cmpxchg_tl(oldv, cpu_A0, cmpv, newv, + s->mem_index, ot | MO_LE); + gen_op_mov_reg_v(ot, R_EAX, oldv); } else { - /* perform no-op store cycle like physical cpu; must be - before changing accumulator to ensure idempotency if - the store faults and the instruction is restarted */ - gen_op_st_v(s, ot, t0, a0); - gen_op_mov_reg_v(ot, R_EAX, t0); - tcg_gen_br(label2); - gen_set_label(label1); - gen_op_st_v(s, ot, t1, a0); - } - gen_set_label(label2); - tcg_gen_mov_tl(cpu_cc_src, t0); - tcg_gen_mov_tl(cpu_cc_srcT, t2); - tcg_gen_sub_tl(cpu_cc_dst, t2, t0); + if (mod == 3) { + rm = (modrm & 7) | REX_B(s); + gen_op_mov_v_reg(ot, oldv, rm); + } else { + gen_lea_modrm(env, s, modrm); + gen_op_ld_v(s, ot, oldv, cpu_A0); + rm = 0; /* avoid warning */ + } + gen_extu(ot, oldv); + gen_extu(ot, cmpv); + /* store value = (old == cmp ? new : old); */ + tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv); + if (mod == 3) { + gen_op_mov_reg_v(ot, R_EAX, oldv); + gen_op_mov_reg_v(ot, rm, newv); + } else { + /* Perform an unconditional store cycle like physical cpu; + must be before changing accumulator to ensure + idempotency if the store faults and the instruction + is restarted */ + gen_op_st_v(s, ot, newv, cpu_A0); + gen_op_mov_reg_v(ot, R_EAX, oldv); + } + } + tcg_gen_mov_tl(cpu_cc_src, oldv); + tcg_gen_mov_tl(cpu_cc_srcT, cmpv); + tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv); set_cc_op(s, CC_OP_SUBB + ot); - tcg_temp_free(t0); - tcg_temp_free(t1); - tcg_temp_free(t2); - tcg_temp_free(a0); + tcg_temp_free(oldv); + tcg_temp_free(newv); + tcg_temp_free(cmpv); } break; case 0x1c7: /* cmpxchg8b */ @@ -5133,14 +5134,22 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) goto illegal_op; gen_lea_modrm(env, s, modrm); - gen_helper_cmpxchg16b(cpu_env, cpu_A0); + if ((s->prefix & PREFIX_LOCK) && parallel_cpus) { + gen_helper_cmpxchg16b(cpu_env, cpu_A0); + } else { + gen_helper_cmpxchg16b_unlocked(cpu_env, cpu_A0); + } } else #endif { if (!(s->cpuid_features & CPUID_CX8)) goto illegal_op; gen_lea_modrm(env, s, modrm); - gen_helper_cmpxchg8b(cpu_env, cpu_A0); + if ((s->prefix & PREFIX_LOCK) && parallel_cpus) { + gen_helper_cmpxchg8b(cpu_env, cpu_A0); + } else { + gen_helper_cmpxchg8b_unlocked(cpu_env, cpu_A0); + } } set_cc_op(s, CC_OP_EFLAGS); break; -- 2.7.4