On Fri, Jul 01, 2016 at 10:04:37 -0700, Richard Henderson wrote: > From: "Emilio G. Cota" <c...@braap.org> > > The diff here is uglier than necessary. All this does is to turn > > FOO > > into: > > if (s->prefix & PREFIX_LOCK) { > BAR > } else { > FOO > } > > where FOO is the original implementation of an unlocked cmpxchg. > > [rth: Adjust unlocked cmpxchg to use movcond instead of branches. > Adjust helpers to use atomic helpers.] > > Signed-off-by: Emilio G. Cota <c...@braap.org> > Message-Id: <1467054136-10430-6-git-send-email-c...@braap.org> > Signed-off-by: Richard Henderson <r...@twiddle.net> > --- > target-i386/mem_helper.c | 96 > ++++++++++++++++++++++++++++++++++++++---------- > target-i386/translate.c | 87 +++++++++++++++++++++---------------------- > 2 files changed, 120 insertions(+), 63 deletions(-) > > diff --git a/target-i386/mem_helper.c b/target-i386/mem_helper.c > index c2f4769..5c0558f 100644 > --- a/target-i386/mem_helper.c > +++ b/target-i386/mem_helper.c > @@ -22,6 +22,8 @@ > #include "exec/helper-proto.h" > #include "exec/exec-all.h" > #include "exec/cpu_ldst.h" > +#include "qemu/int128.h" > +#include "tcg.h" > > /* broken thread support */ > > @@ -58,20 +60,39 @@ void helper_lock_init(void) > > void helper_cmpxchg8b(CPUX86State *env, target_ulong a0) > { > - uint64_t d; > + uintptr_t ra = GETPC(); > + uint64_t oldv, cmpv, newv; > int eflags; > > eflags = cpu_cc_compute_all(env, CC_OP); > - d = cpu_ldq_data_ra(env, a0, GETPC()); > - if (d == (((uint64_t)env->regs[R_EDX] << 32) | > (uint32_t)env->regs[R_EAX])) { > - cpu_stq_data_ra(env, a0, ((uint64_t)env->regs[R_ECX] << 32) > - | (uint32_t)env->regs[R_EBX], GETPC()); > - eflags |= CC_Z; > + > + cmpv = deposit64(env->regs[R_EAX], 32, 32, env->regs[R_EDX]); > + newv = deposit64(env->regs[R_EBX], 32, 32, env->regs[R_ECX]); > + > + if (parallel_cpus) {
I think here we mean 'if (prefix_locked)', although prefix_locked isn't here. This is why in my original patch I defined two helpers ('locked' and 'unlocked'), otherwise we'll emulate cmpxchg atomically even when the LOCK prefix wasn't there. Not a big deal, but could hide bugs. > +#ifdef CONFIG_USER_ONLY > + uint64_t *haddr = g2h(a0); > + cmpv = cpu_to_le64(cmpv); > + newv = cpu_to_le64(newv); > + oldv = atomic_cmpxchg(haddr, cmpv, newv); > + oldv = le64_to_cpu(oldv); > +#else > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOpIdx oi = make_memop_idx(MO_TEQ, mem_idx); > + oldv = helper_atomic_cmpxchgq_le_mmu(env, a0, cmpv, newv, oi, ra); > +#endif > } else { > + oldv = cpu_ldq_data_ra(env, a0, ra); > + newv = (cmpv == oldv ? newv : oldv); > /* always do the store */ > - cpu_stq_data_ra(env, a0, d, GETPC()); > - env->regs[R_EDX] = (uint32_t)(d >> 32); > - env->regs[R_EAX] = (uint32_t)d; > + cpu_stq_data_ra(env, a0, newv, ra); > + } > + > + if (oldv == cmpv) { > + eflags |= CC_Z; > + } else { > + env->regs[R_EAX] = (uint32_t)oldv; > + env->regs[R_EDX] = (uint32_t)(oldv >> 32); > eflags &= ~CC_Z; > } > CC_SRC = eflags; > @@ -80,25 +101,60 @@ void helper_cmpxchg8b(CPUX86State *env, target_ulong a0) > #ifdef TARGET_X86_64 > void helper_cmpxchg16b(CPUX86State *env, target_ulong a0) > { > - uint64_t d0, d1; > + uintptr_t ra = GETPC(); > + Int128 oldv, cmpv, newv; > int eflags; > + bool success; > > if ((a0 & 0xf) != 0) { > raise_exception_ra(env, EXCP0D_GPF, GETPC()); > } > eflags = cpu_cc_compute_all(env, CC_OP); > - d0 = cpu_ldq_data_ra(env, a0, GETPC()); > - d1 = cpu_ldq_data_ra(env, a0 + 8, GETPC()); > - if (d0 == env->regs[R_EAX] && d1 == env->regs[R_EDX]) { > - cpu_stq_data_ra(env, a0, env->regs[R_EBX], GETPC()); > - cpu_stq_data_ra(env, a0 + 8, env->regs[R_ECX], GETPC()); > + > + cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]); > + newv = int128_make128(env->regs[R_EBX], env->regs[R_ECX]); > + > + if (parallel_cpus) { Ditto. FWIW I tested the x86_64 bits with the all the ck_pr regression tests in concurrencykit. Would be nice to get access to a big-endian machine to test the byte-ordering bits -- I sent a request to the GCC compile farm earlier today for this purpose. Emilio