On Mon, Feb 06, 2017 at 12:24:28PM +0800, Boqun Feng wrote: > On Fri, Feb 03, 2017 at 02:26:02PM +0100, Peter Zijlstra wrote: > > Add a new cmpxchg interface: > > > > bool try_cmpxchg(u{8,16,32,64} *ptr, u{8,16,32,64} *val, u{8,16,32,64} > > new); > > > > Where the boolean returns the result of the compare; and thus if the > > exchange happened; and in case of failure, the new value of *ptr is > > returned in *val. > > > > This allows simplification/improvement of loops like: > > > > for (;;) { > > new = val $op $imm; > > old = cmpxchg(ptr, val, new); > > if (old == val) > > break; > > val = old; > > } > > > > into: > > > > for (;;) { > > new = val $op $imm; > > if (try_cmpxchg(ptr, &val, new)) > > break; > > } > > > > while also generating better code (GCC6 and onwards). > > > > But switching to try_cmpxchg() will make @val a memory location, which > could not be put in a register. And this will generate unnecessary > memory accesses on archs having enough registers(PPC, e.g.). >
Hmm.. it turns out that compilers can figure out that @val could be fit in a register(maybe in the escape analysis step), so don't treat it as a memory location. This is at least true for GCC 5.4.0 on PPC. So I think we can rely on this? > > Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org> > > --- > > --- a/arch/x86/include/asm/atomic.h > > +++ b/arch/x86/include/asm/atomic.h > > @@ -186,6 +186,12 @@ static __always_inline int atomic_cmpxch > > return cmpxchg(&v->counter, old, new); > > } > > > > +#define atomic_try_cmpxchg atomic_try_cmpxchg > > +static __always_inline bool atomic_try_cmpxchg(atomic_t *v, int *old, int > > new) > > +{ > > + return try_cmpxchg(&v->counter, old, new); > > +} > > + > > static inline int atomic_xchg(atomic_t *v, int new) > > { > > return xchg(&v->counter, new); > > --- a/arch/x86/include/asm/cmpxchg.h > > +++ b/arch/x86/include/asm/cmpxchg.h > > @@ -153,6 +153,75 @@ extern void __add_wrong_size(void) > > #define cmpxchg_local(ptr, old, new) > > \ > > __cmpxchg_local(ptr, old, new, sizeof(*(ptr))) > > > > + > > +#define __raw_try_cmpxchg(_ptr, _pold, _new, size, lock) \ > > +({ \ > > + bool success; \ > > + __typeof__(_ptr) _old = (_pold); \ > > + __typeof__(*(_ptr)) __old = *_old; \ > > + __typeof__(*(_ptr)) __new = (_new); \ > > + switch (size) { \ > > + case __X86_CASE_B: \ > > + { \ > > + volatile u8 *__ptr = (volatile u8 *)(_ptr); \ > > + asm volatile(lock "cmpxchgb %[new], %[ptr]" \ > > + CC_SET(z) \ > > + : CC_OUT(z) (success), \ > > + [ptr] "+m" (*__ptr), \ > > + [old] "+a" (__old) \ > > + : [new] "q" (__new) \ > > + : "memory"); \ > > + break; \ > > + } \ > > + case __X86_CASE_W: \ > > + { \ > > + volatile u16 *__ptr = (volatile u16 *)(_ptr); \ > > + asm volatile(lock "cmpxchgw %[new], %[ptr]" \ > > + CC_SET(z) \ > > + : CC_OUT(z) (success), \ > > + [ptr] "+m" (*__ptr), \ > > + [old] "+a" (__old) \ > > + : [new] "r" (__new) \ > > + : "memory"); \ > > + break; \ > > + } \ > > + case __X86_CASE_L: \ > > + { \ > > + volatile u32 *__ptr = (volatile u32 *)(_ptr); \ > > + asm volatile(lock "cmpxchgl %[new], %[ptr]" \ > > + CC_SET(z) \ > > + : CC_OUT(z) (success), \ > > + [ptr] "+m" (*__ptr), \ > > + [old] "+a" (__old) \ > > + : [new] "r" (__new) \ > > + : "memory"); \ > > + break; \ > > + } \ > > + case __X86_CASE_Q: \ > > + { \ > > + volatile u64 *__ptr = (volatile u64 *)(_ptr); \ > > + asm volatile(lock "cmpxchgq %[new], %[ptr]" \ > > + CC_SET(z) \ > > + : CC_OUT(z) (success), \ > > + [ptr] "+m" (*__ptr), \ > > + [old] "+a" (__old) \ > > + : [new] "r" (__new) \ > > + : "memory"); \ > > + break; \ > > + } \ > > + default: \ > > + __cmpxchg_wrong_size(); \ > > + } \ > > + *_old = __old; \ > > + success; \ > > +}) > > + > > +#define __try_cmpxchg(ptr, pold, new, size) > > \ > > + __raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX) > > + > > +#define try_cmpxchg(ptr, pold, new) > > \ > > + __try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr))) > > + > > /* > > * xadd() adds "inc" to "*ptr" and atomically returns the previous > > * value of "*ptr". > > --- a/include/linux/atomic.h > > +++ b/include/linux/atomic.h > > @@ -423,6 +423,28 @@ > > #endif > > #endif /* atomic_cmpxchg_relaxed */ > > > > +#ifndef atomic_try_cmpxchg > > + > > +#define __atomic_try_cmpxchg(type, _p, _po, _n) > > \ > > +({ \ > > + typeof(_po) __po = (_po); \ > > + typeof(*(_po)) __o = *__po; \ > > + bool success = (atomic_cmpxchg##type((_p), __o, (_n)) == __o); \ > > + *__po = __o; \ > > Besides, is this part correct? atomic_cmpxchg_*() wouldn't change the > value of __o, so *__po wouldn't be changed.. IOW, in case of failure, > *ptr wouldn't be updated to a new value. > > Maybe this should be: > > bool success; > *__po = atomic_cmpxchg##type((_p), __o, (_n)); > sucess = (*__po == _o); typo... should be success = (*__po == __o); Regards, Boqun > > , right? > > Regards, > Boqun > > > + success; \ > > +}) > > + > > +#define atomic_try_cmpxchg(_p, _po, _n) __atomic_try_cmpxchg(, > > _p, _po, _n) > > +#define atomic_try_cmpxchg_relaxed(_p, _po, _n) > > __atomic_try_cmpxchg(_relaxed, _p, _po, _n) > > +#define atomic_try_cmpxchg_acquire(_p, _po, _n) > > __atomic_try_cmpxchg(_acquire, _p, _po, _n) > > +#define atomic_try_cmpxchg_release(_p, _po, _n) > > __atomic_try_cmpxchg(_release, _p, _po, _n) > > + > > +#else /* atomic_try_cmpxchg */ > > +#define atomic_try_cmpxchg_relaxed atomic_try_cmpxchg > > +#define atomic_try_cmpxchg_acquire atomic_try_cmpxchg > > +#define atomic_try_cmpxchg_release atomic_try_cmpxchg > > +#endif /* atomic_try_cmpxchg */ > > + > > /* cmpxchg_relaxed */ > > #ifndef cmpxchg_relaxed > > #define cmpxchg_relaxed cmpxchg > > > >
signature.asc
Description: PGP signature