On Fri, Feb 03, 2017 at 02:26:02PM +0100, Peter Zijlstra wrote:
> Add a new cmpxchg interface:
> 
>   bool try_cmpxchg(u{8,16,32,64} *ptr, u{8,16,32,64} *val, u{8,16,32,64} new);
> 
> Where the boolean returns the result of the compare; and thus if the
> exchange happened; and in case of failure, the new value of *ptr is
> returned in *val.
> 
> This allows simplification/improvement of loops like:
> 
>       for (;;) {
>               new = val $op $imm;
>               old = cmpxchg(ptr, val, new);
>               if (old == val)
>                       break;
>               val = old;
>       }
> 
> into:
> 
>       for (;;) {
>               new = val $op $imm;
>               if (try_cmpxchg(ptr, &val, new))
>                       break;
>       }
> 
> while also generating better code (GCC6 and onwards).
> 

But switching to try_cmpxchg() will make @val a memory location, which
could not be put in a register. And this will generate unnecessary
memory accesses on archs having enough registers(PPC, e.g.).

> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> ---
> --- a/arch/x86/include/asm/atomic.h
> +++ b/arch/x86/include/asm/atomic.h
> @@ -186,6 +186,12 @@ static __always_inline int atomic_cmpxch
>       return cmpxchg(&v->counter, old, new);
>  }
>  
> +#define atomic_try_cmpxchg atomic_try_cmpxchg
> +static __always_inline bool atomic_try_cmpxchg(atomic_t *v, int *old, int 
> new)
> +{
> +     return try_cmpxchg(&v->counter, old, new);
> +}
> +
>  static inline int atomic_xchg(atomic_t *v, int new)
>  {
>       return xchg(&v->counter, new);
> --- a/arch/x86/include/asm/cmpxchg.h
> +++ b/arch/x86/include/asm/cmpxchg.h
> @@ -153,6 +153,75 @@ extern void __add_wrong_size(void)
>  #define cmpxchg_local(ptr, old, new)                                 \
>       __cmpxchg_local(ptr, old, new, sizeof(*(ptr)))
>  
> +
> +#define __raw_try_cmpxchg(_ptr, _pold, _new, size, lock)             \
> +({                                                                   \
> +     bool success;                                                   \
> +     __typeof__(_ptr) _old = (_pold);                                \
> +     __typeof__(*(_ptr)) __old = *_old;                              \
> +     __typeof__(*(_ptr)) __new = (_new);                             \
> +     switch (size) {                                                 \
> +     case __X86_CASE_B:                                              \
> +     {                                                               \
> +             volatile u8 *__ptr = (volatile u8 *)(_ptr);             \
> +             asm volatile(lock "cmpxchgb %[new], %[ptr]"             \
> +                          CC_SET(z)                                  \
> +                          : CC_OUT(z) (success),                     \
> +                            [ptr] "+m" (*__ptr),                     \
> +                            [old] "+a" (__old)                       \
> +                          : [new] "q" (__new)                        \
> +                          : "memory");                               \
> +             break;                                                  \
> +     }                                                               \
> +     case __X86_CASE_W:                                              \
> +     {                                                               \
> +             volatile u16 *__ptr = (volatile u16 *)(_ptr);           \
> +             asm volatile(lock "cmpxchgw %[new], %[ptr]"             \
> +                          CC_SET(z)                                  \
> +                          : CC_OUT(z) (success),                     \
> +                            [ptr] "+m" (*__ptr),                     \
> +                            [old] "+a" (__old)                       \
> +                          : [new] "r" (__new)                        \
> +                          : "memory");                               \
> +             break;                                                  \
> +     }                                                               \
> +     case __X86_CASE_L:                                              \
> +     {                                                               \
> +             volatile u32 *__ptr = (volatile u32 *)(_ptr);           \
> +             asm volatile(lock "cmpxchgl %[new], %[ptr]"             \
> +                          CC_SET(z)                                  \
> +                          : CC_OUT(z) (success),                     \
> +                            [ptr] "+m" (*__ptr),                     \
> +                            [old] "+a" (__old)                       \
> +                          : [new] "r" (__new)                        \
> +                          : "memory");                               \
> +             break;                                                  \
> +     }                                                               \
> +     case __X86_CASE_Q:                                              \
> +     {                                                               \
> +             volatile u64 *__ptr = (volatile u64 *)(_ptr);           \
> +             asm volatile(lock "cmpxchgq %[new], %[ptr]"             \
> +                          CC_SET(z)                                  \
> +                          : CC_OUT(z) (success),                     \
> +                            [ptr] "+m" (*__ptr),                     \
> +                            [old] "+a" (__old)                       \
> +                          : [new] "r" (__new)                        \
> +                          : "memory");                               \
> +             break;                                                  \
> +     }                                                               \
> +     default:                                                        \
> +             __cmpxchg_wrong_size();                                 \
> +     }                                                               \
> +     *_old = __old;                                                  \
> +     success;                                                        \
> +})
> +
> +#define __try_cmpxchg(ptr, pold, new, size)                          \
> +     __raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX)
> +
> +#define try_cmpxchg(ptr, pold, new)                                  \
> +     __try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr)))
> +
>  /*
>   * xadd() adds "inc" to "*ptr" and atomically returns the previous
>   * value of "*ptr".
> --- a/include/linux/atomic.h
> +++ b/include/linux/atomic.h
> @@ -423,6 +423,28 @@
>  #endif
>  #endif /* atomic_cmpxchg_relaxed */
>  
> +#ifndef atomic_try_cmpxchg
> +
> +#define __atomic_try_cmpxchg(type, _p, _po, _n)                              
> \
> +({                                                                   \
> +     typeof(_po) __po = (_po);                                       \
> +     typeof(*(_po)) __o = *__po;                                     \
> +     bool success = (atomic_cmpxchg##type((_p), __o, (_n)) == __o);  \
> +     *__po = __o;                                                    \

Besides, is this part correct? atomic_cmpxchg_*() wouldn't change the
value of __o, so *__po wouldn't be changed.. IOW, in case of failure,
*ptr wouldn't be updated to a new value.

Maybe this should be:

        bool success;
        *__po = atomic_cmpxchg##type((_p), __o, (_n));
        sucess = (*__po == _o);

, right?

Regards,
Boqun

> +     success;                                                        \
> +})
> +
> +#define atomic_try_cmpxchg(_p, _po, _n)              __atomic_try_cmpxchg(, 
> _p, _po, _n)
> +#define atomic_try_cmpxchg_relaxed(_p, _po, _n)      
> __atomic_try_cmpxchg(_relaxed, _p, _po, _n)
> +#define atomic_try_cmpxchg_acquire(_p, _po, _n)      
> __atomic_try_cmpxchg(_acquire, _p, _po, _n)
> +#define atomic_try_cmpxchg_release(_p, _po, _n)      
> __atomic_try_cmpxchg(_release, _p, _po, _n)
> +
> +#else /* atomic_try_cmpxchg */
> +#define atomic_try_cmpxchg_relaxed   atomic_try_cmpxchg
> +#define atomic_try_cmpxchg_acquire   atomic_try_cmpxchg
> +#define atomic_try_cmpxchg_release   atomic_try_cmpxchg
> +#endif /* atomic_try_cmpxchg */
> +
>  /* cmpxchg_relaxed */
>  #ifndef cmpxchg_relaxed
>  #define  cmpxchg_relaxed             cmpxchg
> 
> 

Attachment: signature.asc
Description: PGP signature

Reply via email to