On Wed, Apr 27, 2016 at 09:58:17PM +0800, Boqun Feng wrote:
> On Wed, Apr 27, 2016 at 05:16:45PM +0800, Pan Xinhui wrote:
> > From: Pan Xinhui <xinhui....@linux.vnet.ibm.com>
> > 
> > Implement xchg{u8,u16}{local,relaxed}, and
> > cmpxchg{u8,u16}{,local,acquire,relaxed}.
> > 
> > It works on all ppc.
> > 
> > remove volatile of first parameter in __cmpxchg_local and __cmpxchg
> > 
> > Suggested-by: Peter Zijlstra (Intel) <pet...@infradead.org>
> > Signed-off-by: Pan Xinhui <xinhui....@linux.vnet.ibm.com>
> > ---
> > change from v3:
> >     rewrite in asm for the LL/SC.
> >     remove volatile in __cmpxchg_local and __cmpxchg.
> > change from v2:
> >     in the do{}while(), we save one load and use corresponding cmpxchg 
> > suffix.
> >     Also add corresponding __cmpxchg_u32 function declaration in the 
> > __XCHG_GEN 
> > change from V1:
> >     rework totally.
> > ---
> >  arch/powerpc/include/asm/cmpxchg.h | 109 
> > ++++++++++++++++++++++++++++++++++++-
> >  1 file changed, 106 insertions(+), 3 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/cmpxchg.h 
> > b/arch/powerpc/include/asm/cmpxchg.h
> > index 44efe73..8a3735f 100644
> > --- a/arch/powerpc/include/asm/cmpxchg.h
> > +++ b/arch/powerpc/include/asm/cmpxchg.h
> > @@ -7,6 +7,71 @@
> >  #include <asm/asm-compat.h>
> >  #include <linux/bug.h>
> >  
> > +#ifdef __BIG_ENDIAN
> > +#define BITOFF_CAL(size, off)      ((sizeof(u32) - size - off) * 
> > BITS_PER_BYTE)
> > +#else
> > +#define BITOFF_CAL(size, off)      (off * BITS_PER_BYTE)
> > +#endif
> > +
> > +#define XCHG_GEN(type, sfx, cl)                            \
> > +static inline u32 __xchg_##type##sfx(void *p, u32 val)             \
> > +{                                                          \
> > +   unsigned int prev, prev_mask, tmp, bitoff, off;         \
> > +                                                           \
> > +   off = (unsigned long)p % sizeof(u32);                   \
> > +   bitoff = BITOFF_CAL(sizeof(type), off);                 \
> > +   p -= off;                                               \
> > +   val <<= bitoff;                                         \
> > +   prev_mask = (u32)(type)-1 << bitoff;                    \
> > +                                                           \
> > +   __asm__ __volatile__(                                   \
> > +"1:        lwarx   %0,0,%3\n"                                      \
> > +"  andc    %1,%0,%5\n"                                     \
> > +"  or      %1,%1,%4\n"                                     \
> > +   PPC405_ERR77(0,%3)                                      \
> > +"  stwcx.  %1,0,%3\n"                                      \
> > +"  bne-    1b\n"                                           \
> > +   : "=&r" (prev), "=&r" (tmp), "+m" (*(u32*)p)            \
> 
> I think we can save the "tmp" here by:
> 
>       __asm__ volatile__(
> "1:   lwarx   %0,0,%2\n"
> "     andc    %0,%0,%4\n"
> "     or      %0,%0,%3\n"
>       PPC405_ERR77(0,%2)
> "     stwcx.  %0,0,%2\n"
> "     bne-    1b\n"
>       : "=&r" (prev), "+m" (*(u32*)p)
>       : "r" (p), "r" (val), "r" (prev_mask)
>       : "cc", cl);
> 
> right?
> 
> > +   : "r" (p), "r" (val), "r" (prev_mask)                   \
> > +   : "cc", cl);                                            \
> > +                                                           \
> > +   return prev >> bitoff;                                  \
> > +}
> > +
> > +#define CMPXCHG_GEN(type, sfx, br, br2, cl)                        \
> > +static inline                                                      \
> > +u32 __cmpxchg_##type##sfx(void *p, u32 old, u32 new)               \
> > +{                                                          \
> > +   unsigned int prev, prev_mask, tmp, bitoff, off;         \
> > +                                                           \
> > +   off = (unsigned long)p % sizeof(u32);                   \
> > +   bitoff = BITOFF_CAL(sizeof(type), off);                 \
> > +   p -= off;                                               \
> > +   old <<= bitoff;                                         \
> > +   new <<= bitoff;                                         \
> > +   prev_mask = (u32)(type)-1 << bitoff;                    \
> > +                                                           \
> > +   __asm__ __volatile__(                                   \
> > +   br                                                      \
> > +"1:        lwarx   %0,0,%3\n"                                      \
> > +"  and     %1,%0,%6\n"                                     \
> > +"  cmpw    0,%1,%4\n"                                      \
> > +"  bne-    2f\n"                                           \
> > +"  andc    %1,%0,%6\n"                                     \
> > +"  or      %1,%1,%5\n"                                     \
> > +   PPC405_ERR77(0,%3)                                      \
> > +"  stwcx.  %1,0,%3\n"                                      \
> > +"  bne-    1b\n"                                           \
> > +   br2                                                     \
> > +   "\n"                                                    \
> > +"2:"                                                               \
> > +   : "=&r" (prev), "=&r" (tmp), "+m" (*(u32*)p)            \
> 
> And "tmp" here could also be saved by:
> 
> "1:   lwarx   %0,0,%2\n"                                      \
> "     xor     %3,%0,%3\n"                                     \
> "     and.    %3,%3,%5\n"                                     \
> "     bne-    2f\n"                                           \
> "     andc    %0,%0,%5\n"                                     \
> "     or      %0,%0,%4\n"                                     \
>       PPC405_ERR77(0,%2)                                      \
> "     stwcx.  %0,0,%2\n"                                      \
> "     bne-    1b\n"                                           \
>       br2                                                     \
>       "\n"                                                    \
> "2:"                                                          \
>       : "=&r" (prev), "+m" (*(u32*)p)         \
>       : "r" (p), "r" (old), "r" (new), "r" (prev_mask)        \
>       : "cc", cl);                                            \
> 
> right?
> 

Sorry, my bad, we can't implement cmpxchg like this.. please ignore
this, I should really go to bed soon...

But still, we can save the "tmp" for xchg() I think.

Regards,
Boqun

> IIUC, saving the local variable "tmp" will result in saving a general
> register for the compilers to use for other variables.
> 
> So thoughts?
> 
> Regards,
> Boqun
> 

Attachment: signature.asc
Description: PGP signature

_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to