Re: [PATCH] powerpc: add compile-time support for lbarx, lwarx
Excerpts from Christophe Leroy's message of November 7, 2020 6:15 pm: > > > Le 07/11/2020 à 04:23, Nicholas Piggin a écrit : >> ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lwarx. >> Add a compile option that allows code to use it, and add support in >> cmpxchg and xchg 8 and 16 bit values. > > Do you mean lharx ? Because lwarx exists on all powerpcs I think. Thanks all who pointed out mistakes :) Yes lharx. > >> >> Signed-off-by: Nicholas Piggin >> --- >> arch/powerpc/Kconfig | 3 + >> arch/powerpc/include/asm/cmpxchg.h | 236 - >> arch/powerpc/platforms/Kconfig.cputype | 5 + >> 3 files changed, 243 insertions(+), 1 deletion(-) >> >> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig >> index e9f13fe08492..d231af06f75a 100644 >> --- a/arch/powerpc/Kconfig >> +++ b/arch/powerpc/Kconfig >> @@ -266,6 +266,9 @@ config PPC_BARRIER_NOSPEC >> default y >> depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E >> >> +config PPC_LBARX_LWARX >> +bool > > s/LWARX/LHARX/ ? > > And maybe better with PPC_HAS_LBARX_LWARX ? Yes you're right, PPC_HAS_ fits better. [...] >> +#endif > > That's a lot of code duplication. Could we use some macro, in the same spirit > as what is done in > arch/powerpc/include/asm/io.h for in_be16(), in_be32(), in_be64() and friends > ? For now I don't get too fancy. It's a bit ugly but I'm working through a generic atomics conversion patch and trying to also work out a nice form for larx/stcx operation generation macros, I'll look at tidying up this some time after then. Thanks, Nick
Re: [PATCH] powerpc: add compile-time support for lbarx, lwarx
On Sun, Nov 08, 2020 at 09:01:52PM +0100, Gabriel Paubert wrote: > On Sat, Nov 07, 2020 at 05:42:57AM -0600, Segher Boessenkool wrote: > > On Sat, Nov 07, 2020 at 08:12:13AM +0100, Gabriel Paubert wrote: > > > On Sat, Nov 07, 2020 at 01:23:28PM +1000, Nicholas Piggin wrote: > > > > ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lwarx. > > > > > > Hmm, lwarx exists since original Power AFAIR, > > > > Almost: it was new on PowerPC. > > I stand corrected. Does this mean that Power1 (and 2 I believe) had > no SMP support? As I understand it, that's correct. Of course you always can do SMP "by hand" -- you can do all synchronisation via software (perhaps using some knowledge of the specific hardware you're running on), it's just slow (and usually not portable). Compare to SMP on 603 for example. Segher
Re: [PATCH] powerpc: add compile-time support for lbarx, lwarx
On Sat, Nov 07, 2020 at 05:42:57AM -0600, Segher Boessenkool wrote: > On Sat, Nov 07, 2020 at 08:12:13AM +0100, Gabriel Paubert wrote: > > On Sat, Nov 07, 2020 at 01:23:28PM +1000, Nicholas Piggin wrote: > > > ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lwarx. > > > > Hmm, lwarx exists since original Power AFAIR, > > Almost: it was new on PowerPC. I stand corrected. Does this mean that Power1 (and 2 I believe) had no SMP support? Gabriel
Re: [PATCH] powerpc: add compile-time support for lbarx, lwarx
On Sat, Nov 07, 2020 at 08:12:13AM +0100, Gabriel Paubert wrote: > On Sat, Nov 07, 2020 at 01:23:28PM +1000, Nicholas Piggin wrote: > > ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lwarx. > > Hmm, lwarx exists since original Power AFAIR, Almost: it was new on PowerPC. Segher
Re: [PATCH] powerpc: add compile-time support for lbarx, lwarx
Le 07/11/2020 à 04:23, Nicholas Piggin a écrit : ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lwarx. Add a compile option that allows code to use it, and add support in cmpxchg and xchg 8 and 16 bit values. Do you mean lharx ? Because lwarx exists on all powerpcs I think. Signed-off-by: Nicholas Piggin --- arch/powerpc/Kconfig | 3 + arch/powerpc/include/asm/cmpxchg.h | 236 - arch/powerpc/platforms/Kconfig.cputype | 5 + 3 files changed, 243 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index e9f13fe08492..d231af06f75a 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -266,6 +266,9 @@ config PPC_BARRIER_NOSPEC default y depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E +config PPC_LBARX_LWARX + bool s/LWARX/LHARX/ ? And maybe better with PPC_HAS_LBARX_LWARX ? + config EARLY_PRINTK bool default y diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h index cf091c4c22e5..17fd996dc0d4 100644 --- a/arch/powerpc/include/asm/cmpxchg.h +++ b/arch/powerpc/include/asm/cmpxchg.h @@ -77,10 +77,76 @@ u32 __cmpxchg_##type##sfx(volatile void *p, u32 old, u32 new) \ * the previous value stored there. */ +#ifndef CONFIG_PPC_LBARX_LWARX XCHG_GEN(u8, _local, "memory"); XCHG_GEN(u8, _relaxed, "cc"); XCHG_GEN(u16, _local, "memory"); XCHG_GEN(u16, _relaxed, "cc"); +#else +static __always_inline unsigned long +__xchg_u8_local(volatile void *p, unsigned long val) +{ + unsigned long prev; + + __asm__ __volatile__( +"1: lbarx %0,0,%2 \n" +" stbcx. %3,0,%2 \n\ + bne-1b" + : "=&r" (prev), "+m" (*(volatile unsigned char *)p) + : "r" (p), "r" (val) + : "cc", "memory"); + + return prev; +} + +static __always_inline unsigned long +__xchg_u8_relaxed(u8 *p, unsigned long val) +{ + unsigned long prev; + + __asm__ __volatile__( +"1: lbarx %0,0,%2\n" +" stbcx. %3,0,%2\n" +" bne-1b" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (val) + : "cc"); + + return prev; +} + +static __always_inline unsigned long +__xchg_u16_local(volatile void *p, unsigned long val) +{ + unsigned long prev; + + __asm__ __volatile__( +"1: lharx %0,0,%2 \n" +" sthcx. %3,0,%2 \n\ + bne-1b" + : "=&r" (prev), "+m" (*(volatile unsigned short *)p) + : "r" (p), "r" (val) + : "cc", "memory"); + + return prev; +} + +static __always_inline unsigned long +__xchg_u16_relaxed(u16 *p, unsigned long val) +{ + unsigned long prev; + + __asm__ __volatile__( +"1: lharx %0,0,%2\n" +" sthcx. %3,0,%2\n" +" bne-1b" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (val) + : "cc"); + + return prev; +} +#endif That's a lot of code duplication. Could we use some macro, in the same spirit as what is done in arch/powerpc/include/asm/io.h for in_be16(), in_be32(), in_be64() and friends ? static __always_inline unsigned long __xchg_u32_local(volatile void *p, unsigned long val) @@ -198,11 +264,12 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int size) (__typeof__(*(ptr))) __xchg_relaxed((ptr), \ (unsigned long)_x_, sizeof(*(ptr)));\ }) + /* * Compare and exchange - if *p == old, set it to new, * and return the old value of *p. */ - +#ifndef CONFIG_PPC_LBARX_LWARX CMPXCHG_GEN(u8, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory"); CMPXCHG_GEN(u8, _local, , , "memory"); CMPXCHG_GEN(u8, _acquire, , PPC_ACQUIRE_BARRIER, "memory"); @@ -211,6 +278,173 @@ CMPXCHG_GEN(u16, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory"); CMPXCHG_GEN(u16, _local, , , "memory"); CMPXCHG_GEN(u16, _acquire, , PPC_ACQUIRE_BARRIER, "memory"); CMPXCHG_GEN(u16, _relaxed, , , "cc"); +#else +static __always_inline unsigned long +__cmpxchg_u8(volatile unsigned char *p, unsigned long old, unsigned long new) +{ + unsigned int prev; + + __asm__ __volatile__ ( + PPC_ATOMIC_ENTRY_BARRIER +"1: lbarx %0,0,%2 # __cmpxchg_u8\n\ + cmpw0,%0,%3\n\ + bne-2f\n" +" stbcx. %4,0,%2\n\ + bne-1b" + PPC_ATOMIC_EXIT_BARRIER + "\n\ +2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} + +static __always_inline unsigned long +__cmpxchg_u8_local(volatile unsigned char *p, unsigned long old, + unsigned long new) +{ + unsigned int prev; + + __asm__ __volatile__ ( +"1: lbarx %0,0,%2 # __cmpxchg_u8\n\ + cmpw0,%0,%3\n\ + bne-2f\n" +" stbcx. %4,0,%2\n\ + bne-1b" + "\n\ +2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + :
Re: [PATCH] powerpc: add compile-time support for lbarx, lwarx
On Sat, Nov 07, 2020 at 01:23:28PM +1000, Nicholas Piggin wrote: > ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lwarx. Hmm, lwarx exists since original Power AFAIR, s/lwarx/lharx/ perhaps? Same for the title of the patch and the CONFIG variable. Gabriel > Add a compile option that allows code to use it, and add support in > cmpxchg and xchg 8 and 16 bit values. > > Signed-off-by: Nicholas Piggin > --- > arch/powerpc/Kconfig | 3 + > arch/powerpc/include/asm/cmpxchg.h | 236 - > arch/powerpc/platforms/Kconfig.cputype | 5 + > 3 files changed, 243 insertions(+), 1 deletion(-) > > diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig > index e9f13fe08492..d231af06f75a 100644 > --- a/arch/powerpc/Kconfig > +++ b/arch/powerpc/Kconfig > @@ -266,6 +266,9 @@ config PPC_BARRIER_NOSPEC > default y > depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E > > +config PPC_LBARX_LWARX > + bool > + > config EARLY_PRINTK > bool > default y > diff --git a/arch/powerpc/include/asm/cmpxchg.h > b/arch/powerpc/include/asm/cmpxchg.h > index cf091c4c22e5..17fd996dc0d4 100644 > --- a/arch/powerpc/include/asm/cmpxchg.h > +++ b/arch/powerpc/include/asm/cmpxchg.h > @@ -77,10 +77,76 @@ u32 __cmpxchg_##type##sfx(volatile void *p, u32 old, u32 > new) \ > * the previous value stored there. > */ > > +#ifndef CONFIG_PPC_LBARX_LWARX > XCHG_GEN(u8, _local, "memory"); > XCHG_GEN(u8, _relaxed, "cc"); > XCHG_GEN(u16, _local, "memory"); > XCHG_GEN(u16, _relaxed, "cc"); > +#else > +static __always_inline unsigned long > +__xchg_u8_local(volatile void *p, unsigned long val) > +{ > + unsigned long prev; > + > + __asm__ __volatile__( > +"1: lbarx %0,0,%2 \n" > +"stbcx. %3,0,%2 \n\ > + bne-1b" > + : "=&r" (prev), "+m" (*(volatile unsigned char *)p) > + : "r" (p), "r" (val) > + : "cc", "memory"); > + > + return prev; > +} > + > +static __always_inline unsigned long > +__xchg_u8_relaxed(u8 *p, unsigned long val) > +{ > + unsigned long prev; > + > + __asm__ __volatile__( > +"1: lbarx %0,0,%2\n" > +"stbcx. %3,0,%2\n" > +"bne-1b" > + : "=&r" (prev), "+m" (*p) > + : "r" (p), "r" (val) > + : "cc"); > + > + return prev; > +} > + > +static __always_inline unsigned long > +__xchg_u16_local(volatile void *p, unsigned long val) > +{ > + unsigned long prev; > + > + __asm__ __volatile__( > +"1: lharx %0,0,%2 \n" > +"sthcx. %3,0,%2 \n\ > + bne-1b" > + : "=&r" (prev), "+m" (*(volatile unsigned short *)p) > + : "r" (p), "r" (val) > + : "cc", "memory"); > + > + return prev; > +} > + > +static __always_inline unsigned long > +__xchg_u16_relaxed(u16 *p, unsigned long val) > +{ > + unsigned long prev; > + > + __asm__ __volatile__( > +"1: lharx %0,0,%2\n" > +"sthcx. %3,0,%2\n" > +"bne-1b" > + : "=&r" (prev), "+m" (*p) > + : "r" (p), "r" (val) > + : "cc"); > + > + return prev; > +} > +#endif > > static __always_inline unsigned long > __xchg_u32_local(volatile void *p, unsigned long val) > @@ -198,11 +264,12 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int > size) > (__typeof__(*(ptr))) __xchg_relaxed((ptr), \ > (unsigned long)_x_, sizeof(*(ptr)));\ > }) > + > /* > * Compare and exchange - if *p == old, set it to new, > * and return the old value of *p. > */ > - > +#ifndef CONFIG_PPC_LBARX_LWARX > CMPXCHG_GEN(u8, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, > "memory"); > CMPXCHG_GEN(u8, _local, , , "memory"); > CMPXCHG_GEN(u8, _acquire, , PPC_ACQUIRE_BARRIER, "memory"); > @@ -211,6 +278,173 @@ CMPXCHG_GEN(u16, , PPC_ATOMIC_ENTRY_BARRIER, > PPC_ATOMIC_EXIT_BARRIER, "memory"); > CMPXCHG_GEN(u16, _local, , , "memory"); > CMPXCHG_GEN(u16, _acquire, , PPC_ACQUIRE_BARRIER, "memory"); > CMPXCHG_GEN(u16, _relaxed, , , "cc"); > +#else > +static __always_inline unsigned long > +__cmpxchg_u8(volatile unsigned char *p, unsigned long old, unsigned long new) > +{ > + unsigned int prev; > + > + __asm__ __volatile__ ( > + PPC_ATOMIC_ENTRY_BARRIER > +"1: lbarx %0,0,%2 # __cmpxchg_u8\n\ > + cmpw0,%0,%3\n\ > + bne-2f\n" > +"stbcx. %4,0,%2\n\ > + bne-1b" > + PPC_ATOMIC_EXIT_BARRIER > + "\n\ > +2:" > + : "=&r" (prev), "+m" (*p) > + : "r" (p), "r" (old), "r" (new) > + : "cc", "memory"); > + > + return prev; > +} > + > +static __always_inline unsigned long > +__cmpxchg_u8_local(volatile unsigned char *p, unsigned long old, > + unsigned long new) > +{ > + unsigned int prev; > + > + __asm__ __volatile__ ( > +"1: lbarx %0,0,%2 # __cmpxchg_u8\n\ > + cmpw0,%0,%3\n\ > + bne-2f\n" > +"stbcx. %4,0,%2\n\ > + bne-1b" > + "\n\ > +2:" > + : "=&r" (prev), "+m" (*p) > + : "r
[PATCH] powerpc: add compile-time support for lbarx, lwarx
ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lwarx. Add a compile option that allows code to use it, and add support in cmpxchg and xchg 8 and 16 bit values. Signed-off-by: Nicholas Piggin --- arch/powerpc/Kconfig | 3 + arch/powerpc/include/asm/cmpxchg.h | 236 - arch/powerpc/platforms/Kconfig.cputype | 5 + 3 files changed, 243 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index e9f13fe08492..d231af06f75a 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -266,6 +266,9 @@ config PPC_BARRIER_NOSPEC default y depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E +config PPC_LBARX_LWARX + bool + config EARLY_PRINTK bool default y diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h index cf091c4c22e5..17fd996dc0d4 100644 --- a/arch/powerpc/include/asm/cmpxchg.h +++ b/arch/powerpc/include/asm/cmpxchg.h @@ -77,10 +77,76 @@ u32 __cmpxchg_##type##sfx(volatile void *p, u32 old, u32 new) \ * the previous value stored there. */ +#ifndef CONFIG_PPC_LBARX_LWARX XCHG_GEN(u8, _local, "memory"); XCHG_GEN(u8, _relaxed, "cc"); XCHG_GEN(u16, _local, "memory"); XCHG_GEN(u16, _relaxed, "cc"); +#else +static __always_inline unsigned long +__xchg_u8_local(volatile void *p, unsigned long val) +{ + unsigned long prev; + + __asm__ __volatile__( +"1:lbarx %0,0,%2 \n" +" stbcx. %3,0,%2 \n\ + bne-1b" + : "=&r" (prev), "+m" (*(volatile unsigned char *)p) + : "r" (p), "r" (val) + : "cc", "memory"); + + return prev; +} + +static __always_inline unsigned long +__xchg_u8_relaxed(u8 *p, unsigned long val) +{ + unsigned long prev; + + __asm__ __volatile__( +"1:lbarx %0,0,%2\n" +" stbcx. %3,0,%2\n" +" bne-1b" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (val) + : "cc"); + + return prev; +} + +static __always_inline unsigned long +__xchg_u16_local(volatile void *p, unsigned long val) +{ + unsigned long prev; + + __asm__ __volatile__( +"1:lharx %0,0,%2 \n" +" sthcx. %3,0,%2 \n\ + bne-1b" + : "=&r" (prev), "+m" (*(volatile unsigned short *)p) + : "r" (p), "r" (val) + : "cc", "memory"); + + return prev; +} + +static __always_inline unsigned long +__xchg_u16_relaxed(u16 *p, unsigned long val) +{ + unsigned long prev; + + __asm__ __volatile__( +"1:lharx %0,0,%2\n" +" sthcx. %3,0,%2\n" +" bne-1b" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (val) + : "cc"); + + return prev; +} +#endif static __always_inline unsigned long __xchg_u32_local(volatile void *p, unsigned long val) @@ -198,11 +264,12 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int size) (__typeof__(*(ptr))) __xchg_relaxed((ptr), \ (unsigned long)_x_, sizeof(*(ptr)));\ }) + /* * Compare and exchange - if *p == old, set it to new, * and return the old value of *p. */ - +#ifndef CONFIG_PPC_LBARX_LWARX CMPXCHG_GEN(u8, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory"); CMPXCHG_GEN(u8, _local, , , "memory"); CMPXCHG_GEN(u8, _acquire, , PPC_ACQUIRE_BARRIER, "memory"); @@ -211,6 +278,173 @@ CMPXCHG_GEN(u16, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory"); CMPXCHG_GEN(u16, _local, , , "memory"); CMPXCHG_GEN(u16, _acquire, , PPC_ACQUIRE_BARRIER, "memory"); CMPXCHG_GEN(u16, _relaxed, , , "cc"); +#else +static __always_inline unsigned long +__cmpxchg_u8(volatile unsigned char *p, unsigned long old, unsigned long new) +{ + unsigned int prev; + + __asm__ __volatile__ ( + PPC_ATOMIC_ENTRY_BARRIER +"1:lbarx %0,0,%2 # __cmpxchg_u8\n\ + cmpw0,%0,%3\n\ + bne-2f\n" +" stbcx. %4,0,%2\n\ + bne-1b" + PPC_ATOMIC_EXIT_BARRIER + "\n\ +2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} + +static __always_inline unsigned long +__cmpxchg_u8_local(volatile unsigned char *p, unsigned long old, + unsigned long new) +{ + unsigned int prev; + + __asm__ __volatile__ ( +"1:lbarx %0,0,%2 # __cmpxchg_u8\n\ + cmpw0,%0,%3\n\ + bne-2f\n" +" stbcx. %4,0,%2\n\ + bne-1b" + "\n\ +2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} + +static __always_inline unsigned long +__cmpxchg_u8_relaxed(u8 *p, unsigned long old, unsigned long new) +{ + unsigned long prev; + + __asm__ __volatile__ ( +"1:lbarx %0,0,%2 # __cmpxchg_u8_relaxed\n" +" cmpw0,%0,%3\n" +" bne-2f\n" +" stbcx. %4,0,%2\n" +" bne-1b\n" +"2:" +