Re: [PATCH v2] powerpc: add compile-time support for lbarx, lharx

2022-09-07 Thread Christophe Leroy




Le 23/06/2021 à 05:28, Nicholas Piggin a écrit :

ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lharx.
Add a compile option that allows code to use it, and add support in
cmpxchg and xchg 8 and 16 bit values without shifting and masking.


Is this this patch still relevant ?

If so, it should be rebased because it badly conflicts.

Thanks
Christophe



Signed-off-by: Nicholas Piggin 
---
v2: Fixed lwarx->lharx typo, switched to PPC_HAS_

  arch/powerpc/Kconfig   |   3 +
  arch/powerpc/include/asm/cmpxchg.h | 236 -
  arch/powerpc/lib/sstep.c   |  21 +--
  arch/powerpc/platforms/Kconfig.cputype |   5 +
  4 files changed, 254 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 088dd2afcfe4..dc17f4d51a79 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -278,6 +278,9 @@ config PPC_BARRIER_NOSPEC
default y
depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E
  
+config PPC_HAS_LBARX_LHARX

+   bool
+
  config EARLY_PRINTK
bool
default y
diff --git a/arch/powerpc/include/asm/cmpxchg.h 
b/arch/powerpc/include/asm/cmpxchg.h
index cf091c4c22e5..28fbd57db1ec 100644
--- a/arch/powerpc/include/asm/cmpxchg.h
+++ b/arch/powerpc/include/asm/cmpxchg.h
@@ -77,10 +77,76 @@ u32 __cmpxchg_##type##sfx(volatile void *p, u32 old, u32 
new)   \
   * the previous value stored there.
   */
  
+#ifndef CONFIG_PPC_HAS_LBARX_LHARX

  XCHG_GEN(u8, _local, "memory");
  XCHG_GEN(u8, _relaxed, "cc");
  XCHG_GEN(u16, _local, "memory");
  XCHG_GEN(u16, _relaxed, "cc");
+#else
+static __always_inline unsigned long
+__xchg_u8_local(volatile void *p, unsigned long val)
+{
+   unsigned long prev;
+
+   __asm__ __volatile__(
+"1:   lbarx   %0,0,%2 \n"
+" stbcx.  %3,0,%2 \n\
+   bne-1b"
+   : "=&r" (prev), "+m" (*(volatile unsigned char *)p)
+   : "r" (p), "r" (val)
+   : "cc", "memory");
+
+   return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u8_relaxed(u8 *p, unsigned long val)
+{
+   unsigned long prev;
+
+   __asm__ __volatile__(
+"1:   lbarx   %0,0,%2\n"
+" stbcx.  %3,0,%2\n"
+" bne-1b"
+   : "=&r" (prev), "+m" (*p)
+   : "r" (p), "r" (val)
+   : "cc");
+
+   return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u16_local(volatile void *p, unsigned long val)
+{
+   unsigned long prev;
+
+   __asm__ __volatile__(
+"1:   lharx   %0,0,%2 \n"
+" sthcx.  %3,0,%2 \n\
+   bne-1b"
+   : "=&r" (prev), "+m" (*(volatile unsigned short *)p)
+   : "r" (p), "r" (val)
+   : "cc", "memory");
+
+   return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u16_relaxed(u16 *p, unsigned long val)
+{
+   unsigned long prev;
+
+   __asm__ __volatile__(
+"1:   lharx   %0,0,%2\n"
+" sthcx.  %3,0,%2\n"
+" bne-1b"
+   : "=&r" (prev), "+m" (*p)
+   : "r" (p), "r" (val)
+   : "cc");
+
+   return prev;
+}
+#endif
  
  static __always_inline unsigned long

  __xchg_u32_local(volatile void *p, unsigned long val)
@@ -198,11 +264,12 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int 
size)
(__typeof__(*(ptr))) __xchg_relaxed((ptr),  \
(unsigned long)_x_, sizeof(*(ptr)));\
  })
+
  /*
   * Compare and exchange - if *p == old, set it to new,
   * and return the old value of *p.
   */
-
+#ifndef CONFIG_PPC_HAS_LBARX_LHARX
  CMPXCHG_GEN(u8, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, 
"memory");
  CMPXCHG_GEN(u8, _local, , , "memory");
  CMPXCHG_GEN(u8, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
@@ -211,6 +278,173 @@ CMPXCHG_GEN(u16, , PPC_ATOMIC_ENTRY_BARRIER, 
PPC_ATOMIC_EXIT_BARRIER, "memory");
  CMPXCHG_GEN(u16, _local, , , "memory");
  CMPXCHG_GEN(u16, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
  CMPXCHG_GEN(u16, _relaxed, , , "cc");
+#else
+static __always_inline unsigned long
+__cmpxchg_u8(volatile unsigned char *p, unsigned long old, unsigned long new)
+{
+   unsigned int prev;
+
+   __asm__ __volatile__ (
+   PPC_ATOMIC_ENTRY_BARRIER
+"1:   lbarx   %0,0,%2 # __cmpxchg_u8\n\
+   cmpw0,%0,%3\n\
+   bne-2f\n"
+" stbcx.  %4,0,%2\n\
+   bne-1b"
+   PPC_ATOMIC_EXIT_BARRIER
+   "\n\
+2:"
+   : "=&r" (prev), "+m" (*p)
+   : "r" (p), "r" (old), "r" (new)
+   : "cc", "memory");
+
+   return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u8_local(volatile unsigned char *p, unsigned long old,
+   unsigned long new)
+{
+   unsigned int prev;
+
+   __asm__ __volatile__ (
+"1:   lbarx   %0,0,%2 # __cmpxchg_u8\n\
+   cmpw0,%0,%3\n\
+   bne-2f\n"
+" stbcx.  %4,0,%2\n\
+   bne-1b"
+   "\n\
+2:"
+   : "=&r" (prev), "+m" (*p)
+   : "r" (p), "r" (old), "r" (new)
+   : "cc", "memory");
+
+   return prev;
+}
+
+static __al

[PATCH v2] powerpc: add compile-time support for lbarx, lharx

2021-06-22 Thread Nicholas Piggin
ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lharx.
Add a compile option that allows code to use it, and add support in
cmpxchg and xchg 8 and 16 bit values without shifting and masking.

Signed-off-by: Nicholas Piggin 
---
v2: Fixed lwarx->lharx typo, switched to PPC_HAS_

 arch/powerpc/Kconfig   |   3 +
 arch/powerpc/include/asm/cmpxchg.h | 236 -
 arch/powerpc/lib/sstep.c   |  21 +--
 arch/powerpc/platforms/Kconfig.cputype |   5 +
 4 files changed, 254 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 088dd2afcfe4..dc17f4d51a79 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -278,6 +278,9 @@ config PPC_BARRIER_NOSPEC
default y
depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E
 
+config PPC_HAS_LBARX_LHARX
+   bool
+
 config EARLY_PRINTK
bool
default y
diff --git a/arch/powerpc/include/asm/cmpxchg.h 
b/arch/powerpc/include/asm/cmpxchg.h
index cf091c4c22e5..28fbd57db1ec 100644
--- a/arch/powerpc/include/asm/cmpxchg.h
+++ b/arch/powerpc/include/asm/cmpxchg.h
@@ -77,10 +77,76 @@ u32 __cmpxchg_##type##sfx(volatile void *p, u32 old, u32 
new)   \
  * the previous value stored there.
  */
 
+#ifndef CONFIG_PPC_HAS_LBARX_LHARX
 XCHG_GEN(u8, _local, "memory");
 XCHG_GEN(u8, _relaxed, "cc");
 XCHG_GEN(u16, _local, "memory");
 XCHG_GEN(u16, _relaxed, "cc");
+#else
+static __always_inline unsigned long
+__xchg_u8_local(volatile void *p, unsigned long val)
+{
+   unsigned long prev;
+
+   __asm__ __volatile__(
+"1:lbarx   %0,0,%2 \n"
+"  stbcx.  %3,0,%2 \n\
+   bne-1b"
+   : "=&r" (prev), "+m" (*(volatile unsigned char *)p)
+   : "r" (p), "r" (val)
+   : "cc", "memory");
+
+   return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u8_relaxed(u8 *p, unsigned long val)
+{
+   unsigned long prev;
+
+   __asm__ __volatile__(
+"1:lbarx   %0,0,%2\n"
+"  stbcx.  %3,0,%2\n"
+"  bne-1b"
+   : "=&r" (prev), "+m" (*p)
+   : "r" (p), "r" (val)
+   : "cc");
+
+   return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u16_local(volatile void *p, unsigned long val)
+{
+   unsigned long prev;
+
+   __asm__ __volatile__(
+"1:lharx   %0,0,%2 \n"
+"  sthcx.  %3,0,%2 \n\
+   bne-1b"
+   : "=&r" (prev), "+m" (*(volatile unsigned short *)p)
+   : "r" (p), "r" (val)
+   : "cc", "memory");
+
+   return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u16_relaxed(u16 *p, unsigned long val)
+{
+   unsigned long prev;
+
+   __asm__ __volatile__(
+"1:lharx   %0,0,%2\n"
+"  sthcx.  %3,0,%2\n"
+"  bne-1b"
+   : "=&r" (prev), "+m" (*p)
+   : "r" (p), "r" (val)
+   : "cc");
+
+   return prev;
+}
+#endif
 
 static __always_inline unsigned long
 __xchg_u32_local(volatile void *p, unsigned long val)
@@ -198,11 +264,12 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int 
size)
(__typeof__(*(ptr))) __xchg_relaxed((ptr),  \
(unsigned long)_x_, sizeof(*(ptr)));\
 })
+
 /*
  * Compare and exchange - if *p == old, set it to new,
  * and return the old value of *p.
  */
-
+#ifndef CONFIG_PPC_HAS_LBARX_LHARX
 CMPXCHG_GEN(u8, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory");
 CMPXCHG_GEN(u8, _local, , , "memory");
 CMPXCHG_GEN(u8, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
@@ -211,6 +278,173 @@ CMPXCHG_GEN(u16, , PPC_ATOMIC_ENTRY_BARRIER, 
PPC_ATOMIC_EXIT_BARRIER, "memory");
 CMPXCHG_GEN(u16, _local, , , "memory");
 CMPXCHG_GEN(u16, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
 CMPXCHG_GEN(u16, _relaxed, , , "cc");
+#else
+static __always_inline unsigned long
+__cmpxchg_u8(volatile unsigned char *p, unsigned long old, unsigned long new)
+{
+   unsigned int prev;
+
+   __asm__ __volatile__ (
+   PPC_ATOMIC_ENTRY_BARRIER
+"1:lbarx   %0,0,%2 # __cmpxchg_u8\n\
+   cmpw0,%0,%3\n\
+   bne-2f\n"
+"  stbcx.  %4,0,%2\n\
+   bne-1b"
+   PPC_ATOMIC_EXIT_BARRIER
+   "\n\
+2:"
+   : "=&r" (prev), "+m" (*p)
+   : "r" (p), "r" (old), "r" (new)
+   : "cc", "memory");
+
+   return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u8_local(volatile unsigned char *p, unsigned long old,
+   unsigned long new)
+{
+   unsigned int prev;
+
+   __asm__ __volatile__ (
+"1:lbarx   %0,0,%2 # __cmpxchg_u8\n\
+   cmpw0,%0,%3\n\
+   bne-2f\n"
+"  stbcx.  %4,0,%2\n\
+   bne-1b"
+   "\n\
+2:"
+   : "=&r" (prev), "+m" (*p)
+   : "r" (p), "r" (old), "r" (new)
+   : "cc", "memory");
+
+   return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u8_relaxed(u8 *p, unsigned long old, unsigned long new)
+{
+   unsigned long prev;
+
+   __asm__ __volatile__ (
+"1:lbarx   %0,0,%2