Re: [PATCH] powerpc: add compile-time support for lbarx, lwarx

2020-11-10 Thread Nicholas Piggin
Excerpts from Christophe Leroy's message of November 7, 2020 6:15 pm:
> 
> 
> Le 07/11/2020 à 04:23, Nicholas Piggin a écrit :
>> ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lwarx.
>> Add a compile option that allows code to use it, and add support in
>> cmpxchg and xchg 8 and 16 bit values.
> 
> Do you mean lharx ? Because lwarx exists on all powerpcs I think.

Thanks all who pointed out mistakes :) Yes lharx.

> 
>> 
>> Signed-off-by: Nicholas Piggin 
>> ---
>>   arch/powerpc/Kconfig   |   3 +
>>   arch/powerpc/include/asm/cmpxchg.h | 236 -
>>   arch/powerpc/platforms/Kconfig.cputype |   5 +
>>   3 files changed, 243 insertions(+), 1 deletion(-)
>> 
>> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
>> index e9f13fe08492..d231af06f75a 100644
>> --- a/arch/powerpc/Kconfig
>> +++ b/arch/powerpc/Kconfig
>> @@ -266,6 +266,9 @@ config PPC_BARRIER_NOSPEC
>>  default y
>>  depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E
>>   
>> +config PPC_LBARX_LWARX
>> +bool
> 
> s/LWARX/LHARX/ ?
> 
> And maybe better with PPC_HAS_LBARX_LWARX ?

Yes you're right, PPC_HAS_ fits better.

[...]

>> +#endif
> 
> That's a lot of code duplication. Could we use some macro, in the same spirit 
> as what is done in 
> arch/powerpc/include/asm/io.h for in_be16(), in_be32(), in_be64() and friends 
> ?

For now I don't get too fancy. It's a bit ugly but I'm working through a 
generic atomics conversion patch and trying to also work out a nice form
for larx/stcx operation generation macros, I'll look at tidying up this
some time after then.

Thanks,
Nick



Re: [PATCH] powerpc: add compile-time support for lbarx, lwarx

2020-11-09 Thread Segher Boessenkool
On Sun, Nov 08, 2020 at 09:01:52PM +0100, Gabriel Paubert wrote:
> On Sat, Nov 07, 2020 at 05:42:57AM -0600, Segher Boessenkool wrote:
> > On Sat, Nov 07, 2020 at 08:12:13AM +0100, Gabriel Paubert wrote:
> > > On Sat, Nov 07, 2020 at 01:23:28PM +1000, Nicholas Piggin wrote:
> > > > ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lwarx.
> > > 
> > > Hmm, lwarx exists since original Power AFAIR,
> > 
> > Almost: it was new on PowerPC.
> 
> I stand corrected. Does this mean that Power1 (and 2 I believe) had 
> no SMP support?

As I understand it, that's correct.  Of course you always can do SMP "by
hand" -- you can do all synchronisation via software (perhaps using some
knowledge of the specific hardware you're running on), it's just slow
(and usually not portable).  Compare to SMP on 603 for example.


Segher


Re: [PATCH] powerpc: add compile-time support for lbarx, lwarx

2020-11-08 Thread Gabriel Paubert
On Sat, Nov 07, 2020 at 05:42:57AM -0600, Segher Boessenkool wrote:
> On Sat, Nov 07, 2020 at 08:12:13AM +0100, Gabriel Paubert wrote:
> > On Sat, Nov 07, 2020 at 01:23:28PM +1000, Nicholas Piggin wrote:
> > > ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lwarx.
> > 
> > Hmm, lwarx exists since original Power AFAIR,
> 
> Almost: it was new on PowerPC.

I stand corrected. Does this mean that Power1 (and 2 I believe) had 
no SMP support?

Gabriel
 



Re: [PATCH] powerpc: add compile-time support for lbarx, lwarx

2020-11-07 Thread Segher Boessenkool
On Sat, Nov 07, 2020 at 08:12:13AM +0100, Gabriel Paubert wrote:
> On Sat, Nov 07, 2020 at 01:23:28PM +1000, Nicholas Piggin wrote:
> > ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lwarx.
> 
> Hmm, lwarx exists since original Power AFAIR,

Almost: it was new on PowerPC.


Segher


Re: [PATCH] powerpc: add compile-time support for lbarx, lwarx

2020-11-07 Thread Christophe Leroy




Le 07/11/2020 à 04:23, Nicholas Piggin a écrit :

ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lwarx.
Add a compile option that allows code to use it, and add support in
cmpxchg and xchg 8 and 16 bit values.


Do you mean lharx ? Because lwarx exists on all powerpcs I think.



Signed-off-by: Nicholas Piggin 
---
  arch/powerpc/Kconfig   |   3 +
  arch/powerpc/include/asm/cmpxchg.h | 236 -
  arch/powerpc/platforms/Kconfig.cputype |   5 +
  3 files changed, 243 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index e9f13fe08492..d231af06f75a 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -266,6 +266,9 @@ config PPC_BARRIER_NOSPEC
default y
depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E
  
+config PPC_LBARX_LWARX

+   bool


s/LWARX/LHARX/ ?

And maybe better with PPC_HAS_LBARX_LWARX ?


+
  config EARLY_PRINTK
bool
default y
diff --git a/arch/powerpc/include/asm/cmpxchg.h 
b/arch/powerpc/include/asm/cmpxchg.h
index cf091c4c22e5..17fd996dc0d4 100644
--- a/arch/powerpc/include/asm/cmpxchg.h
+++ b/arch/powerpc/include/asm/cmpxchg.h
@@ -77,10 +77,76 @@ u32 __cmpxchg_##type##sfx(volatile void *p, u32 old, u32 
new)   \
   * the previous value stored there.
   */
  
+#ifndef CONFIG_PPC_LBARX_LWARX

  XCHG_GEN(u8, _local, "memory");
  XCHG_GEN(u8, _relaxed, "cc");
  XCHG_GEN(u16, _local, "memory");
  XCHG_GEN(u16, _relaxed, "cc");
+#else
+static __always_inline unsigned long
+__xchg_u8_local(volatile void *p, unsigned long val)
+{
+   unsigned long prev;
+
+   __asm__ __volatile__(
+"1:   lbarx   %0,0,%2 \n"
+" stbcx.  %3,0,%2 \n\
+   bne-1b"
+   : "=&r" (prev), "+m" (*(volatile unsigned char *)p)
+   : "r" (p), "r" (val)
+   : "cc", "memory");
+
+   return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u8_relaxed(u8 *p, unsigned long val)
+{
+   unsigned long prev;
+
+   __asm__ __volatile__(
+"1:   lbarx   %0,0,%2\n"
+" stbcx.  %3,0,%2\n"
+" bne-1b"
+   : "=&r" (prev), "+m" (*p)
+   : "r" (p), "r" (val)
+   : "cc");
+
+   return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u16_local(volatile void *p, unsigned long val)
+{
+   unsigned long prev;
+
+   __asm__ __volatile__(
+"1:   lharx   %0,0,%2 \n"
+" sthcx.  %3,0,%2 \n\
+   bne-1b"
+   : "=&r" (prev), "+m" (*(volatile unsigned short *)p)
+   : "r" (p), "r" (val)
+   : "cc", "memory");
+
+   return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u16_relaxed(u16 *p, unsigned long val)
+{
+   unsigned long prev;
+
+   __asm__ __volatile__(
+"1:   lharx   %0,0,%2\n"
+" sthcx.  %3,0,%2\n"
+" bne-1b"
+   : "=&r" (prev), "+m" (*p)
+   : "r" (p), "r" (val)
+   : "cc");
+
+   return prev;
+}
+#endif


That's a lot of code duplication. Could we use some macro, in the same spirit as what is done in 
arch/powerpc/include/asm/io.h for in_be16(), in_be32(), in_be64() and friends ?


  
  static __always_inline unsigned long

  __xchg_u32_local(volatile void *p, unsigned long val)
@@ -198,11 +264,12 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int 
size)
(__typeof__(*(ptr))) __xchg_relaxed((ptr),  \
(unsigned long)_x_, sizeof(*(ptr)));\
  })
+
  /*
   * Compare and exchange - if *p == old, set it to new,
   * and return the old value of *p.
   */
-
+#ifndef CONFIG_PPC_LBARX_LWARX
  CMPXCHG_GEN(u8, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, 
"memory");
  CMPXCHG_GEN(u8, _local, , , "memory");
  CMPXCHG_GEN(u8, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
@@ -211,6 +278,173 @@ CMPXCHG_GEN(u16, , PPC_ATOMIC_ENTRY_BARRIER, 
PPC_ATOMIC_EXIT_BARRIER, "memory");
  CMPXCHG_GEN(u16, _local, , , "memory");
  CMPXCHG_GEN(u16, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
  CMPXCHG_GEN(u16, _relaxed, , , "cc");
+#else
+static __always_inline unsigned long
+__cmpxchg_u8(volatile unsigned char *p, unsigned long old, unsigned long new)
+{
+   unsigned int prev;
+
+   __asm__ __volatile__ (
+   PPC_ATOMIC_ENTRY_BARRIER
+"1:   lbarx   %0,0,%2 # __cmpxchg_u8\n\
+   cmpw0,%0,%3\n\
+   bne-2f\n"
+" stbcx.  %4,0,%2\n\
+   bne-1b"
+   PPC_ATOMIC_EXIT_BARRIER
+   "\n\
+2:"
+   : "=&r" (prev), "+m" (*p)
+   : "r" (p), "r" (old), "r" (new)
+   : "cc", "memory");
+
+   return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u8_local(volatile unsigned char *p, unsigned long old,
+   unsigned long new)
+{
+   unsigned int prev;
+
+   __asm__ __volatile__ (
+"1:   lbarx   %0,0,%2 # __cmpxchg_u8\n\
+   cmpw0,%0,%3\n\
+   bne-2f\n"
+" stbcx.  %4,0,%2\n\
+   bne-1b"
+   "\n\
+2:"
+   : "=&r" (prev), "+m" (*p)
+   : "r" (p), "r" (old), "r" (new)
+   : 

Re: [PATCH] powerpc: add compile-time support for lbarx, lwarx

2020-11-06 Thread Gabriel Paubert
On Sat, Nov 07, 2020 at 01:23:28PM +1000, Nicholas Piggin wrote:
> ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lwarx.

Hmm, lwarx exists since original Power AFAIR, s/lwarx/lharx/ perhaps?

Same for the title of the patch and the CONFIG variable.

Gabriel

> Add a compile option that allows code to use it, and add support in
> cmpxchg and xchg 8 and 16 bit values.
> 
> Signed-off-by: Nicholas Piggin 
> ---
>  arch/powerpc/Kconfig   |   3 +
>  arch/powerpc/include/asm/cmpxchg.h | 236 -
>  arch/powerpc/platforms/Kconfig.cputype |   5 +
>  3 files changed, 243 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index e9f13fe08492..d231af06f75a 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -266,6 +266,9 @@ config PPC_BARRIER_NOSPEC
>   default y
>   depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E
>  
> +config PPC_LBARX_LWARX
> + bool
> +
>  config EARLY_PRINTK
>   bool
>   default y
> diff --git a/arch/powerpc/include/asm/cmpxchg.h 
> b/arch/powerpc/include/asm/cmpxchg.h
> index cf091c4c22e5..17fd996dc0d4 100644
> --- a/arch/powerpc/include/asm/cmpxchg.h
> +++ b/arch/powerpc/include/asm/cmpxchg.h
> @@ -77,10 +77,76 @@ u32 __cmpxchg_##type##sfx(volatile void *p, u32 old, u32 
> new) \
>   * the previous value stored there.
>   */
>  
> +#ifndef CONFIG_PPC_LBARX_LWARX
>  XCHG_GEN(u8, _local, "memory");
>  XCHG_GEN(u8, _relaxed, "cc");
>  XCHG_GEN(u16, _local, "memory");
>  XCHG_GEN(u16, _relaxed, "cc");
> +#else
> +static __always_inline unsigned long
> +__xchg_u8_local(volatile void *p, unsigned long val)
> +{
> + unsigned long prev;
> +
> + __asm__ __volatile__(
> +"1:  lbarx   %0,0,%2 \n"
> +"stbcx.  %3,0,%2 \n\
> + bne-1b"
> + : "=&r" (prev), "+m" (*(volatile unsigned char *)p)
> + : "r" (p), "r" (val)
> + : "cc", "memory");
> +
> + return prev;
> +}
> +
> +static __always_inline unsigned long
> +__xchg_u8_relaxed(u8 *p, unsigned long val)
> +{
> + unsigned long prev;
> +
> + __asm__ __volatile__(
> +"1:  lbarx   %0,0,%2\n"
> +"stbcx.  %3,0,%2\n"
> +"bne-1b"
> + : "=&r" (prev), "+m" (*p)
> + : "r" (p), "r" (val)
> + : "cc");
> +
> + return prev;
> +}
> +
> +static __always_inline unsigned long
> +__xchg_u16_local(volatile void *p, unsigned long val)
> +{
> + unsigned long prev;
> +
> + __asm__ __volatile__(
> +"1:  lharx   %0,0,%2 \n"
> +"sthcx.  %3,0,%2 \n\
> + bne-1b"
> + : "=&r" (prev), "+m" (*(volatile unsigned short *)p)
> + : "r" (p), "r" (val)
> + : "cc", "memory");
> +
> + return prev;
> +}
> +
> +static __always_inline unsigned long
> +__xchg_u16_relaxed(u16 *p, unsigned long val)
> +{
> + unsigned long prev;
> +
> + __asm__ __volatile__(
> +"1:  lharx   %0,0,%2\n"
> +"sthcx.  %3,0,%2\n"
> +"bne-1b"
> + : "=&r" (prev), "+m" (*p)
> + : "r" (p), "r" (val)
> + : "cc");
> +
> + return prev;
> +}
> +#endif
>  
>  static __always_inline unsigned long
>  __xchg_u32_local(volatile void *p, unsigned long val)
> @@ -198,11 +264,12 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int 
> size)
>   (__typeof__(*(ptr))) __xchg_relaxed((ptr),  \
>   (unsigned long)_x_, sizeof(*(ptr)));\
>  })
> +
>  /*
>   * Compare and exchange - if *p == old, set it to new,
>   * and return the old value of *p.
>   */
> -
> +#ifndef CONFIG_PPC_LBARX_LWARX
>  CMPXCHG_GEN(u8, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, 
> "memory");
>  CMPXCHG_GEN(u8, _local, , , "memory");
>  CMPXCHG_GEN(u8, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
> @@ -211,6 +278,173 @@ CMPXCHG_GEN(u16, , PPC_ATOMIC_ENTRY_BARRIER, 
> PPC_ATOMIC_EXIT_BARRIER, "memory");
>  CMPXCHG_GEN(u16, _local, , , "memory");
>  CMPXCHG_GEN(u16, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
>  CMPXCHG_GEN(u16, _relaxed, , , "cc");
> +#else
> +static __always_inline unsigned long
> +__cmpxchg_u8(volatile unsigned char *p, unsigned long old, unsigned long new)
> +{
> + unsigned int prev;
> +
> + __asm__ __volatile__ (
> + PPC_ATOMIC_ENTRY_BARRIER
> +"1:  lbarx   %0,0,%2 # __cmpxchg_u8\n\
> + cmpw0,%0,%3\n\
> + bne-2f\n"
> +"stbcx.  %4,0,%2\n\
> + bne-1b"
> + PPC_ATOMIC_EXIT_BARRIER
> + "\n\
> +2:"
> + : "=&r" (prev), "+m" (*p)
> + : "r" (p), "r" (old), "r" (new)
> + : "cc", "memory");
> +
> + return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u8_local(volatile unsigned char *p, unsigned long old,
> + unsigned long new)
> +{
> + unsigned int prev;
> +
> + __asm__ __volatile__ (
> +"1:  lbarx   %0,0,%2 # __cmpxchg_u8\n\
> + cmpw0,%0,%3\n\
> + bne-2f\n"
> +"stbcx.  %4,0,%2\n\
> + bne-1b"
> + "\n\
> +2:"
> + : "=&r" (prev), "+m" (*p)
> + : "r

[PATCH] powerpc: add compile-time support for lbarx, lwarx

2020-11-06 Thread Nicholas Piggin
ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lwarx.
Add a compile option that allows code to use it, and add support in
cmpxchg and xchg 8 and 16 bit values.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/Kconfig   |   3 +
 arch/powerpc/include/asm/cmpxchg.h | 236 -
 arch/powerpc/platforms/Kconfig.cputype |   5 +
 3 files changed, 243 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index e9f13fe08492..d231af06f75a 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -266,6 +266,9 @@ config PPC_BARRIER_NOSPEC
default y
depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E
 
+config PPC_LBARX_LWARX
+   bool
+
 config EARLY_PRINTK
bool
default y
diff --git a/arch/powerpc/include/asm/cmpxchg.h 
b/arch/powerpc/include/asm/cmpxchg.h
index cf091c4c22e5..17fd996dc0d4 100644
--- a/arch/powerpc/include/asm/cmpxchg.h
+++ b/arch/powerpc/include/asm/cmpxchg.h
@@ -77,10 +77,76 @@ u32 __cmpxchg_##type##sfx(volatile void *p, u32 old, u32 
new)   \
  * the previous value stored there.
  */
 
+#ifndef CONFIG_PPC_LBARX_LWARX
 XCHG_GEN(u8, _local, "memory");
 XCHG_GEN(u8, _relaxed, "cc");
 XCHG_GEN(u16, _local, "memory");
 XCHG_GEN(u16, _relaxed, "cc");
+#else
+static __always_inline unsigned long
+__xchg_u8_local(volatile void *p, unsigned long val)
+{
+   unsigned long prev;
+
+   __asm__ __volatile__(
+"1:lbarx   %0,0,%2 \n"
+"  stbcx.  %3,0,%2 \n\
+   bne-1b"
+   : "=&r" (prev), "+m" (*(volatile unsigned char *)p)
+   : "r" (p), "r" (val)
+   : "cc", "memory");
+
+   return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u8_relaxed(u8 *p, unsigned long val)
+{
+   unsigned long prev;
+
+   __asm__ __volatile__(
+"1:lbarx   %0,0,%2\n"
+"  stbcx.  %3,0,%2\n"
+"  bne-1b"
+   : "=&r" (prev), "+m" (*p)
+   : "r" (p), "r" (val)
+   : "cc");
+
+   return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u16_local(volatile void *p, unsigned long val)
+{
+   unsigned long prev;
+
+   __asm__ __volatile__(
+"1:lharx   %0,0,%2 \n"
+"  sthcx.  %3,0,%2 \n\
+   bne-1b"
+   : "=&r" (prev), "+m" (*(volatile unsigned short *)p)
+   : "r" (p), "r" (val)
+   : "cc", "memory");
+
+   return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u16_relaxed(u16 *p, unsigned long val)
+{
+   unsigned long prev;
+
+   __asm__ __volatile__(
+"1:lharx   %0,0,%2\n"
+"  sthcx.  %3,0,%2\n"
+"  bne-1b"
+   : "=&r" (prev), "+m" (*p)
+   : "r" (p), "r" (val)
+   : "cc");
+
+   return prev;
+}
+#endif
 
 static __always_inline unsigned long
 __xchg_u32_local(volatile void *p, unsigned long val)
@@ -198,11 +264,12 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int 
size)
(__typeof__(*(ptr))) __xchg_relaxed((ptr),  \
(unsigned long)_x_, sizeof(*(ptr)));\
 })
+
 /*
  * Compare and exchange - if *p == old, set it to new,
  * and return the old value of *p.
  */
-
+#ifndef CONFIG_PPC_LBARX_LWARX
 CMPXCHG_GEN(u8, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory");
 CMPXCHG_GEN(u8, _local, , , "memory");
 CMPXCHG_GEN(u8, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
@@ -211,6 +278,173 @@ CMPXCHG_GEN(u16, , PPC_ATOMIC_ENTRY_BARRIER, 
PPC_ATOMIC_EXIT_BARRIER, "memory");
 CMPXCHG_GEN(u16, _local, , , "memory");
 CMPXCHG_GEN(u16, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
 CMPXCHG_GEN(u16, _relaxed, , , "cc");
+#else
+static __always_inline unsigned long
+__cmpxchg_u8(volatile unsigned char *p, unsigned long old, unsigned long new)
+{
+   unsigned int prev;
+
+   __asm__ __volatile__ (
+   PPC_ATOMIC_ENTRY_BARRIER
+"1:lbarx   %0,0,%2 # __cmpxchg_u8\n\
+   cmpw0,%0,%3\n\
+   bne-2f\n"
+"  stbcx.  %4,0,%2\n\
+   bne-1b"
+   PPC_ATOMIC_EXIT_BARRIER
+   "\n\
+2:"
+   : "=&r" (prev), "+m" (*p)
+   : "r" (p), "r" (old), "r" (new)
+   : "cc", "memory");
+
+   return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u8_local(volatile unsigned char *p, unsigned long old,
+   unsigned long new)
+{
+   unsigned int prev;
+
+   __asm__ __volatile__ (
+"1:lbarx   %0,0,%2 # __cmpxchg_u8\n\
+   cmpw0,%0,%3\n\
+   bne-2f\n"
+"  stbcx.  %4,0,%2\n\
+   bne-1b"
+   "\n\
+2:"
+   : "=&r" (prev), "+m" (*p)
+   : "r" (p), "r" (old), "r" (new)
+   : "cc", "memory");
+
+   return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u8_relaxed(u8 *p, unsigned long old, unsigned long new)
+{
+   unsigned long prev;
+
+   __asm__ __volatile__ (
+"1:lbarx   %0,0,%2 # __cmpxchg_u8_relaxed\n"
+"  cmpw0,%0,%3\n"
+"  bne-2f\n"
+"  stbcx.  %4,0,%2\n"
+"  bne-1b\n"
+"2:"
+