Re: armv7 bcopyinout.S vs bcopyinout_xscale.S

2017-10-09 Thread Mark Kettenis
> Date: Mon, 9 Oct 2017 08:45:55 +0300
> From: Artturi Alm 
> 
> Hi,
> 
> 
> has anyone looked at the netbsd xscale-versions of bcopyin/bcopyout/kcopy?
> 
> this is from netbsd bcopyinout.S:
> #if defined(__XSCALE__) || defined(_ARM_ARCH_6)
> /*
>  * armv6 and v7 have pld and strd so they can use the xscale
>  * bcopyinout as well.
>  */
> #include "bcopyinout_xscale.S"
> #else
> 
> untested diff below i just scavenged from one of my dead branches,
> just incase someone has the time and motivation to run it through some
> performance testing or w/e.

Please stop sending untested diffs.  Nobody has the motivation to look
at them, unless maybe they fix actual bugs.

> diff --git a/sys/arch/arm/arm/bcopyinout.S b/sys/arch/arm/arm/bcopyinout.S
> index 9a7d11865c0..bc2e58d22b7 100644
> --- a/sys/arch/arm/arm/bcopyinout.S
> +++ b/sys/arch/arm/arm/bcopyinout.S
> @@ -41,7 +41,7 @@
>  #include 
>  #include 
>  
> -#ifdef __XSCALE__
> +#ifdef CPU_ARMv7
>  #include "bcopyinout_xscale.S"
>  #else
>  
> diff --git a/sys/arch/arm/arm/bcopyinout_xscale.S 
> b/sys/arch/arm/arm/bcopyinout_xscale.S
> new file mode 100644
> index 000..2e740eb96c2
> --- /dev/null
> +++ b/sys/arch/arm/arm/bcopyinout_xscale.S
> @@ -0,0 +1,1139 @@
> +/* $OpenBSD$ */
> +/*   $NetBSD: bcopyinout_xscale.S,v 1.11 2013/12/01 02:54:33 joerg Exp $ 
> */
> +
> +/*
> + * Copyright 2003 Wasabi Systems, Inc.
> + * All rights reserved.
> + *
> + * Written by Steve C. Woodford for Wasabi Systems, Inc.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1. Redistributions of source code must retain the above copyright
> + *notice, this list of conditions and the following disclaimer.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + *notice, this list of conditions and the following disclaimer in the
> + *documentation and/or other materials provided with the distribution.
> + * 3. All advertising materials mentioning features or use of this software
> + *must display the following acknowledgement:
> + *  This product includes software developed for the NetBSD Project by
> + *  Wasabi Systems, Inc.
> + * 4. The name of Wasabi Systems, Inc. may not be used to endorse
> + *or promote products derived from this software without specific prior
> + *written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
> + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
> + * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
> + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
> + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
> + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
> + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
> + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
> + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
> + * POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> + .text
> + .align  2
> +
> +/*
> + * r0 = user space address
> + * r1 = kernel space address
> + * r2 = length
> + *
> + * Copies bytes from user space to kernel space
> + */
> +ENTRY(copyin)
> + cmp r2, #0x00
> +#if /* XXX or <= 0 like below? */ 1
> + moveq   r0, #0
> + moveq   pc, lr
> +#else
> + movle   r0, #0x00
> + RETc(le)/* Bail early if length is <= 0 */
> +#endif
> + push{r10-r11, lr}
> +
> + /* Get curcpu from TPIDRPRW. */
> + mrc CP15_TPIDRPRW(r10)
> + ldr r10, [r10, #CI_CURPCB]
> +
> + mov r3, #0x00
> + adr ip, .Lcopyin_fault
> + ldr r11, [r10, #PCB_ONFAULT]
> + str ip, [r10, #PCB_ONFAULT]
> + bl  .Lcopyin_guts
> + str r11, [r10, #PCB_ONFAULT]
> + mov r0, #0x00
> + pop {r10-r11, pc}
> +
> +.Lcopyin_fault:
> + str r11, [r10, #PCB_ONFAULT]
> + cmp r3, #0x00
> + popgt   {r4-r7} /* r3 > 0 Restore r4-r7 */
> + poplt   {r4-r9} /* r3 < 0 Restore r4-r9 */
> + pop {r10-r11, pc}
> +
> +.Lcopyin_guts:
> + pld [r0]
> + /* Word-align the destination buffer */
> + andsip, r1, #0x03   /* Already word aligned? */
> + beq .Lcopyin_wordaligned/* Yup */
> + rsb ip, ip, #0x04
> + cmp r2, ip  /* Enough bytes left to align it? */
> + blt .Lcopyin_l4_2   /* Nope. Just copy bytewise */
> + sub r2, r2, ip
> + rsbsip, ip, #0x03
> + addne   pc, pc, ip, lsl #3
> + nop
> + ldrbt   ip, [r0], #0x01
> + strbip, [r1], #0x01
> + ldrbt   ip, [r0], #0x01
> + strbip, [r1], #0x01
> + 

armv7 bcopyinout.S vs bcopyinout_xscale.S

2017-10-08 Thread Artturi Alm
Hi,


has anyone looked at the netbsd xscale-versions of bcopyin/bcopyout/kcopy?

this is from netbsd bcopyinout.S:
#if defined(__XSCALE__) || defined(_ARM_ARCH_6)
/*
 * armv6 and v7 have pld and strd so they can use the xscale
 * bcopyinout as well.
 */
#include "bcopyinout_xscale.S"
#else

untested diff below i just scavenged from one of my dead branches,
just incase someone has the time and motivation to run it through some
performance testing or w/e.

-Artturi


diff --git a/sys/arch/arm/arm/bcopyinout.S b/sys/arch/arm/arm/bcopyinout.S
index 9a7d11865c0..bc2e58d22b7 100644
--- a/sys/arch/arm/arm/bcopyinout.S
+++ b/sys/arch/arm/arm/bcopyinout.S
@@ -41,7 +41,7 @@
 #include 
 #include 
 
-#ifdef __XSCALE__
+#ifdef CPU_ARMv7
 #include "bcopyinout_xscale.S"
 #else
 
diff --git a/sys/arch/arm/arm/bcopyinout_xscale.S 
b/sys/arch/arm/arm/bcopyinout_xscale.S
new file mode 100644
index 000..2e740eb96c2
--- /dev/null
+++ b/sys/arch/arm/arm/bcopyinout_xscale.S
@@ -0,0 +1,1139 @@
+/* $OpenBSD$ */
+/* $NetBSD: bcopyinout_xscale.S,v 1.11 2013/12/01 02:54:33 joerg Exp $ 
*/
+
+/*
+ * Copyright 2003 Wasabi Systems, Inc.
+ * All rights reserved.
+ *
+ * Written by Steve C. Woodford for Wasabi Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *notice, this list of conditions and the following disclaimer in the
+ *documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *must display the following acknowledgement:
+ *  This product includes software developed for the NetBSD Project by
+ *  Wasabi Systems, Inc.
+ * 4. The name of Wasabi Systems, Inc. may not be used to endorse
+ *or promote products derived from this software without specific prior
+ *written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+   .text
+   .align  2
+
+/*
+ * r0 = user space address
+ * r1 = kernel space address
+ * r2 = length
+ *
+ * Copies bytes from user space to kernel space
+ */
+ENTRY(copyin)
+   cmp r2, #0x00
+#if /* XXX or <= 0 like below? */ 1
+   moveq   r0, #0
+   moveq   pc, lr
+#else
+   movle   r0, #0x00
+   RETc(le)/* Bail early if length is <= 0 */
+#endif
+   push{r10-r11, lr}
+
+   /* Get curcpu from TPIDRPRW. */
+   mrc CP15_TPIDRPRW(r10)
+   ldr r10, [r10, #CI_CURPCB]
+
+   mov r3, #0x00
+   adr ip, .Lcopyin_fault
+   ldr r11, [r10, #PCB_ONFAULT]
+   str ip, [r10, #PCB_ONFAULT]
+   bl  .Lcopyin_guts
+   str r11, [r10, #PCB_ONFAULT]
+   mov r0, #0x00
+   pop {r10-r11, pc}
+
+.Lcopyin_fault:
+   str r11, [r10, #PCB_ONFAULT]
+   cmp r3, #0x00
+   popgt   {r4-r7} /* r3 > 0 Restore r4-r7 */
+   poplt   {r4-r9} /* r3 < 0 Restore r4-r9 */
+   pop {r10-r11, pc}
+
+.Lcopyin_guts:
+   pld [r0]
+   /* Word-align the destination buffer */
+   andsip, r1, #0x03   /* Already word aligned? */
+   beq .Lcopyin_wordaligned/* Yup */
+   rsb ip, ip, #0x04
+   cmp r2, ip  /* Enough bytes left to align it? */
+   blt .Lcopyin_l4_2   /* Nope. Just copy bytewise */
+   sub r2, r2, ip
+   rsbsip, ip, #0x03
+   addne   pc, pc, ip, lsl #3
+   nop
+   ldrbt   ip, [r0], #0x01
+   strbip, [r1], #0x01
+   ldrbt   ip, [r0], #0x01
+   strbip, [r1], #0x01
+   ldrbt   ip, [r0], #0x01
+   strbip, [r1], #0x01
+   cmp r2, #0x00   /* All done? */
+   moveq   pc, lr
+
+   /* Destination buffer is now word aligned */
+.Lcopyin_wordaligned:
+   andsip, r0, #0x03   /* Is src also word-aligned? */
+   bne .Lcopyin_bad_align  /* Nope. Things just got bad */
+   cmp r2,