On Thu, 2011-12-08 at 17:11 +1100, Anton Blanchard wrote:
> Implement a POWER7 optimised copy_to_user/copy_from_user using VMX.
> For large aligned copies this new loop is over 10% faster, and for
> large unaligned copies it is over 200% faster.

Breaks !CONFIG_ALTIVEC build an pops some WARN's with preempt & lockdep,
this seems to fix them:

diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index b90b3e7..7735a2c 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -17,13 +17,14 @@ obj-$(CONFIG_HAS_IOMEM)     += devres.o
 obj-$(CONFIG_PPC64)    += copypage_64.o copyuser_64.o \
                           memcpy_64.o usercopy_64.o mem_64.o string.o \
                           checksum_wrappers_64.o hweight_64.o \
-                          copyuser_power7.o copyuser_power7_vmx.o
+                          copyuser_power7.o
 obj-$(CONFIG_XMON)     += sstep.o ldstfp.o
 obj-$(CONFIG_KPROBES)  += sstep.o ldstfp.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)       += sstep.o ldstfp.o
 
 ifeq ($(CONFIG_PPC64),y)
 obj-$(CONFIG_SMP)      += locks.o
+obj-$(CONFIG_ALTIVEC)  += copyuser_power7_vmx.o
 endif
 
 obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
diff --git a/arch/powerpc/lib/copyuser_power7.S 
b/arch/powerpc/lib/copyuser_power7.S
index 4395939..9a21b08 100644
--- a/arch/powerpc/lib/copyuser_power7.S
+++ b/arch/powerpc/lib/copyuser_power7.S
@@ -85,6 +85,7 @@
 
 
 _GLOBAL(__copy_tofrom_user_power7)
+#ifdef CONFIG_ALTIVEC
        cmpldi  r5,16
        cmpldi  cr1,r5,4096
 
@@ -94,6 +95,15 @@ _GLOBAL(__copy_tofrom_user_power7)
 
        blt     .Lshort_copy
        bgt     cr1,.Lvmx_copy
+#else
+       cmpldi  r5,16
+
+       std     r3,48(r1)
+       std     r4,56(r1)
+       std     r5,64(r1)
+
+       blt     .Lshort_copy
+#endif
 
 .Lnonvmx_copy:
        /* Get the source 8B aligned */
@@ -273,6 +283,7 @@ err1;       stb     r0,0(r3)
        addi    r1,r1,STACKFRAMESIZE
        b       .Lnonvmx_copy
 
+#ifdef CONFIG_ALTIVEC
 .Lvmx_copy:
        mflr    r0
        std     r0,16(r1)
@@ -667,3 +678,4 @@ err3;       stb     r0,0(r3)
 
 15:    addi    r1,r1,STACKFRAMESIZE
        b       .exit_vmx_copy          /* tail call optimise */
+#endif /* CONFiG_ALTIVEC */
diff --git a/arch/powerpc/lib/copyuser_power7_vmx.c 
b/arch/powerpc/lib/copyuser_power7_vmx.c
index c37b949..6e1efad 100644
--- a/arch/powerpc/lib/copyuser_power7_vmx.c
+++ b/arch/powerpc/lib/copyuser_power7_vmx.c
@@ -26,10 +26,16 @@ int enter_vmx_copy(void)
        if (in_interrupt())
                return 0;
 
-       enable_kernel_altivec();
-
+       /* This acts as preempt_disable() as well and will make
+        * enable_kernel_altivec(). We need to disable page faults
+        * as they can call schedule and thus make us lose the VMX
+        * context. So on page faults, we just fail which will cause
+        * a fallback to the normal non-vmx copy.
+        */
        pagefault_disable();
 
+       enable_kernel_altivec();
+
        return 1;
 }
 

_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to