On Thu, 2011-12-08 at 17:11 +1100, Anton Blanchard wrote: > Implement a POWER7 optimised copy_to_user/copy_from_user using VMX. > For large aligned copies this new loop is over 10% faster, and for > large unaligned copies it is over 200% faster.
Breaks !CONFIG_ALTIVEC build an pops some WARN's with preempt & lockdep, this seems to fix them: diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index b90b3e7..7735a2c 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -17,13 +17,14 @@ obj-$(CONFIG_HAS_IOMEM) += devres.o obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \ memcpy_64.o usercopy_64.o mem_64.o string.o \ checksum_wrappers_64.o hweight_64.o \ - copyuser_power7.o copyuser_power7_vmx.o + copyuser_power7.o obj-$(CONFIG_XMON) += sstep.o ldstfp.o obj-$(CONFIG_KPROBES) += sstep.o ldstfp.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += sstep.o ldstfp.o ifeq ($(CONFIG_PPC64),y) obj-$(CONFIG_SMP) += locks.o +obj-$(CONFIG_ALTIVEC) += copyuser_power7_vmx.o endif obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S index 4395939..9a21b08 100644 --- a/arch/powerpc/lib/copyuser_power7.S +++ b/arch/powerpc/lib/copyuser_power7.S @@ -85,6 +85,7 @@ _GLOBAL(__copy_tofrom_user_power7) +#ifdef CONFIG_ALTIVEC cmpldi r5,16 cmpldi cr1,r5,4096 @@ -94,6 +95,15 @@ _GLOBAL(__copy_tofrom_user_power7) blt .Lshort_copy bgt cr1,.Lvmx_copy +#else + cmpldi r5,16 + + std r3,48(r1) + std r4,56(r1) + std r5,64(r1) + + blt .Lshort_copy +#endif .Lnonvmx_copy: /* Get the source 8B aligned */ @@ -273,6 +283,7 @@ err1; stb r0,0(r3) addi r1,r1,STACKFRAMESIZE b .Lnonvmx_copy +#ifdef CONFIG_ALTIVEC .Lvmx_copy: mflr r0 std r0,16(r1) @@ -667,3 +678,4 @@ err3; stb r0,0(r3) 15: addi r1,r1,STACKFRAMESIZE b .exit_vmx_copy /* tail call optimise */ +#endif /* CONFiG_ALTIVEC */ diff --git a/arch/powerpc/lib/copyuser_power7_vmx.c b/arch/powerpc/lib/copyuser_power7_vmx.c index c37b949..6e1efad 100644 --- a/arch/powerpc/lib/copyuser_power7_vmx.c +++ b/arch/powerpc/lib/copyuser_power7_vmx.c @@ -26,10 +26,16 @@ int enter_vmx_copy(void) if (in_interrupt()) return 0; - enable_kernel_altivec(); - + /* This acts as preempt_disable() as well and will make + * enable_kernel_altivec(). We need to disable page faults + * as they can call schedule and thus make us lose the VMX + * context. So on page faults, we just fail which will cause + * a fallback to the normal non-vmx copy. + */ pagefault_disable(); + enable_kernel_altivec(); + return 1; } _______________________________________________ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev