[PATCH v2 3/4] arch/sparc: Optimized memcpy, memset, copy_to_user, copy_from_user for M7/M8
New algorithm that takes advantage of the M7/M8 block init store ASI, ie, overlapping pipelines and miss buffer filling. Full details in code comments. Signed-off-by: Babu Moger--- arch/sparc/kernel/head_64.S | 16 +- arch/sparc/lib/M7copy_from_user.S | 41 ++ arch/sparc/lib/M7copy_to_user.S | 51 ++ arch/sparc/lib/M7memcpy.S | 923 + arch/sparc/lib/M7memset.S | 352 ++ arch/sparc/lib/M7patch.S | 51 ++ arch/sparc/lib/Makefile |3 + 7 files changed, 1435 insertions(+), 2 deletions(-) create mode 100644 arch/sparc/lib/M7copy_from_user.S create mode 100644 arch/sparc/lib/M7copy_to_user.S create mode 100644 arch/sparc/lib/M7memcpy.S create mode 100644 arch/sparc/lib/M7memset.S create mode 100644 arch/sparc/lib/M7patch.S diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 78e0211..bf9a5ac 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -603,10 +603,10 @@ niagara_tlb_fixup: be,pt %xcc, niagara4_patch nop cmp %g1, SUN4V_CHIP_SPARC_M7 - be,pt %xcc, niagara4_patch + be,pt %xcc, sparc_m7_patch nop cmp %g1, SUN4V_CHIP_SPARC_M8 - be,pt %xcc, niagara4_patch + be,pt %xcc, sparc_m7_patch nop cmp %g1, SUN4V_CHIP_SPARC_SN be,pt %xcc, niagara4_patch @@ -621,6 +621,18 @@ niagara_tlb_fixup: ba,a,pt %xcc, 80f nop + +sparc_m7_patch: + callm7_patch_copyops +nop + callm7_patch_bzero +nop + callm7_patch_pageops +nop + + ba,a,pt %xcc, 80f +nop + niagara4_patch: callniagara4_patch_copyops nop diff --git a/arch/sparc/lib/M7copy_from_user.S b/arch/sparc/lib/M7copy_from_user.S new file mode 100644 index 000..d0689d7 --- /dev/null +++ b/arch/sparc/lib/M7copy_from_user.S @@ -0,0 +1,41 @@ +/* + * M7copy_from_user.S: SPARC M7 optimized copy from userspace. + * + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + */ + + +#define EX_LD(x) \ +98:x; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, __restore_asi; \ + .text; \ + .align 4; + +#define EX_LD_FP(x)\ +98:x; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, __restore_asi_fp;\ + .text; \ + .align 4; + + +#ifndef ASI_AIUS +#define ASI_AIUS 0x11 +#endif + +#define FUNC_NAME M7copy_from_user +#define LOAD(type,addr,dest) type##a [addr] %asi, dest +#define EX_RETVAL(x) 0 + +#ifdef __KERNEL__ +#define PREAMBLE \ + rd %asi, %g1; \ + cmp %g1, ASI_AIUS; \ + bne,pn %icc, raw_copy_in_user; \ + nop +#endif + +#include "M7memcpy.S" diff --git a/arch/sparc/lib/M7copy_to_user.S b/arch/sparc/lib/M7copy_to_user.S new file mode 100644 index 000..d3be132 --- /dev/null +++ b/arch/sparc/lib/M7copy_to_user.S @@ -0,0 +1,51 @@ +/* + * M7copy_to_user.S: SPARC M7 optimized copy to userspace. + * + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + */ + + +#define EX_ST(x) \ +98:x; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, __restore_asi; \ + .text; \ + .align 4; + +#define EX_ST_FP(x)\ +98:x; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, __restore_asi_fp;\ + .text; \ + .align 4; + + +#ifndef ASI_AIUS +#define ASI_AIUS 0x11 +#endif + +#ifndef ASI_BLK_INIT_QUAD_LDD_AIUS +#define ASI_BLK_INIT_QUAD_LDD_AIUS 0x23 +#endif + +#define FUNC_NAME M7copy_to_user +#define STORE(type,src,addr) type##a src, [addr] %asi +#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_AIUS +#defineSTORE_MRU_ASI ASI_ST_BLKINIT_MRU_S +#define EX_RETVAL(x) 0 + +#ifdef __KERNEL__ + /* Writing to %asi is _expensive_ so we hardcode it. +* Reading %asi to check for KERNEL_DS is comparatively +* cheap. +*/ +#define PREAMBLE \ + rd %asi, %g1; \ + cmp %g1, ASI_AIUS; \ + bne,pn %icc, raw_copy_in_user; \ + nop +#endif + +#include "M7memcpy.S" diff --git a/arch/sparc/lib/M7memcpy.S
[PATCH v2 3/4] arch/sparc: Optimized memcpy, memset, copy_to_user, copy_from_user for M7/M8
New algorithm that takes advantage of the M7/M8 block init store ASI, ie, overlapping pipelines and miss buffer filling. Full details in code comments. Signed-off-by: Babu Moger --- arch/sparc/kernel/head_64.S | 16 +- arch/sparc/lib/M7copy_from_user.S | 41 ++ arch/sparc/lib/M7copy_to_user.S | 51 ++ arch/sparc/lib/M7memcpy.S | 923 + arch/sparc/lib/M7memset.S | 352 ++ arch/sparc/lib/M7patch.S | 51 ++ arch/sparc/lib/Makefile |3 + 7 files changed, 1435 insertions(+), 2 deletions(-) create mode 100644 arch/sparc/lib/M7copy_from_user.S create mode 100644 arch/sparc/lib/M7copy_to_user.S create mode 100644 arch/sparc/lib/M7memcpy.S create mode 100644 arch/sparc/lib/M7memset.S create mode 100644 arch/sparc/lib/M7patch.S diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 78e0211..bf9a5ac 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -603,10 +603,10 @@ niagara_tlb_fixup: be,pt %xcc, niagara4_patch nop cmp %g1, SUN4V_CHIP_SPARC_M7 - be,pt %xcc, niagara4_patch + be,pt %xcc, sparc_m7_patch nop cmp %g1, SUN4V_CHIP_SPARC_M8 - be,pt %xcc, niagara4_patch + be,pt %xcc, sparc_m7_patch nop cmp %g1, SUN4V_CHIP_SPARC_SN be,pt %xcc, niagara4_patch @@ -621,6 +621,18 @@ niagara_tlb_fixup: ba,a,pt %xcc, 80f nop + +sparc_m7_patch: + callm7_patch_copyops +nop + callm7_patch_bzero +nop + callm7_patch_pageops +nop + + ba,a,pt %xcc, 80f +nop + niagara4_patch: callniagara4_patch_copyops nop diff --git a/arch/sparc/lib/M7copy_from_user.S b/arch/sparc/lib/M7copy_from_user.S new file mode 100644 index 000..d0689d7 --- /dev/null +++ b/arch/sparc/lib/M7copy_from_user.S @@ -0,0 +1,41 @@ +/* + * M7copy_from_user.S: SPARC M7 optimized copy from userspace. + * + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + */ + + +#define EX_LD(x) \ +98:x; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, __restore_asi; \ + .text; \ + .align 4; + +#define EX_LD_FP(x)\ +98:x; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, __restore_asi_fp;\ + .text; \ + .align 4; + + +#ifndef ASI_AIUS +#define ASI_AIUS 0x11 +#endif + +#define FUNC_NAME M7copy_from_user +#define LOAD(type,addr,dest) type##a [addr] %asi, dest +#define EX_RETVAL(x) 0 + +#ifdef __KERNEL__ +#define PREAMBLE \ + rd %asi, %g1; \ + cmp %g1, ASI_AIUS; \ + bne,pn %icc, raw_copy_in_user; \ + nop +#endif + +#include "M7memcpy.S" diff --git a/arch/sparc/lib/M7copy_to_user.S b/arch/sparc/lib/M7copy_to_user.S new file mode 100644 index 000..d3be132 --- /dev/null +++ b/arch/sparc/lib/M7copy_to_user.S @@ -0,0 +1,51 @@ +/* + * M7copy_to_user.S: SPARC M7 optimized copy to userspace. + * + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + */ + + +#define EX_ST(x) \ +98:x; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, __restore_asi; \ + .text; \ + .align 4; + +#define EX_ST_FP(x)\ +98:x; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, __restore_asi_fp;\ + .text; \ + .align 4; + + +#ifndef ASI_AIUS +#define ASI_AIUS 0x11 +#endif + +#ifndef ASI_BLK_INIT_QUAD_LDD_AIUS +#define ASI_BLK_INIT_QUAD_LDD_AIUS 0x23 +#endif + +#define FUNC_NAME M7copy_to_user +#define STORE(type,src,addr) type##a src, [addr] %asi +#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_AIUS +#defineSTORE_MRU_ASI ASI_ST_BLKINIT_MRU_S +#define EX_RETVAL(x) 0 + +#ifdef __KERNEL__ + /* Writing to %asi is _expensive_ so we hardcode it. +* Reading %asi to check for KERNEL_DS is comparatively +* cheap. +*/ +#define PREAMBLE \ + rd %asi, %g1; \ + cmp %g1, ASI_AIUS; \ + bne,pn %icc, raw_copy_in_user; \ + nop +#endif + +#include "M7memcpy.S" diff --git a/arch/sparc/lib/M7memcpy.S b/arch/sparc/lib/M7memcpy.S new