Add a new CPU feature, CPU_FTR_CP_USE_DCBTZ, to be added to the CPUs that 
benefit
from having dcbt and dcbz instructions used in copy_4K_page(). So far Cell, 
PPC970
and Power4 benefit.

This way all the other 64bit powerpc chips will have the whole prefetching loop
nop'ed out.

Signed-off-by: Mark Nelson <[EMAIL PROTECTED]>
---
 arch/powerpc/include/asm/cputable.h |    9 ++++++---
 arch/powerpc/lib/copypage_64.S      |    3 ++-
 2 files changed, 8 insertions(+), 4 deletions(-)

Index: upstream/arch/powerpc/include/asm/cputable.h
===================================================================
--- upstream.orig/arch/powerpc/include/asm/cputable.h
+++ upstream/arch/powerpc/include/asm/cputable.h
@@ -192,6 +192,7 @@ extern const char *powerpc_base_platform
 #define CPU_FTR_NO_SLBIE_B             LONG_ASM_CONST(0x0008000000000000)
 #define CPU_FTR_VSX                    LONG_ASM_CONST(0x0010000000000000)
 #define CPU_FTR_SAO                    LONG_ASM_CONST(0x0020000000000000)
+#define CPU_FTR_CP_USE_DCBTZ           LONG_ASM_CONST(0x0040000000000000)
 
 #ifndef __ASSEMBLY__
 
@@ -387,10 +388,11 @@ extern const char *powerpc_base_platform
            CPU_FTR_MMCRA | CPU_FTR_CTRL)
 #define CPU_FTRS_POWER4        (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
            CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
-           CPU_FTR_MMCRA)
+           CPU_FTR_MMCRA | CPU_FTR_CP_USE_DCBTZ)
 #define CPU_FTRS_PPC970        (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
            CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
-           CPU_FTR_ALTIVEC_COMP | CPU_FTR_CAN_NAP | CPU_FTR_MMCRA)
+           CPU_FTR_ALTIVEC_COMP | CPU_FTR_CAN_NAP | CPU_FTR_MMCRA | \
+           CPU_FTR_CP_USE_DCBTZ)
 #define CPU_FTRS_POWER5        (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
            CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
            CPU_FTR_MMCRA | CPU_FTR_SMT | \
@@ -411,7 +413,8 @@ extern const char *powerpc_base_platform
 #define CPU_FTRS_CELL  (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
            CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
            CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
-           CPU_FTR_PAUSE_ZERO | CPU_FTR_CI_LARGE_PAGE | CPU_FTR_CELL_TB_BUG)
+           CPU_FTR_PAUSE_ZERO | CPU_FTR_CI_LARGE_PAGE | \
+           CPU_FTR_CELL_TB_BUG | CPU_FTR_CP_USE_DCBTZ)
 #define CPU_FTRS_PA6T (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
            CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | \
            CPU_FTR_ALTIVEC_COMP | CPU_FTR_CI_LARGE_PAGE | \
Index: upstream/arch/powerpc/lib/copypage_64.S
===================================================================
--- upstream.orig/arch/powerpc/lib/copypage_64.S
+++ upstream/arch/powerpc/lib/copypage_64.S
@@ -18,6 +18,7 @@ PPC64_CACHES:
 
 _GLOBAL(copy_4K_page)
        li      r5,4096         /* 4K page size */
+BEGIN_FTR_SECTION
        ld      r10,[EMAIL PROTECTED](r2)
        lwz     r11,DCACHEL1LOGLINESIZE(r10)    /* log2 of cache line size */
        lwz     r12,DCACHEL1LINESIZE(r10)       /* Get cache line size */
@@ -30,7 +31,7 @@ setup:
        dcbz    r9,r3
        add     r9,r9,r12
        bdnz    setup
-
+END_FTR_SECTION_IFSET(CPU_FTR_CP_USE_DCBTZ)
        addi    r3,r3,-8
        srdi    r8,r5,7         /* page is copied in 128 byte strides */
        addi    r8,r8,-1        /* one stride copied outside loop */
_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@ozlabs.org
https://ozlabs.org/mailman/listinfo/linuxppc-dev

Reply via email to