From: Abhishek Paliwal <abhishek.pali...@aricent.com> From: David Daney <dda...@caviumnetworks.com>
MIPS: Add Octeon2 optimizations to clear_page. Use the ZCBT instruction for Octeon2. Reduce the number of generated instructions when possible. Both OCTEON3 and OCTEON2 use the same instrucitons for this. Signed-off-by: David Daney <dda...@caviumnetworks.com> Signed-off-by: Leonid Rosenboim <lrosenb...@caviumnetworks.com> Signed-off-by: Abhishek Paliwal <abhishek.pali...@aricent.com> --- arch/mips/mm/page.c | 146 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 101 insertions(+), 45 deletions(-) diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c index 58033c4..d4f78ee 100644 --- a/arch/mips/mm/page.c +++ b/arch/mips/mm/page.c @@ -25,6 +25,7 @@ #include <asm/prefetch.h> #include <asm/bootinfo.h> #include <asm/mipsregs.h> +#include <asm/sections.h> #include <asm/mmu_context.h> #include <asm/cpu.h> #include <asm/war.h> @@ -37,6 +38,28 @@ #include <asm/uasm.h> +#ifdef CONFIG_MAPPED_KERNEL +/* Initialized so it is not clobbered when .bss is zeroed. */ +unsigned long phys_to_kernel_offset = 1; +unsigned long kernel_image_end = 1; +#endif + +#ifdef CONFIG_64BIT +unsigned long __phys_addr(unsigned long x) +{ +#ifdef CONFIG_MAPPED_KERNEL + if ((char *)x >= _text && (char *)x < _end) + return x - phys_to_kernel_offset; +#endif + if (x < CKSEG0) + return XPHYSADDR(x); + if (x < CKSSEG) + return CPHYSADDR(x); + BUG(); +} +EXPORT_SYMBOL(__phys_addr); +#endif /* CONFIG_64BIT */ + /* Registers used in the assembled routines. */ #define ZERO 0 #define AT 2 @@ -100,8 +123,13 @@ pg_addiu(u32 **buf, unsigned int reg1, unsigned int reg2, unsigned int off) uasm_i_daddu(buf, reg1, reg2, T9); } else { if (off > 0x7fff) { - uasm_i_lui(buf, T9, uasm_rel_hi(off)); - uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off)); + if (off == 0x8000) { + uasm_i_ori(buf, T9, ZERO, 0x8000); + } else { + uasm_i_lui(buf, T9, uasm_rel_hi(off)); + if (uasm_rel_lo(off) != 0) + uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off)); + } UASM_i_ADDU(buf, reg1, reg2, T9); } else UASM_i_ADDIU(buf, reg1, reg2, off); @@ -256,66 +284,94 @@ void build_clear_page(void) memset(labels, 0, sizeof(labels)); memset(relocs, 0, sizeof(relocs)); - set_prefetch_parameters(); + if (current_cpu_data.cputype == CPU_CAVIUM_OCTEON2 || + current_cpu_data.cputype == CPU_CAVIUM_OCTEON3) { + const unsigned int wb_nudge = 26; + + pg_addiu(&buf, T0, A0, PAGE_SIZE); + + UASM_i_ADDIU(&buf, A1, A0, 128); + uasm_l_clear_pref(&l, buf); + uasm_i_zcbt(&buf, A0); + UASM_i_ADDIU(&buf, A0, A0, 256); + uasm_i_zcbt(&buf, A1); + UASM_i_ADDIU(&buf, A1, A1, 256); + uasm_i_zcbt(&buf, A0); + UASM_i_ADDIU(&buf, A0, A0, 256); + uasm_i_zcbt(&buf, A1); + uasm_i_pref(&buf, wb_nudge, 0, A1); + UASM_i_ADDIU(&buf, A1, A1, 256); + uasm_i_zcbt(&buf, A0); + UASM_i_ADDIU(&buf, A0, A0, 256); + uasm_i_zcbt(&buf, A1); + UASM_i_ADDIU(&buf, A1, A1, 256); + uasm_i_zcbt(&buf, A0); + UASM_i_ADDIU(&buf, A0, A0, 256); + uasm_i_zcbt(&buf, A1); + uasm_i_pref(&buf, wb_nudge, 0, A1); + uasm_il_bne(&buf, &r, A0, T0, label_clear_pref); + UASM_i_ADDIU(&buf, A1, A1, 256); + } else { + set_prefetch_parameters(); - /* - * This algorithm makes the following assumptions: - * - The prefetch bias is a multiple of 2 words. - * - The prefetch bias is less than one page. - */ - BUG_ON(pref_bias_clear_store % (2 * clear_word_size)); - BUG_ON(PAGE_SIZE < pref_bias_clear_store); + /* + * This algorithm makes the following assumptions: + * - The prefetch bias is a multiple of 2 words. + * - The prefetch bias is less than one page. + */ + BUG_ON(pref_bias_clear_store % (2 * clear_word_size)); + BUG_ON(PAGE_SIZE < pref_bias_clear_store); - off = PAGE_SIZE - pref_bias_clear_store; - if (off > 0xffff || !pref_bias_clear_store) - pg_addiu(&buf, A2, A0, off); - else - uasm_i_ori(&buf, A2, A0, off); + off = PAGE_SIZE - pref_bias_clear_store; + if (off > 0xffff || !pref_bias_clear_store) + pg_addiu(&buf, A2, A0, off); + else + uasm_i_ori(&buf, A2, A0, off); - if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) - uasm_i_lui(&buf, AT, 0xa000); + if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) + uasm_i_lui(&buf, AT, 0xa000); - off = cache_line_size ? min(8, pref_bias_clear_store / cache_line_size) + off = cache_line_size ? min(8, pref_bias_clear_store / cache_line_size) * cache_line_size : 0; - while (off) { - build_clear_pref(&buf, -off); - off -= cache_line_size; - } - uasm_l_clear_pref(&l, buf); - do { - build_clear_pref(&buf, off); - build_clear_store(&buf, off); - off += clear_word_size; - } while (off < half_clear_loop_size); - pg_addiu(&buf, A0, A0, 2 * off); - off = -off; - do { - build_clear_pref(&buf, off); - if (off == -clear_word_size) - uasm_il_bne(&buf, &r, A0, A2, label_clear_pref); - build_clear_store(&buf, off); - off += clear_word_size; - } while (off < 0); - - if (pref_bias_clear_store) { - pg_addiu(&buf, A2, A0, pref_bias_clear_store); - uasm_l_clear_nopref(&l, buf); - off = 0; + while (off) { + build_clear_pref(&buf, -off); + off -= cache_line_size; + } + uasm_l_clear_pref(&l, buf); do { + build_clear_pref(&buf, off); build_clear_store(&buf, off); off += clear_word_size; } while (off < half_clear_loop_size); pg_addiu(&buf, A0, A0, 2 * off); off = -off; do { + build_clear_pref(&buf, off); if (off == -clear_word_size) - uasm_il_bne(&buf, &r, A0, A2, - label_clear_nopref); + uasm_il_bne(&buf, &r, A0, A2, label_clear_pref); build_clear_store(&buf, off); off += clear_word_size; } while (off < 0); - } + if (pref_bias_clear_store) { + pg_addiu(&buf, A2, A0, pref_bias_clear_store); + uasm_l_clear_nopref(&l, buf); + off = 0; + do { + build_clear_store(&buf, off); + off += clear_word_size; + } while (off < half_clear_loop_size); + pg_addiu(&buf, A0, A0, 2 * off); + off = -off; + do { + if (off == -clear_word_size) + uasm_il_bne(&buf, &r, A0, A2, + label_clear_nopref); + build_clear_store(&buf, off); + off += clear_word_size; + } while (off < 0); + } + } uasm_i_jr(&buf, RA); uasm_i_nop(&buf); -- 1.8.1.4 -- _______________________________________________ linux-yocto mailing list linux-yocto@yoctoproject.org https://lists.yoctoproject.org/listinfo/linux-yocto