From: Abhishek Paliwal <abhishek.pali...@aricent.com>

From: David Daney <dda...@caviumnetworks.com>

MIPS: Add Octeon2 optimizations to clear_page.

Use the ZCBT instruction for Octeon2.
Reduce the number of generated instructions when possible.

Both OCTEON3 and OCTEON2 use the same instrucitons for this.

Signed-off-by: David Daney <dda...@caviumnetworks.com>
Signed-off-by: Leonid Rosenboim <lrosenb...@caviumnetworks.com>
Signed-off-by: Abhishek Paliwal <abhishek.pali...@aricent.com>
---
 arch/mips/mm/page.c | 146 ++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 101 insertions(+), 45 deletions(-)

diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c
index 58033c4..d4f78ee 100644
--- a/arch/mips/mm/page.c
+++ b/arch/mips/mm/page.c
@@ -25,6 +25,7 @@
 #include <asm/prefetch.h>
 #include <asm/bootinfo.h>
 #include <asm/mipsregs.h>
+#include <asm/sections.h>
 #include <asm/mmu_context.h>
 #include <asm/cpu.h>
 #include <asm/war.h>
@@ -37,6 +38,28 @@
 
 #include <asm/uasm.h>
 
+#ifdef CONFIG_MAPPED_KERNEL
+/* Initialized so it is not clobbered when .bss is zeroed.  */
+unsigned long phys_to_kernel_offset = 1;
+unsigned long kernel_image_end = 1;
+#endif
+
+#ifdef CONFIG_64BIT
+unsigned long __phys_addr(unsigned long x)
+{
+#ifdef CONFIG_MAPPED_KERNEL
+       if ((char *)x >= _text && (char *)x < _end)
+               return x - phys_to_kernel_offset;
+#endif
+       if (x < CKSEG0)
+               return XPHYSADDR(x);
+       if (x < CKSSEG)
+               return CPHYSADDR(x);
+       BUG();
+}
+EXPORT_SYMBOL(__phys_addr);
+#endif /* CONFIG_64BIT */
+
 /* Registers used in the assembled routines. */
 #define ZERO 0
 #define AT 2
@@ -100,8 +123,13 @@ pg_addiu(u32 **buf, unsigned int reg1, unsigned int reg2, 
unsigned int off)
                uasm_i_daddu(buf, reg1, reg2, T9);
        } else {
                if (off > 0x7fff) {
-                       uasm_i_lui(buf, T9, uasm_rel_hi(off));
-                       uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off));
+                       if (off == 0x8000) {
+                               uasm_i_ori(buf, T9, ZERO, 0x8000);
+                       } else {
+                               uasm_i_lui(buf, T9, uasm_rel_hi(off));
+                               if (uasm_rel_lo(off) != 0)
+                                       uasm_i_addiu(buf, T9, T9, 
uasm_rel_lo(off));
+                       }
                        UASM_i_ADDU(buf, reg1, reg2, T9);
                } else
                        UASM_i_ADDIU(buf, reg1, reg2, off);
@@ -256,66 +284,94 @@ void build_clear_page(void)
        memset(labels, 0, sizeof(labels));
        memset(relocs, 0, sizeof(relocs));
 
-       set_prefetch_parameters();
+       if (current_cpu_data.cputype == CPU_CAVIUM_OCTEON2 ||
+           current_cpu_data.cputype == CPU_CAVIUM_OCTEON3) {
+               const unsigned int wb_nudge = 26;
+
+               pg_addiu(&buf, T0, A0, PAGE_SIZE);
+
+               UASM_i_ADDIU(&buf, A1, A0, 128);
+               uasm_l_clear_pref(&l, buf);
+               uasm_i_zcbt(&buf, A0);
+               UASM_i_ADDIU(&buf, A0, A0, 256);
+               uasm_i_zcbt(&buf, A1);
+               UASM_i_ADDIU(&buf, A1, A1, 256);
+               uasm_i_zcbt(&buf, A0);
+               UASM_i_ADDIU(&buf, A0, A0, 256);
+               uasm_i_zcbt(&buf, A1);
+               uasm_i_pref(&buf, wb_nudge, 0, A1);
+               UASM_i_ADDIU(&buf, A1, A1, 256);
+               uasm_i_zcbt(&buf, A0);
+               UASM_i_ADDIU(&buf, A0, A0, 256);
+               uasm_i_zcbt(&buf, A1);
+               UASM_i_ADDIU(&buf, A1, A1, 256);
+               uasm_i_zcbt(&buf, A0);
+               UASM_i_ADDIU(&buf, A0, A0, 256);
+               uasm_i_zcbt(&buf, A1);
+               uasm_i_pref(&buf, wb_nudge, 0, A1);
+               uasm_il_bne(&buf, &r, A0, T0, label_clear_pref);
+               UASM_i_ADDIU(&buf, A1, A1, 256);
+       } else {
+               set_prefetch_parameters();
 
-       /*
-        * This algorithm makes the following assumptions:
-        *   - The prefetch bias is a multiple of 2 words.
-        *   - The prefetch bias is less than one page.
-        */
-       BUG_ON(pref_bias_clear_store % (2 * clear_word_size));
-       BUG_ON(PAGE_SIZE < pref_bias_clear_store);
+               /*
+                * This algorithm makes the following assumptions:
+                *   - The prefetch bias is a multiple of 2 words.
+                *   - The prefetch bias is less than one page.
+                */
+               BUG_ON(pref_bias_clear_store % (2 * clear_word_size));
+               BUG_ON(PAGE_SIZE < pref_bias_clear_store);
 
-       off = PAGE_SIZE - pref_bias_clear_store;
-       if (off > 0xffff || !pref_bias_clear_store)
-               pg_addiu(&buf, A2, A0, off);
-       else
-               uasm_i_ori(&buf, A2, A0, off);
+               off = PAGE_SIZE - pref_bias_clear_store;
+               if (off > 0xffff || !pref_bias_clear_store)
+                       pg_addiu(&buf, A2, A0, off);
+               else
+                       uasm_i_ori(&buf, A2, A0, off);
 
-       if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
-               uasm_i_lui(&buf, AT, 0xa000);
+               if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
+                       uasm_i_lui(&buf, AT, 0xa000);
 
-       off = cache_line_size ? min(8, pref_bias_clear_store / cache_line_size)
+               off = cache_line_size ? min(8, pref_bias_clear_store / 
cache_line_size)
                                * cache_line_size : 0;
-       while (off) {
-               build_clear_pref(&buf, -off);
-               off -= cache_line_size;
-       }
-       uasm_l_clear_pref(&l, buf);
-       do {
-               build_clear_pref(&buf, off);
-               build_clear_store(&buf, off);
-               off += clear_word_size;
-       } while (off < half_clear_loop_size);
-       pg_addiu(&buf, A0, A0, 2 * off);
-       off = -off;
-       do {
-               build_clear_pref(&buf, off);
-               if (off == -clear_word_size)
-                       uasm_il_bne(&buf, &r, A0, A2, label_clear_pref);
-               build_clear_store(&buf, off);
-               off += clear_word_size;
-       } while (off < 0);
-
-       if (pref_bias_clear_store) {
-               pg_addiu(&buf, A2, A0, pref_bias_clear_store);
-               uasm_l_clear_nopref(&l, buf);
-               off = 0;
+               while (off) {
+                       build_clear_pref(&buf, -off);
+                       off -= cache_line_size;
+               }
+               uasm_l_clear_pref(&l, buf);
                do {
+                       build_clear_pref(&buf, off);
                        build_clear_store(&buf, off);
                        off += clear_word_size;
                } while (off < half_clear_loop_size);
                pg_addiu(&buf, A0, A0, 2 * off);
                off = -off;
                do {
+                       build_clear_pref(&buf, off);
                        if (off == -clear_word_size)
-                               uasm_il_bne(&buf, &r, A0, A2,
-                                           label_clear_nopref);
+                               uasm_il_bne(&buf, &r, A0, A2, label_clear_pref);
                        build_clear_store(&buf, off);
                        off += clear_word_size;
                } while (off < 0);
-       }
 
+               if (pref_bias_clear_store) {
+                       pg_addiu(&buf, A2, A0, pref_bias_clear_store);
+                       uasm_l_clear_nopref(&l, buf);
+                       off = 0;
+                       do {
+                               build_clear_store(&buf, off);
+                               off += clear_word_size;
+                       } while (off < half_clear_loop_size);
+                       pg_addiu(&buf, A0, A0, 2 * off);
+                       off = -off;
+                       do {
+                               if (off == -clear_word_size)
+                                       uasm_il_bne(&buf, &r, A0, A2,
+                                                   label_clear_nopref);
+                               build_clear_store(&buf, off);
+                               off += clear_word_size;
+                       } while (off < 0);
+               }
+       }
        uasm_i_jr(&buf, RA);
        uasm_i_nop(&buf);
 
-- 
1.8.1.4

-- 
_______________________________________________
linux-yocto mailing list
linux-yocto@yoctoproject.org
https://lists.yoctoproject.org/listinfo/linux-yocto

Reply via email to