--- pixman/pixman-mips-common.h | 31 +++++++++-- pixman/pixman-mips-dspr1-asm.S | 59 +++++++++++++++++++++- pixman/pixman-mips-dspr1.c | 15 ++++-- pixman/pixman-mips-dspr2.c | 6 +-- pixman/pixman-mips.c | 34 ++++++++++++- pixman/pixman-mips32r2-asm.S | 110 ++++++++++++++++++++++++++++++++++++++-- pixman/pixman-mips32r2.c | 25 +++++++-- 7 files changed, 252 insertions(+), 28 deletions(-)
diff --git a/pixman/pixman-mips-common.h b/pixman/pixman-mips-common.h index 05ff7ad..a141226 100644 --- a/pixman/pixman-mips-common.h +++ b/pixman/pixman-mips-common.h @@ -39,16 +39,37 @@ #define SKIP_ZERO_MASK 2 #define DO_FAST_MEMCPY 3 -void +#if defined(USE_MIPS_DSPR2) || defined(USE_MIPS_DSPR1) || \ + defined(USE_MIPS32R2) +extern int allow_prefetch; +#endif + +void* +(*pixman_fast_memcpy_mips) (void *dst, void *src, uint32_t n_bytes); +void* pixman_fast_memcpy_mips32r2 (void *dst, void *src, uint32_t n_bytes); + +void +(*pixman_fill_buff32_mips32r2) (void *dst, uint32_t n_bytes, uint32_t value); void -pixman_fill_buff32_mips32r2 (void *dst, uint32_t n_bytes, uint32_t value); +pixman_fill_buff32_pref_mips32r2 (void *dst, uint32_t n_bytes, uint32_t value); void -pixman_fill_buff16_mips32r2 (void *dst, uint32_t n_bytes, uint16_t value); +pixman_fill_buff32_no_pref_mips32r2 (void *dst, uint32_t n_bytes, uint32_t value); + +void +(*pixman_fill_buff16_mips32r2) (void *dst, uint32_t n_bytes, uint16_t value); +void +pixman_fill_buff16_pref_mips32r2 (void *dst, uint32_t n_bytes, uint16_t value); +void +pixman_fill_buff16_no_pref_mips32r2 (void *dst, uint32_t n_bytes, uint16_t value); #ifdef USE_MIPS_DSPR1 void -pixman_fill_buff16_mips_dspr1 (void *dst, uint32_t n_bytes, uint16_t value); +(*pixman_fill_buff16_mips_dspr1) (void *dst, uint32_t n_bytes, uint16_t value); +void +pixman_fill_buff16_pref_mips_dspr1 (void *dst, uint32_t n_bytes, uint16_t value); +void +pixman_fill_buff16_no_pref_mips_dspr1 (void *dst, uint32_t n_bytes, uint16_t value); #endif /****************************************************************/ @@ -85,7 +106,7 @@ mips_composite_##name (pixman_implementation_t *imp, \ src_line += src_stride; \ \ if (flags == DO_FAST_MEMCPY) \ - pixman_fast_memcpy_mips32r2 (dst, src, width * bpp); \ + pixman_fast_memcpy_mips (dst, src, width * bpp); \ else \ pixman_composite_##name##_asm##suffix (dst, src, width); \ } \ diff --git a/pixman/pixman-mips-dspr1-asm.S b/pixman/pixman-mips-dspr1-asm.S index a4b9ebc..91fae9a 100644 --- a/pixman/pixman-mips-dspr1-asm.S +++ b/pixman/pixman-mips-dspr1-asm.S @@ -32,7 +32,62 @@ #include "pixman-private.h" #include "pixman-mips-dspr1-asm.h" -LEAF_MIPS_DSPR1(pixman_fill_buff16) +LEAF_MIPS_DSPR1(pixman_fill_buff16_no_pref) +/* + * a0 - *dest + * a1 - count (bytes) + * a2 - value to fill buffer with + */ + + beqz a1, 3f + andi t1, a0, 0x0002 + beqz t1, 0f /* check if address is 4-byte aligned */ + nop + sh a2, 0(a0) + addiu a0, a0, 2 + addiu a1, a1, -2 +0: + srl t1, a1, 5 /* t1 how many multiples of 32 bytes */ + replv.ph a2, a2 /* replicate fill value (16bit) in a2 */ + beqz t1, 2f + nop +1: + addiu t1, t1, -1 + beqz t1, 11f + addiu a1, a1, -32 + sw a2, 0(a0) + sw a2, 4(a0) + sw a2, 8(a0) + sw a2, 12(a0) + sw a2, 16(a0) + sw a2, 20(a0) + sw a2, 24(a0) + sw a2, 28(a0) + b 1b + addiu a0, a0, 32 +11: + sw a2, 0(a0) + sw a2, 4(a0) + sw a2, 8(a0) + sw a2, 12(a0) + sw a2, 16(a0) + sw a2, 20(a0) + sw a2, 24(a0) + sw a2, 28(a0) + addiu a0, a0, 32 +2: + blez a1, 3f + addiu a1, a1, -2 + sh a2, 0(a0) + b 2b + addiu a0, a0, 2 +3: + jr ra + nop + +END_MIPS_DSPR1(pixman_fill_buff16_no_pref) + +LEAF_MIPS_DSPR1(pixman_fill_buff16_pref) /* * a0 - *dest * a1 - count (bytes) @@ -86,7 +141,7 @@ LEAF_MIPS_DSPR1(pixman_fill_buff16) jr ra nop -END_MIPS_DSPR1(pixman_fill_buff16) +END_MIPS_DSPR1(pixman_fill_buff16_pref) LEAF_MIPS_DSPR1(pixman_composite_add_8888_8888_asm) /* diff --git a/pixman/pixman-mips-dspr1.c b/pixman/pixman-mips-dspr1.c index 638d993..8b04fe6 100644 --- a/pixman/pixman-mips-dspr1.c +++ b/pixman/pixman-mips-dspr1.c @@ -152,11 +152,7 @@ mips_dspr1_blt (pixman_implementation_t *imp, uint8_t *dst = dst_bytes; src_bytes += src_stride; dst_bytes += dst_stride; -#ifdef USE_MIPS32R2 - pixman_fast_memcpy_mips32r2 (dst, src, byte_width); -#else - memcpy (dst, src, byte_width); -#endif + pixman_fast_memcpy_mips (dst, src, byte_width); } return TRUE; @@ -175,6 +171,15 @@ _pixman_implementation_create_mips_dspr1 (pixman_implementation_t *fallback) pixman_implementation_t *imp = _pixman_implementation_create (fallback, mips_dspr1_fast_paths); + if (allow_prefetch) + { + pixman_fill_buff16_mips_dspr1 = pixman_fill_buff16_pref_mips_dspr1; + } + else + { + pixman_fill_buff16_mips_dspr1 = pixman_fill_buff16_no_pref_mips_dspr1; + } + imp->blt = mips_dspr1_blt; imp->fill = mips_dspr1_fill; diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c index a9773b7..a1551ca 100644 --- a/pixman/pixman-mips-dspr2.c +++ b/pixman/pixman-mips-dspr2.c @@ -292,11 +292,7 @@ mips_dspr2_blt (pixman_implementation_t *imp, uint8_t *dst = dst_bytes; src_bytes += src_stride; dst_bytes += dst_stride; -#ifdef USE_MIPS32R2 - pixman_fast_memcpy_mips32r2 (dst, src, byte_width); -#else - memcpy (dst, src, byte_width); -#endif + pixman_fast_memcpy_mips (dst, src, byte_width); } return TRUE; diff --git a/pixman/pixman-mips.c b/pixman/pixman-mips.c index 8825621..a970165 100644 --- a/pixman/pixman-mips.c +++ b/pixman/pixman-mips.c @@ -24,9 +24,15 @@ #endif #include "pixman-private.h" +#include "pixman-mips-common.h" #include <string.h> #include <stdlib.h> +#if defined(USE_MIPS_DSPR2) || defined(USE_MIPS_DSPR1) || \ + defined(USE_MIPS32R2) +int allow_prefetch; +#endif + #ifdef USE_MIPS_DSPR2 static const char *mips_dspr2_cores[] = { @@ -144,16 +150,40 @@ _pixman_mips_get_implementations (pixman_implementation_t *imp) if (sizeof (uintptr_t) != 4) return imp; +#if defined(USE_MIPS_DSPR2) || defined(USE_MIPS_DSPR1) || \ + defined(USE_MIPS32R2) + allow_prefetch = 0; + pixman_fast_memcpy_mips = (void*)memcpy; +#endif + #ifdef USE_MIPS32R2 if (!_pixman_disabled ("mips32r2")) { int already_compiling_everything_for_mips32r2 = 0; + pixman_bool_t temp = FALSE; #if defined(__mips__) && (__mips_isa_rev >= 2) already_compiling_everything_for_mips32r2 = 1; #endif - if (already_compiling_everything_for_mips32r2 || - have_feature (mips32r2_cores, "mips32r2") + temp = have_feature (mips32r2_cores, "mips32r2"); + if (already_compiling_everything_for_mips32r2 || temp) + { + if (temp) + { + int cache_line_size; + + __asm__ volatile ( + ".set arch=mips32r2 \n\t" + "rdhwr %0, $1 \n\t" + : "=r" (cache_line_size) + : + ); + + if (cache_line_size == 32) + allow_prefetch = 1; + } imp = _pixman_implementation_create_mips32r2 (imp); + } + } #endif #ifdef USE_MIPS_DSPR1 diff --git a/pixman/pixman-mips32r2-asm.S b/pixman/pixman-mips32r2-asm.S index 75ff9e2..38048a0 100644 --- a/pixman/pixman-mips32r2-asm.S +++ b/pixman/pixman-mips32r2-asm.S @@ -381,7 +381,61 @@ $ua_smallCopy_loop: END_MIPS32R2(pixman_fast_memcpy) -LEAF_MIPS32R2(pixman_fill_buff16) +LEAF_MIPS32R2(pixman_fill_buff16_no_pref) +/* + * a0 - *dest + * a1 - count (bytes) + * a2 - value to fill buffer with + */ + + beqz a1, 3f + andi t1, a0, 0x0002 + beqz t1, 0f /* check if address is 4-byte aligned */ + nop + sh a2, 0(a0) + addiu a0, a0, 2 + addiu a1, a1, -2 +0: + srl t1, a1, 5 /* t1 how many multiples of 32 bytes */ + beqz t1, 2f + ins a2, a2, 16, 16 +1: + addiu t1, t1, -1 + beqz t1, 11f + addiu a1, a1, -32 + sw a2, 0(a0) + sw a2, 4(a0) + sw a2, 8(a0) + sw a2, 12(a0) + sw a2, 16(a0) + sw a2, 20(a0) + sw a2, 24(a0) + sw a2, 28(a0) + b 1b + addiu a0, a0, 32 +11: + sw a2, 0(a0) + sw a2, 4(a0) + sw a2, 8(a0) + sw a2, 12(a0) + sw a2, 16(a0) + sw a2, 20(a0) + sw a2, 24(a0) + sw a2, 28(a0) + addiu a0, a0, 32 +2: + blez a1, 3f + addiu a1, a1, -2 + sh a2, 0(a0) + b 2b + addiu a0, a0, 2 +3: + jr ra + nop + +END_MIPS32R2(pixman_fill_buff16_no_pref) + +LEAF_MIPS32R2(pixman_fill_buff16_pref) /* * a0 - *dest * a1 - count (bytes) @@ -434,9 +488,57 @@ LEAF_MIPS32R2(pixman_fill_buff16) jr ra nop -END_MIPS32R2(pixman_fill_buff16) +END_MIPS32R2(pixman_fill_buff16_pref) + +LEAF_MIPS32R2(pixman_fill_buff32_no_pref) +/* + * a0 - *dest + * a1 - count (bytes) + * a2 - value to fill buffer with + */ + + beqz a1, 3f + nop + srl t1, a1, 5 /* t1 how many multiples of 32 bytes */ + beqz t1, 2f + nop +1: + addiu t1, t1, -1 + beqz t1, 11f + addiu a1, a1, -32 + sw a2, 0(a0) + sw a2, 4(a0) + sw a2, 8(a0) + sw a2, 12(a0) + sw a2, 16(a0) + sw a2, 20(a0) + sw a2, 24(a0) + sw a2, 28(a0) + b 1b + addiu a0, a0, 32 +11: + sw a2, 0(a0) + sw a2, 4(a0) + sw a2, 8(a0) + sw a2, 12(a0) + sw a2, 16(a0) + sw a2, 20(a0) + sw a2, 24(a0) + sw a2, 28(a0) + addiu a0, a0, 32 +2: + blez a1, 3f + addiu a1, a1, -4 + sw a2, 0(a0) + b 2b + addiu a0, a0, 4 +3: + jr ra + nop + +END_MIPS32R2(pixman_fill_buff32_no_pref) -LEAF_MIPS32R2(pixman_fill_buff32) +LEAF_MIPS32R2(pixman_fill_buff32_pref) /* * a0 - *dest * a1 - count (bytes) @@ -483,7 +585,7 @@ LEAF_MIPS32R2(pixman_fill_buff32) jr ra nop -END_MIPS32R2(pixman_fill_buff32) +END_MIPS32R2(pixman_fill_buff32_pref) LEAF_MIPS32R2(pixman_composite_src_x888_8888_asm) /* diff --git a/pixman/pixman-mips32r2.c b/pixman/pixman-mips32r2.c index 26b62f7..4b27608 100644 --- a/pixman/pixman-mips32r2.c +++ b/pixman/pixman-mips32r2.c @@ -146,13 +146,17 @@ mips32r2_blt (pixman_implementation_t *imp, uint8_t *dst = dst_bytes; src_bytes += src_stride; dst_bytes += dst_stride; - pixman_fast_memcpy_mips32r2 (dst, src, byte_width); + pixman_fast_memcpy_mips (dst, src, byte_width); } return TRUE; } static const pixman_fast_path_t mips32r2_fast_paths[] = { + /* pref ON */ + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, mips_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, mips_composite_src_x888_8888), + /* pref OFF */ PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, mips_composite_src_0565_0565), PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, mips_composite_src_0565_0565), PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, mips_composite_src_8888_8888), @@ -162,16 +166,27 @@ static const pixman_fast_path_t mips32r2_fast_paths[] = PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, mips_composite_src_8888_8888), PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, mips_composite_src_8888_8888), PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, mips_composite_src_0888_0888), - PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, mips_composite_src_x888_8888), - PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, mips_composite_src_x888_8888), { PIXMAN_OP_NONE }, }; pixman_implementation_t * _pixman_implementation_create_mips32r2 (pixman_implementation_t *fallback) { - pixman_implementation_t *imp = - _pixman_implementation_create (fallback, mips32r2_fast_paths); + pixman_implementation_t *imp; + + if (allow_prefetch) + { + imp = _pixman_implementation_create (fallback, &mips32r2_fast_paths[0]); + pixman_fill_buff16_mips32r2 = pixman_fill_buff16_pref_mips32r2; + pixman_fill_buff32_mips32r2 = pixman_fill_buff32_pref_mips32r2; + pixman_fast_memcpy_mips = pixman_fast_memcpy_mips32r2; + } + else + { + imp = _pixman_implementation_create (fallback, &mips32r2_fast_paths[2]); + pixman_fill_buff16_mips32r2 = pixman_fill_buff16_no_pref_mips32r2; + pixman_fill_buff32_mips32r2 = pixman_fill_buff32_no_pref_mips32r2; + } imp->blt = mips32r2_blt; imp->fill = mips32r2_fill; -- 1.7.3 _______________________________________________ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman