Performance numbers before/after on MIPS-24kc @ 500 MHz Referent (before):
src_n_0565 = L1: 117.24 L2: 110.68 M:115.83 ( 96.31%) HT: 78.96 VT: 75.03 R: 65.98 RT: 24.94 ( 164Kops/s) Optimized (with these optimizations): src_n_0565 = L1: 429.43 L2: 299.39 M:346.21 (287.61%) HT: 90.68 VT: 80.23 R: 70.99 RT: 23.13 ( 156Kops/s) --- pixman/pixman-mips-common.h | 2 + pixman/pixman-mips32r2-asm.S | 55 ++++++++++++++++++++++++++++++++++++++++++ pixman/pixman-mips32r2.c | 19 +++++--------- 3 files changed, 64 insertions(+), 12 deletions(-) diff --git a/pixman/pixman-mips-common.h b/pixman/pixman-mips-common.h index 70af1f7..05ff7ad 100644 --- a/pixman/pixman-mips-common.h +++ b/pixman/pixman-mips-common.h @@ -43,6 +43,8 @@ void pixman_fast_memcpy_mips32r2 (void *dst, void *src, uint32_t n_bytes); void pixman_fill_buff32_mips32r2 (void *dst, uint32_t n_bytes, uint32_t value); +void +pixman_fill_buff16_mips32r2 (void *dst, uint32_t n_bytes, uint16_t value); #ifdef USE_MIPS_DSPR1 void diff --git a/pixman/pixman-mips32r2-asm.S b/pixman/pixman-mips32r2-asm.S index 3f73e41..75ff9e2 100644 --- a/pixman/pixman-mips32r2-asm.S +++ b/pixman/pixman-mips32r2-asm.S @@ -381,6 +381,61 @@ $ua_smallCopy_loop: END_MIPS32R2(pixman_fast_memcpy) +LEAF_MIPS32R2(pixman_fill_buff16) +/* + * a0 - *dest + * a1 - count (bytes) + * a2 - value to fill buffer with + */ + + beqz a1, 3f + andi t1, a0, 0x0002 + beqz t1, 0f /* check if address is 4-byte aligned */ + nop + sh a2, 0(a0) + addiu a0, a0, 2 + addiu a1, a1, -2 +0: + srl t1, a1, 5 /* t1 how many multiples of 32 bytes */ + beqz t1, 2f + ins a2, a2, 16, 16 +1: + addiu t1, t1, -1 + beqz t1, 11f + addiu a1, a1, -32 + pref 30, 32(a0) + sw a2, 0(a0) + sw a2, 4(a0) + sw a2, 8(a0) + sw a2, 12(a0) + sw a2, 16(a0) + sw a2, 20(a0) + sw a2, 24(a0) + sw a2, 28(a0) + b 1b + addiu a0, a0, 32 +11: + sw a2, 0(a0) + sw a2, 4(a0) + sw a2, 8(a0) + sw a2, 12(a0) + sw a2, 16(a0) + sw a2, 20(a0) + sw a2, 24(a0) + sw a2, 28(a0) + addiu a0, a0, 32 +2: + blez a1, 3f + addiu a1, a1, -2 + sh a2, 0(a0) + b 2b + addiu a0, a0, 2 +3: + jr ra + nop + +END_MIPS32R2(pixman_fill_buff16) + LEAF_MIPS32R2(pixman_fill_buff32) /* * a0 - *dest diff --git a/pixman/pixman-mips32r2.c b/pixman/pixman-mips32r2.c index 99ceb85..18fc786 100644 --- a/pixman/pixman-mips32r2.c +++ b/pixman/pixman-mips32r2.c @@ -58,25 +58,20 @@ mips32r2_fill (pixman_implementation_t *imp, { uint8_t *byte_line; uint32_t byte_width; - int i, short_stride; - uint16_t *dst; - uint16_t v; switch (bpp) { case 16: - short_stride = (stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t); - dst = (uint16_t *)bits; - v = _xor & 0xffff; - - dst = dst + y * short_stride + x; + stride = stride * (int) sizeof (uint32_t) / 2; + byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x); + byte_width = width * 2; + stride *= 2; while (height--) { - for (i = 0; i < width; ++i) - dst[i] = v; - - dst += short_stride; + uint8_t *dst = byte_line; + byte_line += stride; + pixman_fill_buff16_mips32r2 (dst, byte_width, _xor & 0xffff); } return TRUE; case 32: -- 1.7.3 _______________________________________________ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman