On Tue, Feb 28, 2012 at 7:47 AM, Nemanja Lukic <nlu...@mips.com> wrote: > From: Nemanja Lukic <nemanja.lu...@rt-rk.com> > > Performance numbers before/after on MIPS-74kc @ 1GHz > > Referent (before): > cairo-perf-trace: > [ # ] backend test min(s) median(s) stddev. count > [ # ] image: pixman 0.25.1 > [ 0] image gnome-system-monitor 268.460 269.712 0.22% 6/6 > > Optimized: > cairo-perf-trace: > [ # ] backend test min(s) median(s) stddev. count > [ # ] image: pixman 0.25.1 > [ 0] image gnome-system-monitor 246.565 246.706 0.04% 6/6 > --- > pixman/pixman-mips-dspr2-asm.S | 114 ++++++++++++++++++++++++++++ > pixman/pixman-mips-dspr2.c | 163 > ++++++++++++++++++++++++++++++++++++++++ > pixman/pixman-mips-dspr2.h | 4 + > 3 files changed, 281 insertions(+), 0 deletions(-) > > diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S > index 0a4c87e..4125beb 100644 > --- a/pixman/pixman-mips-dspr2-asm.S > +++ b/pixman/pixman-mips-dspr2-asm.S > @@ -31,6 +31,120 @@ > > #include "pixman-mips-dspr2-asm.h" > > +LEAF_MIPS32R2(pixman_fill_buff16_mips) > +/* > + * a0 - *dest > + * a1 - count (bytes) > + * a2 - value to fill buffer with > + */ > + > + beqz a1, 3f > + nop > + srl t1, a1, 5 /* t1 how many multiples of 32 bytes */ > + beqz t1, 2f > + nop > +1: > + addiu t1, t1, -1 > + beqz t1, 11f > + addiu a1, a1, -32 > + pref 30, 32(a0) > + sh a2, 0(a0) > + sh a2, 2(a0) > + sh a2, 4(a0) > + sh a2, 6(a0) > + sh a2, 8(a0) > + sh a2, 10(a0) > + sh a2, 12(a0) > + sh a2, 14(a0) > + sh a2, 16(a0) > + sh a2, 18(a0) > + sh a2, 20(a0) > + sh a2, 22(a0) > + sh a2, 24(a0) > + sh a2, 26(a0) > + sh a2, 28(a0) > + sh a2, 30(a0) > + b 1b > + addiu a0, a0, 32 > +11: > + sh a2, 0(a0) > + sh a2, 2(a0) > + sh a2, 4(a0) > + sh a2, 6(a0) > + sh a2, 8(a0) > + sh a2, 10(a0) > + sh a2, 12(a0) > + sh a2, 14(a0) > + sh a2, 16(a0) > + sh a2, 18(a0) > + sh a2, 20(a0) > + sh a2, 22(a0) > + sh a2, 24(a0) > + sh a2, 26(a0) > + sh a2, 28(a0) > + sh a2, 30(a0) > + addiu a0, a0, 32 > +2: > + blez a1, 3f > + addiu a1, a1, -2 > + sh a2, 0(a0) > + b 2b > + addiu a0,a0, 2 > +3: > + jr ra > + nop > + > +END(pixman_fill_buff16_mips)
Couldn't we do 4-byte stores in the main loop? I would think that would be faster. > + > +LEAF_MIPS32R2(pixman_fill_buff32_mips) > +/* > + * a0 - *dest > + * a1 - count (bytes) > + * a2 - value to fill buffer with > + */ > + > + beqz a1, 3f > + nop > + srl t1, a1, 5 /* t1 how many multiples of 32 bytes */ > + beqz t1, 2f > + nop > +1: > + addiu t1, t1, -1 > + beqz t1, 11f > + addiu a1, a1, -32 > + pref 30, 32(a0) > + sw a2, 0(a0) > + sw a2, 4(a0) > + sw a2, 8(a0) > + sw a2, 12(a0) > + sw a2, 16(a0) > + sw a2, 20(a0) > + sw a2, 24(a0) > + sw a2, 28(a0) > + b 1b > + addiu a0, a0, 32 > +11: > + sw a2, 0(a0) > + sw a2, 4(a0) > + sw a2, 8(a0) > + sw a2, 12(a0) > + sw a2, 16(a0) > + sw a2, 20(a0) > + sw a2, 24(a0) > + sw a2, 28(a0) > + addiu a0, a0, 32 > +2: > + blez a1, 3f > + addiu a1, a1, -4 > + sw a2, 0(a0) > + b 2b > + addiu a0,a0, 4 > +3: > + jr ra > + nop > + > +END(pixman_fill_buff32_mips) > + > LEAF_MIPS_DSPR2(pixman_composite_src_8888_0565_asm_mips) > /* > * a0 - dst (r5g6b5) > diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c > index e331853..2beada3 100644 > --- a/pixman/pixman-mips-dspr2.c > +++ b/pixman/pixman-mips-dspr2.c > @@ -49,6 +49,119 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, > src_8888_8888, > PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_0888_0888, > uint8_t, 3, uint8_t, 3) > > +static pixman_bool_t > +pixman_fill_mips (uint32_t *bits, > + int stride, > + int bpp, > + int x, > + int y, > + int width, > + int height, > + uint32_t _xor) > +{ > + uint8_t *byte_line; > + uint32_t byte_width; > + switch (bpp) > + { > + case 16: > + stride = stride * (int) sizeof (uint32_t) / 2; > + byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x); > + byte_width = width * 2; > + stride *= 2; > + > + while (height--) > + { > + uint8_t *dst = byte_line; > + byte_line += stride; > + pixman_fill_buff16_mips (dst, byte_width, _xor & 0xffff); > + } > + return TRUE; > + case 32: > + stride = stride * (int) sizeof (uint32_t) / 4; > + byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x); > + byte_width = width * 4; > + stride *= 4; > + > + while (height--) > + { > + uint8_t *dst = byte_line; > + byte_line += stride; > + pixman_fill_buff32_mips (dst, byte_width, _xor); > + } > + return TRUE; > + default: > + return FALSE; > + } > +} > + > +static pixman_bool_t > +pixman_blt_mips (uint32_t *src_bits, > + uint32_t *dst_bits, > + int src_stride, > + int dst_stride, > + int src_bpp, > + int dst_bpp, > + int src_x, > + int src_y, > + int dest_x, > + int dest_y, > + int width, > + int height) > +{ > + if (src_bpp != dst_bpp) > + return FALSE; > + > + uint8_t *src_bytes; > + uint8_t *dst_bytes; > + uint32_t byte_width; > + > + switch (src_bpp) > + { > + case 16: > + src_stride = src_stride * (int) sizeof (uint32_t) / 2; > + dst_stride = dst_stride * (int) sizeof (uint32_t) / 2; > + src_bytes =(uint8_t *)(((uint16_t *)src_bits) > + + src_stride * (src_y) + (src_x)); > + dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) > + + dst_stride * (dest_y) + > (dest_x)); > + byte_width = width * 2; > + src_stride *= 2; > + dst_stride *= 2; > + > + while (height--) > + { > + uint8_t *src = src_bytes; > + uint8_t *dst = dst_bytes; > + src_bytes += src_stride; > + dst_bytes += dst_stride; > + pixman_mips_fast_memcpy (dst, src, byte_width); > + } > + return TRUE; > + case 32: > + src_stride = src_stride * (int) sizeof (uint32_t) / 4; > + dst_stride = dst_stride * (int) sizeof (uint32_t) / 4; > + src_bytes = (uint8_t *)(((uint32_t *)src_bits) > + + src_stride * (src_y) + (src_x)); > + dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) > + + dst_stride * (dest_y) + > (dest_x)); > + byte_width = width * 4; > + src_stride *= 4; > + dst_stride *= 4; > + > + while (height--) > + { > + uint8_t *src = src_bytes; > + uint8_t *dst = dst_bytes; > + src_bytes += src_stride; > + dst_bytes += dst_stride; > + pixman_mips_fast_memcpy (dst, src, byte_width); > + } > + return TRUE; > + default: > + return FALSE; > + } > +} > + > static const pixman_fast_path_t mips_dspr2_fast_paths[] = > { > PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, > mips_composite_src_0565_0565), > @@ -74,11 +187,61 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] = > { PIXMAN_OP_NONE }, > }; > > +static pixman_bool_t > +mips_dspr2_blt (pixman_implementation_t *imp, > + uint32_t * src_bits, > + uint32_t * dst_bits, > + int src_stride, > + int dst_stride, > + int src_bpp, > + int dst_bpp, > + int src_x, > + int src_y, > + int dest_x, > + int dest_y, > + int width, > + int height) > +{ > + if (!pixman_blt_mips ( > + src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp, > + src_x, src_y, dest_x, dest_y, width, height)) > + > + { > + return _pixman_implementation_blt ( > + imp->delegate, > + src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp, > + src_x, src_y, dest_x, dest_y, width, height); > + } > + > + return TRUE; > +} > + > +static pixman_bool_t > +mips_dspr2_fill (pixman_implementation_t *imp, > + uint32_t * bits, > + int stride, > + int bpp, > + int x, > + int y, > + int width, > + int height, > + uint32_t xor) > +{ > + if (pixman_fill_mips (bits, stride, bpp, x, y, width, height, xor)) > + return TRUE; > + > + return _pixman_implementation_fill ( > + imp->delegate, bits, stride, bpp, x, y, width, height, xor); > +} > + > pixman_implementation_t * > _pixman_implementation_create_mips_dspr2 (pixman_implementation_t *fallback) > { > pixman_implementation_t *imp = > _pixman_implementation_create (fallback, mips_dspr2_fast_paths); > > + imp->blt = mips_dspr2_blt; > + imp->fill = mips_dspr2_fill; > + > return imp; > } > diff --git a/pixman/pixman-mips-dspr2.h b/pixman/pixman-mips-dspr2.h > index 449c42a..a40e7c8 100644 > --- a/pixman/pixman-mips-dspr2.h > +++ b/pixman/pixman-mips-dspr2.h > @@ -41,6 +41,10 @@ > > void > pixman_mips_fast_memcpy (void *dst, void *src, uint32_t n_bytes); > +void > +pixman_fill_buff16_mips (void *dst, uint32_t n_bytes, uint16_t value); > +void > +pixman_fill_buff32_mips (void *dst, uint32_t n_bytes, uint32_t value); > > /****************************************************************/ > > -- > 1.7.3 > > _______________________________________________ > Pixman mailing list > Pixman@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/pixman _______________________________________________ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman