No changes were observed when running cairo trimmed benchmarks. Signed-off-by: Oded Gabbay <oded.gab...@gmail.com> --- pixman/pixman-vmx.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+)
diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c index b9acd6c..b42288b 100644 --- a/pixman/pixman-vmx.c +++ b/pixman/pixman-vmx.c @@ -2708,6 +2708,128 @@ vmx_fill (pixman_implementation_t *imp, return TRUE; } +static pixman_bool_t +vmx_blt (pixman_implementation_t *imp, + uint32_t * src_bits, + uint32_t * dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dest_x, + int dest_y, + int width, + int height) +{ + uint8_t * src_bytes; + uint8_t * dst_bytes; + int byte_width; + + if (src_bpp != dst_bpp) + return FALSE; + + if (src_bpp == 16) + { + src_stride = src_stride * (int) sizeof (uint32_t) / 2; + dst_stride = dst_stride * (int) sizeof (uint32_t) / 2; + src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x)); + dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dest_y) + (dest_x)); + byte_width = 2 * width; + src_stride *= 2; + dst_stride *= 2; + } + else if (src_bpp == 32) + { + src_stride = src_stride * (int) sizeof (uint32_t) / 4; + dst_stride = dst_stride * (int) sizeof (uint32_t) / 4; + src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x)); + dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dest_y) + (dest_x)); + byte_width = 4 * width; + src_stride *= 4; + dst_stride *= 4; + } + else + { + return FALSE; + } + + while (height--) + { + int w; + uint8_t *s = src_bytes; + uint8_t *d = dst_bytes; + src_bytes += src_stride; + dst_bytes += dst_stride; + w = byte_width; + + while (w >= 2 && ((uintptr_t)d & 3)) + { + *(uint16_t *)d = *(uint16_t *)s; + w -= 2; + s += 2; + d += 2; + } + + while (w >= 4 && ((uintptr_t)d & 15)) + { + *(uint32_t *)d = *(uint32_t *)s; + + w -= 4; + s += 4; + d += 4; + } + + while (w >= 64) + { + vector unsigned int vmx0, vmx1, vmx2, vmx3; + + vmx0 = load_128_unaligned ((uint32_t*) s); + vmx1 = load_128_unaligned ((uint32_t*)(s + 16)); + vmx2 = load_128_unaligned ((uint32_t*)(s + 32)); + vmx3 = load_128_unaligned ((uint32_t*)(s + 48)); + + save_128_aligned ((uint32_t*)(d), vmx0); + save_128_aligned ((uint32_t*)(d + 16), vmx1); + save_128_aligned ((uint32_t*)(d + 32), vmx2); + save_128_aligned ((uint32_t*)(d + 48), vmx3); + + s += 64; + d += 64; + w -= 64; + } + + while (w >= 16) + { + save_128_aligned ((uint32_t*) d, load_128_unaligned ((uint32_t*) s)); + + w -= 16; + d += 16; + s += 16; + } + + while (w >= 4) + { + *(uint32_t *)d = *(uint32_t *)s; + + w -= 4; + s += 4; + d += 4; + } + + if (w >= 2) + { + *(uint16_t *)d = *(uint16_t *)s; + w -= 2; + s += 2; + d += 2; + } + } + + return TRUE; +} + static void vmx_composite_over_8888_8888 (pixman_implementation_t *imp, pixman_composite_info_t *info) @@ -2812,6 +2934,7 @@ vmx_composite_add_8888_8888 (pixman_implementation_t *imp, static const pixman_fast_path_t vmx_fast_paths[] = { + /* PIXMAN_OP_OVER */ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, vmx_composite_over_8888_8888), PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, vmx_composite_over_8888_8888), PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, vmx_composite_over_8888_8888), @@ -2865,6 +2988,7 @@ _pixman_implementation_create_vmx (pixman_implementation_t *fallback) imp->combine_32_ca[PIXMAN_OP_XOR] = vmx_combine_xor_ca; imp->combine_32_ca[PIXMAN_OP_ADD] = vmx_combine_add_ca; + imp->blt = vmx_blt; imp->fill = vmx_fill; return imp; -- 2.4.3 _______________________________________________ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman