No changes were observed when running cairo trimmed benchmarks.

Signed-off-by: Oded Gabbay <oded.gab...@gmail.com>
---
 pixman/pixman-vmx.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 124 insertions(+)

diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c
index b9acd6c..b42288b 100644
--- a/pixman/pixman-vmx.c
+++ b/pixman/pixman-vmx.c
@@ -2708,6 +2708,128 @@ vmx_fill (pixman_implementation_t *imp,
     return TRUE;
 }
 
+static pixman_bool_t
+vmx_blt (pixman_implementation_t *imp,
+          uint32_t *               src_bits,
+          uint32_t *               dst_bits,
+          int                      src_stride,
+          int                      dst_stride,
+          int                      src_bpp,
+          int                      dst_bpp,
+          int                      src_x,
+          int                      src_y,
+          int                      dest_x,
+          int                      dest_y,
+          int                      width,
+          int                      height)
+{
+    uint8_t *   src_bytes;
+    uint8_t *   dst_bytes;
+    int byte_width;
+
+    if (src_bpp != dst_bpp)
+       return FALSE;
+
+    if (src_bpp == 16)
+    {
+       src_stride = src_stride * (int) sizeof (uint32_t) / 2;
+       dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
+       src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + 
(src_x));
+       dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dest_y) 
+ (dest_x));
+       byte_width = 2 * width;
+       src_stride *= 2;
+       dst_stride *= 2;
+    }
+    else if (src_bpp == 32)
+    {
+       src_stride = src_stride * (int) sizeof (uint32_t) / 4;
+       dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
+       src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + 
(src_x));
+       dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dest_y) 
+ (dest_x));
+       byte_width = 4 * width;
+       src_stride *= 4;
+       dst_stride *= 4;
+    }
+    else
+    {
+       return FALSE;
+    }
+
+    while (height--)
+    {
+       int w;
+       uint8_t *s = src_bytes;
+       uint8_t *d = dst_bytes;
+       src_bytes += src_stride;
+       dst_bytes += dst_stride;
+       w = byte_width;
+
+       while (w >= 2 && ((uintptr_t)d & 3))
+       {
+           *(uint16_t *)d = *(uint16_t *)s;
+           w -= 2;
+           s += 2;
+           d += 2;
+       }
+
+       while (w >= 4 && ((uintptr_t)d & 15))
+       {
+           *(uint32_t *)d = *(uint32_t *)s;
+
+           w -= 4;
+           s += 4;
+           d += 4;
+       }
+
+       while (w >= 64)
+       {
+           vector unsigned int vmx0, vmx1, vmx2, vmx3;
+
+           vmx0 = load_128_unaligned ((uint32_t*) s);
+           vmx1 = load_128_unaligned ((uint32_t*)(s + 16));
+           vmx2 = load_128_unaligned ((uint32_t*)(s + 32));
+           vmx3 = load_128_unaligned ((uint32_t*)(s + 48));
+
+           save_128_aligned ((uint32_t*)(d), vmx0);
+           save_128_aligned ((uint32_t*)(d + 16), vmx1);
+           save_128_aligned ((uint32_t*)(d + 32), vmx2);
+           save_128_aligned ((uint32_t*)(d + 48), vmx3);
+
+           s += 64;
+           d += 64;
+           w -= 64;
+       }
+
+       while (w >= 16)
+       {
+           save_128_aligned ((uint32_t*) d, load_128_unaligned ((uint32_t*) 
s));
+
+           w -= 16;
+           d += 16;
+           s += 16;
+       }
+
+       while (w >= 4)
+       {
+           *(uint32_t *)d = *(uint32_t *)s;
+
+           w -= 4;
+           s += 4;
+           d += 4;
+       }
+
+       if (w >= 2)
+       {
+           *(uint16_t *)d = *(uint16_t *)s;
+           w -= 2;
+           s += 2;
+           d += 2;
+       }
+    }
+
+    return TRUE;
+}
+
 static void
 vmx_composite_over_8888_8888 (pixman_implementation_t *imp,
                                pixman_composite_info_t *info)
@@ -2812,6 +2934,7 @@ vmx_composite_add_8888_8888 (pixman_implementation_t *imp,
 
 static const pixman_fast_path_t vmx_fast_paths[] =
 {
+    /* PIXMAN_OP_OVER */
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, 
vmx_composite_over_8888_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, 
vmx_composite_over_8888_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, 
vmx_composite_over_8888_8888),
@@ -2865,6 +2988,7 @@ _pixman_implementation_create_vmx 
(pixman_implementation_t *fallback)
     imp->combine_32_ca[PIXMAN_OP_XOR] = vmx_combine_xor_ca;
     imp->combine_32_ca[PIXMAN_OP_ADD] = vmx_combine_add_ca;
 
+    imp->blt = vmx_blt;
     imp->fill = vmx_fill;
 
     return imp;
-- 
2.4.3

_______________________________________________
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman

Reply via email to