It was benchmarked against commid id 2be523b from pixman/master POWER8, 8 cores, 3.4GHz, RHEL 7.1 ppc64le. reference memcpy speed = 24764.8MB/s (6191.2MP/s for 32bpp fills)
Before After Change --------------------------------------------- L1 1115.4 5006.49 +348.85% L2 1112.26 4338.01 +290.02% M 1110.54 2524.15 +127.29% HT 745.41 1140.03 +52.94% VT 749.03 1287.13 +71.84% R 423.91 547.6 +29.18% RT 205.79 194.98 -5.25% Kops/s 1414 1361 -3.75% cairo trimmed benchmarks : Speedups ======== t-gnome-system-monitor 1402.62 -> 1212.75 : 1.16x t-firefox-asteroids 533.92 -> 474.50 : 1.13x Signed-off-by: Oded Gabbay <oded.gab...@gmail.com> Acked-by: Siarhei Siamashka <siarhei.siamas...@gmail.com> --- pixman/pixman-vmx.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c index 47393dc..bc944af 100644 --- a/pixman/pixman-vmx.c +++ b/pixman/pixman-vmx.c @@ -2689,6 +2689,62 @@ vmx_fill (pixman_implementation_t *imp, } static void +vmx_composite_src_x888_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int32_t w; + int dst_stride, src_stride; + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w && (uintptr_t)dst & 15) + { + *dst++ = *src++ | 0xff000000; + w--; + } + + while (w >= 16) + { + vector unsigned int vmx_src1, vmx_src2, vmx_src3, vmx_src4; + + vmx_src1 = load_128_unaligned (src); + vmx_src2 = load_128_unaligned (src + 4); + vmx_src3 = load_128_unaligned (src + 8); + vmx_src4 = load_128_unaligned (src + 12); + + save_128_aligned (dst, vec_or (vmx_src1, mask_ff000000)); + save_128_aligned (dst + 4, vec_or (vmx_src2, mask_ff000000)); + save_128_aligned (dst + 8, vec_or (vmx_src3, mask_ff000000)); + save_128_aligned (dst + 12, vec_or (vmx_src4, mask_ff000000)); + + dst += 16; + src += 16; + w -= 16; + } + + while (w) + { + *dst++ = *src++ | 0xff000000; + w--; + } + } +} + +static void vmx_composite_over_8888_8888 (pixman_implementation_t *imp, pixman_composite_info_t *info) { @@ -2913,6 +2969,8 @@ static const pixman_fast_path_t vmx_fast_paths[] = PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, vmx_composite_add_8_8), PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, vmx_composite_add_8888_8888), PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, vmx_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, vmx_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, vmx_composite_src_x888_8888), { PIXMAN_OP_NONE }, }; -- 2.4.3 _______________________________________________ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman