The generic C over_u combiner can be a lot faster with the
addition of special shortcuts for 0xFF and 0x00 alpha/mask
values. This is already implemented in C and SSE2 fast paths.

Profiling the run of cairo-perf-trace benchmarks with PIXMAN_DISABLE
environment variable set to "fast mmx sse2" on Intel Core i7:

=== before ===

37.32%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] combine_over_u
21.37%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] 
bits_image_fetch_bilinear_no_repeat_8888
13.51%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] 
bits_image_fetch_bilinear_affine_none_a8r8g8b8
 2.96%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] radial_compute_color
 2.74%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] fetch_scanline_a8
 2.71%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] fetch_scanline_x8r8g8b8
 2.17%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] _pixman_gradient_walker_pixel
 1.86%  cairo-perf-trac  libcairo.so.2.11200.0 [.] 
_cairo_tor_scan_converter_generate
 1.57%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] 
bits_image_fetch_bilinear_affine_pad_a8r8g8b8
 0.97%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] combine_in_reverse_u
 0.96%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] combine_over_ca

=== after ===

28.79%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] 
bits_image_fetch_bilinear_no_repeat_8888
18.44%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] 
bits_image_fetch_bilinear_affine_none_a8r8g8b8
15.54%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] combine_over_u
 3.94%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] radial_compute_color
 3.69%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] fetch_scanline_a8
 3.69%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] fetch_scanline_x8r8g8b8
 2.94%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] _pixman_gradient_walker_pixel
 2.52%  cairo-perf-trac  libcairo.so.2.11200.0 [.] 
_cairo_tor_scan_converter_generate
 2.08%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] 
bits_image_fetch_bilinear_affine_pad_a8r8g8b8
 1.31%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] combine_in_reverse_u
 1.29%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] combine_over_ca
---
 pixman/pixman-combine32.c |   58 +++++++++++++++++++++++++++++++++++++++-----
 1 files changed, 51 insertions(+), 7 deletions(-)

diff --git a/pixman/pixman-combine32.c b/pixman/pixman-combine32.c
index 54cc877..3ac7576 100644
--- a/pixman/pixman-combine32.c
+++ b/pixman/pixman-combine32.c
@@ -196,14 +196,58 @@ combine_over_u (pixman_implementation_t *imp,
 {
     int i;
 
-    for (i = 0; i < width; ++i)
+    if (!mask)
     {
-       uint32_t s = combine_mask (src, mask, i);
-       uint32_t d = *(dest + i);
-       uint32_t ia = ALPHA_8 (~s);
-
-       UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
-       *(dest + i) = d;
+       for (i = 0; i < width; ++i)
+       {
+           uint32_t s = *(src + i);
+           uint32_t a = ALPHA_8 (s);
+           if (a == 0xFF)
+           {
+               *(dest + i) = s;
+           }
+           else if (s)
+           {
+               uint32_t d = *(dest + i);
+               uint32_t ia = a ^ 0xFF;
+               UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
+               *(dest + i) = d;
+           }
+       }
+    }
+    else
+    {
+       for (i = 0; i < width; ++i)
+       {
+           uint32_t m = ALPHA_8 (*(mask + i));
+           if (m == 0xFF)
+           {
+               uint32_t s = *(src + i);
+               uint32_t a = ALPHA_8 (s);
+               if (a == 0xFF)
+               {
+                   *(dest + i) = s;
+               }
+               else if (s)
+               {
+                   uint32_t d = *(dest + i);
+                   uint32_t ia = a ^ 0xFF;
+                   UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
+                   *(dest + i) = d;
+               }
+           }
+           else if (m)
+           {
+               uint32_t s = *(src + i);
+               if (s)
+               {
+                   uint32_t d = *(dest + i);
+                   UN8x4_MUL_UN8 (s, m);
+                   UN8x4_MUL_UN8_ADD_UN8x4 (d, ALPHA_8 (~s), s);
+                   *(dest + i) = d;
+               }
+           }
+       }
     }
 }
 
-- 
1.7.8.6

_______________________________________________
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman

Reply via email to