Siarhei, can you measure any performance improvement with this? I can't... :( --- pixman/pixman-sse2.c | 8 +++----- 1 files changed, 3 insertions(+), 5 deletions(-)
diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c index efed310..4fbc045 100644 --- a/pixman/pixman-sse2.c +++ b/pixman/pixman-sse2.c @@ -32,6 +32,7 @@ #include <xmmintrin.h> /* for _mm_shuffle_pi16 and _MM_SHUFFLE */ #include <emmintrin.h> /* for SSE2 intrinsics */ +#include <tmmintrin.h> /* for SSSE3 intrinsics */ #include "pixman-private.h" #include "pixman-combine32.h" #include "pixman-inlines.h" @@ -5414,7 +5415,7 @@ FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER, #define BILINEAR_INTERPOLATE_ONE_PIXEL(pix) \ do { \ - __m128i xmm_wh, xmm_lo, xmm_hi, a; \ + __m128i xmm_wh, a; \ /* fetch 2x2 pixel block into sse2 registers */ \ __m128i tltr = _mm_loadl_epi64 ( \ (__m128i *)&src_top[pixman_fixed_to_int (vx)]); \ @@ -5443,10 +5444,7 @@ do { \ _mm_srli_epi16 (xmm_x, 16 - BILINEAR_INTERPOLATION_BITS))); \ xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \ /* horizontal interpolation */ \ - xmm_lo = _mm_mullo_epi16 (a, xmm_wh); \ - xmm_hi = _mm_mulhi_epu16 (a, xmm_wh); \ - a = _mm_add_epi32 (_mm_unpacklo_epi16 (xmm_lo, xmm_hi), \ - _mm_unpackhi_epi16 (xmm_lo, xmm_hi)); \ + a = _mm_maddubs_epi16 (a, xmm_wh); \ } \ /* shift and pack the result */ \ a = _mm_srli_epi32 (a, BILINEAR_INTERPOLATION_BITS * 2); \ -- 1.7.8.6 _______________________________________________ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman