[libav-devel] [PATCH 3/3] x86: videodsp: Small speedups in ff_emulated_edge_mc x86 SIMD.

2014-01-30 Thread Janne Grunau
From: Ronald S. Bultje rsbul...@gmail.com

Do not use word-size multiplications if size == 2, and if we're using
SIMD instructions (size = 8), complete leftover 4byte sets using movd,
not mov. Both of these changes lead to minor speedups.

Signed-off-by: Janne Grunau janne-li...@jannau.net
---
 libavcodec/x86/videodsp.asm | 34 +-
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/libavcodec/x86/videodsp.asm b/libavcodec/x86/videodsp.asm
index 53b9e82..d8a7359 100644
--- a/libavcodec/x86/videodsp.asm
+++ b/libavcodec/x86/videodsp.asm
@@ -344,10 +344,6 @@ VERTICAL_EXTEND 16, 22
 ; obviously not the same on both sides.
 
 %macro READ_V_PIXEL 2
-%if %1 == 2
-movzx  valw, byte %2
-imul   valw, 0x0101
-%else
 movzx  vald, byte %2
 imul   vald, 0x01010101
 %if %1 = 8
@@ -356,13 +352,15 @@ VERTICAL_EXTEND 16, 22
 pshufd   m0, m0, q
 %else
 punpckldqm0, m0
-%endif
-%endif ; %1 = 8
-%endif
+%endif ; mmsize == 16
+%endif ; %1  16
 %endmacro ; READ_V_PIXEL
 
 %macro WRITE_V_PIXEL 2
 %assign %%off 0
+
+%if %1 = 8
+
 %rep %1/mmsize
 movu [%2+%%off], m0
 %assign %%off %%off+mmsize
@@ -378,27 +376,29 @@ VERTICAL_EXTEND 16, 22
 %assign %%off %%off+8
 %endif
 %endif ; %1-%%off = 8
-%endif
+%endif ; mmsize == 16
 
 %if %1-%%off = 4
 %if %1  8  %1-%%off  4
 movq  [%2+%1-8], m0
 %assign %%off %1
-%elif %1 = 8  %1-%%off = 4
-movd [%2+%%off], m0
-%assign %%off %%off+4
 %else
-mov  [%2+%%off], vald
+movd [%2+%%off], m0
 %assign %%off %%off+4
 %endif
 %endif ; %1-%%off = 4
 
-%if %1-%%off = 2
-%if %1 = 8
-movd  [%2+%1-4], m0
-%else
+%else ; %1  8
+
+%rep %1/4
+mov  [%2+%%off], vald
+%assign %%off %%off+4
+%endrep ; %1/4
+
+%endif ; %1 =/ 8
+
+%if %1-%%off == 2
 mov  [%2+%%off], valw
-%endif
 %endif ; (%1-%%off)/2
 %endmacro ; WRITE_V_PIXEL
 
-- 
1.8.5.3

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH 3/3] x86: videodsp: Small speedups in ff_emulated_edge_mc x86 SIMD.

2014-01-30 Thread Diego Biurrun
On Thu, Jan 30, 2014 at 03:43:32PM +0100, Janne Grunau wrote:
 --- a/libavcodec/x86/videodsp.asm
 +++ b/libavcodec/x86/videodsp.asm
 @@ -344,10 +344,6 @@ VERTICAL_EXTEND 16, 22
  movzx  vald, byte %2
  imul   vald, 0x01010101
  %if %1 = 8
 @@ -356,13 +352,15 @@ VERTICAL_EXTEND 16, 22
  %else
  punpckldqm0, m0
 -%endif ; %1 = 8
 +%endif ; %1  16

This does no longer match the %if above.

possibly OK otherwise

Diego
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel