Re: [FFmpeg-devel] [PATCH 1/2] swscale/x86/output: Move code into yuv2planeX_mainloop

2016-02-17 Thread Michael Niedermayer
On Wed, Feb 17, 2016 at 04:30:39AM +0100, Michael Niedermayer wrote:
> Signed-off-by: Michael Niedermayer 
> ---
>  libswscale/x86/output.asm |  141 
> +++--
>  1 file changed, 72 insertions(+), 69 deletions(-)

patch set approved by ronald on irc yesterday and applied

[...]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Let us carefully observe those good qualities wherein our enemies excel us
and endeavor to excel them, by avoiding what is faulty, and imitating what
is excellent in them. -- Plutarch


signature.asc
Description: Digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 1/2] swscale/x86/output: Move code into yuv2planeX_mainloop

2016-02-16 Thread Michael Niedermayer
Signed-off-by: Michael Niedermayer 
---
 libswscale/x86/output.asm |  141 +++--
 1 file changed, 72 insertions(+), 69 deletions(-)

diff --git a/libswscale/x86/output.asm b/libswscale/x86/output.asm
index 9ea4af9..9570969 100644
--- a/libswscale/x86/output.asm
+++ b/libswscale/x86/output.asm
@@ -54,75 +54,7 @@ SECTION .text
 ; int32_t if $output_size is 16. $filter is 12-bits. $filterSize is a multiple
 ; of 2. $offset is either 0 or 3. $dither holds 8 values.
 ;-
-
-%macro yuv2planeX_fn 3
-
-%if ARCH_X86_32
-%define cntr_reg fltsizeq
-%define movsx mov
-%else
-%define cntr_reg r7
-%define movsx movsxd
-%endif
-
-cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset
-%if %1 == 8 || %1 == 9 || %1 == 10
-pxorm6,  m6
-%endif ; %1 == 8/9/10
-
-%if %1 == 8
-%if ARCH_X86_32
-%assign pad 0x2c - (stack_offset & 15)
-SUB rsp, pad
-%define m_dith m7
-%else ; x86-64
-%define m_dith m9
-%endif ; x86-32
-
-; create registers holding dither
-movqm_dith, [ditherq]; dither
-testoffsetd, offsetd
-jz  .no_rot
-%if mmsize == 16
-punpcklqdq  m_dith,  m_dith
-%endif ; mmsize == 16
-PALIGNR m_dith,  m_dith,  3,  m0
-.no_rot:
-%if mmsize == 16
-punpcklbw   m_dith,  m6
-%if ARCH_X86_64
-punpcklwd   m8,  m_dith,  m6
-pslld   m8,  12
-%else ; x86-32
-punpcklwd   m5,  m_dith,  m6
-pslld   m5,  12
-%endif ; x86-32/64
-punpckhwd   m_dith,  m6
-pslld   m_dith,  12
-%if ARCH_X86_32
-mova  [rsp+ 0],  m5
-mova  [rsp+16],  m_dith
-%endif
-%else ; mmsize == 8
-punpcklbw   m5,  m_dith,  m6
-punpckhbw   m_dith,  m6
-punpcklwd   m4,  m5,  m6
-punpckhwd   m5,  m6
-punpcklwd   m3,  m_dith,  m6
-punpckhwd   m_dith,  m6
-pslld   m4,  12
-pslld   m5,  12
-pslld   m3,  12
-pslld   m_dith,  12
-mova  [rsp+ 0],  m4
-mova  [rsp+ 8],  m5
-mova  [rsp+16],  m3
-mova  [rsp+24],  m_dith
-%endif ; mmsize == 8/16
-%endif ; %1 == 8
-
-xor r5,  r5
-
+%macro yuv2planeX_mainloop 1
 .pixelloop:
 %assign %%i 0
 ; the rep here is for the 8bit output mmx case, where dither covers
@@ -233,6 +165,77 @@ cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, 
dst, w, dither, offset
 %assign %%i %%i+2
 %endrep
 jg .pixelloop
+%endmacro
+
+%macro yuv2planeX_fn 3
+
+%if ARCH_X86_32
+%define cntr_reg fltsizeq
+%define movsx mov
+%else
+%define cntr_reg r7
+%define movsx movsxd
+%endif
+
+cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset
+%if %1 == 8 || %1 == 9 || %1 == 10
+pxorm6,  m6
+%endif ; %1 == 8/9/10
+
+%if %1 == 8
+%if ARCH_X86_32
+%assign pad 0x2c - (stack_offset & 15)
+SUB rsp, pad
+%define m_dith m7
+%else ; x86-64
+%define m_dith m9
+%endif ; x86-32
+
+; create registers holding dither
+movqm_dith, [ditherq]; dither
+testoffsetd, offsetd
+jz  .no_rot
+%if mmsize == 16
+punpcklqdq  m_dith,  m_dith
+%endif ; mmsize == 16
+PALIGNR m_dith,  m_dith,  3,  m0
+.no_rot:
+%if mmsize == 16
+punpcklbw   m_dith,  m6
+%if ARCH_X86_64
+punpcklwd   m8,  m_dith,  m6
+pslld   m8,  12
+%else ; x86-32
+punpcklwd   m5,  m_dith,  m6
+pslld   m5,  12
+%endif ; x86-32/64
+punpckhwd   m_dith,  m6
+pslld   m_dith,  12
+%if ARCH_X86_32
+mova  [rsp+ 0],  m5
+mova  [rsp+16],  m_dith
+%endif
+%else ; mmsize == 8
+punpcklbw   m5,  m_dith,  m6
+punpckhbw   m_dith,  m6
+punpcklwd   m4,  m5,  m6
+punpckhwd   m5,  m6
+punpcklwd   m3,  m_dith,  m6
+punpckhwd   m_dith,  m6
+pslld   m4,  12
+pslld   m5,  12
+pslld   m3,  12
+pslld   m_dith,  12
+mova  [rsp+ 0],  m4
+mova  [rsp+ 8],  m5
+mova  [rsp+16],  m3
+mova  [rsp+24],  m_dith
+%endif ; mmsize == 8/16
+%endif ; %1 == 8
+
+xor r5,  r5
+
+yuv2planeX_mainloop %1
 
 %if %1 == 8
 %if ARCH_X86_32
-- 
1.7.9.5

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel