Signed-off-by: Michael Niedermayer
---
libswscale/x86/output.asm | 141 +++--
1 file changed, 72 insertions(+), 69 deletions(-)
diff --git a/libswscale/x86/output.asm b/libswscale/x86/output.asm
index 9ea4af9..9570969 100644
--- a/libswscale/x86/output.asm
+++ b/libswscale/x86/output.asm
@@ -54,75 +54,7 @@ SECTION .text
; int32_t if $output_size is 16. $filter is 12-bits. $filterSize is a multiple
; of 2. $offset is either 0 or 3. $dither holds 8 values.
;-
-
-%macro yuv2planeX_fn 3
-
-%if ARCH_X86_32
-%define cntr_reg fltsizeq
-%define movsx mov
-%else
-%define cntr_reg r7
-%define movsx movsxd
-%endif
-
-cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset
-%if %1 == 8 || %1 == 9 || %1 == 10
-pxorm6, m6
-%endif ; %1 == 8/9/10
-
-%if %1 == 8
-%if ARCH_X86_32
-%assign pad 0x2c - (stack_offset & 15)
-SUB rsp, pad
-%define m_dith m7
-%else ; x86-64
-%define m_dith m9
-%endif ; x86-32
-
-; create registers holding dither
-movqm_dith, [ditherq]; dither
-testoffsetd, offsetd
-jz .no_rot
-%if mmsize == 16
-punpcklqdq m_dith, m_dith
-%endif ; mmsize == 16
-PALIGNR m_dith, m_dith, 3, m0
-.no_rot:
-%if mmsize == 16
-punpcklbw m_dith, m6
-%if ARCH_X86_64
-punpcklwd m8, m_dith, m6
-pslld m8, 12
-%else ; x86-32
-punpcklwd m5, m_dith, m6
-pslld m5, 12
-%endif ; x86-32/64
-punpckhwd m_dith, m6
-pslld m_dith, 12
-%if ARCH_X86_32
-mova [rsp+ 0], m5
-mova [rsp+16], m_dith
-%endif
-%else ; mmsize == 8
-punpcklbw m5, m_dith, m6
-punpckhbw m_dith, m6
-punpcklwd m4, m5, m6
-punpckhwd m5, m6
-punpcklwd m3, m_dith, m6
-punpckhwd m_dith, m6
-pslld m4, 12
-pslld m5, 12
-pslld m3, 12
-pslld m_dith, 12
-mova [rsp+ 0], m4
-mova [rsp+ 8], m5
-mova [rsp+16], m3
-mova [rsp+24], m_dith
-%endif ; mmsize == 8/16
-%endif ; %1 == 8
-
-xor r5, r5
-
+%macro yuv2planeX_mainloop 1
.pixelloop:
%assign %%i 0
; the rep here is for the 8bit output mmx case, where dither covers
@@ -233,6 +165,77 @@ cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src,
dst, w, dither, offset
%assign %%i %%i+2
%endrep
jg .pixelloop
+%endmacro
+
+%macro yuv2planeX_fn 3
+
+%if ARCH_X86_32
+%define cntr_reg fltsizeq
+%define movsx mov
+%else
+%define cntr_reg r7
+%define movsx movsxd
+%endif
+
+cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset
+%if %1 == 8 || %1 == 9 || %1 == 10
+pxorm6, m6
+%endif ; %1 == 8/9/10
+
+%if %1 == 8
+%if ARCH_X86_32
+%assign pad 0x2c - (stack_offset & 15)
+SUB rsp, pad
+%define m_dith m7
+%else ; x86-64
+%define m_dith m9
+%endif ; x86-32
+
+; create registers holding dither
+movqm_dith, [ditherq]; dither
+testoffsetd, offsetd
+jz .no_rot
+%if mmsize == 16
+punpcklqdq m_dith, m_dith
+%endif ; mmsize == 16
+PALIGNR m_dith, m_dith, 3, m0
+.no_rot:
+%if mmsize == 16
+punpcklbw m_dith, m6
+%if ARCH_X86_64
+punpcklwd m8, m_dith, m6
+pslld m8, 12
+%else ; x86-32
+punpcklwd m5, m_dith, m6
+pslld m5, 12
+%endif ; x86-32/64
+punpckhwd m_dith, m6
+pslld m_dith, 12
+%if ARCH_X86_32
+mova [rsp+ 0], m5
+mova [rsp+16], m_dith
+%endif
+%else ; mmsize == 8
+punpcklbw m5, m_dith, m6
+punpckhbw m_dith, m6
+punpcklwd m4, m5, m6
+punpckhwd m5, m6
+punpcklwd m3, m_dith, m6
+punpckhwd m_dith, m6
+pslld m4, 12
+pslld m5, 12
+pslld m3, 12
+pslld m_dith, 12
+mova [rsp+ 0], m4
+mova [rsp+ 8], m5
+mova [rsp+16], m3
+mova [rsp+24], m_dith
+%endif ; mmsize == 8/16
+%endif ; %1 == 8
+
+xor r5, r5
+
+yuv2planeX_mainloop %1
%if %1 == 8
%if ARCH_X86_32
--
1.7.9.5
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel