---
 libavcodec/x86/vp8dsp.asm |  141 +++++++++++++++++----------------------------
 1 files changed, 53 insertions(+), 88 deletions(-)

diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm
index e0041ec..9d1a9fd 100644
--- a/libavcodec/x86/vp8dsp.asm
+++ b/libavcodec/x86/vp8dsp.asm
@@ -678,9 +678,28 @@ FILTER_V 4
 INIT_XMM sse2
 FILTER_V 8
 
-%macro FILTER_BILINEAR 1
-cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, 
picreg, my
+%macro FILTER_BILINEAR 3
+cglobal put_vp8_bilinear%1_v, 7, 7, %2, dst, dststride, src, srcstride, 
height, picreg, my
     shl      myd, 4
+%if cpuflag(ssse3)
+%ifdef PIC
+    lea  picregq, [bilinear_filter_vb_m]
+%endif
+    pxor      m4, m4
+    mova      m3, [bilinear_filter_vb+myq-16]
+.nextrow
+    movh      m0, [srcq+srcstrideq*0]
+    movh      m1, [srcq+srcstrideq*1]
+    movh      m2, [srcq+srcstrideq*2]
+    punpcklbw m0, m1
+    punpcklbw m1, m2
+    pmaddubsw m0, m3
+    pmaddubsw m1, m3
+    psraw     m0, 2
+    psraw     m1, 2
+    pavgw     m0, m4
+    pavgw     m1, m4
+%else ; cpuflag(ssse3)
 %ifdef PIC
     lea  picregq, [bilinear_filter_vw_m]
 %endif
@@ -706,13 +725,14 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, 
src, srcstride, height, p
     psraw     m2, 2
     pavgw     m0, m6
     pavgw     m2, m6
+%endif ; cpuflag(ssse3)
 %if mmsize == 8
     packuswb  m0, m0
-    packuswb  m2, m2
+    packuswb  %3, %3
     movh   [dstq+dststrideq*0], m0
-    movh   [dstq+dststrideq*1], m2
+    movh   [dstq+dststrideq*1], %3
 %else
-    packuswb  m0, m2
+    packuswb  m0, %3
     movh   [dstq+dststrideq*0], m0
     movhps [dstq+dststrideq*1], m0
 %endif
@@ -723,8 +743,27 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, 
src, srcstride, height, p
     jg .nextrow
     REP_RET
 
-cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, 
srcstride, height, mx, picreg
+cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, %2, dst, dststride, src, 
srcstride, height, mx, picreg
     shl      mxd, 4
+%if cpuflag(ssse3)
+%ifdef PIC
+    lea  picregq, [bilinear_filter_vb_m]
+%endif
+    pxor      m4, m4
+    mova      m2, [filter_h2_shuf]
+    mova      m3, [bilinear_filter_vb+mxq-16]
+.nextrow
+    movu      m0, [srcq+srcstrideq*0]
+    movu      m1, [srcq+srcstrideq*1]
+    pshufb    m0, m2
+    pshufb    m1, m2
+    pmaddubsw m0, m3
+    pmaddubsw m1, m3
+    psraw     m0, 2
+    psraw     m1, 2
+    pavgw     m0, m4
+    pavgw     m1, m4
+%else ; cpuflag(ssse3)
 %ifdef PIC
     lea  picregq, [bilinear_filter_vw_m]
 %endif
@@ -751,13 +790,14 @@ cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, 
dststride, src, srcstride
     psraw     m2, 2
     pavgw     m0, m6
     pavgw     m2, m6
+%endif ; cpuflag(ssse3)
 %if mmsize == 8
     packuswb  m0, m0
-    packuswb  m2, m2
+    packuswb  %3, %3
     movh   [dstq+dststrideq*0], m0
-    movh   [dstq+dststrideq*1], m2
+    movh   [dstq+dststrideq*1], %3
 %else
-    packuswb  m0, m2
+    packuswb  m0, %3
     movh   [dstq+dststrideq*0], m0
     movhps [dstq+dststrideq*1], m0
 %endif
@@ -770,88 +810,13 @@ cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, 
dststride, src, srcstride
 %endmacro
 
 INIT_MMX mmxext
-FILTER_BILINEAR 4
+FILTER_BILINEAR 4, 7, m2
 INIT_XMM sse2
-FILTER_BILINEAR 8
-
-%macro FILTER_BILINEAR_SSSE3 1
-cglobal put_vp8_bilinear%1_v, 7, 7, 5, dst, dststride, src, srcstride, height, 
picreg, my
-    shl      myd, 4
-%ifdef PIC
-    lea  picregq, [bilinear_filter_vb_m]
-%endif
-    pxor      m4, m4
-    mova      m3, [bilinear_filter_vb+myq-16]
-.nextrow
-    movh      m0, [srcq+srcstrideq*0]
-    movh      m1, [srcq+srcstrideq*1]
-    movh      m2, [srcq+srcstrideq*2]
-    punpcklbw m0, m1
-    punpcklbw m1, m2
-    pmaddubsw m0, m3
-    pmaddubsw m1, m3
-    psraw     m0, 2
-    psraw     m1, 2
-    pavgw     m0, m4
-    pavgw     m1, m4
-%if mmsize==8
-    packuswb  m0, m0
-    packuswb  m1, m1
-    movh   [dstq+dststrideq*0], m0
-    movh   [dstq+dststrideq*1], m1
-%else
-    packuswb  m0, m1
-    movh   [dstq+dststrideq*0], m0
-    movhps [dstq+dststrideq*1], m0
-%endif
-
-    lea     dstq, [dstq+dststrideq*2]
-    lea     srcq, [srcq+srcstrideq*2]
-    sub  heightd, 2
-    jg .nextrow
-    REP_RET
-
-cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, 
srcstride, height, mx, picreg
-    shl      mxd, 4
-%ifdef PIC
-    lea  picregq, [bilinear_filter_vb_m]
-%endif
-    pxor      m4, m4
-    mova      m2, [filter_h2_shuf]
-    mova      m3, [bilinear_filter_vb+mxq-16]
-.nextrow
-    movu      m0, [srcq+srcstrideq*0]
-    movu      m1, [srcq+srcstrideq*1]
-    pshufb    m0, m2
-    pshufb    m1, m2
-    pmaddubsw m0, m3
-    pmaddubsw m1, m3
-    psraw     m0, 2
-    psraw     m1, 2
-    pavgw     m0, m4
-    pavgw     m1, m4
-%if mmsize==8
-    packuswb  m0, m0
-    packuswb  m1, m1
-    movh   [dstq+dststrideq*0], m0
-    movh   [dstq+dststrideq*1], m1
-%else
-    packuswb  m0, m1
-    movh   [dstq+dststrideq*0], m0
-    movhps [dstq+dststrideq*1], m0
-%endif
-
-    lea     dstq, [dstq+dststrideq*2]
-    lea     srcq, [srcq+srcstrideq*2]
-    sub  heightd, 2
-    jg .nextrow
-    REP_RET
-%endmacro
-
+FILTER_BILINEAR 8, 7, m2
 INIT_MMX ssse3
-FILTER_BILINEAR_SSSE3 4
+FILTER_BILINEAR 4, 7, m2
 INIT_XMM ssse3
-FILTER_BILINEAR_SSSE3 8
+FILTER_BILINEAR 8, 5, m1
 
 INIT_MMX mmx
 cglobal put_vp8_pixels8, 5, 5, 0, dst, dststride, src, srcstride, height
-- 
1.7.2.5

_______________________________________________
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to