--- libavcodec/x86/vp8dsp.asm | 141 +++++++++++++++++---------------------------- 1 files changed, 53 insertions(+), 88 deletions(-)
diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm index e0041ec..9d1a9fd 100644 --- a/libavcodec/x86/vp8dsp.asm +++ b/libavcodec/x86/vp8dsp.asm @@ -678,9 +678,28 @@ FILTER_V 4 INIT_XMM sse2 FILTER_V 8 -%macro FILTER_BILINEAR 1 -cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, picreg, my +%macro FILTER_BILINEAR 3 +cglobal put_vp8_bilinear%1_v, 7, 7, %2, dst, dststride, src, srcstride, height, picreg, my shl myd, 4 +%if cpuflag(ssse3) +%ifdef PIC + lea picregq, [bilinear_filter_vb_m] +%endif + pxor m4, m4 + mova m3, [bilinear_filter_vb+myq-16] +.nextrow + movh m0, [srcq+srcstrideq*0] + movh m1, [srcq+srcstrideq*1] + movh m2, [srcq+srcstrideq*2] + punpcklbw m0, m1 + punpcklbw m1, m2 + pmaddubsw m0, m3 + pmaddubsw m1, m3 + psraw m0, 2 + psraw m1, 2 + pavgw m0, m4 + pavgw m1, m4 +%else ; cpuflag(ssse3) %ifdef PIC lea picregq, [bilinear_filter_vw_m] %endif @@ -706,13 +725,14 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, p psraw m2, 2 pavgw m0, m6 pavgw m2, m6 +%endif ; cpuflag(ssse3) %if mmsize == 8 packuswb m0, m0 - packuswb m2, m2 + packuswb %3, %3 movh [dstq+dststrideq*0], m0 - movh [dstq+dststrideq*1], m2 + movh [dstq+dststrideq*1], %3 %else - packuswb m0, m2 + packuswb m0, %3 movh [dstq+dststrideq*0], m0 movhps [dstq+dststrideq*1], m0 %endif @@ -723,8 +743,27 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, p jg .nextrow REP_RET -cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, height, mx, picreg +cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, %2, dst, dststride, src, srcstride, height, mx, picreg shl mxd, 4 +%if cpuflag(ssse3) +%ifdef PIC + lea picregq, [bilinear_filter_vb_m] +%endif + pxor m4, m4 + mova m2, [filter_h2_shuf] + mova m3, [bilinear_filter_vb+mxq-16] +.nextrow + movu m0, [srcq+srcstrideq*0] + movu m1, [srcq+srcstrideq*1] + pshufb m0, m2 + pshufb m1, m2 + pmaddubsw m0, m3 + pmaddubsw m1, m3 + psraw m0, 2 + psraw m1, 2 + pavgw m0, m4 + pavgw m1, m4 +%else ; cpuflag(ssse3) %ifdef PIC lea picregq, [bilinear_filter_vw_m] %endif @@ -751,13 +790,14 @@ cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride psraw m2, 2 pavgw m0, m6 pavgw m2, m6 +%endif ; cpuflag(ssse3) %if mmsize == 8 packuswb m0, m0 - packuswb m2, m2 + packuswb %3, %3 movh [dstq+dststrideq*0], m0 - movh [dstq+dststrideq*1], m2 + movh [dstq+dststrideq*1], %3 %else - packuswb m0, m2 + packuswb m0, %3 movh [dstq+dststrideq*0], m0 movhps [dstq+dststrideq*1], m0 %endif @@ -770,88 +810,13 @@ cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride %endmacro INIT_MMX mmxext -FILTER_BILINEAR 4 +FILTER_BILINEAR 4, 7, m2 INIT_XMM sse2 -FILTER_BILINEAR 8 - -%macro FILTER_BILINEAR_SSSE3 1 -cglobal put_vp8_bilinear%1_v, 7, 7, 5, dst, dststride, src, srcstride, height, picreg, my - shl myd, 4 -%ifdef PIC - lea picregq, [bilinear_filter_vb_m] -%endif - pxor m4, m4 - mova m3, [bilinear_filter_vb+myq-16] -.nextrow - movh m0, [srcq+srcstrideq*0] - movh m1, [srcq+srcstrideq*1] - movh m2, [srcq+srcstrideq*2] - punpcklbw m0, m1 - punpcklbw m1, m2 - pmaddubsw m0, m3 - pmaddubsw m1, m3 - psraw m0, 2 - psraw m1, 2 - pavgw m0, m4 - pavgw m1, m4 -%if mmsize==8 - packuswb m0, m0 - packuswb m1, m1 - movh [dstq+dststrideq*0], m0 - movh [dstq+dststrideq*1], m1 -%else - packuswb m0, m1 - movh [dstq+dststrideq*0], m0 - movhps [dstq+dststrideq*1], m0 -%endif - - lea dstq, [dstq+dststrideq*2] - lea srcq, [srcq+srcstrideq*2] - sub heightd, 2 - jg .nextrow - REP_RET - -cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride, height, mx, picreg - shl mxd, 4 -%ifdef PIC - lea picregq, [bilinear_filter_vb_m] -%endif - pxor m4, m4 - mova m2, [filter_h2_shuf] - mova m3, [bilinear_filter_vb+mxq-16] -.nextrow - movu m0, [srcq+srcstrideq*0] - movu m1, [srcq+srcstrideq*1] - pshufb m0, m2 - pshufb m1, m2 - pmaddubsw m0, m3 - pmaddubsw m1, m3 - psraw m0, 2 - psraw m1, 2 - pavgw m0, m4 - pavgw m1, m4 -%if mmsize==8 - packuswb m0, m0 - packuswb m1, m1 - movh [dstq+dststrideq*0], m0 - movh [dstq+dststrideq*1], m1 -%else - packuswb m0, m1 - movh [dstq+dststrideq*0], m0 - movhps [dstq+dststrideq*1], m0 -%endif - - lea dstq, [dstq+dststrideq*2] - lea srcq, [srcq+srcstrideq*2] - sub heightd, 2 - jg .nextrow - REP_RET -%endmacro - +FILTER_BILINEAR 8, 7, m2 INIT_MMX ssse3 -FILTER_BILINEAR_SSSE3 4 +FILTER_BILINEAR 4, 7, m2 INIT_XMM ssse3 -FILTER_BILINEAR_SSSE3 8 +FILTER_BILINEAR 8, 5, m1 INIT_MMX mmx cglobal put_vp8_pixels8, 5, 5, 0, dst, dststride, src, srcstride, height -- 1.7.2.5 _______________________________________________ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel