--- libavcodec/x86/dct32_sse.asm | 74 ++++++++++++++--------------------------- 1 files changed, 25 insertions(+), 49 deletions(-)
diff --git a/libavcodec/x86/dct32_sse.asm b/libavcodec/x86/dct32_sse.asm index 20a5c7b..65fac09 100644 --- a/libavcodec/x86/dct32_sse.asm +++ b/libavcodec/x86/dct32_sse.asm @@ -41,39 +41,24 @@ ps_cos_vec: dd 0.500603, 0.505471, 0.515447, 0.531043 align 32 ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000 -%macro BUTTERFLY_SSE 4 - movaps %4, %1 - subps %1, %2 - addps %2, %4 - mulps %1, %3 -%endmacro - -%macro BUTTERFLY_AVX 4 - vsubps %4, %1, %2 - vaddps %2, %2, %1 - vmulps %1, %4, %3 -%endmacro - -%macro BUTTERFLY0_SSE 5 - movaps %4, %1 - shufps %1, %1, %5 - xorps %4, %2 - addps %1, %4 - mulps %1, %3 +%macro BUTTERFLY 4 + subps %4, %1, %2 + addps %2, %2, %1 + mulps %1, %4, %3 %endmacro -%macro BUTTERFLY0_SSE2 5 +%macro BUTTERFLY0 5 +%if cpuflag(sse2) && notcpuflag(avx) pshufd %4, %1, %5 xorps %1, %2 addps %1, %4 mulps %1, %3 -%endmacro - -%macro BUTTERFLY0_AVX 5 - vshufps %4, %1, %1, %5 - vxorps %1, %1, %2 - vaddps %4, %4, %1 - vmulps %1, %4, %3 +%else + shufps %4, %1, %1, %5 + xorps %1, %1, %2 + addps %4, %4, %1 + mulps %1, %4, %3 +%endif %endmacro %macro BUTTERFLY2 4 @@ -205,14 +190,11 @@ ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000 movss [outq+116], m6 %endmacro -%define BUTTERFLY BUTTERFLY_AVX -%define BUTTERFLY0 BUTTERFLY0_AVX - -INIT_YMM +INIT_YMM avx SECTION_TEXT %if HAVE_AVX ; void ff_dct32_float_avx(FFTSample *out, const FFTSample *in) -cglobal dct32_float_avx, 2,3,8, out, in, tmp +cglobal dct32_float, 2,3,8, out, in, tmp ; pass 1 vmovaps m4, [inq+0] vinsertf128 m5, m5, [inq+96], 1 @@ -285,9 +267,6 @@ INIT_XMM RET %endif -%define BUTTERFLY BUTTERFLY_SSE -%define BUTTERFLY0 BUTTERFLY0_SSE - %if ARCH_X86_64 %define SPILL SWAP %define UNSPILL SWAP @@ -410,10 +389,9 @@ INIT_XMM %endif -INIT_XMM -%macro DCT32_FUNC 1 ; void ff_dct32_float_sse(FFTSample *out, const FFTSample *in) -cglobal dct32_float_%1, 2,3,16, out, in, tmp +%macro DCT32_FUNC 0 +cglobal dct32_float, 2, 3, 16, out, in, tmp ; pass 1 movaps m0, [inq+0] @@ -497,18 +475,16 @@ cglobal dct32_float_%1, 2,3,16, out, in, tmp RET %endmacro -%macro LOAD_INV_SSE 2 +%macro LOAD_INV 2 +%if cpuflag(sse2) + pshufd %1, %2, 0x1b +%elif cpuflag(sse) movaps %1, %2 shufps %1, %1, 0x1b +%endif %endmacro -%define LOAD_INV LOAD_INV_SSE -DCT32_FUNC sse - -%macro LOAD_INV_SSE2 2 - pshufd %1, %2, 0x1b -%endmacro - -%define LOAD_INV LOAD_INV_SSE2 -%define BUTTERFLY0 BUTTERFLY0_SSE2 -DCT32_FUNC sse2 +INIT_XMM sse +DCT32_FUNC +INIT_XMM sse2 +DCT32_FUNC -- 1.7.1 _______________________________________________ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel