---
libavcodec/x86/ac3dsp.asm | 84 ++++++++++++++++++++++--------------------
libavcodec/x86/ac3dsp_mmx.c | 12 +++---
2 files changed, 50 insertions(+), 46 deletions(-)
diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm
index c1b0906..8aa35f9 100644
--- a/libavcodec/x86/ac3dsp.asm
+++ b/libavcodec/x86/ac3dsp.asm
@@ -43,8 +43,8 @@ SECTION .text
; void ff_ac3_exponent_min(uint8_t *exp, int num_reuse_blocks, int nb_coefs)
;-----------------------------------------------------------------------------
-%macro AC3_EXPONENT_MIN 1
-cglobal ac3_exponent_min_%1, 3,4,2, exp, reuse_blks, expn, offset
+%macro AC3_EXPONENT_MIN 0
+cglobal ac3_exponent_min, 3,4,2, exp, reuse_blks, expn, offset
shl reuse_blksq, 8
jz .end
LOOP_ALIGN
@@ -67,16 +67,17 @@ cglobal ac3_exponent_min_%1, 3,4,2, exp, reuse_blks, expn,
offset
%define PMINUB PMINUB_MMX
%define LOOP_ALIGN
-INIT_MMX
-AC3_EXPONENT_MIN mmx
+INIT_MMX mmx
+AC3_EXPONENT_MIN
%ifdef HAVE_MMX2
+INIT_MMX mmx2
%define PMINUB PMINUB_MMXEXT
%define LOOP_ALIGN ALIGN 16
-AC3_EXPONENT_MIN mmxext
+AC3_EXPONENT_MIN
%endif
%ifdef HAVE_SSE
-INIT_XMM
-AC3_EXPONENT_MIN sse2
+INIT_XMM sse2
+AC3_EXPONENT_MIN
%endif
%undef PMINUB
%undef LOOP_ALIGN
@@ -92,12 +93,12 @@ AC3_EXPONENT_MIN sse2
; This is used for mmxext and sse2 because they have pminsw/pmaxsw.
;-----------------------------------------------------------------------------
-%macro AC3_MAX_MSB_ABS_INT16 2
-cglobal ac3_max_msb_abs_int16_%1, 2,2,5, src, len
+%macro AC3_MAX_MSB_ABS_INT16 1
+cglobal ac3_max_msb_abs_int16, 2,2,5, src, len
pxor m2, m2
pxor m3, m3
.loop:
-%ifidn %2, min_max
+%ifidn %1, min_max
mova m0, [srcq]
mova m1, [srcq+mmsize]
pminsw m2, m0
@@ -105,14 +106,14 @@ cglobal ac3_max_msb_abs_int16_%1, 2,2,5, src, len
pmaxsw m3, m0
pmaxsw m3, m1
%else ; or_abs
-%ifidn %1, mmx
- mova m0, [srcq]
- mova m1, [srcq+mmsize]
- ABS2 m0, m1, m3, m4
-%else ; ssse3
+%if cpuflag(ssse3)
; using memory args is faster for ssse3
pabsw m0, [srcq]
pabsw m1, [srcq+mmsize]
+%else
+ mova m0, [srcq]
+ mova m1, [srcq+mmsize]
+ ABS2 m0, m1, m3, m4
%endif
por m2, m0
por m2, m1
@@ -120,11 +121,11 @@ cglobal ac3_max_msb_abs_int16_%1, 2,2,5, src, len
add srcq, mmsize*2
sub lend, mmsize
ja .loop
-%ifidn %2, min_max
+%ifidn %1, min_max
ABS2 m2, m3, m0, m1
por m2, m3
%endif
-%ifidn mmsize, 16
+%if mmsize == 16
movhlps m0, m2
por m2, m0
%endif
@@ -137,24 +138,26 @@ cglobal ac3_max_msb_abs_int16_%1, 2,2,5, src, len
RET
%endmacro
-INIT_MMX
+INIT_MMX mmx
%define ABS2 ABS2_MMX
%define PSHUFLW pshufw
-AC3_MAX_MSB_ABS_INT16 mmx, or_abs
+AC3_MAX_MSB_ABS_INT16 or_abs
+INIT_MMX mmx2
%define ABS2 ABS2_MMX2
-AC3_MAX_MSB_ABS_INT16 mmxext, min_max
-INIT_XMM
+AC3_MAX_MSB_ABS_INT16 min_max
+INIT_XMM sse2
%define PSHUFLW pshuflw
-AC3_MAX_MSB_ABS_INT16 sse2, min_max
+AC3_MAX_MSB_ABS_INT16 min_max
+INIT_XMM ssse3
%define ABS2 ABS2_SSSE3
-AC3_MAX_MSB_ABS_INT16 ssse3, or_abs
+AC3_MAX_MSB_ABS_INT16 or_abs
;-----------------------------------------------------------------------------
; macro used for ff_ac3_lshift_int16() and ff_ac3_rshift_int32()
;-----------------------------------------------------------------------------
-%macro AC3_SHIFT 4 ; l/r, 16/32, shift instruction, instruction set
-cglobal ac3_%1shift_int%2_%4, 3,3,5, src, len, shift
+%macro AC3_SHIFT 3 ; l/r, 16/32, shift instruction
+cglobal ac3_%1shift_int%2, 3,3,5, src, len, shift
movd m0, shiftd
.loop:
mova m1, [srcq ]
@@ -180,19 +183,19 @@ cglobal ac3_%1shift_int%2_%4, 3,3,5, src, len, shift
; void ff_ac3_lshift_int16(int16_t *src, unsigned int len, unsigned int shift)
;-----------------------------------------------------------------------------
-INIT_MMX
-AC3_SHIFT l, 16, psllw, mmx
-INIT_XMM
-AC3_SHIFT l, 16, psllw, sse2
+INIT_MMX mmx
+AC3_SHIFT l, 16, psllw
+INIT_XMM sse2
+AC3_SHIFT l, 16, psllw
;-----------------------------------------------------------------------------
; void ff_ac3_rshift_int32(int32_t *src, unsigned int len, unsigned int shift)
;-----------------------------------------------------------------------------
-INIT_MMX
-AC3_SHIFT r, 32, psrad, mmx
-INIT_XMM
-AC3_SHIFT r, 32, psrad, sse2
+INIT_MMX mmx
+AC3_SHIFT r, 32, psrad
+INIT_XMM sse2
+AC3_SHIFT r, 32, psrad
;-----------------------------------------------------------------------------
; void ff_float_to_fixed24(int32_t *dst, const float *src, unsigned int len)
@@ -403,14 +406,14 @@ cglobal ac3_extract_exponents_3dnow, 3,3,0, exp, coef, len
REP_RET
%endif
-%macro AC3_EXTRACT_EXPONENTS 1
-cglobal ac3_extract_exponents_%1, 3,3,5, exp, coef, len
+%macro AC3_EXTRACT_EXPONENTS 0
+cglobal ac3_extract_exponents, 3,3,5, exp, coef, len
add expq, lenq
lea coefq, [coefq+4*lenq]
neg lenq
mova m2, [pd_1]
mova m3, [pd_151]
-%ifidn %1, ssse3 ;
+%if cpuflag(ssse3)
movd m4, [pb_shuf_4dwb]
%endif
.loop:
@@ -426,7 +429,7 @@ cglobal ac3_extract_exponents_%1, 3,3,5, exp, coef, len
mova m0, m3
psubd m0, m1
; move the lowest byte in each of 4 dwords to the low dword
-%ifidn %1, ssse3
+%if cpuflag(ssse3)
pshufb m0, m4
%else
packssdw m0, m0
@@ -440,11 +443,12 @@ cglobal ac3_extract_exponents_%1, 3,3,5, exp, coef, len
%endmacro
%ifdef HAVE_SSE
-INIT_XMM
+INIT_XMM sse2
%define PABSD PABSD_MMX
-AC3_EXTRACT_EXPONENTS sse2
+AC3_EXTRACT_EXPONENTS
%ifdef HAVE_SSSE3
+INIT_XMM ssse3
%define PABSD PABSD_SSSE3
-AC3_EXTRACT_EXPONENTS ssse3
+AC3_EXTRACT_EXPONENTS
%endif
%endif
diff --git a/libavcodec/x86/ac3dsp_mmx.c b/libavcodec/x86/ac3dsp_mmx.c
index d6bb469..81c3eab 100644
--- a/libavcodec/x86/ac3dsp_mmx.c
+++ b/libavcodec/x86/ac3dsp_mmx.c
@@ -23,12 +23,12 @@
#include "dsputil_mmx.h"
#include "libavcodec/ac3dsp.h"
-extern void ff_ac3_exponent_min_mmx (uint8_t *exp, int num_reuse_blocks, int
nb_coefs);
-extern void ff_ac3_exponent_min_mmxext(uint8_t *exp, int num_reuse_blocks, int
nb_coefs);
-extern void ff_ac3_exponent_min_sse2 (uint8_t *exp, int num_reuse_blocks, int
nb_coefs);
+extern void ff_ac3_exponent_min_mmx (uint8_t *exp, int num_reuse_blocks, int
nb_coefs);
+extern void ff_ac3_exponent_min_mmx2(uint8_t *exp, int num_reuse_blocks, int
nb_coefs);
+extern void ff_ac3_exponent_min_sse2(uint8_t *exp, int num_reuse_blocks, int
nb_coefs);
extern int ff_ac3_max_msb_abs_int16_mmx (const int16_t *src, int len);
-extern int ff_ac3_max_msb_abs_int16_mmxext(const int16_t *src, int len);
+extern int ff_ac3_max_msb_abs_int16_mmx2 (const int16_t *src, int len);
extern int ff_ac3_max_msb_abs_int16_sse2 (const int16_t *src, int len);
extern int ff_ac3_max_msb_abs_int16_ssse3 (const int16_t *src, int len);
@@ -66,8 +66,8 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int
bit_exact)
}
}
if (mm_flags & AV_CPU_FLAG_MMX2 && HAVE_MMX2) {
- c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
- c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmxext;
+ c->ac3_exponent_min = ff_ac3_exponent_min_mmx2;
+ c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx2;
}
if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) {
c->float_to_fixed24 = ff_float_to_fixed24_sse;
--
1.7.1
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel