---
previous patch left out changing some of the small macros to use cpuflags.
libavcodec/x86/ac3dsp.asm | 107 +++++++++++++++++++++----------------------
libavcodec/x86/ac3dsp_mmx.c | 12 ++--
libavutil/x86/x86util.asm | 10 ++--
3 files changed, 64 insertions(+), 65 deletions(-)
diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm
index c1b0906..21479de 100644
--- a/libavcodec/x86/ac3dsp.asm
+++ b/libavcodec/x86/ac3dsp.asm
@@ -43,8 +43,8 @@ SECTION .text
; void ff_ac3_exponent_min(uint8_t *exp, int num_reuse_blocks, int nb_coefs)
;-----------------------------------------------------------------------------
-%macro AC3_EXPONENT_MIN 1
-cglobal ac3_exponent_min_%1, 3,4,2, exp, reuse_blks, expn, offset
+%macro AC3_EXPONENT_MIN 0
+cglobal ac3_exponent_min, 3,4,2, exp, reuse_blks, expn, offset
shl reuse_blksq, 8
jz .end
LOOP_ALIGN
@@ -65,20 +65,18 @@ cglobal ac3_exponent_min_%1, 3,4,2, exp, reuse_blks, expn,
offset
REP_RET
%endmacro
-%define PMINUB PMINUB_MMX
%define LOOP_ALIGN
-INIT_MMX
-AC3_EXPONENT_MIN mmx
+INIT_MMX mmx
+AC3_EXPONENT_MIN
%ifdef HAVE_MMX2
-%define PMINUB PMINUB_MMXEXT
+INIT_MMX mmx2
%define LOOP_ALIGN ALIGN 16
-AC3_EXPONENT_MIN mmxext
+AC3_EXPONENT_MIN
%endif
%ifdef HAVE_SSE
-INIT_XMM
-AC3_EXPONENT_MIN sse2
+INIT_XMM sse2
+AC3_EXPONENT_MIN
%endif
-%undef PMINUB
%undef LOOP_ALIGN
;-----------------------------------------------------------------------------
@@ -92,12 +90,12 @@ AC3_EXPONENT_MIN sse2
; This is used for mmxext and sse2 because they have pminsw/pmaxsw.
;-----------------------------------------------------------------------------
-%macro AC3_MAX_MSB_ABS_INT16 2
-cglobal ac3_max_msb_abs_int16_%1, 2,2,5, src, len
+%macro AC3_MAX_MSB_ABS_INT16 1
+cglobal ac3_max_msb_abs_int16, 2,2,5, src, len
pxor m2, m2
pxor m3, m3
.loop:
-%ifidn %2, min_max
+%ifidn %1, min_max
mova m0, [srcq]
mova m1, [srcq+mmsize]
pminsw m2, m0
@@ -105,14 +103,14 @@ cglobal ac3_max_msb_abs_int16_%1, 2,2,5, src, len
pmaxsw m3, m0
pmaxsw m3, m1
%else ; or_abs
-%ifidn %1, mmx
- mova m0, [srcq]
- mova m1, [srcq+mmsize]
- ABS2 m0, m1, m3, m4
-%else ; ssse3
+%if cpuflag(ssse3)
; using memory args is faster for ssse3
pabsw m0, [srcq]
pabsw m1, [srcq+mmsize]
+%else
+ mova m0, [srcq]
+ mova m1, [srcq+mmsize]
+ ABS2 m0, m1, m3, m4
%endif
por m2, m0
por m2, m1
@@ -120,11 +118,11 @@ cglobal ac3_max_msb_abs_int16_%1, 2,2,5, src, len
add srcq, mmsize*2
sub lend, mmsize
ja .loop
-%ifidn %2, min_max
+%ifidn %1, min_max
ABS2 m2, m3, m0, m1
por m2, m3
%endif
-%ifidn mmsize, 16
+%if mmsize == 16
movhlps m0, m2
por m2, m0
%endif
@@ -137,24 +135,26 @@ cglobal ac3_max_msb_abs_int16_%1, 2,2,5, src, len
RET
%endmacro
-INIT_MMX
+INIT_MMX mmx
%define ABS2 ABS2_MMX
%define PSHUFLW pshufw
-AC3_MAX_MSB_ABS_INT16 mmx, or_abs
+AC3_MAX_MSB_ABS_INT16 or_abs
+INIT_MMX mmx2
%define ABS2 ABS2_MMX2
-AC3_MAX_MSB_ABS_INT16 mmxext, min_max
-INIT_XMM
+AC3_MAX_MSB_ABS_INT16 min_max
+INIT_XMM sse2
%define PSHUFLW pshuflw
-AC3_MAX_MSB_ABS_INT16 sse2, min_max
+AC3_MAX_MSB_ABS_INT16 min_max
+INIT_XMM ssse3
%define ABS2 ABS2_SSSE3
-AC3_MAX_MSB_ABS_INT16 ssse3, or_abs
+AC3_MAX_MSB_ABS_INT16 or_abs
;-----------------------------------------------------------------------------
; macro used for ff_ac3_lshift_int16() and ff_ac3_rshift_int32()
;-----------------------------------------------------------------------------
-%macro AC3_SHIFT 4 ; l/r, 16/32, shift instruction, instruction set
-cglobal ac3_%1shift_int%2_%4, 3,3,5, src, len, shift
+%macro AC3_SHIFT 3 ; l/r, 16/32, shift instruction
+cglobal ac3_%1shift_int%2, 3,3,5, src, len, shift
movd m0, shiftd
.loop:
mova m1, [srcq ]
@@ -180,19 +180,19 @@ cglobal ac3_%1shift_int%2_%4, 3,3,5, src, len, shift
; void ff_ac3_lshift_int16(int16_t *src, unsigned int len, unsigned int shift)
;-----------------------------------------------------------------------------
-INIT_MMX
-AC3_SHIFT l, 16, psllw, mmx
-INIT_XMM
-AC3_SHIFT l, 16, psllw, sse2
+INIT_MMX mmx
+AC3_SHIFT l, 16, psllw
+INIT_XMM sse2
+AC3_SHIFT l, 16, psllw
;-----------------------------------------------------------------------------
; void ff_ac3_rshift_int32(int32_t *src, unsigned int len, unsigned int shift)
;-----------------------------------------------------------------------------
-INIT_MMX
-AC3_SHIFT r, 32, psrad, mmx
-INIT_XMM
-AC3_SHIFT r, 32, psrad, sse2
+INIT_MMX mmx
+AC3_SHIFT r, 32, psrad
+INIT_XMM sse2
+AC3_SHIFT r, 32, psrad
;-----------------------------------------------------------------------------
; void ff_float_to_fixed24(int32_t *dst, const float *src, unsigned int len)
@@ -356,20 +356,20 @@ cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt,
sum
; void ff_ac3_extract_exponents(uint8_t *exp, int32_t *coef, int nb_coefs)
;------------------------------------------------------------------------------
-%macro PABSD_MMX 2 ; src/dst, tmp
+%macro PABSD 1-2 ; src/dst, tmp/unused
+%if cpuflag(ssse3)
+ pabsd %1, %1
+%else
pxor %2, %2
pcmpgtd %2, %1
pxor %1, %2
psubd %1, %2
-%endmacro
-
-%macro PABSD_SSSE3 1-2 ; src/dst, unused
- pabsd %1, %1
+%endif
%endmacro
%ifdef HAVE_AMD3DNOW
-INIT_MMX
-cglobal ac3_extract_exponents_3dnow, 3,3,0, exp, coef, len
+INIT_MMX 3dnow
+cglobal ac3_extract_exponents, 3,3,0, exp, coef, len
add expq, lenq
lea coefq, [coefq+4*lenq]
neg lenq
@@ -378,8 +378,8 @@ cglobal ac3_extract_exponents_3dnow, 3,3,0, exp, coef, len
.loop:
movq m0, [coefq+4*lenq ]
movq m1, [coefq+4*lenq+8]
- PABSD_MMX m0, m2
- PABSD_MMX m1, m2
+ PABSD m0, m2
+ PABSD m1, m2
pslld m0, 1
por m0, m3
pi2fd m2, m0
@@ -403,14 +403,14 @@ cglobal ac3_extract_exponents_3dnow, 3,3,0, exp, coef, len
REP_RET
%endif
-%macro AC3_EXTRACT_EXPONENTS 1
-cglobal ac3_extract_exponents_%1, 3,3,5, exp, coef, len
+%macro AC3_EXTRACT_EXPONENTS 0
+cglobal ac3_extract_exponents, 3,3,5, exp, coef, len
add expq, lenq
lea coefq, [coefq+4*lenq]
neg lenq
mova m2, [pd_1]
mova m3, [pd_151]
-%ifidn %1, ssse3 ;
+%if cpuflag(ssse3)
movd m4, [pb_shuf_4dwb]
%endif
.loop:
@@ -426,7 +426,7 @@ cglobal ac3_extract_exponents_%1, 3,3,5, exp, coef, len
mova m0, m3
psubd m0, m1
; move the lowest byte in each of 4 dwords to the low dword
-%ifidn %1, ssse3
+%if cpuflag(ssse3)
pshufb m0, m4
%else
packssdw m0, m0
@@ -440,11 +440,10 @@ cglobal ac3_extract_exponents_%1, 3,3,5, exp, coef, len
%endmacro
%ifdef HAVE_SSE
-INIT_XMM
-%define PABSD PABSD_MMX
-AC3_EXTRACT_EXPONENTS sse2
+INIT_XMM sse2
+AC3_EXTRACT_EXPONENTS
%ifdef HAVE_SSSE3
-%define PABSD PABSD_SSSE3
-AC3_EXTRACT_EXPONENTS ssse3
+INIT_XMM ssse3
+AC3_EXTRACT_EXPONENTS
%endif
%endif
diff --git a/libavcodec/x86/ac3dsp_mmx.c b/libavcodec/x86/ac3dsp_mmx.c
index d6bb469..81c3eab 100644
--- a/libavcodec/x86/ac3dsp_mmx.c
+++ b/libavcodec/x86/ac3dsp_mmx.c
@@ -23,12 +23,12 @@
#include "dsputil_mmx.h"
#include "libavcodec/ac3dsp.h"
-extern void ff_ac3_exponent_min_mmx (uint8_t *exp, int num_reuse_blocks, int
nb_coefs);
-extern void ff_ac3_exponent_min_mmxext(uint8_t *exp, int num_reuse_blocks, int
nb_coefs);
-extern void ff_ac3_exponent_min_sse2 (uint8_t *exp, int num_reuse_blocks, int
nb_coefs);
+extern void ff_ac3_exponent_min_mmx (uint8_t *exp, int num_reuse_blocks, int
nb_coefs);
+extern void ff_ac3_exponent_min_mmx2(uint8_t *exp, int num_reuse_blocks, int
nb_coefs);
+extern void ff_ac3_exponent_min_sse2(uint8_t *exp, int num_reuse_blocks, int
nb_coefs);
extern int ff_ac3_max_msb_abs_int16_mmx (const int16_t *src, int len);
-extern int ff_ac3_max_msb_abs_int16_mmxext(const int16_t *src, int len);
+extern int ff_ac3_max_msb_abs_int16_mmx2 (const int16_t *src, int len);
extern int ff_ac3_max_msb_abs_int16_sse2 (const int16_t *src, int len);
extern int ff_ac3_max_msb_abs_int16_ssse3 (const int16_t *src, int len);
@@ -66,8 +66,8 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int
bit_exact)
}
}
if (mm_flags & AV_CPU_FLAG_MMX2 && HAVE_MMX2) {
- c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
- c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmxext;
+ c->ac3_exponent_min = ff_ac3_exponent_min_mmx2;
+ c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx2;
}
if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) {
c->float_to_fixed24 = ff_float_to_fixed24_sse;
diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index 874443a..ee900b4 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@ -509,14 +509,14 @@
movh [%7+%8], %4
%endmacro
-%macro PMINUB_MMX 3 ; dst, src, tmp
+%macro PMINUB 2-3 ; dst, src, tmp/unused
+%if cpuflag(mmx2)
+ pminub %1, %2
+%else
mova %3, %1
psubusb %3, %2
psubb %1, %3
-%endmacro
-
-%macro PMINUB_MMXEXT 3 ; dst, src, ignored
- pminub %1, %2
+%endif
%endmacro
%macro SPLATW 2-3 0
--
1.7.1
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel