---
libavcodec/x86/ac3dsp.asm | 71 +++++++++++++++++++++++++++++++++++++++++++
libavcodec/x86/ac3dsp_mmx.c | 7 ++++
2 files changed, 78 insertions(+), 0 deletions(-)
diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm
index 6892ec2..1425c95 100644
--- a/libavcodec/x86/ac3dsp.asm
+++ b/libavcodec/x86/ac3dsp.asm
@@ -32,6 +32,12 @@ cextern ac3_bap_bits
pw_bap_mul1: dw 21846, 21846, 0, 32768, 21846, 21846, 0, 32768
pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7
+; used in ff_ac3_extract_exponents()
+pd_23: times 4 dd 23
+pd_255: times 4 dd 0xFF
+pd_127: times 4 dd 127
+pb_shuf_4dwb: db 0, 4, 8, 12, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255
+
SECTION .text
;-----------------------------------------------------------------------------
@@ -346,3 +352,68 @@ cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, sum
movd eax, m0
add eax, sumd
RET
+
+;------------------------------------------------------------------------------
+; void ff_ac3_extract_exponents(uint8_t *exp, int32_t *coef, int nb_coefs)
+;------------------------------------------------------------------------------
+
+%macro PABSD_MMX 2 ; src/dst, tmp
+ pxor %2, %2
+ pcmpgtd %2, %1
+ pxor %1, %2
+ psubd %1, %2
+%endmacro
+
+%macro PABSD_SSSE3 1-2 ; src/dst, tmp
+ pabsd %1, %1
+%endmacro
+
+%macro AC3_EXTRACT_EXPONENTS 1
+cglobal ac3_extract_exponents_%1, 3,3,5, exp, coef, len
+ mova m2, [pd_255]
+ mova m3, [pd_127]
+ mova m4, [pd_23]
+%ifidn %1, ssse3 ;
+ mova m5, [pb_shuf_4dwb]
+%endif
+ ALIGN 16
+.loop:
+ ; move 4 32-bit coefs to xmm0
+ mova m0, [coefq]
+ ; absolute value
+ PABSD m0, m1
+ ; create mask of all 1's if coef == 0
+ pxor m1, m1
+ pcmpeqd m1, m0
+ ; calculate log2 of each coef
+ cvtdq2ps m0, m0
+ psrld m0, 23
+ pand m0, m2
+ psubd m0, m3
+ ; set zero coefs to -1
+ por m0, m1
+ ; m0 = 23 - m0
+ mova m1, m0
+ mova m0, m4
+ psubd m0, m1
+ ; move the lowest byte in each of 4 dwords to the low dword
+%ifidn %1, ssse3
+ pshufb m0, m5
+%else
+ packssdw m0, m0
+ packuswb m0, m0
+%endif
+ movd [expq], m0
+
+ add coefq, 16
+ add expq, 4
+ sub lend, 4
+ jg .loop
+ RET
+%endmacro
+
+INIT_XMM
+%define PABSD PABSD_MMX
+AC3_EXTRACT_EXPONENTS sse2
+%define PABSD PABSD_SSSE3
+AC3_EXTRACT_EXPONENTS ssse3
diff --git a/libavcodec/x86/ac3dsp_mmx.c b/libavcodec/x86/ac3dsp_mmx.c
index 2664736..2f76bdd 100644
--- a/libavcodec/x86/ac3dsp_mmx.c
+++ b/libavcodec/x86/ac3dsp_mmx.c
@@ -44,6 +44,9 @@ extern void ff_float_to_fixed24_sse2 (int32_t *dst, const float *src, unsigned i
extern int ff_ac3_compute_mantissa_size_sse2(uint16_t mant_cnt[6][16]);
+extern void ff_ac3_extract_exponents_sse2 (uint8_t *exp, int32_t *coef, int nb_coefs);
+extern void ff_ac3_extract_exponents_ssse3(uint8_t *exp, int32_t *coef, int nb_coefs);
+
av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
{
int mm_flags = av_get_cpu_flags();
@@ -72,6 +75,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
c->float_to_fixed24 = ff_float_to_fixed24_sse2;
c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2;
+ c->extract_exponents = ff_ac3_extract_exponents_sse2;
if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2;
c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;
@@ -79,6 +83,9 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
}
if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSSE3) {
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3;
+ if (!(mm_flags & AV_CPU_FLAG_ATOM)) {
+ c->extract_exponents = ff_ac3_extract_exponents_ssse3;
+ }
}
#endif
}
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel