---
 libavcodec/x86/ac3dsp.asm   |   71 +++++++++++++++++++++++++++++++++++++++++++
 libavcodec/x86/ac3dsp_mmx.c |    7 ++++
 2 files changed, 78 insertions(+), 0 deletions(-)

diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm
index 6892ec2..1425c95 100644
--- a/libavcodec/x86/ac3dsp.asm
+++ b/libavcodec/x86/ac3dsp.asm
@@ -32,6 +32,12 @@ cextern ac3_bap_bits
 pw_bap_mul1: dw 21846, 21846, 0, 32768, 21846, 21846, 0, 32768
 pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7
 
+; used in ff_ac3_extract_exponents()
+pd_23:  times 4 dd 23
+pd_255: times 4 dd 0xFF
+pd_127: times 4 dd 127
+pb_shuf_4dwb: db 0, 4, 8, 12, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255
+
 SECTION .text
 
 ;-----------------------------------------------------------------------------
@@ -346,3 +352,68 @@ cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, sum
     movd       eax, m0
     add        eax, sumd
     RET
+
+;------------------------------------------------------------------------------
+; void ff_ac3_extract_exponents(uint8_t *exp, int32_t *coef, int nb_coefs)
+;------------------------------------------------------------------------------
+
+%macro PABSD_MMX 2 ; src/dst, tmp
+    pxor     %2, %2
+    pcmpgtd  %2, %1
+    pxor     %1, %2
+    psubd    %1, %2
+%endmacro
+
+%macro PABSD_SSSE3 1-2 ; src/dst, tmp
+    pabsd    %1, %1
+%endmacro
+
+%macro AC3_EXTRACT_EXPONENTS 1
+cglobal ac3_extract_exponents_%1, 3,3,5, exp, coef, len
+    mova      m2, [pd_255]
+    mova      m3, [pd_127]
+    mova      m4, [pd_23]
+%ifidn %1, ssse3 ;
+    mova      m5, [pb_shuf_4dwb]
+%endif
+    ALIGN 16
+.loop:
+    ; move 4 32-bit coefs to xmm0
+    mova      m0, [coefq]
+    ; absolute value
+    PABSD     m0, m1
+    ; create mask of all 1's if coef == 0
+    pxor      m1, m1
+    pcmpeqd   m1, m0
+    ; calculate log2 of each coef
+    cvtdq2ps  m0, m0
+    psrld     m0, 23
+    pand      m0, m2
+    psubd     m0, m3
+    ; set zero coefs to -1
+    por       m0, m1
+    ; m0 = 23 - m0
+    mova      m1, m0
+    mova      m0, m4
+    psubd     m0, m1
+    ; move the lowest byte in each of 4 dwords to the low dword
+%ifidn %1, ssse3
+    pshufb    m0, m5
+%else
+    packssdw  m0, m0
+    packuswb  m0, m0
+%endif
+    movd  [expq], m0
+
+    add  coefq, 16
+    add   expq, 4
+    sub   lend, 4
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM
+%define PABSD PABSD_MMX
+AC3_EXTRACT_EXPONENTS sse2
+%define PABSD PABSD_SSSE3
+AC3_EXTRACT_EXPONENTS ssse3
diff --git a/libavcodec/x86/ac3dsp_mmx.c b/libavcodec/x86/ac3dsp_mmx.c
index 2664736..2f76bdd 100644
--- a/libavcodec/x86/ac3dsp_mmx.c
+++ b/libavcodec/x86/ac3dsp_mmx.c
@@ -44,6 +44,9 @@ extern void ff_float_to_fixed24_sse2 (int32_t *dst, const float *src, unsigned i
 
 extern int ff_ac3_compute_mantissa_size_sse2(uint16_t mant_cnt[6][16]);
 
+extern void ff_ac3_extract_exponents_sse2 (uint8_t *exp, int32_t *coef, int nb_coefs);
+extern void ff_ac3_extract_exponents_ssse3(uint8_t *exp, int32_t *coef, int nb_coefs);
+
 av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
 {
     int mm_flags = av_get_cpu_flags();
@@ -72,6 +75,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
         c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
         c->float_to_fixed24 = ff_float_to_fixed24_sse2;
         c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2;
+        c->extract_exponents = ff_ac3_extract_exponents_sse2;
         if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
             c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2;
             c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;
@@ -79,6 +83,9 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
     }
     if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSSE3) {
         c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3;
+        if (!(mm_flags & AV_CPU_FLAG_ATOM)) {
+            c->extract_exponents = ff_ac3_extract_exponents_ssse3;
+        }
     }
 #endif
 }
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to