6316->5677 cycles on penryn.

--Loren Merritt
diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm
index 4dcfe50..be3bccd 100644
--- a/libavcodec/x86/ac3dsp.asm
+++ b/libavcodec/x86/ac3dsp.asm
@@ -35,7 +35,7 @@ pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7
 ; used in ff_ac3_extract_exponents()
 pd_1:   times 4 dd 1
 pd_151: times 4 dd 151
-pb_shuf_4dwb: db 0, 4, 8, 12, 255, 255, 255, 255, 255, 255, 255, 255, 255, 
255, 255, 255
+pb_shuf_4dwb: db 0, 4, 8, 12
 
 SECTION .text
 
@@ -369,15 +369,18 @@ cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, 
sum
 
 %macro AC3_EXTRACT_EXPONENTS 1
 cglobal ac3_extract_exponents_%1, 3,3,5, exp, coef, len
+    add     expq, lenq
+    lea    coefq, [coefq+4*lenq]
+    neg     lenq
     mova      m2, [pd_1]
     mova      m3, [pd_151]
 %ifidn %1, ssse3 ;
-    mova      m4, [pb_shuf_4dwb]
+    movd      m4, [pb_shuf_4dwb]
 %endif
     ALIGN 16
 .loop:
     ; move 4 32-bit coefs to xmm0
-    mova      m0, [coefq]
+    mova      m0, [coefq+4*lenq]
     ; absolute value
     PABSD     m0, m1
     ; convert to float and extract exponents
@@ -394,13 +397,11 @@ cglobal ac3_extract_exponents_%1, 3,3,5, exp, coef, len
     packssdw  m0, m0
     packuswb  m0, m0
 %endif
-    movd  [expq], m0
+    movd  [expq+lenq], m0
 
-    add  coefq, 16
-    add   expq, 4
-    sub   lend, 4
-    jg .loop
-    RET
+    add     lenq, 4
+    jl .loop
+    REP_RET
 %endmacro
 
 INIT_XMM
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to