---
 libavfilter/x86/af_volume.asm    |   47 ++++++++++++++++++++++++++++++++++++++
 libavfilter/x86/af_volume_init.c |   14 +++++++++++
 2 files changed, 61 insertions(+), 0 deletions(-)

diff --git a/libavfilter/x86/af_volume.asm b/libavfilter/x86/af_volume.asm
index bf350ae..b7d45a2 100644
--- a/libavfilter/x86/af_volume.asm
+++ b/libavfilter/x86/af_volume.asm
@@ -25,6 +25,7 @@ SECTION_RODATA 32
 
 pw_1:   times 8 dw 1
 pw_128: times 8 dw 128
+pq_128: times 2 dq 128
 
 SECTION_TEXT
 
@@ -54,3 +55,49 @@ cglobal scale_samples_s16, 4,4,4, dst, src, len, volume
     sub       lenq, mmsize
     jge .loop
     REP_RET
+
+;------------------------------------------------------------------------------
+; void ff_scale_samples_s32(uint8_t *dst, const uint8_t *src, int len,
+;                           int volume)
+;------------------------------------------------------------------------------
+
+; NOTE: This is not bit-identical with the C version because it clips to
+;       [-INT_MAX, INT_MAX] instead of [INT_MIN, INT_MAX]
+
+%macro SCALE_SAMPLES_S32 0
+cglobal scale_samples_s32, 4,4,8, dst, src, len, volume
+    movd        m4, volumem
+    pshufd      m4, m4, 0
+    mova        m5, [pq_128]
+    pxor        m6, m6
+    lea       lenq, [lend*4-mmsize]
+.loop:
+    ; src[i] = av_clipl_int32((src[i] * volume + 128) >> 8);
+    mova        m7, [srcq+lenq]
+    pabsd       m3, m7
+    pshufd      m0, m3, q0100
+    pshufd      m1, m3, q0302
+    pmuludq     m0, m4
+    pmuludq     m1, m4
+    paddq       m0, m5
+    paddq       m1, m5
+    psrlq       m0, 7
+    psrlq       m1, 7
+    shufps      m2, m0, m1, q3131
+    shufps      m0, m0, m1, q2020
+    pcmpgtd     m2, m6
+    por         m0, m2
+    psrld       m0, 1
+    psignd      m0, m7
+    mova  [dstq+lenq], m0
+    sub       lenq, mmsize
+    jge .loop
+    REP_RET
+%endmacro
+
+INIT_XMM ssse3
+SCALE_SAMPLES_S32
+%if HAVE_AVX_EXTERNAL
+INIT_XMM avx
+SCALE_SAMPLES_S32
+%endif
diff --git a/libavfilter/x86/af_volume_init.c b/libavfilter/x86/af_volume_init.c
index 00103df..a18eee8 100644
--- a/libavfilter/x86/af_volume_init.c
+++ b/libavfilter/x86/af_volume_init.c
@@ -25,6 +25,11 @@
 void ff_scale_samples_s16_sse2(uint8_t *dst, const uint8_t *src, int len,
                                int volume);
 
+void ff_scale_samples_s32_ssse3(uint8_t *dst, const uint8_t *src, int len,
+                                int volume);
+void ff_scale_samples_s32_avx(uint8_t *dst, const uint8_t *src, int len,
+                              int volume);
+
 void ff_volume_init_x86(VolumeContext *vol)
 {
     int mm_flags = av_get_cpu_flags();
@@ -35,5 +40,14 @@ void ff_volume_init_x86(VolumeContext *vol)
             vol->scale_samples = ff_scale_samples_s16_sse2;
             vol->samples_align = 8;
         }
+    } else if (sample_fmt == AV_SAMPLE_FMT_S32) {
+        if (EXTERNAL_SSSE3(mm_flags)) {
+            vol->scale_samples = ff_scale_samples_s32_ssse3;
+            vol->samples_align = 4;
+        }
+        if (EXTERNAL_AVX(mm_flags)) {
+            vol->scale_samples = ff_scale_samples_s32_avx;
+            vol->samples_align = 4;
+        }
     }
 }
-- 
1.7.1

_______________________________________________
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to