--- libavfilter/x86/af_volume.asm | 47 ++++++++++++++++++++++++++++++++++++++ libavfilter/x86/af_volume_init.c | 14 +++++++++++ 2 files changed, 61 insertions(+), 0 deletions(-)
diff --git a/libavfilter/x86/af_volume.asm b/libavfilter/x86/af_volume.asm index bf350ae..b7d45a2 100644 --- a/libavfilter/x86/af_volume.asm +++ b/libavfilter/x86/af_volume.asm @@ -25,6 +25,7 @@ SECTION_RODATA 32 pw_1: times 8 dw 1 pw_128: times 8 dw 128 +pq_128: times 2 dq 128 SECTION_TEXT @@ -54,3 +55,49 @@ cglobal scale_samples_s16, 4,4,4, dst, src, len, volume sub lenq, mmsize jge .loop REP_RET + +;------------------------------------------------------------------------------ +; void ff_scale_samples_s32(uint8_t *dst, const uint8_t *src, int len, +; int volume) +;------------------------------------------------------------------------------ + +; NOTE: This is not bit-identical with the C version because it clips to +; [-INT_MAX, INT_MAX] instead of [INT_MIN, INT_MAX] + +%macro SCALE_SAMPLES_S32 0 +cglobal scale_samples_s32, 4,4,8, dst, src, len, volume + movd m4, volumem + pshufd m4, m4, 0 + mova m5, [pq_128] + pxor m6, m6 + lea lenq, [lend*4-mmsize] +.loop: + ; src[i] = av_clipl_int32((src[i] * volume + 128) >> 8); + mova m7, [srcq+lenq] + pabsd m3, m7 + pshufd m0, m3, q0100 + pshufd m1, m3, q0302 + pmuludq m0, m4 + pmuludq m1, m4 + paddq m0, m5 + paddq m1, m5 + psrlq m0, 7 + psrlq m1, 7 + shufps m2, m0, m1, q3131 + shufps m0, m0, m1, q2020 + pcmpgtd m2, m6 + por m0, m2 + psrld m0, 1 + psignd m0, m7 + mova [dstq+lenq], m0 + sub lenq, mmsize + jge .loop + REP_RET +%endmacro + +INIT_XMM ssse3 +SCALE_SAMPLES_S32 +%if HAVE_AVX_EXTERNAL +INIT_XMM avx +SCALE_SAMPLES_S32 +%endif diff --git a/libavfilter/x86/af_volume_init.c b/libavfilter/x86/af_volume_init.c index 00103df..a18eee8 100644 --- a/libavfilter/x86/af_volume_init.c +++ b/libavfilter/x86/af_volume_init.c @@ -25,6 +25,11 @@ void ff_scale_samples_s16_sse2(uint8_t *dst, const uint8_t *src, int len, int volume); +void ff_scale_samples_s32_ssse3(uint8_t *dst, const uint8_t *src, int len, + int volume); +void ff_scale_samples_s32_avx(uint8_t *dst, const uint8_t *src, int len, + int volume); + void ff_volume_init_x86(VolumeContext *vol) { int mm_flags = av_get_cpu_flags(); @@ -35,5 +40,14 @@ void ff_volume_init_x86(VolumeContext *vol) vol->scale_samples = ff_scale_samples_s16_sse2; vol->samples_align = 8; } + } else if (sample_fmt == AV_SAMPLE_FMT_S32) { + if (EXTERNAL_SSSE3(mm_flags)) { + vol->scale_samples = ff_scale_samples_s32_ssse3; + vol->samples_align = 4; + } + if (EXTERNAL_AVX(mm_flags)) { + vol->scale_samples = ff_scale_samples_s32_avx; + vol->samples_align = 4; + } } } -- 1.7.1 _______________________________________________ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel