Re: [libav-devel] [PATCH 03/15] lavr: x86: optimized 2-channel s16p to flt conversion
Hi, On Sun, Aug 5, 2012 at 9:52 PM, Justin Ruggles wrote: > --- > libavresample/x86/audio_convert.asm| 49 > > libavresample/x86/audio_convert_init.c |9 ++ > 2 files changed, 58 insertions(+), 0 deletions(-) LGTM. Ronald ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 03/15] lavr: x86: optimized 2-channel s16p to flt conversion
--- libavresample/x86/audio_convert.asm| 49 libavresample/x86/audio_convert_init.c |9 ++ 2 files changed, 58 insertions(+), 0 deletions(-) diff --git a/libavresample/x86/audio_convert.asm b/libavresample/x86/audio_convert.asm index ee05efc..c3cc76f 100644 --- a/libavresample/x86/audio_convert.asm +++ b/libavresample/x86/audio_convert.asm @@ -383,6 +383,55 @@ INIT_XMM avx CONV_S16P_TO_S16_6CH %endif +;-- +; void ff_conv_s16p_to_flt_2ch(float *dst, int16_t *const *src, int len, +; int channels); +;-- + +%macro CONV_S16P_TO_FLT_2CH 0 +cglobal conv_s16p_to_flt_2ch, 3,4,6, dst, src0, len, src1 +lea lenq, [2*lend] +mov src1q, [src0q+gprsize] +mov src0q, [src0q] +lea dstq, [dstq+4*lenq] +add src0q, lenq +add src1q, lenq +neg lenq +movam5, [pf_s32_inv_scale] +.loop: +movam2, [src0q+lenq]; m2 = 0, 2, 4, 6, 8, 10, 12, 14 +movam4, [src1q+lenq]; m4 = 1, 3, 5, 7, 9, 11, 13, 15 +SBUTTERFLY2 wd, 2, 4, 3 ; m2 = 0, 1, 2, 3, 4, 5, 6, 7 +; m4 = 8, 9, 10, 11, 12, 13, 14, 15 +pxorm3, m3 +punpcklwd m0, m3, m2 ; m0 = 0, 1, 2, 3 +punpckhwd m1, m3, m2 ; m1 = 4, 5, 6, 7 +punpcklwd m2, m3, m4 ; m2 = 8, 9, 10, 11 +punpckhwd m3, m4 ; m3 = 12, 13, 14, 15 +cvtdq2psm0, m0 +cvtdq2psm1, m1 +cvtdq2psm2, m2 +cvtdq2psm3, m3 +mulps m0, m5 +mulps m1, m5 +mulps m2, m5 +mulps m3, m5 +mova [dstq+4*lenq ], m0 +mova [dstq+4*lenq+ mmsize], m1 +mova [dstq+4*lenq+2*mmsize], m2 +mova [dstq+4*lenq+3*mmsize], m3 +add lenq, mmsize +jl .loop +REP_RET +%endmacro + +INIT_XMM sse2 +CONV_S16P_TO_FLT_2CH +%if HAVE_AVX +INIT_XMM avx +CONV_S16P_TO_FLT_2CH +%endif + ;- ; void ff_conv_fltp_to_flt_6ch(float *dst, float *const *src, int len, ; int channels); diff --git a/libavresample/x86/audio_convert_init.c b/libavresample/x86/audio_convert_init.c index d9d4714..9706c71 100644 --- a/libavresample/x86/audio_convert_init.c +++ b/libavresample/x86/audio_convert_init.c @@ -54,6 +54,11 @@ extern void ff_conv_s16p_to_s16_6ch_sse2slow(int16_t *dst, int16_t *const *src, extern void ff_conv_s16p_to_s16_6ch_avx (int16_t *dst, int16_t *const *src, int len, int channels); +extern void ff_conv_s16p_to_flt_2ch_sse2(float *dst, int16_t *const *src, + int len, int channels); +extern void ff_conv_s16p_to_flt_2ch_avx (float *dst, int16_t *const *src, + int len, int channels); + extern void ff_conv_fltp_to_flt_6ch_mmx (float *dst, float *const *src, int len, int channels); extern void ff_conv_fltp_to_flt_6ch_sse4(float *dst, float *const *src, int len, @@ -94,6 +99,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) 0, 16, 16, "SSE2", ff_conv_flt_to_s32_sse2); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P, 2, 16, 16, "SSE2", ff_conv_s16p_to_s16_2ch_sse2); +ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P, + 2, 16, 8, "SSE2", ff_conv_s16p_to_flt_2ch_sse2); } if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) { ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, @@ -110,6 +117,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) 2, 16, 16, "AVX", ff_conv_s16p_to_s16_2ch_avx); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P, 6, 16, 8, "AVX", ff_conv_s16p_to_s16_6ch_avx); +ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P, + 2, 16, 8, "AVX", ff_conv_s16p_to_flt_2ch_avx); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, 6, 16, 4, "AVX", ff_conv_fltp_to_flt_6ch_avx); } -- 1.7.1 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 03/15] lavr: x86: optimized 2-channel s16p to flt conversion
Hi, On Sat, Jul 28, 2012 at 4:57 PM, Justin Ruggles wrote: > --- > libavresample/x86/audio_convert.asm| 49 > > libavresample/x86/audio_convert_init.c |9 ++ > 2 files changed, 58 insertions(+), 0 deletions(-) OK. Ronald ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 03/15] lavr: x86: optimized 2-channel s16p to flt conversion
--- libavresample/x86/audio_convert.asm| 49 libavresample/x86/audio_convert_init.c |9 ++ 2 files changed, 58 insertions(+), 0 deletions(-) diff --git a/libavresample/x86/audio_convert.asm b/libavresample/x86/audio_convert.asm index ee05efc..c3cc76f 100644 --- a/libavresample/x86/audio_convert.asm +++ b/libavresample/x86/audio_convert.asm @@ -383,6 +383,55 @@ INIT_XMM avx CONV_S16P_TO_S16_6CH %endif +;-- +; void ff_conv_s16p_to_flt_2ch(float *dst, int16_t *const *src, int len, +; int channels); +;-- + +%macro CONV_S16P_TO_FLT_2CH 0 +cglobal conv_s16p_to_flt_2ch, 3,4,6, dst, src0, len, src1 +lea lenq, [2*lend] +mov src1q, [src0q+gprsize] +mov src0q, [src0q] +lea dstq, [dstq+4*lenq] +add src0q, lenq +add src1q, lenq +neg lenq +movam5, [pf_s32_inv_scale] +.loop: +movam2, [src0q+lenq]; m2 = 0, 2, 4, 6, 8, 10, 12, 14 +movam4, [src1q+lenq]; m4 = 1, 3, 5, 7, 9, 11, 13, 15 +SBUTTERFLY2 wd, 2, 4, 3 ; m2 = 0, 1, 2, 3, 4, 5, 6, 7 +; m4 = 8, 9, 10, 11, 12, 13, 14, 15 +pxorm3, m3 +punpcklwd m0, m3, m2 ; m0 = 0, 1, 2, 3 +punpckhwd m1, m3, m2 ; m1 = 4, 5, 6, 7 +punpcklwd m2, m3, m4 ; m2 = 8, 9, 10, 11 +punpckhwd m3, m4 ; m3 = 12, 13, 14, 15 +cvtdq2psm0, m0 +cvtdq2psm1, m1 +cvtdq2psm2, m2 +cvtdq2psm3, m3 +mulps m0, m5 +mulps m1, m5 +mulps m2, m5 +mulps m3, m5 +mova [dstq+4*lenq ], m0 +mova [dstq+4*lenq+ mmsize], m1 +mova [dstq+4*lenq+2*mmsize], m2 +mova [dstq+4*lenq+3*mmsize], m3 +add lenq, mmsize +jl .loop +REP_RET +%endmacro + +INIT_XMM sse2 +CONV_S16P_TO_FLT_2CH +%if HAVE_AVX +INIT_XMM avx +CONV_S16P_TO_FLT_2CH +%endif + ;- ; void ff_conv_fltp_to_flt_6ch(float *dst, float *const *src, int len, ; int channels); diff --git a/libavresample/x86/audio_convert_init.c b/libavresample/x86/audio_convert_init.c index d9d4714..9706c71 100644 --- a/libavresample/x86/audio_convert_init.c +++ b/libavresample/x86/audio_convert_init.c @@ -54,6 +54,11 @@ extern void ff_conv_s16p_to_s16_6ch_sse2slow(int16_t *dst, int16_t *const *src, extern void ff_conv_s16p_to_s16_6ch_avx (int16_t *dst, int16_t *const *src, int len, int channels); +extern void ff_conv_s16p_to_flt_2ch_sse2(float *dst, int16_t *const *src, + int len, int channels); +extern void ff_conv_s16p_to_flt_2ch_avx (float *dst, int16_t *const *src, + int len, int channels); + extern void ff_conv_fltp_to_flt_6ch_mmx (float *dst, float *const *src, int len, int channels); extern void ff_conv_fltp_to_flt_6ch_sse4(float *dst, float *const *src, int len, @@ -94,6 +99,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) 0, 16, 16, "SSE2", ff_conv_flt_to_s32_sse2); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P, 2, 16, 16, "SSE2", ff_conv_s16p_to_s16_2ch_sse2); +ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P, + 2, 16, 8, "SSE2", ff_conv_s16p_to_flt_2ch_sse2); } if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) { ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, @@ -110,6 +117,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) 2, 16, 16, "AVX", ff_conv_s16p_to_s16_2ch_avx); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P, 6, 16, 8, "AVX", ff_conv_s16p_to_s16_6ch_avx); +ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P, + 2, 16, 8, "AVX", ff_conv_s16p_to_flt_2ch_avx); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, 6, 16, 4, "AVX", ff_conv_fltp_to_flt_6ch_avx); } -- 1.7.1 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 03/15] lavr: x86: optimized 2-channel s16p to flt conversion
Hi, On Sat, Jul 14, 2012 at 9:29 PM, Justin Ruggles wrote: > --- > libavresample/x86/audio_convert.asm| 49 > > libavresample/x86/audio_convert_init.c |9 ++ > 2 files changed, 58 insertions(+), 0 deletions(-) LGTM. Ronald ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 03/15] lavr: x86: optimized 2-channel s16p to flt conversion
--- libavresample/x86/audio_convert.asm| 49 libavresample/x86/audio_convert_init.c |9 ++ 2 files changed, 58 insertions(+), 0 deletions(-) diff --git a/libavresample/x86/audio_convert.asm b/libavresample/x86/audio_convert.asm index fdcea3a..52528fa 100644 --- a/libavresample/x86/audio_convert.asm +++ b/libavresample/x86/audio_convert.asm @@ -331,6 +331,55 @@ INIT_XMM avx CONV_S16P_TO_S16_6CH %endif +;-- +; void ff_conv_s16p_to_flt_2ch(float *dst, int16_t *const *src, int len, +; int channels); +;-- + +%macro CONV_S16P_TO_FLT_2CH 0 +cglobal conv_s16p_to_flt_2ch, 3,4,6, dst, src0, len, src1 +lea lenq, [2*lend] +mov src1q, [src0q+gprsize] +mov src0q, [src0q] +lea dstq, [dstq+4*lenq] +add src0q, lenq +add src1q, lenq +neg lenq +movam5, [pf_s32_inv_scale] +.loop: +movam4, [src0q+lenq] +movam1, [src1q+lenq] +punpcklwd m2, m4, m1 +punpckhwd m4, m1 +pxorm3, m3 +punpcklwd m0, m3, m2 +punpckhwd m1, m3, m2 +punpcklwd m2, m3, m4 +punpckhwd m3, m4 +cvtdq2psm0, m0 +cvtdq2psm1, m1 +cvtdq2psm2, m2 +cvtdq2psm3, m3 +mulps m0, m5 +mulps m1, m5 +mulps m2, m5 +mulps m3, m5 +mova [dstq+4*lenq ], m0 +mova [dstq+4*lenq+ mmsize], m1 +mova [dstq+4*lenq+2*mmsize], m2 +mova [dstq+4*lenq+3*mmsize], m3 +add lenq, mmsize +jl .loop +REP_RET +%endmacro + +INIT_XMM sse2 +CONV_S16P_TO_FLT_2CH +%if HAVE_AVX +INIT_XMM avx +CONV_S16P_TO_FLT_2CH +%endif + ;- ; void ff_conv_fltp_to_flt_6ch(float *dst, float *const *src, int len, ; int channels); diff --git a/libavresample/x86/audio_convert_init.c b/libavresample/x86/audio_convert_init.c index fdddf99..56d00bb 100644 --- a/libavresample/x86/audio_convert_init.c +++ b/libavresample/x86/audio_convert_init.c @@ -52,6 +52,11 @@ extern void ff_conv_s16p_to_s16_6ch_sse2(int16_t *dst, int16_t *const *src, extern void ff_conv_s16p_to_s16_6ch_avx (int16_t *dst, int16_t *const *src, int len, int channels); +extern void ff_conv_s16p_to_flt_2ch_sse2(float *dst, int16_t *const *src, + int len, int channels); +extern void ff_conv_s16p_to_flt_2ch_avx (float *dst, int16_t *const *src, + int len, int channels); + extern void ff_conv_fltp_to_flt_6ch_mmx (float *dst, float *const *src, int len, int channels); extern void ff_conv_fltp_to_flt_6ch_sse4(float *dst, float *const *src, int len, @@ -89,6 +94,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) 2, 16, 16, "SSE2", ff_conv_s16p_to_s16_2ch_sse2); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P, 6, 16, 8, "SSE2", ff_conv_s16p_to_s16_6ch_sse2); +ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P, + 2, 16, 8, "SSE2", ff_conv_s16p_to_flt_2ch_sse2); } if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) { ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, @@ -105,6 +112,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) 2, 16, 16, "AVX", ff_conv_s16p_to_s16_2ch_avx); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P, 6, 16, 8, "AVX", ff_conv_s16p_to_s16_6ch_avx); +ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P, + 2, 16, 8, "AVX", ff_conv_s16p_to_flt_2ch_avx); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, 6, 16, 4, "AVX", ff_conv_fltp_to_flt_6ch_avx); } -- 1.7.1 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel