Re: [libav-devel] [PATCH 03/15] lavr: x86: optimized 2-channel s16p to flt conversion

2012-08-21 Thread Ronald S. Bultje
Hi,

On Sun, Aug 5, 2012 at 9:52 PM, Justin Ruggles  wrote:
> ---
>  libavresample/x86/audio_convert.asm|   49 
> 
>  libavresample/x86/audio_convert_init.c |9 ++
>  2 files changed, 58 insertions(+), 0 deletions(-)

LGTM.

Ronald
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 03/15] lavr: x86: optimized 2-channel s16p to flt conversion

2012-08-05 Thread Justin Ruggles
---
 libavresample/x86/audio_convert.asm|   49 
 libavresample/x86/audio_convert_init.c |9 ++
 2 files changed, 58 insertions(+), 0 deletions(-)

diff --git a/libavresample/x86/audio_convert.asm 
b/libavresample/x86/audio_convert.asm
index ee05efc..c3cc76f 100644
--- a/libavresample/x86/audio_convert.asm
+++ b/libavresample/x86/audio_convert.asm
@@ -383,6 +383,55 @@ INIT_XMM avx
 CONV_S16P_TO_S16_6CH
 %endif
 
+;--
+; void ff_conv_s16p_to_flt_2ch(float *dst, int16_t *const *src, int len,
+;  int channels);
+;--
+
+%macro CONV_S16P_TO_FLT_2CH 0
+cglobal conv_s16p_to_flt_2ch, 3,4,6, dst, src0, len, src1
+lea   lenq, [2*lend]
+mov  src1q, [src0q+gprsize]
+mov  src0q, [src0q]
+lea   dstq, [dstq+4*lenq]
+add  src0q, lenq
+add  src1q, lenq
+neg   lenq
+movam5, [pf_s32_inv_scale]
+.loop:
+movam2, [src0q+lenq]; m2 =  0,  2,  4,  6,  8, 10, 12, 14
+movam4, [src1q+lenq]; m4 =  1,  3,  5,  7,  9, 11, 13, 15
+SBUTTERFLY2 wd, 2, 4, 3 ; m2 =  0,  1,  2,  3,  4,  5,  6,  7
+; m4 =  8,  9, 10, 11, 12, 13, 14, 15
+pxorm3, m3
+punpcklwd   m0, m3, m2  ; m0 =  0,  1,  2,  3
+punpckhwd   m1, m3, m2  ; m1 =  4,  5,  6,  7
+punpcklwd   m2, m3, m4  ; m2 =  8,  9, 10, 11
+punpckhwd   m3, m4  ; m3 = 12, 13, 14, 15
+cvtdq2psm0, m0
+cvtdq2psm1, m1
+cvtdq2psm2, m2
+cvtdq2psm3, m3
+mulps   m0, m5
+mulps   m1, m5
+mulps   m2, m5
+mulps   m3, m5
+mova  [dstq+4*lenq ], m0
+mova  [dstq+4*lenq+  mmsize], m1
+mova  [dstq+4*lenq+2*mmsize], m2
+mova  [dstq+4*lenq+3*mmsize], m3
+add   lenq, mmsize
+jl .loop
+REP_RET
+%endmacro
+
+INIT_XMM sse2
+CONV_S16P_TO_FLT_2CH
+%if HAVE_AVX
+INIT_XMM avx
+CONV_S16P_TO_FLT_2CH
+%endif
+
 ;-
 ; void ff_conv_fltp_to_flt_6ch(float *dst, float *const *src, int len,
 ;  int channels);
diff --git a/libavresample/x86/audio_convert_init.c 
b/libavresample/x86/audio_convert_init.c
index d9d4714..9706c71 100644
--- a/libavresample/x86/audio_convert_init.c
+++ b/libavresample/x86/audio_convert_init.c
@@ -54,6 +54,11 @@ extern void ff_conv_s16p_to_s16_6ch_sse2slow(int16_t *dst, 
int16_t *const *src,
 extern void ff_conv_s16p_to_s16_6ch_avx (int16_t *dst, int16_t *const *src,
  int len, int channels);
 
+extern void ff_conv_s16p_to_flt_2ch_sse2(float *dst, int16_t *const *src,
+ int len, int channels);
+extern void ff_conv_s16p_to_flt_2ch_avx (float *dst, int16_t *const *src,
+ int len, int channels);
+
 extern void ff_conv_fltp_to_flt_6ch_mmx (float *dst, float *const *src, int 
len,
  int channels);
 extern void ff_conv_fltp_to_flt_6ch_sse4(float *dst, float *const *src, int 
len,
@@ -94,6 +99,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
   0, 16, 16, "SSE2", ff_conv_flt_to_s32_sse2);
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
   2, 16, 16, "SSE2", 
ff_conv_s16p_to_s16_2ch_sse2);
+ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
+  2, 16, 8, "SSE2", 
ff_conv_s16p_to_flt_2ch_sse2);
 }
 if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) {
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16,
@@ -110,6 +117,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
   2, 16, 16, "AVX", 
ff_conv_s16p_to_s16_2ch_avx);
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
   6, 16, 8, "AVX", 
ff_conv_s16p_to_s16_6ch_avx);
+ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
+  2, 16, 8, "AVX", 
ff_conv_s16p_to_flt_2ch_avx);
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
   6, 16, 4, "AVX", 
ff_conv_fltp_to_flt_6ch_avx);
 }
-- 
1.7.1

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH 03/15] lavr: x86: optimized 2-channel s16p to flt conversion

2012-07-28 Thread Ronald S. Bultje
Hi,

On Sat, Jul 28, 2012 at 4:57 PM, Justin Ruggles
 wrote:
> ---
>  libavresample/x86/audio_convert.asm|   49 
> 
>  libavresample/x86/audio_convert_init.c |9 ++
>  2 files changed, 58 insertions(+), 0 deletions(-)

OK.

Ronald
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 03/15] lavr: x86: optimized 2-channel s16p to flt conversion

2012-07-28 Thread Justin Ruggles
---
 libavresample/x86/audio_convert.asm|   49 
 libavresample/x86/audio_convert_init.c |9 ++
 2 files changed, 58 insertions(+), 0 deletions(-)

diff --git a/libavresample/x86/audio_convert.asm 
b/libavresample/x86/audio_convert.asm
index ee05efc..c3cc76f 100644
--- a/libavresample/x86/audio_convert.asm
+++ b/libavresample/x86/audio_convert.asm
@@ -383,6 +383,55 @@ INIT_XMM avx
 CONV_S16P_TO_S16_6CH
 %endif
 
+;--
+; void ff_conv_s16p_to_flt_2ch(float *dst, int16_t *const *src, int len,
+;  int channels);
+;--
+
+%macro CONV_S16P_TO_FLT_2CH 0
+cglobal conv_s16p_to_flt_2ch, 3,4,6, dst, src0, len, src1
+lea   lenq, [2*lend]
+mov  src1q, [src0q+gprsize]
+mov  src0q, [src0q]
+lea   dstq, [dstq+4*lenq]
+add  src0q, lenq
+add  src1q, lenq
+neg   lenq
+movam5, [pf_s32_inv_scale]
+.loop:
+movam2, [src0q+lenq]; m2 =  0,  2,  4,  6,  8, 10, 12, 14
+movam4, [src1q+lenq]; m4 =  1,  3,  5,  7,  9, 11, 13, 15
+SBUTTERFLY2 wd, 2, 4, 3 ; m2 =  0,  1,  2,  3,  4,  5,  6,  7
+; m4 =  8,  9, 10, 11, 12, 13, 14, 15
+pxorm3, m3
+punpcklwd   m0, m3, m2  ; m0 =  0,  1,  2,  3
+punpckhwd   m1, m3, m2  ; m1 =  4,  5,  6,  7
+punpcklwd   m2, m3, m4  ; m2 =  8,  9, 10, 11
+punpckhwd   m3, m4  ; m3 = 12, 13, 14, 15
+cvtdq2psm0, m0
+cvtdq2psm1, m1
+cvtdq2psm2, m2
+cvtdq2psm3, m3
+mulps   m0, m5
+mulps   m1, m5
+mulps   m2, m5
+mulps   m3, m5
+mova  [dstq+4*lenq ], m0
+mova  [dstq+4*lenq+  mmsize], m1
+mova  [dstq+4*lenq+2*mmsize], m2
+mova  [dstq+4*lenq+3*mmsize], m3
+add   lenq, mmsize
+jl .loop
+REP_RET
+%endmacro
+
+INIT_XMM sse2
+CONV_S16P_TO_FLT_2CH
+%if HAVE_AVX
+INIT_XMM avx
+CONV_S16P_TO_FLT_2CH
+%endif
+
 ;-
 ; void ff_conv_fltp_to_flt_6ch(float *dst, float *const *src, int len,
 ;  int channels);
diff --git a/libavresample/x86/audio_convert_init.c 
b/libavresample/x86/audio_convert_init.c
index d9d4714..9706c71 100644
--- a/libavresample/x86/audio_convert_init.c
+++ b/libavresample/x86/audio_convert_init.c
@@ -54,6 +54,11 @@ extern void ff_conv_s16p_to_s16_6ch_sse2slow(int16_t *dst, 
int16_t *const *src,
 extern void ff_conv_s16p_to_s16_6ch_avx (int16_t *dst, int16_t *const *src,
  int len, int channels);
 
+extern void ff_conv_s16p_to_flt_2ch_sse2(float *dst, int16_t *const *src,
+ int len, int channels);
+extern void ff_conv_s16p_to_flt_2ch_avx (float *dst, int16_t *const *src,
+ int len, int channels);
+
 extern void ff_conv_fltp_to_flt_6ch_mmx (float *dst, float *const *src, int 
len,
  int channels);
 extern void ff_conv_fltp_to_flt_6ch_sse4(float *dst, float *const *src, int 
len,
@@ -94,6 +99,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
   0, 16, 16, "SSE2", ff_conv_flt_to_s32_sse2);
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
   2, 16, 16, "SSE2", 
ff_conv_s16p_to_s16_2ch_sse2);
+ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
+  2, 16, 8, "SSE2", 
ff_conv_s16p_to_flt_2ch_sse2);
 }
 if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) {
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16,
@@ -110,6 +117,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
   2, 16, 16, "AVX", 
ff_conv_s16p_to_s16_2ch_avx);
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
   6, 16, 8, "AVX", 
ff_conv_s16p_to_s16_6ch_avx);
+ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
+  2, 16, 8, "AVX", 
ff_conv_s16p_to_flt_2ch_avx);
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
   6, 16, 4, "AVX", 
ff_conv_fltp_to_flt_6ch_avx);
 }
-- 
1.7.1

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH 03/15] lavr: x86: optimized 2-channel s16p to flt conversion

2012-07-24 Thread Ronald S. Bultje
Hi,

On Sat, Jul 14, 2012 at 9:29 PM, Justin Ruggles
 wrote:
> ---
>  libavresample/x86/audio_convert.asm|   49 
> 
>  libavresample/x86/audio_convert_init.c |9 ++
>  2 files changed, 58 insertions(+), 0 deletions(-)

LGTM.

Ronald
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 03/15] lavr: x86: optimized 2-channel s16p to flt conversion

2012-07-14 Thread Justin Ruggles
---
 libavresample/x86/audio_convert.asm|   49 
 libavresample/x86/audio_convert_init.c |9 ++
 2 files changed, 58 insertions(+), 0 deletions(-)

diff --git a/libavresample/x86/audio_convert.asm 
b/libavresample/x86/audio_convert.asm
index fdcea3a..52528fa 100644
--- a/libavresample/x86/audio_convert.asm
+++ b/libavresample/x86/audio_convert.asm
@@ -331,6 +331,55 @@ INIT_XMM avx
 CONV_S16P_TO_S16_6CH
 %endif
 
+;--
+; void ff_conv_s16p_to_flt_2ch(float *dst, int16_t *const *src, int len,
+;  int channels);
+;--
+
+%macro CONV_S16P_TO_FLT_2CH 0
+cglobal conv_s16p_to_flt_2ch, 3,4,6, dst, src0, len, src1
+lea   lenq, [2*lend]
+mov  src1q, [src0q+gprsize]
+mov  src0q, [src0q]
+lea   dstq, [dstq+4*lenq]
+add  src0q, lenq
+add  src1q, lenq
+neg   lenq
+movam5, [pf_s32_inv_scale]
+.loop:
+movam4, [src0q+lenq]
+movam1, [src1q+lenq]
+punpcklwd   m2, m4, m1
+punpckhwd   m4, m1
+pxorm3, m3
+punpcklwd   m0, m3, m2
+punpckhwd   m1, m3, m2
+punpcklwd   m2, m3, m4
+punpckhwd   m3, m4
+cvtdq2psm0, m0
+cvtdq2psm1, m1
+cvtdq2psm2, m2
+cvtdq2psm3, m3
+mulps   m0, m5
+mulps   m1, m5
+mulps   m2, m5
+mulps   m3, m5
+mova  [dstq+4*lenq ], m0
+mova  [dstq+4*lenq+  mmsize], m1
+mova  [dstq+4*lenq+2*mmsize], m2
+mova  [dstq+4*lenq+3*mmsize], m3
+add   lenq, mmsize
+jl .loop
+REP_RET
+%endmacro
+
+INIT_XMM sse2
+CONV_S16P_TO_FLT_2CH
+%if HAVE_AVX
+INIT_XMM avx
+CONV_S16P_TO_FLT_2CH
+%endif
+
 ;-
 ; void ff_conv_fltp_to_flt_6ch(float *dst, float *const *src, int len,
 ;  int channels);
diff --git a/libavresample/x86/audio_convert_init.c 
b/libavresample/x86/audio_convert_init.c
index fdddf99..56d00bb 100644
--- a/libavresample/x86/audio_convert_init.c
+++ b/libavresample/x86/audio_convert_init.c
@@ -52,6 +52,11 @@ extern void ff_conv_s16p_to_s16_6ch_sse2(int16_t *dst, 
int16_t *const *src,
 extern void ff_conv_s16p_to_s16_6ch_avx (int16_t *dst, int16_t *const *src,
  int len, int channels);
 
+extern void ff_conv_s16p_to_flt_2ch_sse2(float *dst, int16_t *const *src,
+ int len, int channels);
+extern void ff_conv_s16p_to_flt_2ch_avx (float *dst, int16_t *const *src,
+ int len, int channels);
+
 extern void ff_conv_fltp_to_flt_6ch_mmx (float *dst, float *const *src, int 
len,
  int channels);
 extern void ff_conv_fltp_to_flt_6ch_sse4(float *dst, float *const *src, int 
len,
@@ -89,6 +94,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
   2, 16, 16, "SSE2", 
ff_conv_s16p_to_s16_2ch_sse2);
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
   6, 16, 8, "SSE2", 
ff_conv_s16p_to_s16_6ch_sse2);
+ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
+  2, 16, 8, "SSE2", 
ff_conv_s16p_to_flt_2ch_sse2);
 }
 if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) {
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16,
@@ -105,6 +112,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
   2, 16, 16, "AVX", 
ff_conv_s16p_to_s16_2ch_avx);
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
   6, 16, 8, "AVX", 
ff_conv_s16p_to_s16_6ch_avx);
+ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
+  2, 16, 8, "AVX", 
ff_conv_s16p_to_flt_2ch_avx);
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
   6, 16, 4, "AVX", 
ff_conv_fltp_to_flt_6ch_avx);
 }
-- 
1.7.1

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel