Re: [libav-devel] [PATCH 07/45] x86: mmx2 ---> mmxext in asm constructs

2012-08-05 Thread Daniel Kang
On Sun, Aug 5, 2012 at 10:20 AM, Ronald S. Bultje  wrote:
> Plus, I didn't say it was a good idea, I said I could live with it if
> others want it. Right now, it seems others (i.e. Loren) don't.

FYI, it makes my life harder. Also I agree with Loren.
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH 4/5] x86: fix rNmp macros with nasm

2012-08-05 Thread Daniel Kang
On Sun, Aug 5, 2012 at 7:36 PM, Mans Rullgard  wrote:
> For some reason, nasm requires this.  No harm done to yasm.
>
> Signed-off-by: Mans Rullgard 
> ---
>  libavutil/x86/x86inc.asm | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)

Has this been synced with x264?
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH] libx264: don't set framerate, set vfr_input.

2012-08-05 Thread Anton Khirnov
There's no way for the encoder to know whether the input is CFR, so
assume VFR.
---
 libavcodec/libx264.c |5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
index adda881..72fd390 100644
--- a/libavcodec/libx264.c
+++ b/libavcodec/libx264.c
@@ -381,8 +381,9 @@ static av_cold int X264_init(AVCodecContext *avctx)
 x4->params.i_height = avctx->height;
 x4->params.vui.i_sar_width  = avctx->sample_aspect_ratio.num;
 x4->params.vui.i_sar_height = avctx->sample_aspect_ratio.den;
-x4->params.i_fps_num = x4->params.i_timebase_den = avctx->time_base.den;
-x4->params.i_fps_den = x4->params.i_timebase_num = avctx->time_base.num;
+x4->params.i_timebase_den = avctx->time_base.den;
+x4->params.i_timebase_num = avctx->time_base.num;
+x4->params.b_vfr_input= 1;
 
 x4->params.analyse.b_psnr = avctx->flags & CODEC_FLAG_PSNR;
 
-- 
1.7.10.4

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 13/15] lavr: x86: optimized 6-channel flt to s16p conversion

2012-08-05 Thread Justin Ruggles
---
 libavresample/x86/audio_convert.asm|   74 
 libavresample/x86/audio_convert_init.c |   13 ++
 2 files changed, 87 insertions(+), 0 deletions(-)

diff --git a/libavresample/x86/audio_convert.asm 
b/libavresample/x86/audio_convert.asm
index 2908cbf..c666da0 100644
--- a/libavresample/x86/audio_convert.asm
+++ b/libavresample/x86/audio_convert.asm
@@ -1091,3 +1091,77 @@ CONV_FLT_TO_S16P_2CH
 INIT_XMM avx
 CONV_FLT_TO_S16P_2CH
 %endif
+
+;--
+; void ff_conv_flt_to_s16p_6ch(int16_t *const *dst, float *src, int len,
+;  int channels);
+;--
+
+%macro CONV_FLT_TO_S16P_6CH 0
+%if ARCH_X86_64
+cglobal conv_flt_to_s16p_6ch, 3,8,7, dst, src, len, dst1, dst2, dst3, dst4, 
dst5
+%else
+cglobal conv_flt_to_s16p_6ch, 2,7,7, dst, src, dst1, dst2, dst3, dst4, dst5
+%define lend dword r2m
+%endif
+mov dst1q, [dstq+  gprsize]
+mov dst2q, [dstq+2*gprsize]
+mov dst3q, [dstq+3*gprsize]
+mov dst4q, [dstq+4*gprsize]
+mov dst5q, [dstq+5*gprsize]
+mov  dstq, [dstq  ]
+sub dst1q, dstq
+sub dst2q, dstq
+sub dst3q, dstq
+sub dst4q, dstq
+sub dst5q, dstq
+mova   m6, [pf_s16_scale]
+.loop:
+mulps  m0, m6, [srcq+0*mmsize]
+mulps  m3, m6, [srcq+1*mmsize]
+mulps  m1, m6, [srcq+2*mmsize]
+mulps  m4, m6, [srcq+3*mmsize]
+mulps  m2, m6, [srcq+4*mmsize]
+mulps  m5, m6, [srcq+5*mmsize]
+cvtps2dq   m0, m0
+cvtps2dq   m1, m1
+cvtps2dq   m2, m2
+cvtps2dq   m3, m3
+cvtps2dq   m4, m4
+cvtps2dq   m5, m5
+packssdw   m0, m3   ; m0 =  0,  1,  2,  3,  4,  5,  6,  7
+packssdw   m1, m4   ; m1 =  8,  9, 10, 11, 12, 13, 14, 15
+packssdw   m2, m5   ; m2 = 16, 17, 18, 19, 20, 21, 22, 23
+PALIGNRm3, m1, m0, 12, m4   ; m3 =  6,  7,  8,  9, 10, 11,  x,  x
+shufps m1, m2, q1032; m1 = 12, 13, 14, 15, 16, 17, 18, 19
+psrldq m2, 4; m2 = 18, 19, 20, 21, 22, 23,  x,  x
+SBUTTERFLY2 wd, 0, 3, 4 ; m0 =  0,  6,  1,  7,  2,  8,  3,  9
+; m3 =  4, 10,  5, 11,  x,  x,  x,  x
+SBUTTERFLY2 wd, 1, 2, 4 ; m1 = 12, 18, 13, 19, 14, 20, 15, 21
+; m2 = 16, 22, 17, 23,  x,  x,  x,  x
+SBUTTERFLY2 dq, 0, 1, 4 ; m0 =  0,  6, 12, 18,  1,  7, 13, 19
+; m1 =  2,  8, 14, 20,  3,  9, 15, 21
+punpckldq  m3, m2   ; m3 =  4, 10, 16, 22,  5, 11, 17, 23
+movq[dstq  ], m0
+movhps  [dstq+dst1q], m0
+movq[dstq+dst2q], m1
+movhps  [dstq+dst3q], m1
+movq[dstq+dst4q], m3
+movhps  [dstq+dst5q], m3
+add  srcq, mmsize*6
+add  dstq, mmsize/2
+sub  lend, mmsize/4
+jg .loop
+REP_RET
+%endmacro
+
+%define PALIGNR PALIGNR_MMX
+INIT_XMM sse2
+CONV_FLT_TO_S16P_6CH
+%define PALIGNR PALIGNR_SSSE3
+INIT_XMM ssse3
+CONV_FLT_TO_S16P_6CH
+%if HAVE_AVX
+INIT_XMM avx
+CONV_FLT_TO_S16P_6CH
+%endif
diff --git a/libavresample/x86/audio_convert_init.c 
b/libavresample/x86/audio_convert_init.c
index d623543..944f1cd 100644
--- a/libavresample/x86/audio_convert_init.c
+++ b/libavresample/x86/audio_convert_init.c
@@ -127,6 +127,13 @@ extern void ff_conv_flt_to_s16p_2ch_sse2(int16_t *const 
*dst, float *src,
 extern void ff_conv_flt_to_s16p_2ch_avx (int16_t *const *dst, float *src,
  int len, int channels);
 
+extern void ff_conv_flt_to_s16p_6ch_sse2 (int16_t *const *dst, float *src,
+  int len, int channels);
+extern void ff_conv_flt_to_s16p_6ch_ssse3(int16_t *const *dst, float *src,
+  int len, int channels);
+extern void ff_conv_flt_to_s16p_6ch_avx  (int16_t *const *dst, float *src,
+  int len, int channels);
+
 av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
 {
 #if HAVE_YASM
@@ -184,6 +191,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
   6, 16, 4, "SSE2", 
ff_conv_s16_to_fltp_6ch_sse2);
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
   2, 16, 8, "SSE2", 
ff_conv_flt_to_s16p_2ch_sse2);
+ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
+  6, 16, 4, "SSE2", 
ff_conv_flt_to_s16p_6ch_sse2);
 }
 if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSE) {
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
@@ -196,6 +205,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
   6, 16, 4, "SSSE3", 
ff_conv_s16_to_s16p_6ch_

[libav-devel] [PATCH 12/15] lavr: x86: optimized 2-channel flt to s16p conversion

2012-08-05 Thread Justin Ruggles
---
 libavresample/x86/audio_convert.asm|   46 
 libavresample/x86/audio_convert_init.c |9 ++
 2 files changed, 55 insertions(+), 0 deletions(-)

diff --git a/libavresample/x86/audio_convert.asm 
b/libavresample/x86/audio_convert.asm
index 0329a79..2908cbf 100644
--- a/libavresample/x86/audio_convert.asm
+++ b/libavresample/x86/audio_convert.asm
@@ -1045,3 +1045,49 @@ CONV_S16_TO_FLTP_6CH
 INIT_XMM avx
 CONV_S16_TO_FLTP_6CH
 %endif
+
+;--
+; void ff_conv_flt_to_s16p_2ch(int16_t *const *dst, float *src, int len,
+;  int channels);
+;--
+
+%macro CONV_FLT_TO_S16P_2CH 0
+cglobal conv_flt_to_s16p_2ch, 3,4,6, dst0, src, len, dst1
+lea   lenq, [2*lend]
+mov  dst1q, [dst0q+gprsize]
+mov  dst0q, [dst0q]
+lea   srcq, [srcq+4*lenq]
+add  dst0q, lenq
+add  dst1q, lenq
+neg   lenq
+movam5, [pf_s16_scale]
+.loop:
+mova   m0, [srcq+4*lenq ]
+mova   m1, [srcq+4*lenq+  mmsize]
+mova   m2, [srcq+4*lenq+2*mmsize]
+mova   m3, [srcq+4*lenq+3*mmsize]
+DEINT2_PS   0, 1, 4
+DEINT2_PS   2, 3, 4
+mulps  m0, m0, m5
+mulps  m1, m1, m5
+mulps  m2, m2, m5
+mulps  m3, m3, m5
+cvtps2dq   m0, m0
+cvtps2dq   m1, m1
+cvtps2dq   m2, m2
+cvtps2dq   m3, m3
+packssdw   m0, m2
+packssdw   m1, m3
+mova  [dst0q+lenq], m0
+mova  [dst1q+lenq], m1
+add  lenq, mmsize
+jl .loop
+REP_RET
+%endmacro
+
+INIT_XMM sse2
+CONV_FLT_TO_S16P_2CH
+%if HAVE_AVX
+INIT_XMM avx
+CONV_FLT_TO_S16P_2CH
+%endif
diff --git a/libavresample/x86/audio_convert_init.c 
b/libavresample/x86/audio_convert_init.c
index 165e376..d623543 100644
--- a/libavresample/x86/audio_convert_init.c
+++ b/libavresample/x86/audio_convert_init.c
@@ -122,6 +122,11 @@ extern void ff_conv_s16_to_fltp_6ch_sse4 (float *const 
*dst, int16_t *src,
 extern void ff_conv_s16_to_fltp_6ch_avx  (float *const *dst, int16_t *src,
   int len, int channels);
 
+extern void ff_conv_flt_to_s16p_2ch_sse2(int16_t *const *dst, float *src,
+ int len, int channels);
+extern void ff_conv_flt_to_s16p_2ch_avx (int16_t *const *dst, float *src,
+ int len, int channels);
+
 av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
 {
 #if HAVE_YASM
@@ -177,6 +182,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
   2, 16, 8, "SSE2", 
ff_conv_s16_to_fltp_2ch_sse2);
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
   6, 16, 4, "SSE2", 
ff_conv_s16_to_fltp_6ch_sse2);
+ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
+  2, 16, 8, "SSE2", 
ff_conv_flt_to_s16p_2ch_sse2);
 }
 if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSE) {
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
@@ -225,6 +232,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
   2, 16, 8, "AVX", 
ff_conv_s16_to_fltp_2ch_avx);
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
   6, 16, 4, "AVX", 
ff_conv_s16_to_fltp_6ch_avx);
+ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
+  2, 16, 8, "AVX", 
ff_conv_flt_to_s16p_2ch_avx);
 }
 #endif
 }
-- 
1.7.1

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 10/15] lavr: x86: optimized 2-channel s16 to fltp conversion

2012-08-05 Thread Justin Ruggles
---
 libavresample/x86/audio_convert.asm|   39 
 libavresample/x86/audio_convert_init.c |   13 ++
 2 files changed, 52 insertions(+), 0 deletions(-)

diff --git a/libavresample/x86/audio_convert.asm 
b/libavresample/x86/audio_convert.asm
index 09c4e7f..ea0debf 100644
--- a/libavresample/x86/audio_convert.asm
+++ b/libavresample/x86/audio_convert.asm
@@ -923,3 +923,42 @@ CONV_S16_TO_S16P_6CH
 INIT_XMM avx
 CONV_S16_TO_S16P_6CH
 %endif
+
+;--
+; void ff_conv_s16_to_fltp_2ch(float *const *dst, int16_t *src, int len,
+;  int channels);
+;--
+
+%macro CONV_S16_TO_FLTP_2CH 0
+cglobal conv_s16_to_fltp_2ch, 3,4,4, dst0, src, len, dst1
+lea   lenq, [4*lend]
+mov  dst1q, [dst0q+gprsize]
+mov  dst0q, [dst0q]
+add   srcq, lenq
+add  dst0q, lenq
+add  dst1q, lenq
+neg   lenq
+movam3, [pf_s16_inv_scale]
+.loop:
+mova   m0, [srcq+lenq]
+S16_TO_S32_SX 0, 1
+cvtdq2ps   m0, m0
+cvtdq2ps   m1, m1
+mulps  m0, m0, m3
+mulps  m1, m1, m3
+DEINT2_PS   0, 1, 2
+mova  [dst0q+lenq], m0
+mova  [dst1q+lenq], m1
+add  lenq, mmsize
+jl .loop
+REP_RET
+%endmacro
+
+INIT_XMM sse2
+CONV_S16_TO_FLTP_2CH
+INIT_XMM sse4
+CONV_S16_TO_FLTP_2CH
+%if HAVE_AVX
+INIT_XMM avx
+CONV_S16_TO_FLTP_2CH
+%endif
diff --git a/libavresample/x86/audio_convert_init.c 
b/libavresample/x86/audio_convert_init.c
index adb63f6..db4d3f3 100644
--- a/libavresample/x86/audio_convert_init.c
+++ b/libavresample/x86/audio_convert_init.c
@@ -106,6 +106,13 @@ extern void ff_conv_s16_to_s16p_6ch_ssse3(int16_t *const 
*dst, int16_t *src,
 extern void ff_conv_s16_to_s16p_6ch_avx  (int16_t *const *dst, int16_t *src,
   int len, int channels);
 
+extern void ff_conv_s16_to_fltp_2ch_sse2(float *const *dst, int16_t *src,
+ int len, int channels);
+extern void ff_conv_s16_to_fltp_2ch_sse4(float *const *dst, int16_t *src,
+ int len, int channels);
+extern void ff_conv_s16_to_fltp_2ch_avx (float *const *dst, int16_t *src,
+ int len, int channels);
+
 av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
 {
 #if HAVE_YASM
@@ -157,6 +164,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
   2, 16, 8, "SSE2", 
ff_conv_s16_to_s16p_2ch_sse2);
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
   6, 16, 4, "SSE2", 
ff_conv_s16_to_s16p_6ch_sse2);
+ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
+  2, 16, 8, "SSE2", 
ff_conv_s16_to_fltp_2ch_sse2);
 }
 if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSE) {
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
@@ -173,6 +182,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
   0, 16, 8, "SSE4", ff_conv_s16_to_flt_sse4);
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
   6, 16, 4, "SSE4", 
ff_conv_fltp_to_flt_6ch_sse4);
+ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
+  2, 16, 8, "SSE4", 
ff_conv_s16_to_fltp_2ch_sse4);
 }
 if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) {
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32,
@@ -195,6 +206,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
   2, 16, 8, "AVX", 
ff_conv_s16_to_s16p_2ch_avx);
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
   6, 16, 4, "AVX", 
ff_conv_s16_to_s16p_6ch_avx);
+ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
+  2, 16, 8, "AVX", 
ff_conv_s16_to_fltp_2ch_avx);
 }
 #endif
 }
-- 
1.7.1

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 11/15] lavr: x86: optimized 6-channel s16 to fltp conversion

2012-08-05 Thread Justin Ruggles
---
 libavresample/x86/audio_convert.asm|   83 
 libavresample/x86/audio_convert_init.c |   17 +++
 libavutil/x86/x86util.asm  |   12 +
 3 files changed, 112 insertions(+), 0 deletions(-)

diff --git a/libavresample/x86/audio_convert.asm 
b/libavresample/x86/audio_convert.asm
index ea0debf..0329a79 100644
--- a/libavresample/x86/audio_convert.asm
+++ b/libavresample/x86/audio_convert.asm
@@ -962,3 +962,86 @@ CONV_S16_TO_FLTP_2CH
 INIT_XMM avx
 CONV_S16_TO_FLTP_2CH
 %endif
+
+;--
+; void ff_conv_s16_to_fltp_6ch(float *const *dst, int16_t *src, int len,
+;  int channels);
+;--
+
+%macro CONV_S16_TO_FLTP_6CH 0
+%if ARCH_X86_64
+cglobal conv_s16_to_fltp_6ch, 3,8,7, dst, src, len, dst1, dst2, dst3, dst4, 
dst5
+%else
+cglobal conv_s16_to_fltp_6ch, 2,7,7, dst, src, dst1, dst2, dst3, dst4, dst5
+%define lend dword r2m
+%endif
+mov dst1q, [dstq+  gprsize]
+mov dst2q, [dstq+2*gprsize]
+mov dst3q, [dstq+3*gprsize]
+mov dst4q, [dstq+4*gprsize]
+mov dst5q, [dstq+5*gprsize]
+mov  dstq, [dstq  ]
+sub dst1q, dstq
+sub dst2q, dstq
+sub dst3q, dstq
+sub dst4q, dstq
+sub dst5q, dstq
+mova   m6, [pf_s16_inv_scale]
+.loop:
+mova   m0, [srcq+0*mmsize]  ; m0 =  0,  1,  2,  3,  4,  5,  6,  7
+mova   m3, [srcq+1*mmsize]  ; m3 =  8,  9, 10, 11, 12, 13, 14, 15
+mova   m2, [srcq+2*mmsize]  ; m2 = 16, 17, 18, 19, 20, 21, 22, 23
+PALIGNRm1, m3, m0, 12, m4   ; m1 =  6,  7,  8,  9, 10, 11,  x,  x
+shufps m3, m2, q1032; m3 = 12, 13, 14, 15, 16, 17, 18, 19
+psrldq m2, 4; m2 = 18, 19, 20, 21, 22, 23,  x,  x
+SBUTTERFLY2 wd, 0, 1, 4 ; m0 =  0,  6,  1,  7,  2,  8,  3,  9
+; m1 =  4, 10,  5, 11,  x,  x,  x,  x
+SBUTTERFLY2 wd, 3, 2, 4 ; m3 = 12, 18, 13, 19, 14, 20, 15, 21
+; m2 = 16, 22, 17, 23,  x,  x,  x,  x
+SBUTTERFLY2 dq, 0, 3, 4 ; m0 =  0,  6, 12, 18,  1,  7, 13, 19
+; m3 =  2,  8, 14, 20,  3,  9, 15, 21
+punpckldq  m1, m2   ; m1 =  4, 10, 16, 22,  5, 11, 17, 23
+S16_TO_S32_SX 0, 2  ; m0 =  0,  6, 12, 18
+; m2 =  1,  7, 13, 19
+S16_TO_S32_SX 3, 4  ; m3 =  2,  8, 14, 20
+; m4 =  3,  9, 15, 21
+S16_TO_S32_SX 1, 5  ; m1 =  4, 10, 16, 22
+; m5 =  5, 11, 17, 23
+SWAP 1,2,3,4
+cvtdq2ps   m0, m0
+cvtdq2ps   m1, m1
+cvtdq2ps   m2, m2
+cvtdq2ps   m3, m3
+cvtdq2ps   m4, m4
+cvtdq2ps   m5, m5
+mulps  m0, m6
+mulps  m1, m6
+mulps  m2, m6
+mulps  m3, m6
+mulps  m4, m6
+mulps  m5, m6
+mova  [dstq  ], m0
+mova  [dstq+dst1q], m1
+mova  [dstq+dst2q], m2
+mova  [dstq+dst3q], m3
+mova  [dstq+dst4q], m4
+mova  [dstq+dst5q], m5
+add  srcq, mmsize*3
+add  dstq, mmsize
+sub  lend, mmsize/4
+jg .loop
+REP_RET
+%endmacro
+
+%define PALIGNR PALIGNR_MMX
+INIT_XMM sse2
+CONV_S16_TO_FLTP_6CH
+%define PALIGNR PALIGNR_SSSE3
+INIT_XMM ssse3
+CONV_S16_TO_FLTP_6CH
+INIT_XMM sse4
+CONV_S16_TO_FLTP_6CH
+%if HAVE_AVX
+INIT_XMM avx
+CONV_S16_TO_FLTP_6CH
+%endif
diff --git a/libavresample/x86/audio_convert_init.c 
b/libavresample/x86/audio_convert_init.c
index db4d3f3..165e376 100644
--- a/libavresample/x86/audio_convert_init.c
+++ b/libavresample/x86/audio_convert_init.c
@@ -113,6 +113,15 @@ extern void ff_conv_s16_to_fltp_2ch_sse4(float *const 
*dst, int16_t *src,
 extern void ff_conv_s16_to_fltp_2ch_avx (float *const *dst, int16_t *src,
  int len, int channels);
 
+extern void ff_conv_s16_to_fltp_6ch_sse2 (float *const *dst, int16_t *src,
+  int len, int channels);
+extern void ff_conv_s16_to_fltp_6ch_ssse3(float *const *dst, int16_t *src,
+  int len, int channels);
+extern void ff_conv_s16_to_fltp_6ch_sse4 (float *const *dst, int16_t *src,
+  int len, int channels);
+extern void ff_conv_s16_to_fltp_6ch_avx  (float *const *dst, int16_t *src,
+  int len, int channels);
+
 av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
 {
 #if HAVE_YASM
@@ -166,6 +175,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
   6, 16, 4, "SSE2", 
ff_conv_s16_to_s16p_6ch_sse2);
 ff_audio_convert_set_f

[libav-devel] [PATCH 09/15] lavr: x86: optimized 6-channel s16 to s16p conversion

2012-08-05 Thread Justin Ruggles
---
 libavresample/x86/audio_convert.asm|   61 
 libavresample/x86/audio_convert_init.c |   13 +++
 2 files changed, 74 insertions(+), 0 deletions(-)

diff --git a/libavresample/x86/audio_convert.asm 
b/libavresample/x86/audio_convert.asm
index db141e2..09c4e7f 100644
--- a/libavresample/x86/audio_convert.asm
+++ b/libavresample/x86/audio_convert.asm
@@ -862,3 +862,64 @@ CONV_S16_TO_S16P_2CH
 INIT_XMM avx
 CONV_S16_TO_S16P_2CH
 %endif
+
+;--
+; void ff_conv_s16_to_s16p_6ch(int16_t *const *dst, int16_t *src, int len,
+;  int channels);
+;--
+
+%macro CONV_S16_TO_S16P_6CH 0
+%if ARCH_X86_64
+cglobal conv_s16_to_s16p_6ch, 3,8,5, dst, src, len, dst1, dst2, dst3, dst4, 
dst5
+%else
+cglobal conv_s16_to_s16p_6ch, 2,7,5, dst, src, dst1, dst2, dst3, dst4, dst5
+%define lend dword r2m
+%endif
+mov dst1q, [dstq+  gprsize]
+mov dst2q, [dstq+2*gprsize]
+mov dst3q, [dstq+3*gprsize]
+mov dst4q, [dstq+4*gprsize]
+mov dst5q, [dstq+5*gprsize]
+mov  dstq, [dstq  ]
+sub dst1q, dstq
+sub dst2q, dstq
+sub dst3q, dstq
+sub dst4q, dstq
+sub dst5q, dstq
+.loop:
+mova   m0, [srcq+0*mmsize]  ; m0 =  0,  1,  2,  3,  4,  5,  6,  7
+mova   m3, [srcq+1*mmsize]  ; m3 =  8,  9, 10, 11, 12, 13, 14, 15
+mova   m2, [srcq+2*mmsize]  ; m2 = 16, 17, 18, 19, 20, 21, 22, 23
+PALIGNRm1, m3, m0, 12, m4   ; m1 =  6,  7,  8,  9, 10, 11,  x,  x
+shufps m3, m2, q1032; m3 = 12, 13, 14, 15, 16, 17, 18, 19
+psrldq m2, 4; m2 = 18, 19, 20, 21, 22, 23,  x,  x
+SBUTTERFLY2 wd, 0, 1, 4 ; m0 =  0,  6,  1,  7,  2,  8,  3,  9
+; m1 =  4, 10,  5, 11,  x,  x,  x,  x
+SBUTTERFLY2 wd, 3, 2, 4 ; m3 = 12, 18, 13, 19, 14, 20, 15, 21
+; m2 = 16, 22, 17, 23,  x,  x,  x,  x
+SBUTTERFLY2 dq, 0, 3, 4 ; m0 =  0,  6, 12, 18,  1,  7, 13, 19
+; m3 =  2,  8, 14, 20,  3,  9, 15, 21
+punpckldq  m1, m2   ; m1 =  4, 10, 16, 22,  5, 11, 17, 23
+movq[dstq  ], m0
+movhps  [dstq+dst1q], m0
+movq[dstq+dst2q], m3
+movhps  [dstq+dst3q], m3
+movq[dstq+dst4q], m1
+movhps  [dstq+dst5q], m1
+add  srcq, mmsize*3
+add  dstq, mmsize/2
+sub  lend, mmsize/4
+jg .loop
+REP_RET
+%endmacro
+
+%define PALIGNR PALIGNR_MMX
+INIT_XMM sse2
+CONV_S16_TO_S16P_6CH
+%define PALIGNR PALIGNR_SSSE3
+INIT_XMM ssse3
+CONV_S16_TO_S16P_6CH
+%if HAVE_AVX
+INIT_XMM avx
+CONV_S16_TO_S16P_6CH
+%endif
diff --git a/libavresample/x86/audio_convert_init.c 
b/libavresample/x86/audio_convert_init.c
index 79d7f4d..adb63f6 100644
--- a/libavresample/x86/audio_convert_init.c
+++ b/libavresample/x86/audio_convert_init.c
@@ -99,6 +99,13 @@ extern void ff_conv_s16_to_s16p_2ch_ssse3(int16_t *const 
*dst, int16_t *src,
 extern void ff_conv_s16_to_s16p_2ch_avx (int16_t *const *dst, int16_t *src,
  int len, int channels);
 
+extern void ff_conv_s16_to_s16p_6ch_sse2 (int16_t *const *dst, int16_t *src,
+  int len, int channels);
+extern void ff_conv_s16_to_s16p_6ch_ssse3(int16_t *const *dst, int16_t *src,
+  int len, int channels);
+extern void ff_conv_s16_to_s16p_6ch_avx  (int16_t *const *dst, int16_t *src,
+  int len, int channels);
+
 av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
 {
 #if HAVE_YASM
@@ -148,6 +155,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
   2, 16, 4, "SSE2", 
ff_conv_fltp_to_s16_2ch_sse2);
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
   2, 16, 8, "SSE2", 
ff_conv_s16_to_s16p_2ch_sse2);
+ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
+  6, 16, 4, "SSE2", 
ff_conv_s16_to_s16p_6ch_sse2);
 }
 if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSE) {
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
@@ -156,6 +165,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
   2, 16, 4, "SSSE3", 
ff_conv_fltp_to_s16_2ch_ssse3);
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
   2, 16, 8, "SSSE3", 
ff_conv_s16_to_s16p_2ch_ssse3);
+ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
+  6, 16, 4, "SSSE3", 
ff_conv_s16_to_s16p_6ch_ssse3);
 }
 i

[libav-devel] [PATCH 08/15] lavr: x86: optimized 2-channel s16 to s16p conversion

2012-08-05 Thread Justin Ruggles
---
 libavresample/x86/audio_convert.asm|   50 
 libavresample/x86/audio_convert_init.c |   15 +
 libavresample/x86/util.asm |6 
 3 files changed, 71 insertions(+), 0 deletions(-)

diff --git a/libavresample/x86/audio_convert.asm 
b/libavresample/x86/audio_convert.asm
index ead4a5c..db141e2 100644
--- a/libavresample/x86/audio_convert.asm
+++ b/libavresample/x86/audio_convert.asm
@@ -33,6 +33,7 @@ pf_s16_scale: times 4 dd 0x4700
 pb_shuf_unpack_even:  db -1, -1,  0,  1, -1, -1,  2,  3, -1, -1,  8,  9, 
-1, -1, 10, 11
 pb_shuf_unpack_odd:   db -1, -1,  4,  5, -1, -1,  6,  7, -1, -1, 12, 13, 
-1, -1, 14, 15
 pb_interleave_words: SHUFFLE_MASK_W  0,  4,  1,  5,  2,  6,  3,  7
+pb_deinterleave_words: SHUFFLE_MASK_W  0,  2,  4,  6,  1,  3,  5,  7
 
 SECTION_TEXT
 
@@ -812,3 +813,52 @@ CONV_FLTP_TO_FLT_6CH
 INIT_XMM avx
 CONV_FLTP_TO_FLT_6CH
 %endif
+
+;--
+; void ff_conv_s16_to_s16p_2ch(int16_t *const *dst, int16_t *src, int len,
+;  int channels);
+;--
+
+%macro CONV_S16_TO_S16P_2CH 0
+cglobal conv_s16_to_s16p_2ch, 3,4,4, dst0, src, len, dst1
+lea   lenq, [2*lend]
+mov  dst1q, [dst0q+gprsize]
+mov  dst0q, [dst0q]
+lea   srcq, [srcq+2*lenq]
+add  dst0q, lenq
+add  dst1q, lenq
+neg   lenq
+%if cpuflag(ssse3)
+movam3, [pb_deinterleave_words]
+%endif
+.loop:
+movam0, [srcq+2*lenq   ]  ; m0 =  0,  1,  2,  3,  4,  5,  6,  7
+movam1, [srcq+2*lenq+mmsize]  ; m1 =  8,  9, 10, 11, 12, 13, 14, 15
+%if cpuflag(ssse3)
+pshufb  m0, m3; m0 =  0,  2,  4,  6,  1,  3,  5,  7
+pshufb  m1, m3; m1 =  8, 10, 12, 14,  9, 11, 13, 15
+SBUTTERFLY2 qdq, 0, 1, 2  ; m0 =  0,  2,  4,  6,  8, 10, 12, 14
+  ; m1 =  1,  3,  5,  7,  9, 11, 13, 15
+%else ; sse2
+pshuflw m0, m0, q3120 ; m0 =  0,  2,  1,  3,  4,  5,  6,  7
+pshufhw m0, m0, q3120 ; m0 =  0,  2,  1,  3,  4,  6,  5,  7
+pshuflw m1, m1, q3120 ; m1 =  8, 10,  9, 11, 12, 13, 14, 15
+pshufhw m1, m1, q3120 ; m1 =  8, 10,  9, 11, 12, 14, 13, 15
+DEINT2_PS0, 1, 2  ; m0 =  0,  2,  4,  6,  8, 10, 12, 14
+  ; m1 =  1,  3,  5,  7,  9, 11, 13, 15
+%endif
+mova  [dst0q+lenq], m0
+mova  [dst1q+lenq], m1
+add   lenq, mmsize
+jl .loop
+REP_RET
+%endmacro
+
+INIT_XMM sse2
+CONV_S16_TO_S16P_2CH
+INIT_XMM ssse3
+CONV_S16_TO_S16P_2CH
+%if HAVE_AVX
+INIT_XMM avx
+CONV_S16_TO_S16P_2CH
+%endif
diff --git a/libavresample/x86/audio_convert_init.c 
b/libavresample/x86/audio_convert_init.c
index 6e78338..79d7f4d 100644
--- a/libavresample/x86/audio_convert_init.c
+++ b/libavresample/x86/audio_convert_init.c
@@ -90,6 +90,15 @@ extern void ff_conv_fltp_to_flt_6ch_sse4(float *dst, float 
*const *src, int len,
 extern void ff_conv_fltp_to_flt_6ch_avx (float *dst, float *const *src, int 
len,
  int channels);
 
+/* deinterleave conversions */
+
+extern void ff_conv_s16_to_s16p_2ch_sse2(int16_t *const *dst, int16_t *src,
+ int len, int channels);
+extern void ff_conv_s16_to_s16p_2ch_ssse3(int16_t *const *dst, int16_t *src,
+  int len, int channels);
+extern void ff_conv_s16_to_s16p_2ch_avx (int16_t *const *dst, int16_t *src,
+ int len, int channels);
+
 av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
 {
 #if HAVE_YASM
@@ -137,12 +146,16 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
   6, 16, 4, "SSE2", 
ff_conv_s16p_to_flt_6ch_sse2);
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
   2, 16, 4, "SSE2", 
ff_conv_fltp_to_s16_2ch_sse2);
+ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
+  2, 16, 8, "SSE2", 
ff_conv_s16_to_s16p_2ch_sse2);
 }
 if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSE) {
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
   6, 16, 4, "SSSE3", 
ff_conv_s16p_to_flt_6ch_ssse3);
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
   2, 16, 4, "SSSE3", 
ff_conv_fltp_to_s16_2ch_ssse3);
+ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16,
+  2, 16, 8, "SSSE3", 
ff_conv_s16_to_s16p_2ch_ssse3);
 }
 if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) {
 ff_audio_

[libav-devel] [PATCH 06/15] lavr: x86: optimized 6-channel fltp to s16 conversion

2012-08-05 Thread Justin Ruggles
---
 libavresample/x86/audio_convert.asm|  114 
 libavresample/x86/audio_convert_init.c |   15 
 2 files changed, 129 insertions(+), 0 deletions(-)

diff --git a/libavresample/x86/audio_convert.asm 
b/libavresample/x86/audio_convert.asm
index 15aaa6a..8240a32 100644
--- a/libavresample/x86/audio_convert.asm
+++ b/libavresample/x86/audio_convert.asm
@@ -581,6 +581,120 @@ CONV_FLTP_TO_S16_2CH
 INIT_XMM ssse3
 CONV_FLTP_TO_S16_2CH
 
+;--
+; void ff_conv_fltp_to_s16_6ch(int16_t *dst, float *const *src, int len,
+;  int channels);
+;--
+
+%macro CONV_FLTP_TO_S16_6CH 0
+%if ARCH_X86_64
+cglobal conv_fltp_to_s16_6ch, 3,8,7, dst, src, len, src1, src2, src3, src4, 
src5
+%else
+cglobal conv_fltp_to_s16_6ch, 2,7,7, dst, src, src1, src2, src3, src4, src5
+%define lend dword r2m
+%endif
+movsrc1q, [srcq+1*gprsize]
+movsrc2q, [srcq+2*gprsize]
+movsrc3q, [srcq+3*gprsize]
+movsrc4q, [srcq+4*gprsize]
+movsrc5q, [srcq+5*gprsize]
+mov srcq, [srcq]
+subsrc1q, srcq
+subsrc2q, srcq
+subsrc3q, srcq
+subsrc4q, srcq
+subsrc5q, srcq
+movaps  xmm6, [pf_s16_scale]
+.loop:
+%if cpuflag(sse2)
+mulps m0, m6, [srcq  ]
+mulps m1, m6, [srcq+src1q]
+mulps m2, m6, [srcq+src2q]
+mulps m3, m6, [srcq+src3q]
+mulps m4, m6, [srcq+src4q]
+mulps m5, m6, [srcq+src5q]
+cvtps2dq  m0, m0
+cvtps2dq  m1, m1
+cvtps2dq  m2, m2
+cvtps2dq  m3, m3
+cvtps2dq  m4, m4
+cvtps2dq  m5, m5
+packssdw  m0, m3; m0 =  0,  6, 12, 18,  3,  9, 15, 21
+packssdw  m1, m4; m1 =  1,  7, 13, 19,  4, 10, 16, 22
+packssdw  m2, m5; m2 =  2,  8, 14, 20,  5, 11, 17, 23
+; unpack words:
+movhlps   m3, m0; m3 =  3,  9, 15, 21,  x,  x,  x,  x
+punpcklwd m0, m1; m0 =  0,  1,  6,  7, 12, 13, 18, 19
+punpckhwd m1, m2; m1 =  4,  5, 10, 11, 16, 17, 22, 23
+punpcklwd m2, m3; m2 =  2,  3,  8,  9, 14, 15, 20, 21
+; blend dwords:
+shufpsm3, m0, m2, q2020 ; m3 =  0,  1, 12, 13,  2,  3, 14, 15
+shufpsm0, m1, q2031 ; m0 =  6,  7, 18, 19,  4,  5, 16, 17
+shufpsm2, m1, q3131 ; m2 =  8,  9, 20, 21, 10, 11, 22, 23
+; shuffle dwords:
+shufpsm1, m2, m3, q3120 ; m1 =  8,  9, 10, 11, 12, 13, 14, 15
+shufpsm3, m0, q0220 ; m3 =  0,  1,  2,  3,  4,  5,  6,  7
+shufpsm0, m2, q3113 ; m0 = 16, 17, 18, 19, 20, 21, 22, 23
+mova  [dstq+0*mmsize], m3
+mova  [dstq+1*mmsize], m1
+mova  [dstq+2*mmsize], m0
+%else ; sse
+movaxmm0, [srcq  ]
+movaxmm1, [srcq+src1q]
+movaxmm2, [srcq+src2q]
+movaxmm3, [srcq+src3q]
+movaxmm4, [srcq+src4q]
+movaxmm5, [srcq+src5q]
+mulps   xmm0, xmm6
+mulps   xmm1, xmm6
+mulps   xmm2, xmm6
+mulps   xmm3, xmm6
+mulps   xmm4, xmm6
+mulps   xmm5, xmm6
+cvtps2pi mm0, xmm0
+cvtps2pi mm1, xmm1
+cvtps2pi mm2, xmm2
+cvtps2pi mm3, xmm3
+cvtps2pi mm4, xmm4
+cvtps2pi mm5, xmm5
+packssdw mm0, mm3   ; m0 =  0,  6,  3,  9
+packssdw mm1, mm4   ; m1 =  1,  7,  4, 10
+packssdw mm2, mm5   ; m2 =  2,  8,  5, 11
+; unpack words
+pshufw   mm3, mm0, q1032; m3 =  3,  9,  0,  6
+punpcklwdmm0, mm1   ; m0 =  0,  1,  6,  7
+punpckhwdmm1, mm2   ; m1 =  4,  5, 10, 11
+punpcklwdmm2, mm3   ; m2 =  2,  3,  8,  9
+; unpack dwords
+pshufw   mm3, mm0, q1032; m3 =  6,  7,  0,  1
+punpckldqmm0, mm2   ; m0 =  0,  1,  2,  3 (final)
+punpckhdqmm2, mm1   ; m2 =  8,  9, 10, 11 (final)
+punpckldqmm1, mm3   ; m1 =  4,  5,  6,  7 (final)
+mova  [dstq+0*mmsize], mm0
+mova  [dstq+1*mmsize], mm1
+mova  [dstq+2*mmsize], mm2
+%endif
+add   srcq, mmsize
+add   dstq, mmsize*3
+sub   lend, mmsize/4
+jg .loop
+%if mmsize == 8
+emms
+RET
+%else
+REP_RET
+%endif
+%endmacro
+
+INIT_MMX sse
+CONV_FLTP_TO_S16_6CH
+INIT_XMM sse2
+CONV_FLTP_TO_S16_6CH
+%if HAVE_AVX
+INIT_XMM avx
+CONV_FLTP_TO_S16_6CH
+%endif
+
 ;-
 ; void ff_conv_fltp_to_flt_6ch(float *dst, float *const *src, int len,
 ;

[libav-devel] [PATCH 07/15] lavr: x86: optimized 2-channel fltp to flt conversion

2012-08-05 Thread Justin Ruggles
---
 libavresample/x86/audio_convert.asm|   37 
 libavresample/x86/audio_convert_init.c |7 ++
 2 files changed, 44 insertions(+), 0 deletions(-)

diff --git a/libavresample/x86/audio_convert.asm 
b/libavresample/x86/audio_convert.asm
index 8240a32..ead4a5c 100644
--- a/libavresample/x86/audio_convert.asm
+++ b/libavresample/x86/audio_convert.asm
@@ -695,6 +695,43 @@ INIT_XMM avx
 CONV_FLTP_TO_S16_6CH
 %endif
 
+;--
+; void ff_conv_fltp_to_flt_2ch(float *dst, float *const *src, int len,
+;  int channels);
+;--
+
+%macro CONV_FLTP_TO_FLT_2CH 0
+cglobal conv_fltp_to_flt_2ch, 3,4,5, dst, src0, len, src1
+mov  src1q, [src0q+gprsize]
+mov  src0q, [src0q]
+lea   lenq, [4*lend]
+add  src0q, lenq
+add  src1q, lenq
+lea   dstq, [dstq+2*lenq]
+neg   lenq
+.loop
+movam0, [src0q+lenq   ]
+movam1, [src1q+lenq   ]
+movam2, [src0q+lenq+mmsize]
+movam3, [src1q+lenq+mmsize]
+SBUTTERFLYPS 0, 1, 4
+SBUTTERFLYPS 2, 3, 4
+mova  [dstq+2*lenq+0*mmsize], m0
+mova  [dstq+2*lenq+1*mmsize], m1
+mova  [dstq+2*lenq+2*mmsize], m2
+mova  [dstq+2*lenq+3*mmsize], m3
+add   lenq, 2*mmsize
+jl .loop
+REP_RET
+%endmacro
+
+INIT_XMM sse
+CONV_FLTP_TO_FLT_2CH
+%if HAVE_AVX
+INIT_XMM avx
+CONV_FLTP_TO_FLT_2CH
+%endif
+
 ;-
 ; void ff_conv_fltp_to_flt_6ch(float *dst, float *const *src, int len,
 ;  int channels);
diff --git a/libavresample/x86/audio_convert_init.c 
b/libavresample/x86/audio_convert_init.c
index 51e51f5..6e78338 100644
--- a/libavresample/x86/audio_convert_init.c
+++ b/libavresample/x86/audio_convert_init.c
@@ -78,6 +78,11 @@ extern void ff_conv_fltp_to_s16_6ch_sse2(int16_t *dst, float 
*const *src,
 extern void ff_conv_fltp_to_s16_6ch_avx (int16_t *dst, float *const *src,
  int len, int channels);
 
+extern void ff_conv_fltp_to_flt_2ch_sse(float *dst, float *const *src, int len,
+int channels);
+extern void ff_conv_fltp_to_flt_2ch_avx(float *dst, float *const *src, int len,
+int channels);
+
 extern void ff_conv_fltp_to_flt_6ch_mmx (float *dst, float *const *src, int 
len,
  int channels);
 extern void ff_conv_fltp_to_flt_6ch_sse4(float *dst, float *const *src, int 
len,
@@ -99,6 +104,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
 if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) {
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
   6, 1, 2, "SSE", ff_conv_fltp_to_s16_6ch_sse);
+ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
+  2, 16, 8, "SSE", 
ff_conv_fltp_to_flt_2ch_sse);
 }
 if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) {
 if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
-- 
1.7.1

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 04/15] lavr: x86: optimized 6-channel s16p to flt conversion

2012-08-05 Thread Justin Ruggles
---
 libavresample/x86/audio_convert.asm|  106 
 libavresample/x86/audio_convert_init.c |   15 +
 2 files changed, 121 insertions(+), 0 deletions(-)

diff --git a/libavresample/x86/audio_convert.asm 
b/libavresample/x86/audio_convert.asm
index c3cc76f..622a84c 100644
--- a/libavresample/x86/audio_convert.asm
+++ b/libavresample/x86/audio_convert.asm
@@ -30,6 +30,8 @@ pf_s32_inv_scale: times 8 dd 0x3000
 pf_s32_scale: times 8 dd 0x4f00
 pf_s16_inv_scale: times 4 dd 0x3800
 pf_s16_scale: times 4 dd 0x4700
+pb_shuf_unpack_even:  db -1, -1,  0,  1, -1, -1,  2,  3, -1, -1,  8,  9, 
-1, -1, 10, 11
+pb_shuf_unpack_odd:   db -1, -1,  4,  5, -1, -1,  6,  7, -1, -1, 12, 13, 
-1, -1, 14, 15
 
 SECTION_TEXT
 
@@ -432,6 +434,110 @@ INIT_XMM avx
 CONV_S16P_TO_FLT_2CH
 %endif
 
+;--
+; void ff_conv_s16p_to_flt_6ch(float *dst, int16_t *const *src, int len,
+;  int channels);
+;--
+
+%macro CONV_S16P_TO_FLT_6CH 0
+%if ARCH_X86_64
+cglobal conv_s16p_to_flt_6ch, 3,8,8, dst, src, len, src1, src2, src3, src4, 
src5
+%else
+cglobal conv_s16p_to_flt_6ch, 2,7,8, dst, src, src1, src2, src3, src4, src5
+%define lend dword r2m
+%endif
+mov src1q, [srcq+1*gprsize]
+mov src2q, [srcq+2*gprsize]
+mov src3q, [srcq+3*gprsize]
+mov src4q, [srcq+4*gprsize]
+mov src5q, [srcq+5*gprsize]
+mov  srcq, [srcq]
+sub src1q, srcq
+sub src2q, srcq
+sub src3q, srcq
+sub src4q, srcq
+sub src5q, srcq
+mova   m7, [pf_s32_inv_scale]
+%if cpuflag(ssse3)
+%define unpack_even m6
+mova   m6, [pb_shuf_unpack_even]
+%if ARCH_X86_64
+%define unpack_odd m8
+mova   m8, [pb_shuf_unpack_odd]
+%else
+%define unpack_odd [pb_shuf_unpack_odd]
+%endif
+%endif
+.loop:
+movq   m0, [srcq  ]  ; m0 =  0,  6, 12, 18,  x,  x,  x,  x
+movq   m1, [srcq+src1q]  ; m1 =  1,  7, 13, 19,  x,  x,  x,  x
+movq   m2, [srcq+src2q]  ; m2 =  2,  8, 14, 20,  x,  x,  x,  x
+movq   m3, [srcq+src3q]  ; m3 =  3,  9, 15, 21,  x,  x,  x,  x
+movq   m4, [srcq+src4q]  ; m4 =  4, 10, 16, 22,  x,  x,  x,  x
+movq   m5, [srcq+src5q]  ; m5 =  5, 11, 17, 23,  x,  x,  x,  x
+ ; unpack words:
+punpcklwd  m0, m1; m0 =  0,  1,  6,  7, 12, 13, 18, 19
+punpcklwd  m2, m3; m2 =  2,  3,  8,  9, 14, 15, 20, 21
+punpcklwd  m4, m5; m4 =  4,  5, 10, 11, 16, 17, 22, 23
+ ; blend dwords
+shufps m1, m4, m0, q3120 ; m1 =  4,  5, 16, 17,  6,  7, 18, 19
+shufps m0, m2, q2020 ; m0 =  0,  1, 12, 13,  2,  3, 14, 15
+shufps m2, m4, q3131 ; m2 =  8,  9, 20, 21, 10, 11, 22, 23
+%if cpuflag(ssse3)
+pshufb m3, m0, unpack_odd   ; m3 =  12, 13, 14, 15
+pshufb m0, unpack_even  ; m0 =   0,  1,  2,  3
+pshufb m4, m1, unpack_odd   ; m4 =  16, 17, 18, 19
+pshufb m1, unpack_even  ; m1 =   4,  5,  6,  7
+pshufb m5, m2, unpack_odd   ; m5 =  20, 21, 22, 23
+pshufb m2, unpack_even  ; m2 =   8,  9, 10, 11
+%else
+ ; shuffle dwords
+pshufd m0, m0, q3120 ; m0 =  0,  1,  2,  3, 12, 13, 14, 15
+pshufd m1, m1, q3120 ; m1 =  4,  5,  6,  7, 16, 17, 18, 19
+pshufd m2, m2, q3120 ; m2 =  8,  9, 10, 11, 20, 21, 22, 23
+pxor   m6, m6; convert s16 in m0-m2 to s32 in m0-m5
+punpcklwd  m3, m6, m0; m3 =  0,  1,  2,  3
+punpckhwd  m4, m6, m0; m4 = 12, 13, 14, 15
+punpcklwd  m0, m6, m1; m0 =  4,  5,  6,  7
+punpckhwd  m5, m6, m1; m5 = 16, 17, 18, 19
+punpcklwd  m1, m6, m2; m1 =  8,  9, 10, 11
+punpckhwd  m6, m2; m6 = 20, 21, 22, 23
+SWAP 6,2,1,0,3,4,5   ; swap registers 3,0,1,4,5,6 to 0,1,2,3,4,5
+%endif
+cvtdq2ps   m0, m0; convert s32 to float
+cvtdq2ps   m1, m1
+cvtdq2ps   m2, m2
+cvtdq2ps   m3, m3
+cvtdq2ps   m4, m4
+cvtdq2ps   m5, m5
+mulps  m0, m7; scale float from s32 range to [-1.0,1.0]
+mulps  m1, m7
+mulps  m2, m7
+mulps  m3, m7
+mulps  m4, m7
+mulps  m5, m7
+mova  [dstq ], m0
+mova  [dstq+  mmsize], m1
+mova  [dstq+2*mmsize], m2
+mova  [dstq+3*mmsize], m3
+mova  [dstq+4*mmsize], m4
+mova  [dstq+5*mmsize], m5
+add  srcq, mmsize/2
+add  dstq, mmsize*6
+sub  lend, mmsize/4
+jg .loop
+REP_RET
+%endmacro
+
+INIT_XMM sse2
+CONV_S16P_TO_FLT_6CH

[libav-devel] [PATCH 05/15] lavr: x86: optimized 2-channel fltp to s16 conversion

2012-08-05 Thread Justin Ruggles
---
 libavresample/x86/audio_convert.asm|   43 
 libavresample/x86/audio_convert_init.c |9 ++
 2 files changed, 52 insertions(+), 0 deletions(-)

diff --git a/libavresample/x86/audio_convert.asm 
b/libavresample/x86/audio_convert.asm
index 622a84c..15aaa6a 100644
--- a/libavresample/x86/audio_convert.asm
+++ b/libavresample/x86/audio_convert.asm
@@ -32,6 +32,7 @@ pf_s16_inv_scale: times 4 dd 0x3800
 pf_s16_scale: times 4 dd 0x4700
 pb_shuf_unpack_even:  db -1, -1,  0,  1, -1, -1,  2,  3, -1, -1,  8,  9, 
-1, -1, 10, 11
 pb_shuf_unpack_odd:   db -1, -1,  4,  5, -1, -1,  6,  7, -1, -1, 12, 13, 
-1, -1, 14, 15
+pb_interleave_words: SHUFFLE_MASK_W  0,  4,  1,  5,  2,  6,  3,  7
 
 SECTION_TEXT
 
@@ -538,6 +539,48 @@ INIT_XMM avx
 CONV_S16P_TO_FLT_6CH
 %endif
 
+;--
+; void ff_conv_fltp_to_s16_2ch(int16_t *dst, float *const *src, int len,
+;  int channels);
+;--
+
+%macro CONV_FLTP_TO_S16_2CH 0
+cglobal conv_fltp_to_s16_2ch, 3,4,3, dst, src0, len, src1
+lea  lenq, [4*lend]
+mov src1q, [src0q+gprsize]
+mov src0q, [src0q]
+add  dstq, lenq
+add src0q, lenq
+add src1q, lenq
+neg  lenq
+mova   m2, [pf_s16_scale]
+%if cpuflag(ssse3)
+mova   m3, [pb_interleave_words]
+%endif
+.loop:
+mulps  m0, m2, [src0q+lenq] ; m0 =0,2,4,6
+mulps  m1, m2, [src1q+lenq] ; m1 =1,3,5,7
+cvtps2dq   m0, m0
+cvtps2dq   m1, m1
+%if cpuflag(ssse3)
+packssdw   m0, m1   ; m0 = 0, 2, 4, 6, 1, 3, 5, 7
+pshufb m0, m3   ; m0 = 0, 1, 2, 3, 4, 5, 6, 7
+%else
+packssdw   m0, m0   ; m0 = 0, 2, 4, 6, x, x, x, x
+packssdw   m1, m1   ; m1 = 1, 3, 5, 7, x, x, x, x
+punpcklwd  m0, m1   ; m0 = 0, 1, 2, 3, 4, 5, 6, 7
+%endif
+mova  [dstq+lenq], m0
+add  lenq, mmsize
+jl .loop
+REP_RET
+%endmacro
+
+INIT_XMM sse2
+CONV_FLTP_TO_S16_2CH
+INIT_XMM ssse3
+CONV_FLTP_TO_S16_2CH
+
 ;-
 ; void ff_conv_fltp_to_flt_6ch(float *dst, float *const *src, int len,
 ;  int channels);
diff --git a/libavresample/x86/audio_convert_init.c 
b/libavresample/x86/audio_convert_init.c
index 6bcf093..3098658 100644
--- a/libavresample/x86/audio_convert_init.c
+++ b/libavresample/x86/audio_convert_init.c
@@ -66,6 +66,11 @@ extern void ff_conv_s16p_to_flt_6ch_ssse3(float *dst, 
int16_t *const *src,
 extern void ff_conv_s16p_to_flt_6ch_avx  (float *dst, int16_t *const *src,
   int len, int channels);
 
+extern void ff_conv_fltp_to_s16_2ch_sse2 (int16_t *dst, float *const *src,
+  int len, int channels);
+extern void ff_conv_fltp_to_s16_2ch_ssse3(int16_t *dst, float *const *src,
+  int len, int channels);
+
 extern void ff_conv_fltp_to_flt_6ch_mmx (float *dst, float *const *src, int 
len,
  int channels);
 extern void ff_conv_fltp_to_flt_6ch_sse4(float *dst, float *const *src, int 
len,
@@ -110,10 +115,14 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
   2, 16, 8, "SSE2", 
ff_conv_s16p_to_flt_2ch_sse2);
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
   6, 16, 4, "SSE2", 
ff_conv_s16p_to_flt_6ch_sse2);
+ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
+  2, 16, 4, "SSE2", 
ff_conv_fltp_to_s16_2ch_sse2);
 }
 if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSE) {
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
   6, 16, 4, "SSSE3", 
ff_conv_s16p_to_flt_6ch_ssse3);
+ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
+  2, 16, 4, "SSSE3", 
ff_conv_fltp_to_s16_2ch_ssse3);
 }
 if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) {
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16,
-- 
1.7.1

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 02/15] lavr: x86: optimized 6-channel s16p to s16 conversion

2012-08-05 Thread Justin Ruggles
---
 libavresample/x86/audio_convert.asm|  123 
 libavresample/x86/audio_convert_init.c |   14 
 2 files changed, 137 insertions(+), 0 deletions(-)

diff --git a/libavresample/x86/audio_convert.asm 
b/libavresample/x86/audio_convert.asm
index 4a92952..ee05efc 100644
--- a/libavresample/x86/audio_convert.asm
+++ b/libavresample/x86/audio_convert.asm
@@ -260,6 +260,129 @@ INIT_XMM avx
 CONV_S16P_TO_S16_2CH
 %endif
 
+;--
+; void ff_conv_s16p_to_s16_6ch(int16_t *dst, int16_t *const *src, int len,
+;  int channels);
+;--
+
+;--
+; NOTE: In the 6-channel functions, len could be used as an index on x86-64
+;   instead of just a counter, which would avoid incrementing the
+;   pointers, but the extra complexity and amount of code is not worth
+;   the small gain. On x86-32 there are not enough registers to use len
+;   as an index without keeping two of the pointers on the stack and
+;   loading them in each iteration.
+;--
+
+%macro CONV_S16P_TO_S16_6CH 0
+%if ARCH_X86_64
+cglobal conv_s16p_to_s16_6ch, 3,8,7, dst, src0, len, src1, src2, src3, src4, 
src5
+%else
+cglobal conv_s16p_to_s16_6ch, 2,7,7, dst, src0, src1, src2, src3, src4, src5
+%define lend dword r2m
+%endif
+mov  src1q, [src0q+1*gprsize]
+mov  src2q, [src0q+2*gprsize]
+mov  src3q, [src0q+3*gprsize]
+mov  src4q, [src0q+4*gprsize]
+mov  src5q, [src0q+5*gprsize]
+mov  src0q, [src0q]
+sub  src1q, src0q
+sub  src2q, src0q
+sub  src3q, src0q
+sub  src4q, src0q
+sub  src5q, src0q
+.loop:
+%if cpuflag(sse2slow)
+movqm0, [src0q  ]   ; m0 =  0,  6, 12, 18,  x,  x,  x,  x
+movqm1, [src0q+src1q]   ; m1 =  1,  7, 13, 19,  x,  x,  x,  x
+movqm2, [src0q+src2q]   ; m2 =  2,  8, 14, 20,  x,  x,  x,  x
+movqm3, [src0q+src3q]   ; m3 =  3,  9, 15, 21,  x,  x,  x,  x
+movqm4, [src0q+src4q]   ; m4 =  4, 10, 16, 22,  x,  x,  x,  x
+movqm5, [src0q+src5q]   ; m5 =  5, 11, 17, 23,  x,  x,  x,  x
+; unpack words:
+punpcklwd   m0, m1  ; m0 =  0,  1,  6,  7, 12, 13, 18, 19
+punpcklwd   m2, m3  ; m2 =  4,  5, 10, 11, 16, 17, 22, 23
+punpcklwd   m4, m5  ; m4 =  2,  3,  8,  9, 14, 15, 20, 21
+; blend dwords
+shufps  m1, m0, m2, q2020   ; m1 =  0,  1, 12, 13,  2,  3, 14, 15
+shufps  m0, m4, q2031   ; m0 =  6,  7, 18, 19,  4,  5, 16, 17
+shufps  m2, m4, q3131   ; m2 =  8,  9, 20, 21, 10, 11, 22, 23
+; shuffle dwords
+pshufd  m0, m0, q1302   ; m0 =  4,  5,  6,  7, 16, 17, 18, 19
+pshufd  m1, m1, q3120   ; m1 =  0,  1,  2,  3, 12, 13, 14, 15
+pshufd  m2, m2, q3120   ; m2 =  8,  9, 10, 11, 20, 21, 22, 23
+movq   [dstq+0*mmsize/2], m1
+movq   [dstq+1*mmsize/2], m0
+movq   [dstq+2*mmsize/2], m2
+movhps [dstq+3*mmsize/2], m1
+movhps [dstq+4*mmsize/2], m0
+movhps [dstq+5*mmsize/2], m2
+add  src0q, mmsize/2
+add   dstq, mmsize*3
+sub   lend, mmsize/4
+%else
+movam0, [src0q  ]   ; m0 =  0,  6, 12, 18, 24, 30, 36, 42
+movam1, [src0q+src1q]   ; m1 =  1,  7, 13, 19, 25, 31, 37, 43
+movam2, [src0q+src2q]   ; m2 =  2,  8, 14, 20, 26, 32, 38, 44
+movam3, [src0q+src3q]   ; m3 =  3,  9, 15, 21, 27, 33, 39, 45
+movam4, [src0q+src4q]   ; m4 =  4, 10, 16, 22, 28, 34, 40, 46
+movam5, [src0q+src5q]   ; m5 =  5, 11, 17, 23, 29, 35, 41, 47
+; unpack words:
+SBUTTERFLY2 wd, 0, 1, 6 ; m0 =  0,  1,  6,  7, 12, 13, 18, 19
+; m1 = 24, 25, 30, 31, 36, 37, 42, 43
+SBUTTERFLY2 wd, 2, 3, 6 ; m2 =  2,  3,  8,  9, 14, 15, 20, 21
+; m3 = 26, 27, 32, 33, 38, 39, 44, 45
+SBUTTERFLY2 wd, 4, 5, 6 ; m4 =  4,  5, 10, 11, 16, 17, 22, 23
+; m5 = 28, 29, 34, 35, 40, 41, 46, 47
+; blend dwords
+shufps  m6, m0, m2, q2020   ; m6 =  0,  1, 12, 13,  2,  3, 14, 15
+shufps  m0, m4, q2031   ; m0 =  6,  7, 18, 19,  4,  5, 16, 17
+shufps  m2, m4, q3131   ; m2 =  8,  9, 20, 21, 10, 11, 22, 23
+SWAP 4,6; m4 =  0,  1, 12, 13,  2,  3, 14, 15
+shufps  m6, m1, m3, q2020   ; m6 = 24, 25, 36, 37, 26, 27, 38, 39
+shufps  m1, m5, q2031   ; m1 = 30, 31, 42, 43, 28, 29, 40

[libav-devel] [PATCH 03/15] lavr: x86: optimized 2-channel s16p to flt conversion

2012-08-05 Thread Justin Ruggles
---
 libavresample/x86/audio_convert.asm|   49 
 libavresample/x86/audio_convert_init.c |9 ++
 2 files changed, 58 insertions(+), 0 deletions(-)

diff --git a/libavresample/x86/audio_convert.asm 
b/libavresample/x86/audio_convert.asm
index ee05efc..c3cc76f 100644
--- a/libavresample/x86/audio_convert.asm
+++ b/libavresample/x86/audio_convert.asm
@@ -383,6 +383,55 @@ INIT_XMM avx
 CONV_S16P_TO_S16_6CH
 %endif
 
+;--
+; void ff_conv_s16p_to_flt_2ch(float *dst, int16_t *const *src, int len,
+;  int channels);
+;--
+
+%macro CONV_S16P_TO_FLT_2CH 0
+cglobal conv_s16p_to_flt_2ch, 3,4,6, dst, src0, len, src1
+lea   lenq, [2*lend]
+mov  src1q, [src0q+gprsize]
+mov  src0q, [src0q]
+lea   dstq, [dstq+4*lenq]
+add  src0q, lenq
+add  src1q, lenq
+neg   lenq
+movam5, [pf_s32_inv_scale]
+.loop:
+movam2, [src0q+lenq]; m2 =  0,  2,  4,  6,  8, 10, 12, 14
+movam4, [src1q+lenq]; m4 =  1,  3,  5,  7,  9, 11, 13, 15
+SBUTTERFLY2 wd, 2, 4, 3 ; m2 =  0,  1,  2,  3,  4,  5,  6,  7
+; m4 =  8,  9, 10, 11, 12, 13, 14, 15
+pxorm3, m3
+punpcklwd   m0, m3, m2  ; m0 =  0,  1,  2,  3
+punpckhwd   m1, m3, m2  ; m1 =  4,  5,  6,  7
+punpcklwd   m2, m3, m4  ; m2 =  8,  9, 10, 11
+punpckhwd   m3, m4  ; m3 = 12, 13, 14, 15
+cvtdq2psm0, m0
+cvtdq2psm1, m1
+cvtdq2psm2, m2
+cvtdq2psm3, m3
+mulps   m0, m5
+mulps   m1, m5
+mulps   m2, m5
+mulps   m3, m5
+mova  [dstq+4*lenq ], m0
+mova  [dstq+4*lenq+  mmsize], m1
+mova  [dstq+4*lenq+2*mmsize], m2
+mova  [dstq+4*lenq+3*mmsize], m3
+add   lenq, mmsize
+jl .loop
+REP_RET
+%endmacro
+
+INIT_XMM sse2
+CONV_S16P_TO_FLT_2CH
+%if HAVE_AVX
+INIT_XMM avx
+CONV_S16P_TO_FLT_2CH
+%endif
+
 ;-
 ; void ff_conv_fltp_to_flt_6ch(float *dst, float *const *src, int len,
 ;  int channels);
diff --git a/libavresample/x86/audio_convert_init.c 
b/libavresample/x86/audio_convert_init.c
index d9d4714..9706c71 100644
--- a/libavresample/x86/audio_convert_init.c
+++ b/libavresample/x86/audio_convert_init.c
@@ -54,6 +54,11 @@ extern void ff_conv_s16p_to_s16_6ch_sse2slow(int16_t *dst, 
int16_t *const *src,
 extern void ff_conv_s16p_to_s16_6ch_avx (int16_t *dst, int16_t *const *src,
  int len, int channels);
 
+extern void ff_conv_s16p_to_flt_2ch_sse2(float *dst, int16_t *const *src,
+ int len, int channels);
+extern void ff_conv_s16p_to_flt_2ch_avx (float *dst, int16_t *const *src,
+ int len, int channels);
+
 extern void ff_conv_fltp_to_flt_6ch_mmx (float *dst, float *const *src, int 
len,
  int channels);
 extern void ff_conv_fltp_to_flt_6ch_sse4(float *dst, float *const *src, int 
len,
@@ -94,6 +99,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
   0, 16, 16, "SSE2", ff_conv_flt_to_s32_sse2);
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
   2, 16, 16, "SSE2", 
ff_conv_s16p_to_s16_2ch_sse2);
+ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
+  2, 16, 8, "SSE2", 
ff_conv_s16p_to_flt_2ch_sse2);
 }
 if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) {
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16,
@@ -110,6 +117,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
   2, 16, 16, "AVX", 
ff_conv_s16p_to_s16_2ch_avx);
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
   6, 16, 8, "AVX", 
ff_conv_s16p_to_s16_6ch_avx);
+ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
+  2, 16, 8, "AVX", 
ff_conv_s16p_to_flt_2ch_avx);
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
   6, 16, 4, "AVX", 
ff_conv_fltp_to_flt_6ch_avx);
 }
-- 
1.7.1

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 01/15] lavr: x86: optimized 2-channel s16p to s16 conversion

2012-08-05 Thread Justin Ruggles
---
 libavresample/x86/audio_convert.asm|   37 
 libavresample/x86/audio_convert_init.c |   13 +++
 2 files changed, 50 insertions(+), 0 deletions(-)

diff --git a/libavresample/x86/audio_convert.asm 
b/libavresample/x86/audio_convert.asm
index 244c4d1..4a92952 100644
--- a/libavresample/x86/audio_convert.asm
+++ b/libavresample/x86/audio_convert.asm
@@ -223,6 +223,43 @@ INIT_YMM avx
 CONV_FLT_TO_S32
 %endif
 
+;--
+; void ff_conv_s16p_to_s16_2ch(int16_t *dst, int16_t *const *src, int len,
+;  int channels);
+;--
+
+%macro CONV_S16P_TO_S16_2CH 0
+cglobal conv_s16p_to_s16_2ch, 3,4,5, dst, src0, len, src1
+mov   src1q, [src0q+gprsize]
+mov   src0q, [src0q]
+lealenq, [2*lend]
+add   src0q, lenq
+add   src1q, lenq
+leadstq, [dstq+2*lenq]
+neglenq
+.loop
+mova m0, [src0q+lenq   ]
+mova m1, [src1q+lenq   ]
+mova m2, [src0q+lenq+mmsize]
+mova m3, [src1q+lenq+mmsize]
+SBUTTERFLY2  wd, 0, 1, 4
+SBUTTERFLY2  wd, 2, 3, 4
+mova  [dstq+2*lenq+0*mmsize], m0
+mova  [dstq+2*lenq+1*mmsize], m1
+mova  [dstq+2*lenq+2*mmsize], m2
+mova  [dstq+2*lenq+3*mmsize], m3
+addlenq, 2*mmsize
+jl .loop
+REP_RET
+%endmacro
+
+INIT_XMM sse2
+CONV_S16P_TO_S16_2CH
+%if HAVE_AVX
+INIT_XMM avx
+CONV_S16P_TO_S16_2CH
+%endif
+
 ;-
 ; void ff_conv_fltp_to_flt_6ch(float *dst, float *const *src, int len,
 ;  int channels);
diff --git a/libavresample/x86/audio_convert_init.c 
b/libavresample/x86/audio_convert_init.c
index 2de4970..9b7bcb1 100644
--- a/libavresample/x86/audio_convert_init.c
+++ b/libavresample/x86/audio_convert_init.c
@@ -22,6 +22,8 @@
 #include "libavutil/cpu.h"
 #include "libavresample/audio_convert.h"
 
+/* flat conversions */
+
 extern void ff_conv_s16_to_s32_sse2(int16_t *dst, const int32_t *src, int len);
 
 extern void ff_conv_s16_to_flt_sse2(float *dst, const int16_t *src, int len);
@@ -38,6 +40,13 @@ extern void ff_conv_flt_to_s16_sse2(int16_t *dst, const 
float *src, int len);
 extern void ff_conv_flt_to_s32_sse2(int32_t *dst, const float *src, int len);
 extern void ff_conv_flt_to_s32_avx (int32_t *dst, const float *src, int len);
 
+/* interleave conversions */
+
+extern void ff_conv_s16p_to_s16_2ch_sse2(int16_t *dst, int16_t *const *src,
+ int len, int channels);
+extern void ff_conv_s16p_to_s16_2ch_avx (int16_t *dst, int16_t *const *src,
+ int len, int channels);
+
 extern void ff_conv_fltp_to_flt_6ch_mmx (float *dst, float *const *src, int 
len,
  int channels);
 extern void ff_conv_fltp_to_flt_6ch_sse4(float *dst, float *const *src, int 
len,
@@ -71,6 +80,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
   0, 16, 16, "SSE2", ff_conv_flt_to_s16_sse2);
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT,
   0, 16, 16, "SSE2", ff_conv_flt_to_s32_sse2);
+ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
+  2, 16, 16, "SSE2", 
ff_conv_s16p_to_s16_2ch_sse2);
 }
 if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) {
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16,
@@ -83,6 +94,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
   0, 32, 16, "AVX", ff_conv_s32_to_flt_avx);
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT,
   0, 32, 32, "AVX", ff_conv_flt_to_s32_avx);
+ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
+  2, 16, 16, "AVX", 
ff_conv_s16p_to_s16_2ch_avx);
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
   6, 16, 4, "AVX", 
ff_conv_fltp_to_flt_6ch_avx);
 }
-- 
1.7.1

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 00/15] lavr: x86: 2 and 6 channel (de)interleaving (ver 3)

2012-08-05 Thread Justin Ruggles
New round of patches for libavresample x86 asm conversions.

Justin Ruggles (15):
  lavr: x86: optimized 2-channel s16p to s16 conversion
  lavr: x86: optimized 6-channel s16p to s16 conversion
  lavr: x86: optimized 2-channel s16p to flt conversion
  lavr: x86: optimized 6-channel s16p to flt conversion
  lavr: x86: optimized 2-channel fltp to s16 conversion
  lavr: x86: optimized 6-channel fltp to s16 conversion
  lavr: x86: optimized 2-channel fltp to flt conversion
  lavr: x86: optimized 2-channel s16 to s16p conversion
  lavr: x86: optimized 6-channel s16 to s16p conversion
  lavr: x86: optimized 2-channel s16 to fltp conversion
  lavr: x86: optimized 6-channel s16 to fltp conversion
  lavr: x86: optimized 2-channel flt to s16p conversion
  lavr: x86: optimized 6-channel flt to s16p conversion
  lavr: x86: optimized 2-channel flt to fltp conversion
  lavr: x86: optimized 6-channel flt to fltp conversion

 libavresample/x86/audio_convert.asm|  957 
 libavresample/x86/audio_convert_init.c |  180 ++
 libavresample/x86/util.asm |6 +
 libavutil/x86/x86util.asm  |   12 +
 4 files changed, 1155 insertions(+), 0 deletions(-)

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 1/2] [HACK] x86: fix build with nasm 2.08

2012-08-05 Thread Mans Rullgard
It appears that something goes wrong in old nasm versions when the
%+ operator is used in the last argument of a macro invocation and
this argument is tested with %ifdef within the macro.  Adding a
dummy argument somehow fixes this.
---
 libavutil/x86/x86inc.asm | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm
index e729924..3a640d1 100644
--- a/libavutil/x86/x86inc.asm
+++ b/libavutil/x86/x86inc.asm
@@ -507,11 +507,14 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
 ; Appends cpuflags to the function name if cpuflags has been specified.
 %macro cglobal 1-2+ ; name, [PROLOGUE args]
 %if %0 == 1
-cglobal_internal %1 %+ SUFFIX
+cglobal_internal2 %1 %+ SUFFIX, nasm_is_stupid
 %else
 cglobal_internal %1 %+ SUFFIX, %2
 %endif
 %endmacro
+%macro cglobal_internal2 2
+cglobal_internal %1
+%endmacro
 %macro cglobal_internal 1-2+
 %ifndef cglobaled_%1
 %xdefine %1 mangle(program_name %+ _ %+ %1)
@@ -795,9 +798,9 @@ INIT_XMM
 
 ; Append cpuflags to the callee's name iff the appended name is known and the 
plain name isn't
 %macro call 1
-call_internal %1, %1 %+ SUFFIX
+call_internal %1, %1 %+ SUFFIX, nasm_is_stupid
 %endmacro
-%macro call_internal 2
+%macro call_internal 3
 %xdefine %%i %1
 %ifndef cglobaled_%1
 %ifdef cglobaled_%2
-- 
1.7.11.1

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 2/2] x86: build avx functions only if HAVE_AVX is set

2012-08-05 Thread Mans Rullgard
This is required with oldish nasm versions still shipped with some
systems.  Without this patch, the code still builds but due to
a bug[1] crashes at runtime even if avx is not used.

[1] 
http://repo.or.cz/w/nasm.git/commitdiff/3cb0e8c052a672424eaf59a021f0dbfb6ef205b8

Signed-off-by: Mans Rullgard 
---
 libavcodec/x86/h264_deblock.asm   | 10 ++
 libavcodec/x86/h264_deblock_10bit.asm | 14 ++
 libavcodec/x86/h264dsp_mmx.c  |  2 +-
 libavcodec/x86/imdct36_sse.asm|  4 
 4 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index 940a8f7..cea02da 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -390,8 +390,11 @@ cglobal deblock_h_luma_8, 5,9
 
 INIT_XMM sse2
 DEBLOCK_LUMA
+
+%if HAVE_AVX
 INIT_XMM avx
 DEBLOCK_LUMA
+%endif
 
 %else
 
@@ -509,8 +512,11 @@ INIT_MMX mmx2
 DEBLOCK_LUMA v8, 8
 INIT_XMM sse2
 DEBLOCK_LUMA v, 16
+
+%if HAVE_AVX
 INIT_XMM avx
 DEBLOCK_LUMA v, 16
+%endif
 
 %endif ; ARCH
 
@@ -781,8 +787,12 @@ cglobal deblock_h_luma_intra_8, 2,4
 
 INIT_XMM sse2
 DEBLOCK_LUMA_INTRA v
+
+%if HAVE_AVX
 INIT_XMM avx
 DEBLOCK_LUMA_INTRA v
+%endif
+
 %if ARCH_X86_64 == 0
 INIT_MMX mmx2
 DEBLOCK_LUMA_INTRA v8
diff --git a/libavcodec/x86/h264_deblock_10bit.asm 
b/libavcodec/x86/h264_deblock_10bit.asm
index 7b9316d..56f46e0 100644
--- a/libavcodec/x86/h264_deblock_10bit.asm
+++ b/libavcodec/x86/h264_deblock_10bit.asm
@@ -418,10 +418,14 @@ cglobal deblock_h_luma_10, 5,7,15
 
 INIT_XMM sse2
 DEBLOCK_LUMA_64
+
+%if HAVE_AVX
 INIT_XMM avx
 DEBLOCK_LUMA_64
 %endif
 
+%endif
+
 %macro SWAPMOVA 2
 %ifid %1
 SWAP %1, %2
@@ -713,8 +717,11 @@ cglobal deblock_h_luma_intra_10, 4,7,16
 
 INIT_XMM sse2
 DEBLOCK_LUMA_INTRA_64
+
+%if HAVE_AVX
 INIT_XMM avx
 DEBLOCK_LUMA_INTRA_64
+%endif
 
 %endif
 
@@ -798,11 +805,15 @@ DEBLOCK_LUMA_INTRA
 INIT_XMM sse2
 DEBLOCK_LUMA
 DEBLOCK_LUMA_INTRA
+
+%if HAVE_AVX
 INIT_XMM avx
 DEBLOCK_LUMA
 DEBLOCK_LUMA_INTRA
 %endif
 
+%endif
+
 ; in: %1=p0, %2=q0, %3=p1, %4=q1, %5=mask, %6=tmp, %7=tmp
 ; out: %1=p0', %2=q0'
 %macro CHROMA_DEBLOCK_P0_Q0_INTRA 7
@@ -912,5 +923,8 @@ DEBLOCK_CHROMA
 %endif
 INIT_XMM sse2
 DEBLOCK_CHROMA
+
+%if HAVE_AVX
 INIT_XMM avx
 DEBLOCK_CHROMA
+%endif
diff --git a/libavcodec/x86/h264dsp_mmx.c b/libavcodec/x86/h264dsp_mmx.c
index 0612ffb..130308d 100644
--- a/libavcodec/x86/h264dsp_mmx.c
+++ b/libavcodec/x86/h264dsp_mmx.c
@@ -292,7 +292,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int 
bit_depth,
 c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_ssse3;
 c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_ssse3;
 }
-if (mm_flags & AV_CPU_FLAG_AVX) {
+if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) {
 #if HAVE_ALIGNED_STACK
 c->h264_v_loop_filter_luma   = ff_deblock_v_luma_8_avx;
 c->h264_h_loop_filter_luma   = ff_deblock_h_luma_8_avx;
diff --git a/libavcodec/x86/imdct36_sse.asm b/libavcodec/x86/imdct36_sse.asm
index 937a2cc..336e9f0 100644
--- a/libavcodec/x86/imdct36_sse.asm
+++ b/libavcodec/x86/imdct36_sse.asm
@@ -371,8 +371,10 @@ DEFINE_IMDCT
 INIT_XMM ssse3
 DEFINE_IMDCT
 
+%if HAVE_AVX
 INIT_XMM avx
 DEFINE_IMDCT
+%endif
 
 INIT_XMM sse
 
@@ -717,5 +719,7 @@ cglobal four_imdct36_float, 5,5,16, out, buf, in, win, tmp
 INIT_XMM sse
 DEFINE_FOUR_IMDCT
 
+%if HAVE_AVX
 INIT_XMM avx
 DEFINE_FOUR_IMDCT
+%endif
-- 
1.7.11.1

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH v2] x86: use 32-bit source registers with movd instruction

2012-08-05 Thread Mans Rullgard
yasm tolerates mismatch between movd/movq and source register size,
adjusting the instruction according to the register.  nasm is more
strict.

Signed-off-by: Mans Rullgard 
---
Missed a couple that somehow worked with nasm 2.10 but fail with 2.08.
---
 libavcodec/x86/h264_deblock_10bit.asm | 12 ++--
 libavcodec/x86/rv34dsp.asm|  6 +++---
 libavcodec/x86/rv40dsp.asm|  4 ++--
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/libavcodec/x86/h264_deblock_10bit.asm 
b/libavcodec/x86/h264_deblock_10bit.asm
index b18f7bc..7b9316d 100644
--- a/libavcodec/x86/h264_deblock_10bit.asm
+++ b/libavcodec/x86/h264_deblock_10bit.asm
@@ -165,7 +165,7 @@ cglobal deblock_v_luma_10, 5,5,8*(mmsize/16)
 SUBrsp, pad
 shlr2d, 2
 shlr3d, 2
-LOAD_AB m4, m5, r2, r3
+LOAD_AB m4, m5, r2d, r3d
 mov r3, 32/mmsize
 mov r2, r0
 sub r0, r1
@@ -222,7 +222,7 @@ cglobal deblock_h_luma_10, 5,6,8*(mmsize/16)
 SUBrsp, pad
 shlr2d, 2
 shlr3d, 2
-LOAD_AB m4, m5, r2, r3
+LOAD_AB m4, m5, r2d, r3d
 mov r3, r1
 movaam, m4
 add r3, r1
@@ -351,7 +351,7 @@ cglobal deblock_v_luma_10, 5,5,15
 %define mask2 m11
 shlr2d, 2
 shlr3d, 2
-LOAD_ABm12, m13, r2, r3
+LOAD_ABm12, m13, r2d, r3d
 mov r2, r0
 sub r0, r1
 sub r0, r1
@@ -379,7 +379,7 @@ cglobal deblock_v_luma_10, 5,5,15
 cglobal deblock_h_luma_10, 5,7,15
 shlr2d, 2
 shlr3d, 2
-LOAD_ABm12, m13, r2, r3
+LOAD_ABm12, m13, r2d, r3d
 mov r2, r1
 add r2, r1
 add r2, r1
@@ -857,7 +857,7 @@ cglobal deblock_v_chroma_10, 5,7-(mmsize/16),8*(mmsize/16)
 .loop:
 %endif
 CHROMA_V_LOAD r5
-LOAD_AB m4, m5, r2, r3
+LOAD_AB m4, m5, r2d, r3d
 LOAD_MASK   m0, m1, m2, m3, m4, m5, m7, m6, m4
 pxorm4, m4
 CHROMA_V_LOAD_TC m6, r4
@@ -891,7 +891,7 @@ cglobal deblock_v_chroma_intra_10, 
4,6-(mmsize/16),8*(mmsize/16)
 .loop:
 %endif
 CHROMA_V_LOAD r4
-LOAD_AB m4, m5, r2, r3
+LOAD_AB m4, m5, r2d, r3d
 LOAD_MASK   m0, m1, m2, m3, m4, m5, m7, m6, m4
 CHROMA_DEBLOCK_P0_Q0_INTRA m1, m2, m0, m3, m7, m5, m6
 CHROMA_V_STORE
diff --git a/libavcodec/x86/rv34dsp.asm b/libavcodec/x86/rv34dsp.asm
index c43b77a..78d8c92 100644
--- a/libavcodec/x86/rv34dsp.asm
+++ b/libavcodec/x86/rv34dsp.asm
@@ -49,7 +49,7 @@ SECTION .text
 cglobal rv34_idct_%1, 1, 2, 0
 movsx   r1, word [r0]
 IDCT_DC r1
-movdm0, r1
+movdm0, r1d
 pshufw  m0, m0, 0
 movq[r0+ 0], m0
 movq[r0+ 8], m0
@@ -70,7 +70,7 @@ cglobal rv34_idct_dc_add, 3, 3
 ; calculate DC
 IDCT_DC_ROUND r2
 pxor   m1, m1
-movd   m0, r2
+movd   m0, r2d
 psubw  m1, m0
 packuswb   m0, m0
 packuswb   m1, m1
@@ -175,7 +175,7 @@ cglobal rv34_idct_dc_add, 3, 3, 6
 pxor   m1, m1
 
 ; calculate DC
-movd   m0, r2
+movd   m0, r2d
 lear2, [r0+r1*2]
 movd   m2, [r0]
 movd   m3, [r0+r1]
diff --git a/libavcodec/x86/rv40dsp.asm b/libavcodec/x86/rv40dsp.asm
index ae740c2..70c0c04 100644
--- a/libavcodec/x86/rv40dsp.asm
+++ b/libavcodec/x86/rv40dsp.asm
@@ -466,8 +466,8 @@ cglobal rv40_weight_func_%1_%2, 6, 7, 8
 addr2, r6
 negr6
 
-movd   m2, r3
-movd   m3, r4
+movd   m2, r3d
+movd   m3, r4d
 %ifidn %1,rnd
 %define  RND   0
 SPLATW m2, m2
-- 
1.7.11.1

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH 0/5] Restore nasm support

2012-08-05 Thread Benjamin Larsson

All OK

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 1/5] x86: add colons after labels

2012-08-05 Thread Mans Rullgard
nasm prints a warning if the colon is missing.

Signed-off-by: Mans Rullgard 
---
 libavcodec/x86/deinterlace.asm |  2 +-
 libavcodec/x86/dsputil_yasm.asm|  6 ++--
 libavcodec/x86/dsputilenc_yasm.asm |  4 +--
 libavcodec/x86/fft_mmx.asm |  2 +-
 libavcodec/x86/fmtconvert.asm  |  2 +-
 libavcodec/x86/h264_chromamc.asm   | 30 +-
 libavcodec/x86/h264_chromamc_10bit.asm | 16 +-
 libavcodec/x86/h264_deblock_10bit.asm  |  4 +--
 libavcodec/x86/h264_idct.asm   | 58 +-
 libavcodec/x86/h264_idct_10bit.asm |  2 +-
 libavcodec/x86/h264_intrapred.asm  | 16 +-
 libavcodec/x86/h264_qpel_10bit.asm | 16 +-
 libavcodec/x86/h264_weight.asm | 16 +-
 libavcodec/x86/h264_weight_10bit.asm   | 16 +-
 libavcodec/x86/vp56dsp.asm |  4 +--
 libavcodec/x86/vp8dsp.asm  | 30 +-
 libavresample/x86/audio_mix.asm|  2 +-
 libavutil/x86/float_dsp.asm|  4 +--
 18 files changed, 115 insertions(+), 115 deletions(-)

diff --git a/libavcodec/x86/deinterlace.asm b/libavcodec/x86/deinterlace.asm
index 8613485..8681181 100644
--- a/libavcodec/x86/deinterlace.asm
+++ b/libavcodec/x86/deinterlace.asm
@@ -39,7 +39,7 @@ cglobal deinterlace_line_mmx, 7,7,7, dst, lum_m4, 
lum_m3, lum_m2, lum_m1
 %endif
 pxor  mm7, mm7
 movq  mm6, [pw_4]
-.nextrow
+.nextrow:
 movd  mm0, [lum_m4q]
 movd  mm1, [lum_m3q]
 movd  mm2, [lum_m2q]
diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm
index af2de15..d6cf824 100644
--- a/libavcodec/x86/dsputil_yasm.asm
+++ b/libavcodec/x86/dsputil_yasm.asm
@@ -1143,7 +1143,7 @@ VECTOR_CLIP_INT32 6, 1, 0, 0
 cglobal vector_fmul_reverse, 4,4,2, dst, src0, src1, len
 lea   lenq, [lend*4 - 2*mmsize]
 ALIGN 16
-.loop
+.loop:
 %if cpuflag(avx)
 vmovaps xmm0, [src1q + 16]
 vinsertf128 m0, m0, [src1q], 1
@@ -1182,7 +1182,7 @@ VECTOR_FMUL_REVERSE
 cglobal vector_fmul_add, 5,5,2, dst, src0, src1, src2, len
 lea   lenq, [lend*4 - 2*mmsize]
 ALIGN 16
-.loop
+.loop:
 movam0,   [src0q + lenq]
 movam1,   [src0q + lenq + mmsize]
 mulps   m0, m0, [src1q + lenq]
@@ -1313,7 +1313,7 @@ cglobal bswap32_buf, 3,4,5
 add  r0, 4
 dec  r2
 jnz  .loop2
-.end
+.end:
 RET
 
 ; %1 = aligned/unaligned
diff --git a/libavcodec/x86/dsputilenc_yasm.asm 
b/libavcodec/x86/dsputilenc_yasm.asm
index cfd4e6d..b7078f1 100644
--- a/libavcodec/x86/dsputilenc_yasm.asm
+++ b/libavcodec/x86/dsputilenc_yasm.asm
@@ -184,7 +184,7 @@ cglobal hadamard8_diff16_%1, 5, 6, %2
 call hadamard8x8_diff_%1
 addr5d, eax
 
-.done
+.done:
 moveax, r5d
 %ifndef m8
 ADDrsp, pad
@@ -288,7 +288,7 @@ cglobal sse16_sse2, 5, 5, 8
 pxor  m0, m0 ; mm0 = 0
 pxor  m7, m7 ; mm7 holds the sum
 
-.next2lines ; FIXME why are these unaligned movs? pix1[] is aligned
+.next2lines: ; FIXME why are these unaligned movs? pix1[] is aligned
 movu  m1, [r1   ]; mm1 = pix1[0][0-15]
 movu  m2, [r2   ]; mm2 = pix2[0][0-15]
 movu  m3, [r1+r3]; mm3 = pix1[1][0-15]
diff --git a/libavcodec/x86/fft_mmx.asm b/libavcodec/x86/fft_mmx.asm
index 6082d9e..60d6669 100644
--- a/libavcodec/x86/fft_mmx.asm
+++ b/libavcodec/x86/fft_mmx.asm
@@ -607,7 +607,7 @@ cglobal fft_calc, 2,5,8
 add rcx, 3
 shl r2, cl
 sub r4, r2
-.loop
+.loop:
 %if mmsize == 8
 PSWAPD  m0, [r4 + r2 + 4]
 mova [r4 + r2 + 4], m0
diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm
index 0fd14fe..46b7e85 100644
--- a/libavcodec/x86/fmtconvert.asm
+++ b/libavcodec/x86/fmtconvert.asm
@@ -404,7 +404,7 @@ cglobal float_interleave2_%1, 3,4,%2, dst, src, len, src1
 mov src1q, [srcq+gprsize]
 mov  srcq, [srcq]
 sub src1q, srcq
-.loop
+.loop:
 MOVPS  m0, [srcq ]
 MOVPS  m1, [srcq+src1q   ]
 MOVPS  m3, [srcq  +mmsize]
diff --git a/libavcodec/x86/h264_chromamc.asm b/libavcodec/x86/h264_chromamc.asm
index 64a4efe..56b8e56 100644
--- a/libavcodec/x86/h264_chromamc.asm
+++ b/libavcodec/x86/h264_chromamc.asm
@@ -69,7 +69,7 @@ SECTION .text
 
 %macro mv0_pixels_mc8 0
 lea   r4, [r2*2 ]
-.next4rows
+.next4rows:
 movq mm0, [r1   ]
 movq mm1, [r1+r2]
 add   r1, r4
@@ -117,7 +117,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7 + extra_regs, 0
 mv0_pixels_mc8
 REP_RET
 
-.at_least_one_non_zero
+.at_least_one_non_zero:
 %ifidn %2, rv40
 %if ARCH_X86_64
 mov   r7, r5
@@ -145,7 +145,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7 + extra_regs, 0
 test r4d, r4d
 mov   r6, r2; dxy = x ? 1 : stride
 jne .both_non_zero
-.my_is_zero
+.my_is_zero:
 ; mx == 0 XOR my == 0 - 1 dimensional filter only
 or   r4d

[libav-devel] [PATCH 2/5] x86: use 32-bit source registers with movd instruction

2012-08-05 Thread Mans Rullgard
yasm tolerates mismatch between movd/movq and source register size,
adjusting the instruction according to the register.  nasm is more
strict.

Signed-off-by: Mans Rullgard 
---
 libavcodec/x86/h264_deblock_10bit.asm | 12 ++--
 libavcodec/x86/rv34dsp.asm|  2 +-
 libavcodec/x86/rv40dsp.asm|  4 ++--
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/libavcodec/x86/h264_deblock_10bit.asm 
b/libavcodec/x86/h264_deblock_10bit.asm
index b18f7bc..7b9316d 100644
--- a/libavcodec/x86/h264_deblock_10bit.asm
+++ b/libavcodec/x86/h264_deblock_10bit.asm
@@ -165,7 +165,7 @@ cglobal deblock_v_luma_10, 5,5,8*(mmsize/16)
 SUBrsp, pad
 shlr2d, 2
 shlr3d, 2
-LOAD_AB m4, m5, r2, r3
+LOAD_AB m4, m5, r2d, r3d
 mov r3, 32/mmsize
 mov r2, r0
 sub r0, r1
@@ -222,7 +222,7 @@ cglobal deblock_h_luma_10, 5,6,8*(mmsize/16)
 SUBrsp, pad
 shlr2d, 2
 shlr3d, 2
-LOAD_AB m4, m5, r2, r3
+LOAD_AB m4, m5, r2d, r3d
 mov r3, r1
 movaam, m4
 add r3, r1
@@ -351,7 +351,7 @@ cglobal deblock_v_luma_10, 5,5,15
 %define mask2 m11
 shlr2d, 2
 shlr3d, 2
-LOAD_ABm12, m13, r2, r3
+LOAD_ABm12, m13, r2d, r3d
 mov r2, r0
 sub r0, r1
 sub r0, r1
@@ -379,7 +379,7 @@ cglobal deblock_v_luma_10, 5,5,15
 cglobal deblock_h_luma_10, 5,7,15
 shlr2d, 2
 shlr3d, 2
-LOAD_ABm12, m13, r2, r3
+LOAD_ABm12, m13, r2d, r3d
 mov r2, r1
 add r2, r1
 add r2, r1
@@ -857,7 +857,7 @@ cglobal deblock_v_chroma_10, 5,7-(mmsize/16),8*(mmsize/16)
 .loop:
 %endif
 CHROMA_V_LOAD r5
-LOAD_AB m4, m5, r2, r3
+LOAD_AB m4, m5, r2d, r3d
 LOAD_MASK   m0, m1, m2, m3, m4, m5, m7, m6, m4
 pxorm4, m4
 CHROMA_V_LOAD_TC m6, r4
@@ -891,7 +891,7 @@ cglobal deblock_v_chroma_intra_10, 
4,6-(mmsize/16),8*(mmsize/16)
 .loop:
 %endif
 CHROMA_V_LOAD r4
-LOAD_AB m4, m5, r2, r3
+LOAD_AB m4, m5, r2d, r3d
 LOAD_MASK   m0, m1, m2, m3, m4, m5, m7, m6, m4
 CHROMA_DEBLOCK_P0_Q0_INTRA m1, m2, m0, m3, m7, m5, m6
 CHROMA_V_STORE
diff --git a/libavcodec/x86/rv34dsp.asm b/libavcodec/x86/rv34dsp.asm
index c43b77a..9cd2a35 100644
--- a/libavcodec/x86/rv34dsp.asm
+++ b/libavcodec/x86/rv34dsp.asm
@@ -175,7 +175,7 @@ cglobal rv34_idct_dc_add, 3, 3, 6
 pxor   m1, m1
 
 ; calculate DC
-movd   m0, r2
+movd   m0, r2d
 lear2, [r0+r1*2]
 movd   m2, [r0]
 movd   m3, [r0+r1]
diff --git a/libavcodec/x86/rv40dsp.asm b/libavcodec/x86/rv40dsp.asm
index ae740c2..70c0c04 100644
--- a/libavcodec/x86/rv40dsp.asm
+++ b/libavcodec/x86/rv40dsp.asm
@@ -466,8 +466,8 @@ cglobal rv40_weight_func_%1_%2, 6, 7, 8
 addr2, r6
 negr6
 
-movd   m2, r3
-movd   m3, r4
+movd   m2, r3d
+movd   m3, r4d
 %ifidn %1,rnd
 %define  RND   0
 SPLATW m2, m2
-- 
1.7.11.1

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 3/5] build: add trailing / to yasm/nasm -I flags

2012-08-05 Thread Mans Rullgard
nasm requires a trailing / on paths specified with -I.
It does no harm with yasm.

Signed-off-by: Mans Rullgard 
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 644aff4..20e6a38 100644
--- a/Makefile
+++ b/Makefile
@@ -27,7 +27,7 @@ CPPFLAGS   := $(IFLAGS) $(CPPFLAGS)
 CFLAGS += $(ECFLAGS)
 CCFLAGS = $(CPPFLAGS) $(CFLAGS)
 ASFLAGS:= $(CPPFLAGS) $(ASFLAGS)
-YASMFLAGS  += $(IFLAGS) -I$(SRC_PATH)/libavutil/x86/ -Pconfig.asm
+YASMFLAGS  += $(IFLAGS:%=%/) -I$(SRC_PATH)/libavutil/x86/ -Pconfig.asm
 HOSTCCFLAGS = $(IFLAGS) $(HOSTCFLAGS)
 LDFLAGS:= $(ALLFFLIBS:%=-Llib%) $(LDFLAGS)
 
-- 
1.7.11.1

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 4/5] x86: fix rNmp macros with nasm

2012-08-05 Thread Mans Rullgard
For some reason, nasm requires this.  No harm done to yasm.

Signed-off-by: Mans Rullgard 
---
 libavutil/x86/x86inc.asm | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm
index 03e6c07..86c406f 100644
--- a/libavutil/x86/x86inc.asm
+++ b/libavutil/x86/x86inc.asm
@@ -139,10 +139,10 @@ CPU amdnop
 %define r%1mp %2
 %elif ARCH_X86_64 ; memory
 %define r%1m [rsp + stack_offset + %3]
-%define r%1mp qword r %+ %1m
+%define r%1mp qword r %+ %1 %+ m
 %else
 %define r%1m [esp + stack_offset + %3]
-%define r%1mp dword r %+ %1m
+%define r%1mp dword r %+ %1 %+ m
 %endif
 %define r%1  %2
 %endmacro
-- 
1.7.11.1

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 5/5] x86: use nop cpu directives only if supported

2012-08-05 Thread Mans Rullgard
nasm does not support 'CPU foonop' directives.  This adds a configure
test for the directive and uses it only if supported.

Signed-off-by: Mans Rullgard 
---
 configure|  2 ++
 libavutil/x86/x86inc.asm | 12 +---
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/configure b/configure
index 84a1b6b..dff4bee 100755
--- a/configure
+++ b/configure
@@ -1084,6 +1084,7 @@ HAVE_LIST="
 closesocket
 cmov
 cpuid
+cpunop
 dcbzl
 dev_bktr_ioctl_bt848_h
 dev_bktr_ioctl_meteor_h
@@ -2897,6 +2898,7 @@ EOF
 die "yasm not found, use --disable-yasm for a crippled build"
 check_yasm "vextractf128 xmm0, ymm0, 0" || disable avx
 check_yasm "vfmaddps ymm0, ymm1, ymm2, ymm3" || disable fma4
+check_yasm "CPU amdnop" && enable cpunop
 fi
 
 case "$cpu" in
diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm
index 86c406f..e729924 100644
--- a/libavutil/x86/x86inc.asm
+++ b/libavutil/x86/x86inc.asm
@@ -91,8 +91,14 @@
 default rel
 %endif
 
+%macro CPUNOP 1
+%if HAVE_CPUNOP
+CPU %1
+%endif
+%endmacro
+
 ; Always use long nops (reduces 0x90 spam in disassembly on x86_32)
-CPU amdnop
+CPUNOP amdnop
 
 ; Macros to eliminate most code duplication between x86_32 and x86_64:
 ; Currently this works only for leaf functions which load all their arguments
@@ -589,7 +595,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
 ; All subsequent functions (up to the next INIT_CPUFLAGS) is built for the 
specified cpu.
 ; You shouldn't need to invoke this macro directly, it's a subroutine for 
INIT_MMX &co.
 %macro INIT_CPUFLAGS 0-2
-CPU amdnop
+CPUNOP amdnop
 %if %0 >= 1
 %xdefine cpuname %1
 %assign cpuflags cpuflags_%1
@@ -612,7 +618,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
 %define movu lddqu
 %endif
 %if notcpuflag(mmx2)
-CPU basicnop
+CPUNOP basicnop
 %endif
 %else
 %xdefine SUFFIX
-- 
1.7.11.1

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 0/5] Restore nasm support

2012-08-05 Thread Mans Rullgard
These patches restore support for building x86 asm with nasm.

Mans Rullgard (5):
  x86: add colons after labels
  x86: use 32-bit source registers with movd instruction
  build: add trailing / to yasm/nasm -I flags
  x86: fix rNmp macros with nasm
  x86: use nop cpu directives only if supported

 Makefile   |  2 +-
 configure  |  2 ++
 libavcodec/x86/deinterlace.asm |  2 +-
 libavcodec/x86/dsputil_yasm.asm|  6 ++--
 libavcodec/x86/dsputilenc_yasm.asm |  4 +--
 libavcodec/x86/fft_mmx.asm |  2 +-
 libavcodec/x86/fmtconvert.asm  |  2 +-
 libavcodec/x86/h264_chromamc.asm   | 30 +-
 libavcodec/x86/h264_chromamc_10bit.asm | 16 +-
 libavcodec/x86/h264_deblock_10bit.asm  | 16 +-
 libavcodec/x86/h264_idct.asm   | 58 +-
 libavcodec/x86/h264_idct_10bit.asm |  2 +-
 libavcodec/x86/h264_intrapred.asm  | 16 +-
 libavcodec/x86/h264_qpel_10bit.asm | 16 +-
 libavcodec/x86/h264_weight.asm | 16 +-
 libavcodec/x86/h264_weight_10bit.asm   | 16 +-
 libavcodec/x86/rv34dsp.asm |  2 +-
 libavcodec/x86/rv40dsp.asm |  4 +--
 libavcodec/x86/vp56dsp.asm |  4 +--
 libavcodec/x86/vp8dsp.asm  | 30 +-
 libavresample/x86/audio_mix.asm|  2 +-
 libavutil/x86/float_dsp.asm|  4 +--
 libavutil/x86/x86inc.asm   | 16 +++---
 23 files changed, 138 insertions(+), 130 deletions(-)

-- 
1.7.11.1

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH 2/2] imc: remove unused field IMCContext.one_div_log2

2012-08-05 Thread Benjamin Larsson

OK
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH 1/2] imc: fix size of a memset()

2012-08-05 Thread Benjamin Larsson

OK
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] Use log2(x) instead of log(x) / log(2)

2012-08-05 Thread Benjamin Larsson

OK

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] configure: x86: Check if Assembler can cope with "CPU" macro.

2012-08-05 Thread Måns Rullgård
Diego Biurrun  writes:

> No, there is another issue that i cannot quite put my finger on, which
> causes errors of the type:
>
>   error: (call_internal:3) `%ifdef' expects macro identifiers

I'm not seeing any such messages.  I'm getting a bunch of other errors
and warnings, mostly easy to fix.

-- 
Måns Rullgård
m...@mansr.com
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] configure: x86: Check if Assembler can cope with "CPU" macro.

2012-08-05 Thread Ronald S. Bultje
Hi,

On Sun, Aug 5, 2012 at 2:20 PM, Diego Biurrun  wrote:
> On Sun, Aug 05, 2012 at 09:35:27PM +0100, Måns Rullgård wrote:
>> Diego Biurrun  writes:
>> > On Tue, May 22, 2012 at 09:09:39PM +0200, Diego Biurrun wrote:
>> >> On Sun, May 20, 2012 at 06:18:10PM +0200, Diego Biurrun wrote:
>> >> > On Sat, May 19, 2012 at 04:54:18PM +0100, Måns Rullgård wrote:
>> >> > > Diego Biurrun  writes:
>> >> > > > On Sat, May 19, 2012 at 04:21:01PM +0100, Måns Rullgård wrote:
>> >> > > >> Diego Biurrun  writes:
>> >> > > >> > This avoids nasm 2.08 being detected as a compatible Assembler.
>> >> > > >> > ---
>> >> > > >> > Probably at least the log message needs finetuning.  That said, 
>> >> > > >> > this works
>> >> > > >> > for me and avoids nasm being detected as a working Assembler and 
>> >> > > >> > later
>> >> > > >> > failing with
>> >> > > >> > --- a/configure
>> >> > > >> > +++ b/configure
>> >> > > >> > @@ -2815,7 +2815,7 @@ EOF
>> >> > > >> >  elf*) enabled debug && append YASMFLAGS $yasm_debug 
>> >> > > >> > ;;
>> >> > > >> >  esac
>> >> > > >> >
>> >> > > >> > -check_yasm "pextrd [eax], xmm0, 1" && enable yasm ||
>> >> > > >> > +check_yasm "CPU amdnop" && enable yasm ||
>> >> > > >> >  die "yasm not found, use --disable-yasm for a 
>> >> > > >> > crippled build"
>> >> > > >> >  check_yasm "vextractf128 xmm0, ymm0, 0" || disable avx
>> >> > > >>
>> >> > > >> Is that directive the only one causing trouble?
>> >> > > >
>> >> > > > No, there are a few others, like certain AVX instructions, that also
>> >> > > > cause trouble,
>> >> > >
>> >> > > Aren't those disabled by the next test?
>> >> >
>> >> > Quite possibly that is the intention, but if I read our asm files
>> >> > correctly, then not all appearances of AVX instructions are "ifdeffed".
>> >> > Given my lack of familiarity with yasm syntax, I might well be wrong.
>> >>
>> >> Justin's patch that I just pushed fixes that issue, but the following
>> >> remains:
>> >>
>> >> libavcodec/x86/h264_idct.asm:613: error: (call_internal:3) `%ifdef'
>> >> expects macro identifiers
>> >>
>> >> A few dozen identical ones with different line numbers follow.
>> >> Can somebody shine a light on this one?
>> >>
>> >> The original error message that my patch addresses is
>> >>
>> >> x86inc.asm:100: error: unknown 'cpu' type
>> >
>> > .. ping ..
>> >
>> > The problem persists and none of the people able to fix this have shown
>> > interest in fixing nasm support.
>> >
>> > My patch is enough to detect failing nasm versions in practice, so I'd
>> > like to push it.  configure should error out when we know that compilation
>> > will fail.
>>
>> Is this the only thing it fails on?
>
> No, there is another issue that i cannot quite put my finger on, which
> causes errors of the type:
>
>   error: (call_internal:3) `%ifdef' expects macro identifiers

I'm guessing this is because of the macro abstraction around "call".
The purpose of these abstractions is so we can call something for
cpuflags-enabled macro-functions, and call a cpuflag-specific
implementation for each, e.g. something_mmx for the mmx function, but
something_sse for the sse function. Nasm appears to bail if the macro
is not actually defined (in which case with yasm, it defaults back to
calling "just" something without a suffix).

Look for all uses of call and it should be easy to comment 1-2 out and
see when it disappears. Explicitely stating that the call is non-cpu'y
may be a workaround. Maybe Loren has better ideas.

Ronald
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] configure: x86: Check if Assembler can cope with "CPU" macro.

2012-08-05 Thread Diego Biurrun
On Sun, Aug 05, 2012 at 10:45:19PM +0100, Måns Rullgård wrote:
> Diego Biurrun  writes:
> > No, there is another issue that i cannot quite put my finger on, which
> > causes errors of the type:
> >
> >   error: (call_internal:3) `%ifdef' expects macro identifiers
> 
> Does this happen with all files or just some?

libavcodec/x86/dsputilen_yasm.asm
libavcodec/x86/h264_idct.asm
libavcodec/x86/h264_qpel_10bit.asm
libavcodec/x86/h264_weight_10bit.asm
libavcodec/x86/vc1dsp_yasm.asm

Diego
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] configure: x86: Check if Assembler can cope with "CPU" macro.

2012-08-05 Thread Måns Rullgård
Diego Biurrun  writes:

> No, there is another issue that i cannot quite put my finger on, which
> causes errors of the type:
>
>   error: (call_internal:3) `%ifdef' expects macro identifiers

Does this happen with all files or just some?

-- 
Måns Rullgård
m...@mansr.com
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 2/2] imc: remove unused field IMCContext.one_div_log2

2012-08-05 Thread Mans Rullgard
Signed-off-by: Mans Rullgard 
---
 libavcodec/imc.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libavcodec/imc.c b/libavcodec/imc.c
index 726ca67..0d41d5f 100644
--- a/libavcodec/imc.c
+++ b/libavcodec/imc.c
@@ -92,7 +92,6 @@ typedef struct {
 
 float sqrt_tab[30];
 GetBitContext gb;
-float one_div_log2;
 
 DSPContext dsp;
 FFTContext fft;
@@ -227,7 +226,6 @@ static av_cold int imc_decode_init(AVCodecContext *avctx)
  imc_huffman_bits[i][j], 2, 2, INIT_VLC_USE_NEW_STATIC);
 }
 }
-q->one_div_log2 = 1 / log(2);
 
 if (avctx->codec_id == CODEC_ID_IAC) {
 iac_generate_tabs(q, avctx->sample_rate);
-- 
1.7.11.1

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 1/2] imc: fix size of a memset()

2012-08-05 Thread Mans Rullgard
IMCContext was changed from an array to a pointer in 66b84e4,
but this memset() was not updated.

Signed-off-by: Mans Rullgard 
---
 libavcodec/imc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/imc.c b/libavcodec/imc.c
index 899572a..726ca67 100644
--- a/libavcodec/imc.c
+++ b/libavcodec/imc.c
@@ -789,7 +789,7 @@ static int imc_decode_block(AVCodecContext *avctx, 
IMCContext *q, int ch)
 chctx->decoder_reset = 1;
 
 if (chctx->decoder_reset) {
-memset(q->out_samples, 0, sizeof(q->out_samples));
+memset(q->out_samples, 0, COEFFS * sizeof(*q->out_samples));
 for (i = 0; i < BANDS; i++)
 chctx->old_floor[i] = 1.0;
 for (i = 0; i < COEFFS; i++)
-- 
1.7.11.1

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH] Use log2(x) instead of log(x) / log(2)

2012-08-05 Thread Mans Rullgard
Signed-off-by: Mans Rullgard 
---
 avconv.c | 2 +-
 avprobe.c| 2 +-
 libavcodec/imc.c | 4 ++--
 libavcodec/snowenc.c | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/avconv.c b/avconv.c
index b20dbec..8429a72 100644
--- a/avconv.c
+++ b/avconv.c
@@ -1661,7 +1661,7 @@ static void print_report(int is_last_report, int64_t 
timer_start)
 if (qp >= 0 && qp < FF_ARRAY_ELEMS(qp_histogram))
 qp_histogram[qp]++;
 for (j = 0; j < 32; j++)
-snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), 
"%X", (int)lrintf(log(qp_histogram[j] + 1) / log(2)));
+snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), 
"%X", (int)lrintf(log2(qp_histogram[j] + 1)));
 }
 if (enc->flags&CODEC_FLAG_PSNR) {
 int j;
diff --git a/avprobe.c b/avprobe.c
index 5fe5b89..5e03433 100644
--- a/avprobe.c
+++ b/avprobe.c
@@ -468,7 +468,7 @@ static char *value_string(char *buf, int buf_size, double 
val, const char *unit)
 int index;
 
 if (unit == unit_byte_str && use_byte_value_binary_prefix) {
-index = (int) (log(val)/log(2)) / 10;
+index = (int) log2(val) / 10;
 index = av_clip(index, 0, FF_ARRAY_ELEMS(binary_unit_prefixes) - 
1);
 val  /= pow(2, index * 10);
 prefix_string = binary_unit_prefixes[index];
diff --git a/libavcodec/imc.c b/libavcodec/imc.c
index 297efbb..899572a 100644
--- a/libavcodec/imc.c
+++ b/libavcodec/imc.c
@@ -344,7 +344,7 @@ static void imc_decode_level_coefficients(IMCContext *q, 
int *levlCoeffBuf,
 // maybe some frequency division thingy
 
 flcoeffs1[0] = 2.0 / pow (2, levlCoeffBuf[0] * 0.18945); // 0.18945 = 
log2(10) * 0.05703125
-flcoeffs2[0] = log(flcoeffs1[0]) / log(2);
+flcoeffs2[0] = log2f(flcoeffs1[0]);
 tmp  = flcoeffs1[0];
 tmp2 = flcoeffs2[0];
 
@@ -416,7 +416,7 @@ static int bit_allocation(IMCContext *q, IMCChannel *chctx,
 highest = FFMAX(highest, chctx->flcoeffs1[i]);
 
 for (i = 0; i < BANDS - 1; i++)
-chctx->flcoeffs4[i] = chctx->flcoeffs3[i] - log(chctx->flcoeffs5[i]) / 
log(2);
+chctx->flcoeffs4[i] = chctx->flcoeffs3[i] - log2f(chctx->flcoeffs5[i]);
 chctx->flcoeffs4[BANDS - 1] = limit;
 
 highest = highest * 0.25;
diff --git a/libavcodec/snowenc.c b/libavcodec/snowenc.c
index ebfeff6..6e57f82 100644
--- a/libavcodec/snowenc.c
+++ b/libavcodec/snowenc.c
@@ -1529,7 +1529,7 @@ static void update_last_header_values(SnowContext *s){
 }
 
 static int qscale2qlog(int qscale){
-return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
+return rint(QROOT*log2(qscale / (float)FF_QP2LAMBDA))
+ 61*QROOT/8; ///< 64 > 60
 }
 
-- 
1.7.11.1

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] imc: use log2(x) instead of log(x) / log(2)

2012-08-05 Thread Clément Bœsch
On Sun, Aug 05, 2012 at 10:22:14PM +0100, Mans Rullgard wrote:
> Signed-off-by: Mans Rullgard 
> ---
>  libavcodec/imc.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/libavcodec/imc.c b/libavcodec/imc.c
> index 297efbb..92e9c8c 100644
> --- a/libavcodec/imc.c
> +++ b/libavcodec/imc.c
> @@ -416,7 +416,7 @@ static int bit_allocation(IMCContext *q, IMCChannel 
> *chctx,
>  highest = FFMAX(highest, chctx->flcoeffs1[i]);
>  
>  for (i = 0; i < BANDS - 1; i++)
> -chctx->flcoeffs4[i] = chctx->flcoeffs3[i] - log(chctx->flcoeffs5[i]) 
> / log(2);
> +chctx->flcoeffs4[i] = chctx->flcoeffs3[i] - 
> log2(chctx->flcoeffs5[i]);
>  chctx->flcoeffs4[BANDS - 1] = limit;
>  
>  highest = highest * 0.25;

Can't any of these be changed as well, or at least the one in the same file?

  % git grep 'log(.*log(2)'
  avconv.c:snprintf(buf + strlen(buf), sizeof(buf) - 
strlen(buf), "%X", (int)lrintf(log(qp_histogram[j] + 1) / log(2)));
  avprobe.c:index = (int) (log(val)/log(2)) / 10;
  libavcodec/imc.c:flcoeffs2[0] = log(flcoeffs1[0]) / log(2);
  libavcodec/imc.c:chctx->flcoeffs4[i] = chctx->flcoeffs3[i] - 
log(chctx->flcoeffs5[i]) / log(2);
  libavcodec/snowenc.c:return rint(QROOT*log(qscale / 
(float)FF_QP2LAMBDA)/log(2))

[...]

-- 
Clément B.


pgpKaL65kiZwd.pgp
Description: PGP signature
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] imc: use log2(x) instead of log(x) / log(2)

2012-08-05 Thread Benjamin Larsson

OK
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH] imc: use log2(x) instead of log(x) / log(2)

2012-08-05 Thread Mans Rullgard
Signed-off-by: Mans Rullgard 
---
 libavcodec/imc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/imc.c b/libavcodec/imc.c
index 297efbb..92e9c8c 100644
--- a/libavcodec/imc.c
+++ b/libavcodec/imc.c
@@ -416,7 +416,7 @@ static int bit_allocation(IMCContext *q, IMCChannel *chctx,
 highest = FFMAX(highest, chctx->flcoeffs1[i]);
 
 for (i = 0; i < BANDS - 1; i++)
-chctx->flcoeffs4[i] = chctx->flcoeffs3[i] - log(chctx->flcoeffs5[i]) / 
log(2);
+chctx->flcoeffs4[i] = chctx->flcoeffs3[i] - log2(chctx->flcoeffs5[i]);
 chctx->flcoeffs4[BANDS - 1] = limit;
 
 highest = highest * 0.25;
-- 
1.7.11.1

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] configure: x86: Check if Assembler can cope with "CPU" macro.

2012-08-05 Thread Diego Biurrun
On Sun, Aug 05, 2012 at 09:35:27PM +0100, Måns Rullgård wrote:
> Diego Biurrun  writes:
> > On Tue, May 22, 2012 at 09:09:39PM +0200, Diego Biurrun wrote:
> >> On Sun, May 20, 2012 at 06:18:10PM +0200, Diego Biurrun wrote:
> >> > On Sat, May 19, 2012 at 04:54:18PM +0100, Måns Rullgård wrote:
> >> > > Diego Biurrun  writes:
> >> > > > On Sat, May 19, 2012 at 04:21:01PM +0100, Måns Rullgård wrote:
> >> > > >> Diego Biurrun  writes:
> >> > > >> > This avoids nasm 2.08 being detected as a compatible Assembler.
> >> > > >> > ---
> >> > > >> > Probably at least the log message needs finetuning.  That said, 
> >> > > >> > this works
> >> > > >> > for me and avoids nasm being detected as a working Assembler and 
> >> > > >> > later
> >> > > >> > failing with
> >> > > >> > --- a/configure
> >> > > >> > +++ b/configure
> >> > > >> > @@ -2815,7 +2815,7 @@ EOF
> >> > > >> >  elf*) enabled debug && append YASMFLAGS $yasm_debug 
> >> > > >> > ;;
> >> > > >> >  esac
> >> > > >> >
> >> > > >> > -check_yasm "pextrd [eax], xmm0, 1" && enable yasm ||
> >> > > >> > +check_yasm "CPU amdnop" && enable yasm ||
> >> > > >> >  die "yasm not found, use --disable-yasm for a 
> >> > > >> > crippled build"
> >> > > >> >  check_yasm "vextractf128 xmm0, ymm0, 0" || disable avx
> >> > > >> 
> >> > > >> Is that directive the only one causing trouble?
> >> > > >
> >> > > > No, there are a few others, like certain AVX instructions, that also
> >> > > > cause trouble,
> >> > > 
> >> > > Aren't those disabled by the next test?
> >> > 
> >> > Quite possibly that is the intention, but if I read our asm files
> >> > correctly, then not all appearances of AVX instructions are "ifdeffed".
> >> > Given my lack of familiarity with yasm syntax, I might well be wrong.
> >> 
> >> Justin's patch that I just pushed fixes that issue, but the following
> >> remains:
> >> 
> >> libavcodec/x86/h264_idct.asm:613: error: (call_internal:3) `%ifdef'
> >> expects macro identifiers
> >> 
> >> A few dozen identical ones with different line numbers follow.
> >> Can somebody shine a light on this one?
> >> 
> >> The original error message that my patch addresses is
> >> 
> >> x86inc.asm:100: error: unknown 'cpu' type
> >
> > .. ping ..
> >
> > The problem persists and none of the people able to fix this have shown
> > interest in fixing nasm support.
> >
> > My patch is enough to detect failing nasm versions in practice, so I'd
> > like to push it.  configure should error out when we know that compilation
> > will fail.
> 
> Is this the only thing it fails on?

No, there is another issue that i cannot quite put my finger on, which
causes errors of the type:

  error: (call_internal:3) `%ifdef' expects macro identifiers

while the CPU directive causes errors of the type:

  x86inc.asm:100: error: unknown 'cpu' type

Diego
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] x86/cpu: Include system headers before local headers

2012-08-05 Thread Martin Storsjö

On Sun, 5 Aug 2012, Måns Rullgård wrote:


Martin Storsjö  writes:


An inline function in MSVC system headers included by these
headers use free().


Which function?


It's in _freea in the MSVC malloc.h (which is included implicitly). It's 
not used by us, but we enable the equivalent of 
-Werror=implicit-function-declaration, so it fails just by parsing the 
inline function.



The local headers (after 239fdf1b) include internal.h that redirect
free to please_use_av_free_instead_of_free.


That is because avutil.h foolishly includes common.h.  It really ought
not do that.  Can we please fix that instead?


I'll give it a shot.

// Martin___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] lavf: Declare an AVRational struct without a struct literal

2012-08-05 Thread Martin Storsjö

On Sun, 5 Aug 2012, Måns Rullgård wrote:


Martin Storsjö  writes:


At this place, the normal way of initializing a struct works
fine, there's no need for a struct literal.
---
 libavformat/utils.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/utils.c b/libavformat/utils.c
index 3630c6f..5b26c59 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -2499,7 +2499,7 @@ int avformat_find_stream_info(AVFormatContext *ic, 
AVDictionary **options)
 /* round guessed framerate to a "standard" framerate if it's
  * within 1% of the original estimate*/
 for (j = 1; j < MAX_STD_TIMEBASES; j++) {
-AVRational std_fps = (AVRational){get_std_framerate(j), 
12*1001};
+AVRational std_fps = { get_std_framerate(j), 12*1001 };
 double error = fabs(av_q2d(st->avg_frame_rate) / 
av_q2d(std_fps) - 1);

 if (error < best_error) {
--


OK

This is the second one of these I've seen.  Is it the last one?


I think so - the current MSVC preprocessor didn't handle this case 
properly, and it's the only such case in the preprocessor helper patchset 
right now as far as I can see.


// Martin___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] lavf: Declare an AVRational struct without a struct literal

2012-08-05 Thread Måns Rullgård
Martin Storsjö  writes:

> At this place, the normal way of initializing a struct works
> fine, there's no need for a struct literal.
> ---
>  libavformat/utils.c |2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/libavformat/utils.c b/libavformat/utils.c
> index 3630c6f..5b26c59 100644
> --- a/libavformat/utils.c
> +++ b/libavformat/utils.c
> @@ -2499,7 +2499,7 @@ int avformat_find_stream_info(AVFormatContext *ic, 
> AVDictionary **options)
>  /* round guessed framerate to a "standard" framerate if it's
>   * within 1% of the original estimate*/
>  for (j = 1; j < MAX_STD_TIMEBASES; j++) {
> -AVRational std_fps = (AVRational){get_std_framerate(j), 
> 12*1001};
> +AVRational std_fps = { get_std_framerate(j), 12*1001 };
>  double error = fabs(av_q2d(st->avg_frame_rate) / 
> av_q2d(std_fps) - 1);
>  
>  if (error < best_error) {
> -- 

OK

This is the second one of these I've seen.  Is it the last one?

-- 
Måns Rullgård
m...@mansr.com
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] x86/cpu: Include system headers before local headers

2012-08-05 Thread Måns Rullgård
Martin Storsjö  writes:

> An inline function in MSVC system headers included by these
> headers use free(). 

Which function?

> The local headers (after 239fdf1b) include internal.h that redirect
> free to please_use_av_free_instead_of_free.

That is because avutil.h foolishly includes common.h.  It really ought
not do that.  Can we please fix that instead?

-- 
Måns Rullgård
m...@mansr.com
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH] lavf: Declare an AVRational struct without a struct literal

2012-08-05 Thread Martin Storsjö
At this place, the normal way of initializing a struct works
fine, there's no need for a struct literal.
---
 libavformat/utils.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/utils.c b/libavformat/utils.c
index 3630c6f..5b26c59 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -2499,7 +2499,7 @@ int avformat_find_stream_info(AVFormatContext *ic, 
AVDictionary **options)
 /* round guessed framerate to a "standard" framerate if it's
  * within 1% of the original estimate*/
 for (j = 1; j < MAX_STD_TIMEBASES; j++) {
-AVRational std_fps = (AVRational){get_std_framerate(j), 
12*1001};
+AVRational std_fps = { get_std_framerate(j), 12*1001 };
 double error = fabs(av_q2d(st->avg_frame_rate) / 
av_q2d(std_fps) - 1);
 
 if (error < best_error) {
-- 
1.7.9.4

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH] x86/cpu: Include system headers before local headers

2012-08-05 Thread Martin Storsjö
An inline function in MSVC system headers included by these
headers use free(). The local headers (after 239fdf1b) include
internal.h that redirect free to please_use_av_free_instead_of_free.
---
 libavutil/x86/cpu.c |   14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c
index a63b564..27f51ca 100644
--- a/libavutil/x86/cpu.c
+++ b/libavutil/x86/cpu.c
@@ -22,6 +22,16 @@
 
 #include 
 #include 
+#if HAVE_CPUID
+#include 
+#endif
+#if HAVE_XGETBV
+#include 
+#endif
+#if HAVE_RWEFLAGS
+#include 
+#endif
+
 #include "libavutil/x86_cpu.h"
 #include "libavutil/cpu.h"
 
@@ -35,7 +45,6 @@
 : "=a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx)\
 : "0" (index))
 #elif HAVE_CPUID
-#include 
 
 #define cpuid(index, eax, ebx, ecx, edx)\
 do {\
@@ -52,7 +61,6 @@
 #define xgetbv(index, eax, edx) \
 __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c" (index))
 #elif HAVE_XGETBV
-#include 
 
 #define xgetbv(index, eax, edx) \
 do {\
@@ -76,8 +84,6 @@
 
 #elif HAVE_RWEFLAGS
 
-#include 
-
 #define get_eflags(x)   \
 x = __readeflags()
 
-- 
1.7.9.4

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] configure: x86: Check if Assembler can cope with "CPU" macro.

2012-08-05 Thread Måns Rullgård
Diego Biurrun  writes:

> On Tue, May 22, 2012 at 09:09:39PM +0200, Diego Biurrun wrote:
>> On Sun, May 20, 2012 at 06:18:10PM +0200, Diego Biurrun wrote:
>> > On Sat, May 19, 2012 at 04:54:18PM +0100, Måns Rullgård wrote:
>> > > Diego Biurrun  writes:
>> > > > On Sat, May 19, 2012 at 04:21:01PM +0100, Måns Rullgård wrote:
>> > > >> Diego Biurrun  writes:
>> > > >> > This avoids nasm 2.08 being detected as a compatible Assembler.
>> > > >> > ---
>> > > >> > Probably at least the log message needs finetuning.  That said, 
>> > > >> > this works
>> > > >> > for me and avoids nasm being detected as a working Assembler and 
>> > > >> > later
>> > > >> > failing with
>> > > >> > --- a/configure
>> > > >> > +++ b/configure
>> > > >> > @@ -2815,7 +2815,7 @@ EOF
>> > > >> >  elf*) enabled debug && append YASMFLAGS $yasm_debug ;;
>> > > >> >  esac
>> > > >> >
>> > > >> > -check_yasm "pextrd [eax], xmm0, 1" && enable yasm ||
>> > > >> > +check_yasm "CPU amdnop" && enable yasm ||
>> > > >> >  die "yasm not found, use --disable-yasm for a crippled 
>> > > >> > build"
>> > > >> >  check_yasm "vextractf128 xmm0, ymm0, 0" || disable avx
>> > > >> 
>> > > >> Is that directive the only one causing trouble?
>> > > >
>> > > > No, there are a few others, like certain AVX instructions, that also
>> > > > cause trouble,
>> > > 
>> > > Aren't those disabled by the next test?
>> > 
>> > Quite possibly that is the intention, but if I read our asm files
>> > correctly, then not all appearances of AVX instructions are "ifdeffed".
>> > Given my lack of familiarity with yasm syntax, I might well be wrong.
>> 
>> Justin's patch that I just pushed fixes that issue, but the following
>> remains:
>> 
>> libavcodec/x86/h264_idct.asm:613: error: (call_internal:3) `%ifdef'
>> expects macro identifiers
>> 
>> A few dozen identical ones with different line numbers follow.
>> Can somebody shine a light on this one?
>> 
>> The original error message that my patch addresses is
>> 
>> x86inc.asm:100: error: unknown 'cpu' type
>
> .. ping ..
>
> The problem persists and none of the people able to fix this have shown
> interest in fixing nasm support.
>
> My patch is enough to detect failing nasm versions in practice, so I'd
> like to push it.  configure should error out when we know that compilation
> will fail.

Is this the only thing it fails on?

-- 
Måns Rullgård
m...@mansr.com
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] configure: x86: Check if Assembler can cope with "CPU" macro.

2012-08-05 Thread Diego Biurrun
On Tue, May 22, 2012 at 09:09:39PM +0200, Diego Biurrun wrote:
> On Sun, May 20, 2012 at 06:18:10PM +0200, Diego Biurrun wrote:
> > On Sat, May 19, 2012 at 04:54:18PM +0100, Måns Rullgård wrote:
> > > Diego Biurrun  writes:
> > > > On Sat, May 19, 2012 at 04:21:01PM +0100, Måns Rullgård wrote:
> > > >> Diego Biurrun  writes:
> > > >> > This avoids nasm 2.08 being detected as a compatible Assembler.
> > > >> > ---
> > > >> > Probably at least the log message needs finetuning.  That said, this 
> > > >> > works
> > > >> > for me and avoids nasm being detected as a working Assembler and 
> > > >> > later
> > > >> > failing with
> > > >> > --- a/configure
> > > >> > +++ b/configure
> > > >> > @@ -2815,7 +2815,7 @@ EOF
> > > >> >  elf*) enabled debug && append YASMFLAGS $yasm_debug ;;
> > > >> >  esac
> > > >> >
> > > >> > -check_yasm "pextrd [eax], xmm0, 1" && enable yasm ||
> > > >> > +check_yasm "CPU amdnop" && enable yasm ||
> > > >> >  die "yasm not found, use --disable-yasm for a crippled 
> > > >> > build"
> > > >> >  check_yasm "vextractf128 xmm0, ymm0, 0" || disable avx
> > > >> 
> > > >> Is that directive the only one causing trouble?
> > > >
> > > > No, there are a few others, like certain AVX instructions, that also
> > > > cause trouble,
> > > 
> > > Aren't those disabled by the next test?
> > 
> > Quite possibly that is the intention, but if I read our asm files
> > correctly, then not all appearances of AVX instructions are "ifdeffed".
> > Given my lack of familiarity with yasm syntax, I might well be wrong.
> 
> Justin's patch that I just pushed fixes that issue, but the following
> remains:
> 
> libavcodec/x86/h264_idct.asm:613: error: (call_internal:3) `%ifdef' expects 
> macro identifiers
> 
> A few dozen identical ones with different line numbers follow.
> Can somebody shine a light on this one?
> 
> The original error message that my patch addresses is
> 
> x86inc.asm:100: error: unknown 'cpu' type

.. ping ..

The problem persists and none of the people able to fix this have shown
interest in fixing nasm support.

My patch is enough to detect failing nasm versions in practice, so I'd
like to push it.  configure should error out when we know that compilation
will fail.

Diego
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH 3/3] doc: Clarify licensing issues arising from external libraries

2012-08-05 Thread Diego Biurrun
On Thu, Jul 26, 2012 at 11:56:05PM +0200, Luca Barbato wrote:
> On 07/26/2012 11:40 PM, Diego Biurrun wrote:
> > On Thu, Jul 26, 2012 at 02:28:29PM +0200, Luca Barbato wrote:
> >> On 07/22/2012 12:17 AM, Diego Biurrun wrote:
> >>> ---
> >>>  LICENSE |   37 -
> >>>  1 files changed, 24 insertions(+), 13 deletions(-)
> >>
> >> So far ok but
> >>
> >>> +The Fraunhofer AAC library, FAAC and OpenSSL are under licenses 
> >>> incompatible
> >>> +with all (L)GPL versions. Thus, unfortunately, since both licenses 
> >>> cannot be
> >>> +satisfied simultaneously, binaries resulting from the combination of 
> >>> Libav
> >>> +with these libraries are nonfree und unredistributable. If you wish to 
> >>> enable
> >>> +any of these libraries nonetheless, pass --enable-nonfree to configure.
> >>
> >> OpenSSL, being a system library is NOT to be marked nonfree in a quite
> >> number of platforms...
> > 
> > Oh, that opens a big can of worms...
> > 
> > What platforms do you have in mind?  Also, we currently do mark it as
> > incompatible in configure by requiring the nonfree flag.  So I'm just
> > describing the status quo.
> 
> Let it as is, bsd people could survive till we fix that part.

Sorry, I'm not following - what are you trying to say?

Diego
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] imc: remove empty if() block

2012-08-05 Thread Kostya Shishkov
On Sun, Aug 05, 2012 at 06:34:26PM +0100, Mans Rullgard wrote:
> Signed-off-by: Mans Rullgard 
> ---
>  libavcodec/imc.c | 3 ---
>  1 file changed, 3 deletions(-)
> 
> diff --git a/libavcodec/imc.c b/libavcodec/imc.c
> index 6df3e58..297efbb 100644
> --- a/libavcodec/imc.c
> +++ b/libavcodec/imc.c
> @@ -230,9 +230,6 @@ static av_cold int imc_decode_init(AVCodecContext *avctx)
>  q->one_div_log2 = 1 / log(2);
>  
>  if (avctx->codec_id == CODEC_ID_IAC) {
> -}
> -
> -if (avctx->codec_id == CODEC_ID_IAC) {
>  iac_generate_tabs(q, avctx->sample_rate);
>  } else {
>  memcpy(q->cyclTab,  cyclTab,  sizeof(cyclTab));
> -- 

sorry
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] imc: remove empty if() block

2012-08-05 Thread Martin Storsjö

On Sun, 5 Aug 2012, Mans Rullgard wrote:


Signed-off-by: Mans Rullgard 
---
libavcodec/imc.c | 3 ---
1 file changed, 3 deletions(-)

diff --git a/libavcodec/imc.c b/libavcodec/imc.c
index 6df3e58..297efbb 100644
--- a/libavcodec/imc.c
+++ b/libavcodec/imc.c
@@ -230,9 +230,6 @@ static av_cold int imc_decode_init(AVCodecContext *avctx)
q->one_div_log2 = 1 / log(2);

if (avctx->codec_id == CODEC_ID_IAC) {
-}
-
-if (avctx->codec_id == CODEC_ID_IAC) {
iac_generate_tabs(q, avctx->sample_rate);
} else {
memcpy(q->cyclTab,  cyclTab,  sizeof(cyclTab));
--
1.7.11.1


Ok

// Martin
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH] imc: remove empty if() block

2012-08-05 Thread Mans Rullgard
Signed-off-by: Mans Rullgard 
---
 libavcodec/imc.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/libavcodec/imc.c b/libavcodec/imc.c
index 6df3e58..297efbb 100644
--- a/libavcodec/imc.c
+++ b/libavcodec/imc.c
@@ -230,9 +230,6 @@ static av_cold int imc_decode_init(AVCodecContext *avctx)
 q->one_div_log2 = 1 / log(2);
 
 if (avctx->codec_id == CODEC_ID_IAC) {
-}
-
-if (avctx->codec_id == CODEC_ID_IAC) {
 iac_generate_tabs(q, avctx->sample_rate);
 } else {
 memcpy(q->cyclTab,  cyclTab,  sizeof(cyclTab));
-- 
1.7.11.1

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] dpx: Make start offset unsigned

2012-08-05 Thread Martin Storsjö

On Sat, 4 Aug 2012, Derek Buitenhuis wrote:


Some corrupted files would end up with a negative offset,
and segfault.

Fixes bug #177.

Signed-off-by: Derek Buitenhuis 
---
libavcodec/dpx.c |3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libavcodec/dpx.c b/libavcodec/dpx.c
index fadd5c3..9bce648 100644
--- a/libavcodec/dpx.c
+++ b/libavcodec/dpx.c
@@ -62,7 +62,8 @@ static int decode_frame(AVCodecContext *avctx,
AVFrame *const p = &s->picture;
uint8_t *ptr;

-int magic_num, offset, endian;
+unsigned int offset;
+int magic_num, endian;
int x, y;
int w, h, stride, bits_per_color, descriptor, elements, target_packet_size, 
source_packet_size;

--
1.7.10.4


Seems ok to me.

// Martin
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] rational: add av_inv_q() returning the inverse of an AVRational

2012-08-05 Thread Diego Biurrun
On Sun, Aug 05, 2012 at 08:19:25AM -0700, Ronald S. Bultje wrote:
> On Sun, Aug 5, 2012 at 8:05 AM, Måns Rullgård  wrote:
> > "Ronald S. Bultje"  writes:
> >> On Sun, Aug 5, 2012 at 5:07 AM, Måns Rullgård  wrote:
> >>> Mans Rullgard  writes:
> >>>
>  This allows simplifying a few expressions.
> 
>  Signed-off-by: Mans Rullgard 
>  ---
> >>>
> >>> Ping.
> >>>
> >>> Does this solve the msvc problem with those expressions?
> >>
> >> I'm currently working on the converter to handle such code; we might
> >> want to keep the old code, even though msvc has issues with it, so I
> >> have an incentive to fix it.
> >
> > This patch is an improvement regardless.  Can you now please answer the
> > question?
> 
> It does.

Great, let's push it then.  Ronald can still improve the converter.

Diego
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] rational: add av_inv_q() returning the inverse of an AVRational

2012-08-05 Thread Ronald S. Bultje
Hi,

On Sun, Aug 5, 2012 at 8:05 AM, Måns Rullgård  wrote:
> "Ronald S. Bultje"  writes:
>> On Sun, Aug 5, 2012 at 5:07 AM, Måns Rullgård  wrote:
>>> Mans Rullgard  writes:
>>>
 This allows simplifying a few expressions.

 Signed-off-by: Mans Rullgard 
 ---
>>>
>>> Ping.
>>>
>>> Does this solve the msvc problem with those expressions?
>>
>> I'm currently working on the converter to handle such code; we might
>> want to keep the old code, even though msvc has issues with it, so I
>> have an incentive to fix it.
>
> This patch is an improvement regardless.  Can you now please answer the
> question?

It does.

Ronald
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] rational: add av_inv_q() returning the inverse of an AVRational

2012-08-05 Thread Måns Rullgård
"Ronald S. Bultje"  writes:

> Hi,
>
> On Sun, Aug 5, 2012 at 5:07 AM, Måns Rullgård  wrote:
>> Mans Rullgard  writes:
>>
>>> This allows simplifying a few expressions.
>>>
>>> Signed-off-by: Mans Rullgard 
>>> ---
>>
>> Ping.
>>
>> Does this solve the msvc problem with those expressions?
>
> I'm currently working on the converter to handle such code; we might
> want to keep the old code, even though msvc has issues with it, so I
> have an incentive to fix it.

This patch is an improvement regardless.  Can you now please answer the
question?

-- 
Måns Rullgård
m...@mansr.com
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] rational: add av_inv_q() returning the inverse of an AVRational

2012-08-05 Thread Ronald S. Bultje
Hi,

On Sun, Aug 5, 2012 at 5:07 AM, Måns Rullgård  wrote:
> Mans Rullgard  writes:
>
>> This allows simplifying a few expressions.
>>
>> Signed-off-by: Mans Rullgard 
>> ---
>
> Ping.
>
> Does this solve the msvc problem with those expressions?

I'm currently working on the converter to handle such code; we might
want to keep the old code, even though msvc has issues with it, so I
have an incentive to fix it.

Ronald
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH 07/45] x86: mmx2 ---> mmxext in asm constructs

2012-08-05 Thread Ronald S. Bultje
Hi,

On Sun, Aug 5, 2012 at 2:44 AM, Diego Biurrun  wrote:
> On Sat, Aug 04, 2012 at 06:19:38PM -0700, Ronald S. Bultje wrote:
>> On Sat, Aug 4, 2012 at 2:28 PM, Loren Merritt  
>> wrote:
>> > On Sat, 4 Aug 2012, Diego Biurrun wrote:
>> >> On Sat, Aug 04, 2012 at 03:11:50PM -0400, Justin Ruggles wrote:
>> >>> On 07/31/2012 06:17 PM, Diego Biurrun wrote:
>>  ---
>>   30 files changed, 215 insertions(+), 211 deletions(-)
>> >>>
>> >>> Looks ok, but probably should get other opinions on this as well. I know
>> >>> Ronald was trying to keep x86inc.asm sychronized with x264, and trying
>> >>> to do so after this change would likely require similar extensive
>> >>> cpuflag modifications in x264.
>> >>
>> >> I volunteer to patch x264 if such a change would be accepted on their
>> >> side.
>> >
>> > Rejected. I like "mmx2" better.
>> > However, I wouldn't be opposed to dropping mmx1 entirely and using
>> > the name "mmx" to refer to mmx2. (x264 doesn't actually support mmx1
>> > anyway; we use mmx2 inline asm that's actually inlined in places where
>> > runtime cpu detection is impossible.) But that wouldn't help
>> > synchronization if libav doesn't do so.
>>
>> Right - from my PoV, keeping us in sync with x264 is more important
>> that having a slightly more accurate cpuflag for something that's
>> never exposed to end users in any way.
>
> It is exposed through configure.
>
>> Diego, have ideas on how to fix this,
>
> Yes, why don't you convince Loren that mmxext is the more sensible name?
>
>> or can you live with changing everything to MMX2 instead?
>
> I believe mmxext is the better name and everybody except Loren seems
> to agree.  There are very few uses of mmx2/mmxext in x86inc.asm and
> they are unlikely to ever conflict with future changes to this file.
> Besides, we only sync a few times per year.
>
> So I'm unconvinced that using mmx2 would be a net benefit.  I still
> hope that Loren can change his mind, as I said, I volunteer to do
> all the work.

x86inc.asm is a x264 file that we sync, I'd like to keep it that way,
especially given that I'm the guy who historically had to deal with
the outfalls every time a sync doesn't work right (remember INIT_MMX
not backing up XMM regs anymore on Win64? Remember the register
reordering on Win64 recently?).

Plus, I didn't say it was a good idea, I said I could live with it if
others want it. Right now, it seems others (i.e. Loren) don't.

Ronald
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH 2/3] avconv: split options parsing stuff into a separate file.

2012-08-05 Thread Diego Biurrun
On Sat, Aug 04, 2012 at 11:49:15AM +0200, Anton Khirnov wrote:
> ---
>  Makefile |2 +
>  avconv.c | 2227 
> ++
>  avconv.h |  360 ++
>  avconv_opt.c | 1916 ++
>  4 files changed, 2326 insertions(+), 2179 deletions(-)
>  create mode 100644 avconv.h
>  create mode 100644 avconv_opt.c

avconv: split option parsing into a separate file

> --- /dev/null
> +++ b/avconv.h
> @@ -0,0 +1,360 @@
> +
> +#endif // AVCONV_H

Use /* */ like everywhere else please.

> --- /dev/null
> +++ b/avconv_opt.c
> @@ -0,0 +1,1916 @@
> +/*
> + * avconv options parsing

option

> + * Copyright (c) 2000-2012 The libav developers.

I don't think this denotes a proper legal entity, so just drop it.

I'll do a more thorough review once I can apply this cleanly to
some tree of mine.

Diego
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH 3/3] avconv: split configuring filters to a separate file.

2012-08-05 Thread Diego Biurrun
On Sat, Aug 04, 2012 at 11:49:16AM +0200, Anton Khirnov wrote:
> ---
>  Makefile|2 +-
>  avconv.c|  530 ---
>  avconv.h|2 +
>  avconv_filter.c |  562 
> +++
>  4 files changed, 565 insertions(+), 531 deletions(-)
>  create mode 100644 avconv_filter.c

avconv: split filter configuration to a separate file

> --- /dev/null
> +++ b/avconv_filter.c
> @@ -0,0 +1,562 @@
> +/*
> + * avconv filters configuration

filter

> + * Copyright (c) 2000-2012 The libav developers.

I don't think this denotes a proper legal entity, so just drop it.

I'll do a more thorough review once I can apply this cleanly to
some tree of mine.

Diego
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH 19/45] x86: h264_idct: Rename x264_add8x4_idct_sse2 --> h264_add8x4_idct_sse2

2012-08-05 Thread Diego Biurrun
On Wed, Aug 01, 2012 at 12:17:43AM +0200, Diego Biurrun wrote:
> ---
>  libavcodec/x86/h264_idct.asm |8 
>  1 files changed, 4 insertions(+), 4 deletions(-)

OKed by Benjamin on IRC.

Diego
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] build: generalise rules and variable settings for av* programs

2012-08-05 Thread Diego Biurrun
On Sat, Aug 04, 2012 at 07:06:04PM +0100, Mans Rullgard wrote:
> This simplifies adding extra flags for individual programs
> and also allows more than one object file per program.
> 
> Signed-off-by: Mans Rullgard 
> ---
>  Makefile  | 17 -
>  configure |  6 +++---
>  2 files changed, 15 insertions(+), 8 deletions(-)

LGTM

Diego
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] fate: simplify variable setting filter.mak

2012-08-05 Thread Diego Biurrun
On Sun, Aug 05, 2012 at 12:06:23PM +0100, Mans Rullgard wrote:
> This removes some needless indirection and duplication.
> 
> Signed-off-by: Mans Rullgard 
> ---
>  tests/fate/filter.mak | 9 ++---
>  1 file changed, 2 insertions(+), 7 deletions(-)

OK

Diego
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] rational: add av_inv_q() returning the inverse of an AVRational

2012-08-05 Thread Diego Biurrun
On Sun, Jul 29, 2012 at 11:01:34PM +0200, Luca Barbato wrote:
> On 07/29/2012 08:32 PM, Måns Rullgård wrote:
> > "Ronald S. Bultje"  writes:
> >> On Jul 29, 2012 9:01 AM, "Luca Barbato"  wrote:
> >>> On 07/29/2012 04:00 PM, Mans Rullgard wrote:
>  This allows simplifying a few expressions.
> 
>  ---
>   avconv.c |  6 ++
>   libavutil/rational.h | 11 +++
>   2 files changed, 13 insertions(+), 4 deletions(-)
> >>>
> >>> Ok.
> >>
> >> Wrong namespace? av_rational_inv()?
> > 
> > It's consistent with the other functions in that header.  I thought you
> > liked consistency.
> 
> I like the name short as is.

Any name is fine with me.  Let's settle on the short one and not bikeshed
this further.  If Ronald has strong feelings about it, just switch the
name, but get this over with please.

Diego
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] rational: add av_inv_q() returning the inverse of an AVRational

2012-08-05 Thread Måns Rullgård
Mans Rullgard  writes:

> This allows simplifying a few expressions.
>
> Signed-off-by: Mans Rullgard 
> ---

Ping.

Does this solve the msvc problem with those expressions?

-- 
Måns Rullgård
m...@mansr.com
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH] fate: simplify variable setting filter.mak

2012-08-05 Thread Mans Rullgard
This removes some needless indirection and duplication.

Signed-off-by: Mans Rullgard 
---
 tests/fate/filter.mak | 9 ++---
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/tests/fate/filter.mak b/tests/fate/filter.mak
index 35b6558..e42f837 100644
--- a/tests/fate/filter.mak
+++ b/tests/fate/filter.mak
@@ -19,21 +19,16 @@ $(FATE_AMIX): CMP  = oneoff
 $(FATE_AMIX): CMP_UNIT = f32
 
 FATE_FILTER += $(FATE_AMIX)
-FATE_SAMPLES_AVCONV += $(FATE_AMIX)
 
-FATE_ASYNCTS += fate-filter-asyncts
+FATE_FILTER += fate-filter-asyncts
 fate-filter-asyncts: SRC = $(SAMPLES)/nellymoser/nellymoser-discont.flv
 fate-filter-asyncts: CMD = pcm -analyzeduration 1000 -i $(SRC) -af asyncts
 fate-filter-asyncts: CMP = oneoff
 fate-filter-asyncts: REF = $(SAMPLES)/nellymoser/nellymoser-discont.pcm
 
-FATE_FILTER += $(FATE_ASYNCTS)
-FATE_SAMPLES_AVCONV += $(FATE_ASYNCTS)
-
 fate-filter-delogo: CMD = framecrc -i $(SAMPLES)/real/rv30.rm -vf 
delogo=show=0:x=290:y=25:w=26:h=16 -an
 
 FATE_FILTER += fate-filter-delogo
-FATE_SAMPLES_AVCONV += fate-filter-delogo
 
 FATE_YADIF += fate-filter-yadif-mode0
 fate-filter-yadif-mode0: CMD = framecrc -flags bitexact -idct simple -i 
$(SAMPLES)/mpeg2/mpeg2_field_encoding.ts -vf yadif=0
@@ -42,6 +37,6 @@ FATE_YADIF += fate-filter-yadif-mode1
 fate-filter-yadif-mode1: CMD = framecrc -flags bitexact -idct simple -i 
$(SAMPLES)/mpeg2/mpeg2_field_encoding.ts -vf yadif=1
 
 FATE_FILTER += $(FATE_YADIF)
-FATE_SAMPLES_AVCONV += $(FATE_YADIF)
 
+FATE_SAMPLES_AVCONV += $(FATE_FILTER)
 fate-filter: $(FATE_FILTER)
-- 
1.7.11.1

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] lavu: add snprintf(), vsnprint() and strtod() replacements for MS runtime.

2012-08-05 Thread Måns Rullgård
"Ronald S. Bultje"  writes:

> +#ifndef AVUTIL_OS_SUPPORT_H
> +#define AVUTIL_OS_SUPPORT_H
> +
> +/**
> + * @file
> + * OSSupport
> + */
> +
> +#include 
> +#include 
> +
> +/*
> + * snprintf() on MSVC returns -1 (instead of required buffer length)
> + * if the input buffer isn't big enough. Also, if the required buffer
> + * length is exactly identical to the input buffer size, or if the
> + * return value is -1 because the input buffer isn't big enough, MSVC
> + * will fail to NULL-terminate the output buffer. vsnprintf() has the
> + * same issue.
> + *
> + * Thus, provide our own fallback wrappers with correct behaviour.
> + */
> +#undef snprintf
> +#define snprintf avpriv_snprintf
> +int snprintf(char *restrict s, size_t n, const char *restrict format, ...);
> +
> +#undef vsnprintf
> +#define vsnprintf avpriv_vsnprintf
> +int vsnprintf(char *restrict s, size_t n, const char *restrict format, 
> va_list ap);
> +
> +/*
> + * strtod() on MSVC doesn't handle strings like 'inf' or 'nan'. Also,
> + * it doesn't handle "0x" prefixes for hexadecimal input.
> + *
> + * Thus, provide our own fallback wrapper with correct behaviour.
> + */
> +#undef strtod
> +#define strtod avpriv_strtod
> +double strtod(char *restrict nptr, char **restrict endptr);
> +
> +#endif /* AVUTIL_OS_SUPPORT_H */
> -- 

These replacements should be in separate files so that they can be
enabled independently.

-- 
Måns Rullgård
m...@mansr.com
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] lavu: add snprintf(), vsnprint() and strtod() replacements for MS runtime.

2012-08-05 Thread Måns Rullgård
"Ronald S. Bultje"  writes:

> From: "Ronald S. Bultje" 
>
> The idea is to compile in os_support.c when compiling Libav against
> the MS runtime (e.g. with the MSVC compiler) and thereby provide
> replacements for some functions hat are not standards-compliant. We
> can force-include the header using cl.exe -Fi, so we don't have to
> contaminate source files outside the compat/ directory.
> ---
>  configure |2 +
>  libavutil/Makefile|1 +
>  libavutil/compat/os_support.c |  130 
> +
>  libavutil/compat/os_support.h |   61 +++
>  4 files changed, 194 insertions(+)
>  create mode 100644 libavutil/compat/os_support.c
>  create mode 100644 libavutil/compat/os_support.h

When I said these things belong in compat/, I meant the one that already
exists at the top level.

> +#undef vsnprintf
> +int avpriv_vsnprintf(char *restrict s, size_t n, const char *restrict fmt, 
> va_list ap)
> +{
> +int ret;
> +
> +if (n == 0 || n > INT_MAX)
> +return 0;

I don't think zero is the proper value to return here.  It is not
consistent with any existing snprintf variant.

> +/* we use n - 1 here because if the buffer is not big enough, the MS 
> runtime
> + * libraries don't add a terminating zero at the end. MSDN recommends to 
> provide
> + * _snprintf/_vsnprintf() a buffer size that is one less than the actual 
> buffer,
> + * and zero it before calling _snprintf/_vsnprintf() to workaround this 
> problem.
> + * See http://msdn.microsoft.com/en-us/library/1kt27hek(v=vs.80).aspx */

Watch that line length.

> +memset(s, 0, n);
> +ret = vsnprintf(s, n - 1, fmt, ap);
> +if (ret == -1)
> +ret = n;
> +
> +return ret;
> +}
> +
> +static char *check_nan_suffix(char *s)
> +{
> +char *start = s;
> +
> +if (*s++ != '(')
> +return start;
> +
> +while ((*s >= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z') ||
> +   (*s >= '0' && *s <= '9') ||  *s == '_')
> +s++;
> +
> +return *s++ == ')' ? s : start;

This has to be "*s == ')' ? s + 1 : start" to avoid incrementing past a
null terminator.

> +}
> +
> +#undef strtod
> +double avpriv_strtod(char *restrict nptr, char **restrict endptr)
> +{
> +char *end;
> +double res;
> +
> +/* Skip leading spaces */
> +while (isspace(*nptr))
> +nptr++;
> +
> +if (!av_strncasecmp(nptr, "infinity", 8)) {
> +end = nptr + 8;
> +res = INFINITY;
> +} else if (!av_strncasecmp(nptr, "inf", 3)) {
> +end = nptr + 3;
> +res = INFINITY;
> +} else if (!av_strncasecmp(nptr, "+infinity", 9)) {
> +end = nptr + 9;
> +res = INFINITY;
> +} else if (!av_strncasecmp(nptr, "+inf", 4)) {
> +end = nptr + 4;
> +res = INFINITY;
> +} else if (!av_strncasecmp(nptr, "-infinity", 9)) {
> +end = nptr + 9;
> +res = -INFINITY;
> +} else if (!av_strncasecmp(nptr, "-inf", 4)) {
> +end = nptr + 4;
> +res = -INFINITY;
> +} else if (!av_strncasecmp(nptr, "nan", 3)) {
> +end = check_nan_suffix(nptr + 3);
> +res = NAN;
> +} else if (!av_strncasecmp(nptr, "+nan", 4) ||
> +   !av_strncasecmp(nptr, "-nan", 4)) {
> +end = check_nan_suffix(nptr + 4);
> +res = NAN;
> +} else if (!av_strncasecmp(nptr, "0x", 2) ||
> +   !av_strncasecmp(nptr, "-0x", 3) ||
> +   !av_strncasecmp(nptr, "+0x", 3)) {
> +/* FIXME this doesn't handle exponents or non-integers 
> (float/double) */
> +res = strtoll(nptr, &end, 16);

It also doesn't handle integers too large for long long.

> +} else {
> +res = strtod(nptr, &end);
> +}
> +
> +if (endptr)
> +*endptr = end;
> +
> +return res;
> +}
> diff --git a/libavutil/compat/os_support.h b/libavutil/compat/os_support.h
> new file mode 100644
> index 000..b813fee
> --- /dev/null
> +++ b/libavutil/compat/os_support.h
> @@ -0,0 +1,61 @@
> +/*
> + * Support functions for OSes lacking basic libc functionality
> + * Copyright (c) 2012 Ronald S. Bultje 
> + *
> + * This file is part of Libav.
> + *
> + * Libav is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * Libav is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with Libav; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
> USA
> + */
> +
> +#ifndef AVUTIL_O

Re: [libav-devel] [PATCH] lavu: add snprintf(), vsnprint() and strtod() replacements on MSVC.

2012-08-05 Thread Måns Rullgård
Diego Biurrun  writes:

> On Fri, Aug 03, 2012 at 09:38:29PM +0100, Måns Rullgård wrote:
>> "Ronald S. Bultje"  writes:
>> >> Other than that,
>> >> this feels like it belongs in compat/ rather than libavutil.  Not really
>> >> sure how best to build it though.
>> >
>> > I was actually thinking of that for the header, yes. I suppose we can
>> > do it for the source also, but then again, we'd need to link lavu with
>> > that, which seems kind of complex for the rather simple thing we're
>> > trying to accomplish here?
>> 
>> I'm trying to not contaminate libavutil with things that don't belong
>> there.  That is worth a little complexity.
>
> While we're discussing the subject (sorry if I hijack the thread), this
> applies to all those math functions that we fall back on for obsolete
> systems that lack them.  Having a better place than libavutil for them
> sounds like a good idea.

Yes, although those are different in that they don't result in any code,
let alone exported symbols.

-- 
Måns Rullgård
m...@mansr.com
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH 07/45] x86: mmx2 ---> mmxext in asm constructs

2012-08-05 Thread Diego Biurrun
On Sat, Aug 04, 2012 at 06:19:38PM -0700, Ronald S. Bultje wrote:
> On Sat, Aug 4, 2012 at 2:28 PM, Loren Merritt  wrote:
> > On Sat, 4 Aug 2012, Diego Biurrun wrote:
> >> On Sat, Aug 04, 2012 at 03:11:50PM -0400, Justin Ruggles wrote:
> >>> On 07/31/2012 06:17 PM, Diego Biurrun wrote:
>  ---
>   30 files changed, 215 insertions(+), 211 deletions(-)
> >>>
> >>> Looks ok, but probably should get other opinions on this as well. I know
> >>> Ronald was trying to keep x86inc.asm sychronized with x264, and trying
> >>> to do so after this change would likely require similar extensive
> >>> cpuflag modifications in x264.
> >>
> >> I volunteer to patch x264 if such a change would be accepted on their
> >> side.
> >
> > Rejected. I like "mmx2" better.
> > However, I wouldn't be opposed to dropping mmx1 entirely and using
> > the name "mmx" to refer to mmx2. (x264 doesn't actually support mmx1
> > anyway; we use mmx2 inline asm that's actually inlined in places where
> > runtime cpu detection is impossible.) But that wouldn't help
> > synchronization if libav doesn't do so.
> 
> Right - from my PoV, keeping us in sync with x264 is more important
> that having a slightly more accurate cpuflag for something that's
> never exposed to end users in any way.

It is exposed through configure.

> Diego, have ideas on how to fix this,

Yes, why don't you convince Loren that mmxext is the more sensible name?

> or can you live with changing everything to MMX2 instead?

I believe mmxext is the better name and everybody except Loren seems
to agree.  There are very few uses of mmx2/mmxext in x86inc.asm and
they are unlikely to ever conflict with future changes to this file.
Besides, we only sync a few times per year.

So I'm unconvinced that using mmx2 would be a net benefit.  I still
hope that Loren can change his mind, as I said, I volunteer to do
all the work.

Diego
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] lavu: add snprintf(), vsnprint() and strtod() replacements on MSVC.

2012-08-05 Thread Diego Biurrun
On Fri, Aug 03, 2012 at 09:38:29PM +0100, Måns Rullgård wrote:
> "Ronald S. Bultje"  writes:
> >> Other than that,
> >> this feels like it belongs in compat/ rather than libavutil.  Not really
> >> sure how best to build it though.
> >
> > I was actually thinking of that for the header, yes. I suppose we can
> > do it for the source also, but then again, we'd need to link lavu with
> > that, which seems kind of complex for the rather simple thing we're
> > trying to accomplish here?
> 
> I'm trying to not contaminate libavutil with things that don't belong
> there.  That is worth a little complexity.

While we're discussing the subject (sorry if I hijack the thread), this
applies to all those math functions that we fall back on for obsolete
systems that lack them.  Having a better place than libavutil for them
sounds like a good idea.

Diego
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 1/2] mpegaudiodec: don't print an error on > 1 frame in a packet.

2012-08-05 Thread Anton Khirnov
It's a perfectly normal situation, nothing to spam about.
---
 libavcodec/mpegaudiodec.c |1 -
 1 file changed, 1 deletion(-)

diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c
index 6c1e8af..f388d8b 100644
--- a/libavcodec/mpegaudiodec.c
+++ b/libavcodec/mpegaudiodec.c
@@ -1663,7 +1663,6 @@ static int decode_frame(AVCodecContext * avctx, void 
*data, int *got_frame_ptr,
 av_log(avctx, AV_LOG_ERROR, "incomplete frame\n");
 return AVERROR_INVALIDDATA;
 } else if (s->frame_size < buf_size) {
-av_log(avctx, AV_LOG_ERROR, "incorrect frame size\n");
 buf_size= s->frame_size;
 }
 
-- 
1.7.10.4

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 2/2] mpegaudioenc: list supported channel layouts.

2012-08-05 Thread Anton Khirnov
---
 libavcodec/mpegaudioenc.c |5 +
 1 file changed, 5 insertions(+)

diff --git a/libavcodec/mpegaudioenc.c b/libavcodec/mpegaudioenc.c
index 6e79a61..a0ae5a7 100644
--- a/libavcodec/mpegaudioenc.c
+++ b/libavcodec/mpegaudioenc.c
@@ -24,6 +24,8 @@
  * The simplest mpeg audio layer 2 encoder.
  */
 
+#include "libavutil/audioconvert.h"
+
 #include "avcodec.h"
 #include "internal.h"
 #include "put_bits.h"
@@ -794,6 +796,9 @@ AVCodec ff_mp2_encoder = {
 .supported_samplerates = (const int[]){
 44100, 48000,  32000, 22050, 24000, 16000, 0
 },
+.channel_layouts   = (const uint64_t[]){ AV_CH_LAYOUT_MONO,
+ AV_CH_LAYOUT_STEREO,
+ 0 },
 .long_name = NULL_IF_CONFIG_SMALL("MP2 (MPEG audio layer 2)"),
 .defaults  = mp2_defaults,
 };
-- 
1.7.10.4

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel