Re: [libav-devel] [PATCH 07/45] x86: mmx2 ---> mmxext in asm constructs
On Sun, Aug 5, 2012 at 10:20 AM, Ronald S. Bultje wrote: > Plus, I didn't say it was a good idea, I said I could live with it if > others want it. Right now, it seems others (i.e. Loren) don't. FYI, it makes my life harder. Also I agree with Loren. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 4/5] x86: fix rNmp macros with nasm
On Sun, Aug 5, 2012 at 7:36 PM, Mans Rullgard wrote: > For some reason, nasm requires this. No harm done to yasm. > > Signed-off-by: Mans Rullgard > --- > libavutil/x86/x86inc.asm | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) Has this been synced with x264? ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH] libx264: don't set framerate, set vfr_input.
There's no way for the encoder to know whether the input is CFR, so assume VFR. --- libavcodec/libx264.c |5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c index adda881..72fd390 100644 --- a/libavcodec/libx264.c +++ b/libavcodec/libx264.c @@ -381,8 +381,9 @@ static av_cold int X264_init(AVCodecContext *avctx) x4->params.i_height = avctx->height; x4->params.vui.i_sar_width = avctx->sample_aspect_ratio.num; x4->params.vui.i_sar_height = avctx->sample_aspect_ratio.den; -x4->params.i_fps_num = x4->params.i_timebase_den = avctx->time_base.den; -x4->params.i_fps_den = x4->params.i_timebase_num = avctx->time_base.num; +x4->params.i_timebase_den = avctx->time_base.den; +x4->params.i_timebase_num = avctx->time_base.num; +x4->params.b_vfr_input= 1; x4->params.analyse.b_psnr = avctx->flags & CODEC_FLAG_PSNR; -- 1.7.10.4 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 13/15] lavr: x86: optimized 6-channel flt to s16p conversion
--- libavresample/x86/audio_convert.asm| 74 libavresample/x86/audio_convert_init.c | 13 ++ 2 files changed, 87 insertions(+), 0 deletions(-) diff --git a/libavresample/x86/audio_convert.asm b/libavresample/x86/audio_convert.asm index 2908cbf..c666da0 100644 --- a/libavresample/x86/audio_convert.asm +++ b/libavresample/x86/audio_convert.asm @@ -1091,3 +1091,77 @@ CONV_FLT_TO_S16P_2CH INIT_XMM avx CONV_FLT_TO_S16P_2CH %endif + +;-- +; void ff_conv_flt_to_s16p_6ch(int16_t *const *dst, float *src, int len, +; int channels); +;-- + +%macro CONV_FLT_TO_S16P_6CH 0 +%if ARCH_X86_64 +cglobal conv_flt_to_s16p_6ch, 3,8,7, dst, src, len, dst1, dst2, dst3, dst4, dst5 +%else +cglobal conv_flt_to_s16p_6ch, 2,7,7, dst, src, dst1, dst2, dst3, dst4, dst5 +%define lend dword r2m +%endif +mov dst1q, [dstq+ gprsize] +mov dst2q, [dstq+2*gprsize] +mov dst3q, [dstq+3*gprsize] +mov dst4q, [dstq+4*gprsize] +mov dst5q, [dstq+5*gprsize] +mov dstq, [dstq ] +sub dst1q, dstq +sub dst2q, dstq +sub dst3q, dstq +sub dst4q, dstq +sub dst5q, dstq +mova m6, [pf_s16_scale] +.loop: +mulps m0, m6, [srcq+0*mmsize] +mulps m3, m6, [srcq+1*mmsize] +mulps m1, m6, [srcq+2*mmsize] +mulps m4, m6, [srcq+3*mmsize] +mulps m2, m6, [srcq+4*mmsize] +mulps m5, m6, [srcq+5*mmsize] +cvtps2dq m0, m0 +cvtps2dq m1, m1 +cvtps2dq m2, m2 +cvtps2dq m3, m3 +cvtps2dq m4, m4 +cvtps2dq m5, m5 +packssdw m0, m3 ; m0 = 0, 1, 2, 3, 4, 5, 6, 7 +packssdw m1, m4 ; m1 = 8, 9, 10, 11, 12, 13, 14, 15 +packssdw m2, m5 ; m2 = 16, 17, 18, 19, 20, 21, 22, 23 +PALIGNRm3, m1, m0, 12, m4 ; m3 = 6, 7, 8, 9, 10, 11, x, x +shufps m1, m2, q1032; m1 = 12, 13, 14, 15, 16, 17, 18, 19 +psrldq m2, 4; m2 = 18, 19, 20, 21, 22, 23, x, x +SBUTTERFLY2 wd, 0, 3, 4 ; m0 = 0, 6, 1, 7, 2, 8, 3, 9 +; m3 = 4, 10, 5, 11, x, x, x, x +SBUTTERFLY2 wd, 1, 2, 4 ; m1 = 12, 18, 13, 19, 14, 20, 15, 21 +; m2 = 16, 22, 17, 23, x, x, x, x +SBUTTERFLY2 dq, 0, 1, 4 ; m0 = 0, 6, 12, 18, 1, 7, 13, 19 +; m1 = 2, 8, 14, 20, 3, 9, 15, 21 +punpckldq m3, m2 ; m3 = 4, 10, 16, 22, 5, 11, 17, 23 +movq[dstq ], m0 +movhps [dstq+dst1q], m0 +movq[dstq+dst2q], m1 +movhps [dstq+dst3q], m1 +movq[dstq+dst4q], m3 +movhps [dstq+dst5q], m3 +add srcq, mmsize*6 +add dstq, mmsize/2 +sub lend, mmsize/4 +jg .loop +REP_RET +%endmacro + +%define PALIGNR PALIGNR_MMX +INIT_XMM sse2 +CONV_FLT_TO_S16P_6CH +%define PALIGNR PALIGNR_SSSE3 +INIT_XMM ssse3 +CONV_FLT_TO_S16P_6CH +%if HAVE_AVX +INIT_XMM avx +CONV_FLT_TO_S16P_6CH +%endif diff --git a/libavresample/x86/audio_convert_init.c b/libavresample/x86/audio_convert_init.c index d623543..944f1cd 100644 --- a/libavresample/x86/audio_convert_init.c +++ b/libavresample/x86/audio_convert_init.c @@ -127,6 +127,13 @@ extern void ff_conv_flt_to_s16p_2ch_sse2(int16_t *const *dst, float *src, extern void ff_conv_flt_to_s16p_2ch_avx (int16_t *const *dst, float *src, int len, int channels); +extern void ff_conv_flt_to_s16p_6ch_sse2 (int16_t *const *dst, float *src, + int len, int channels); +extern void ff_conv_flt_to_s16p_6ch_ssse3(int16_t *const *dst, float *src, + int len, int channels); +extern void ff_conv_flt_to_s16p_6ch_avx (int16_t *const *dst, float *src, + int len, int channels); + av_cold void ff_audio_convert_init_x86(AudioConvert *ac) { #if HAVE_YASM @@ -184,6 +191,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) 6, 16, 4, "SSE2", ff_conv_s16_to_fltp_6ch_sse2); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT, 2, 16, 8, "SSE2", ff_conv_flt_to_s16p_2ch_sse2); +ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT, + 6, 16, 4, "SSE2", ff_conv_flt_to_s16p_6ch_sse2); } if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSE) { ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P, @@ -196,6 +205,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) 6, 16, 4, "SSSE3", ff_conv_s16_to_s16p_6ch_
[libav-devel] [PATCH 12/15] lavr: x86: optimized 2-channel flt to s16p conversion
--- libavresample/x86/audio_convert.asm| 46 libavresample/x86/audio_convert_init.c |9 ++ 2 files changed, 55 insertions(+), 0 deletions(-) diff --git a/libavresample/x86/audio_convert.asm b/libavresample/x86/audio_convert.asm index 0329a79..2908cbf 100644 --- a/libavresample/x86/audio_convert.asm +++ b/libavresample/x86/audio_convert.asm @@ -1045,3 +1045,49 @@ CONV_S16_TO_FLTP_6CH INIT_XMM avx CONV_S16_TO_FLTP_6CH %endif + +;-- +; void ff_conv_flt_to_s16p_2ch(int16_t *const *dst, float *src, int len, +; int channels); +;-- + +%macro CONV_FLT_TO_S16P_2CH 0 +cglobal conv_flt_to_s16p_2ch, 3,4,6, dst0, src, len, dst1 +lea lenq, [2*lend] +mov dst1q, [dst0q+gprsize] +mov dst0q, [dst0q] +lea srcq, [srcq+4*lenq] +add dst0q, lenq +add dst1q, lenq +neg lenq +movam5, [pf_s16_scale] +.loop: +mova m0, [srcq+4*lenq ] +mova m1, [srcq+4*lenq+ mmsize] +mova m2, [srcq+4*lenq+2*mmsize] +mova m3, [srcq+4*lenq+3*mmsize] +DEINT2_PS 0, 1, 4 +DEINT2_PS 2, 3, 4 +mulps m0, m0, m5 +mulps m1, m1, m5 +mulps m2, m2, m5 +mulps m3, m3, m5 +cvtps2dq m0, m0 +cvtps2dq m1, m1 +cvtps2dq m2, m2 +cvtps2dq m3, m3 +packssdw m0, m2 +packssdw m1, m3 +mova [dst0q+lenq], m0 +mova [dst1q+lenq], m1 +add lenq, mmsize +jl .loop +REP_RET +%endmacro + +INIT_XMM sse2 +CONV_FLT_TO_S16P_2CH +%if HAVE_AVX +INIT_XMM avx +CONV_FLT_TO_S16P_2CH +%endif diff --git a/libavresample/x86/audio_convert_init.c b/libavresample/x86/audio_convert_init.c index 165e376..d623543 100644 --- a/libavresample/x86/audio_convert_init.c +++ b/libavresample/x86/audio_convert_init.c @@ -122,6 +122,11 @@ extern void ff_conv_s16_to_fltp_6ch_sse4 (float *const *dst, int16_t *src, extern void ff_conv_s16_to_fltp_6ch_avx (float *const *dst, int16_t *src, int len, int channels); +extern void ff_conv_flt_to_s16p_2ch_sse2(int16_t *const *dst, float *src, + int len, int channels); +extern void ff_conv_flt_to_s16p_2ch_avx (int16_t *const *dst, float *src, + int len, int channels); + av_cold void ff_audio_convert_init_x86(AudioConvert *ac) { #if HAVE_YASM @@ -177,6 +182,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) 2, 16, 8, "SSE2", ff_conv_s16_to_fltp_2ch_sse2); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16, 6, 16, 4, "SSE2", ff_conv_s16_to_fltp_6ch_sse2); +ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT, + 2, 16, 8, "SSE2", ff_conv_flt_to_s16p_2ch_sse2); } if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSE) { ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P, @@ -225,6 +232,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) 2, 16, 8, "AVX", ff_conv_s16_to_fltp_2ch_avx); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16, 6, 16, 4, "AVX", ff_conv_s16_to_fltp_6ch_avx); +ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT, + 2, 16, 8, "AVX", ff_conv_flt_to_s16p_2ch_avx); } #endif } -- 1.7.1 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 10/15] lavr: x86: optimized 2-channel s16 to fltp conversion
--- libavresample/x86/audio_convert.asm| 39 libavresample/x86/audio_convert_init.c | 13 ++ 2 files changed, 52 insertions(+), 0 deletions(-) diff --git a/libavresample/x86/audio_convert.asm b/libavresample/x86/audio_convert.asm index 09c4e7f..ea0debf 100644 --- a/libavresample/x86/audio_convert.asm +++ b/libavresample/x86/audio_convert.asm @@ -923,3 +923,42 @@ CONV_S16_TO_S16P_6CH INIT_XMM avx CONV_S16_TO_S16P_6CH %endif + +;-- +; void ff_conv_s16_to_fltp_2ch(float *const *dst, int16_t *src, int len, +; int channels); +;-- + +%macro CONV_S16_TO_FLTP_2CH 0 +cglobal conv_s16_to_fltp_2ch, 3,4,4, dst0, src, len, dst1 +lea lenq, [4*lend] +mov dst1q, [dst0q+gprsize] +mov dst0q, [dst0q] +add srcq, lenq +add dst0q, lenq +add dst1q, lenq +neg lenq +movam3, [pf_s16_inv_scale] +.loop: +mova m0, [srcq+lenq] +S16_TO_S32_SX 0, 1 +cvtdq2ps m0, m0 +cvtdq2ps m1, m1 +mulps m0, m0, m3 +mulps m1, m1, m3 +DEINT2_PS 0, 1, 2 +mova [dst0q+lenq], m0 +mova [dst1q+lenq], m1 +add lenq, mmsize +jl .loop +REP_RET +%endmacro + +INIT_XMM sse2 +CONV_S16_TO_FLTP_2CH +INIT_XMM sse4 +CONV_S16_TO_FLTP_2CH +%if HAVE_AVX +INIT_XMM avx +CONV_S16_TO_FLTP_2CH +%endif diff --git a/libavresample/x86/audio_convert_init.c b/libavresample/x86/audio_convert_init.c index adb63f6..db4d3f3 100644 --- a/libavresample/x86/audio_convert_init.c +++ b/libavresample/x86/audio_convert_init.c @@ -106,6 +106,13 @@ extern void ff_conv_s16_to_s16p_6ch_ssse3(int16_t *const *dst, int16_t *src, extern void ff_conv_s16_to_s16p_6ch_avx (int16_t *const *dst, int16_t *src, int len, int channels); +extern void ff_conv_s16_to_fltp_2ch_sse2(float *const *dst, int16_t *src, + int len, int channels); +extern void ff_conv_s16_to_fltp_2ch_sse4(float *const *dst, int16_t *src, + int len, int channels); +extern void ff_conv_s16_to_fltp_2ch_avx (float *const *dst, int16_t *src, + int len, int channels); + av_cold void ff_audio_convert_init_x86(AudioConvert *ac) { #if HAVE_YASM @@ -157,6 +164,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) 2, 16, 8, "SSE2", ff_conv_s16_to_s16p_2ch_sse2); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16, 6, 16, 4, "SSE2", ff_conv_s16_to_s16p_6ch_sse2); +ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16, + 2, 16, 8, "SSE2", ff_conv_s16_to_fltp_2ch_sse2); } if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSE) { ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P, @@ -173,6 +182,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) 0, 16, 8, "SSE4", ff_conv_s16_to_flt_sse4); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, 6, 16, 4, "SSE4", ff_conv_fltp_to_flt_6ch_sse4); +ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16, + 2, 16, 8, "SSE4", ff_conv_s16_to_fltp_2ch_sse4); } if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) { ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32, @@ -195,6 +206,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) 2, 16, 8, "AVX", ff_conv_s16_to_s16p_2ch_avx); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16, 6, 16, 4, "AVX", ff_conv_s16_to_s16p_6ch_avx); +ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16, + 2, 16, 8, "AVX", ff_conv_s16_to_fltp_2ch_avx); } #endif } -- 1.7.1 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 11/15] lavr: x86: optimized 6-channel s16 to fltp conversion
--- libavresample/x86/audio_convert.asm| 83 libavresample/x86/audio_convert_init.c | 17 +++ libavutil/x86/x86util.asm | 12 + 3 files changed, 112 insertions(+), 0 deletions(-) diff --git a/libavresample/x86/audio_convert.asm b/libavresample/x86/audio_convert.asm index ea0debf..0329a79 100644 --- a/libavresample/x86/audio_convert.asm +++ b/libavresample/x86/audio_convert.asm @@ -962,3 +962,86 @@ CONV_S16_TO_FLTP_2CH INIT_XMM avx CONV_S16_TO_FLTP_2CH %endif + +;-- +; void ff_conv_s16_to_fltp_6ch(float *const *dst, int16_t *src, int len, +; int channels); +;-- + +%macro CONV_S16_TO_FLTP_6CH 0 +%if ARCH_X86_64 +cglobal conv_s16_to_fltp_6ch, 3,8,7, dst, src, len, dst1, dst2, dst3, dst4, dst5 +%else +cglobal conv_s16_to_fltp_6ch, 2,7,7, dst, src, dst1, dst2, dst3, dst4, dst5 +%define lend dword r2m +%endif +mov dst1q, [dstq+ gprsize] +mov dst2q, [dstq+2*gprsize] +mov dst3q, [dstq+3*gprsize] +mov dst4q, [dstq+4*gprsize] +mov dst5q, [dstq+5*gprsize] +mov dstq, [dstq ] +sub dst1q, dstq +sub dst2q, dstq +sub dst3q, dstq +sub dst4q, dstq +sub dst5q, dstq +mova m6, [pf_s16_inv_scale] +.loop: +mova m0, [srcq+0*mmsize] ; m0 = 0, 1, 2, 3, 4, 5, 6, 7 +mova m3, [srcq+1*mmsize] ; m3 = 8, 9, 10, 11, 12, 13, 14, 15 +mova m2, [srcq+2*mmsize] ; m2 = 16, 17, 18, 19, 20, 21, 22, 23 +PALIGNRm1, m3, m0, 12, m4 ; m1 = 6, 7, 8, 9, 10, 11, x, x +shufps m3, m2, q1032; m3 = 12, 13, 14, 15, 16, 17, 18, 19 +psrldq m2, 4; m2 = 18, 19, 20, 21, 22, 23, x, x +SBUTTERFLY2 wd, 0, 1, 4 ; m0 = 0, 6, 1, 7, 2, 8, 3, 9 +; m1 = 4, 10, 5, 11, x, x, x, x +SBUTTERFLY2 wd, 3, 2, 4 ; m3 = 12, 18, 13, 19, 14, 20, 15, 21 +; m2 = 16, 22, 17, 23, x, x, x, x +SBUTTERFLY2 dq, 0, 3, 4 ; m0 = 0, 6, 12, 18, 1, 7, 13, 19 +; m3 = 2, 8, 14, 20, 3, 9, 15, 21 +punpckldq m1, m2 ; m1 = 4, 10, 16, 22, 5, 11, 17, 23 +S16_TO_S32_SX 0, 2 ; m0 = 0, 6, 12, 18 +; m2 = 1, 7, 13, 19 +S16_TO_S32_SX 3, 4 ; m3 = 2, 8, 14, 20 +; m4 = 3, 9, 15, 21 +S16_TO_S32_SX 1, 5 ; m1 = 4, 10, 16, 22 +; m5 = 5, 11, 17, 23 +SWAP 1,2,3,4 +cvtdq2ps m0, m0 +cvtdq2ps m1, m1 +cvtdq2ps m2, m2 +cvtdq2ps m3, m3 +cvtdq2ps m4, m4 +cvtdq2ps m5, m5 +mulps m0, m6 +mulps m1, m6 +mulps m2, m6 +mulps m3, m6 +mulps m4, m6 +mulps m5, m6 +mova [dstq ], m0 +mova [dstq+dst1q], m1 +mova [dstq+dst2q], m2 +mova [dstq+dst3q], m3 +mova [dstq+dst4q], m4 +mova [dstq+dst5q], m5 +add srcq, mmsize*3 +add dstq, mmsize +sub lend, mmsize/4 +jg .loop +REP_RET +%endmacro + +%define PALIGNR PALIGNR_MMX +INIT_XMM sse2 +CONV_S16_TO_FLTP_6CH +%define PALIGNR PALIGNR_SSSE3 +INIT_XMM ssse3 +CONV_S16_TO_FLTP_6CH +INIT_XMM sse4 +CONV_S16_TO_FLTP_6CH +%if HAVE_AVX +INIT_XMM avx +CONV_S16_TO_FLTP_6CH +%endif diff --git a/libavresample/x86/audio_convert_init.c b/libavresample/x86/audio_convert_init.c index db4d3f3..165e376 100644 --- a/libavresample/x86/audio_convert_init.c +++ b/libavresample/x86/audio_convert_init.c @@ -113,6 +113,15 @@ extern void ff_conv_s16_to_fltp_2ch_sse4(float *const *dst, int16_t *src, extern void ff_conv_s16_to_fltp_2ch_avx (float *const *dst, int16_t *src, int len, int channels); +extern void ff_conv_s16_to_fltp_6ch_sse2 (float *const *dst, int16_t *src, + int len, int channels); +extern void ff_conv_s16_to_fltp_6ch_ssse3(float *const *dst, int16_t *src, + int len, int channels); +extern void ff_conv_s16_to_fltp_6ch_sse4 (float *const *dst, int16_t *src, + int len, int channels); +extern void ff_conv_s16_to_fltp_6ch_avx (float *const *dst, int16_t *src, + int len, int channels); + av_cold void ff_audio_convert_init_x86(AudioConvert *ac) { #if HAVE_YASM @@ -166,6 +175,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) 6, 16, 4, "SSE2", ff_conv_s16_to_s16p_6ch_sse2); ff_audio_convert_set_f
[libav-devel] [PATCH 09/15] lavr: x86: optimized 6-channel s16 to s16p conversion
--- libavresample/x86/audio_convert.asm| 61 libavresample/x86/audio_convert_init.c | 13 +++ 2 files changed, 74 insertions(+), 0 deletions(-) diff --git a/libavresample/x86/audio_convert.asm b/libavresample/x86/audio_convert.asm index db141e2..09c4e7f 100644 --- a/libavresample/x86/audio_convert.asm +++ b/libavresample/x86/audio_convert.asm @@ -862,3 +862,64 @@ CONV_S16_TO_S16P_2CH INIT_XMM avx CONV_S16_TO_S16P_2CH %endif + +;-- +; void ff_conv_s16_to_s16p_6ch(int16_t *const *dst, int16_t *src, int len, +; int channels); +;-- + +%macro CONV_S16_TO_S16P_6CH 0 +%if ARCH_X86_64 +cglobal conv_s16_to_s16p_6ch, 3,8,5, dst, src, len, dst1, dst2, dst3, dst4, dst5 +%else +cglobal conv_s16_to_s16p_6ch, 2,7,5, dst, src, dst1, dst2, dst3, dst4, dst5 +%define lend dword r2m +%endif +mov dst1q, [dstq+ gprsize] +mov dst2q, [dstq+2*gprsize] +mov dst3q, [dstq+3*gprsize] +mov dst4q, [dstq+4*gprsize] +mov dst5q, [dstq+5*gprsize] +mov dstq, [dstq ] +sub dst1q, dstq +sub dst2q, dstq +sub dst3q, dstq +sub dst4q, dstq +sub dst5q, dstq +.loop: +mova m0, [srcq+0*mmsize] ; m0 = 0, 1, 2, 3, 4, 5, 6, 7 +mova m3, [srcq+1*mmsize] ; m3 = 8, 9, 10, 11, 12, 13, 14, 15 +mova m2, [srcq+2*mmsize] ; m2 = 16, 17, 18, 19, 20, 21, 22, 23 +PALIGNRm1, m3, m0, 12, m4 ; m1 = 6, 7, 8, 9, 10, 11, x, x +shufps m3, m2, q1032; m3 = 12, 13, 14, 15, 16, 17, 18, 19 +psrldq m2, 4; m2 = 18, 19, 20, 21, 22, 23, x, x +SBUTTERFLY2 wd, 0, 1, 4 ; m0 = 0, 6, 1, 7, 2, 8, 3, 9 +; m1 = 4, 10, 5, 11, x, x, x, x +SBUTTERFLY2 wd, 3, 2, 4 ; m3 = 12, 18, 13, 19, 14, 20, 15, 21 +; m2 = 16, 22, 17, 23, x, x, x, x +SBUTTERFLY2 dq, 0, 3, 4 ; m0 = 0, 6, 12, 18, 1, 7, 13, 19 +; m3 = 2, 8, 14, 20, 3, 9, 15, 21 +punpckldq m1, m2 ; m1 = 4, 10, 16, 22, 5, 11, 17, 23 +movq[dstq ], m0 +movhps [dstq+dst1q], m0 +movq[dstq+dst2q], m3 +movhps [dstq+dst3q], m3 +movq[dstq+dst4q], m1 +movhps [dstq+dst5q], m1 +add srcq, mmsize*3 +add dstq, mmsize/2 +sub lend, mmsize/4 +jg .loop +REP_RET +%endmacro + +%define PALIGNR PALIGNR_MMX +INIT_XMM sse2 +CONV_S16_TO_S16P_6CH +%define PALIGNR PALIGNR_SSSE3 +INIT_XMM ssse3 +CONV_S16_TO_S16P_6CH +%if HAVE_AVX +INIT_XMM avx +CONV_S16_TO_S16P_6CH +%endif diff --git a/libavresample/x86/audio_convert_init.c b/libavresample/x86/audio_convert_init.c index 79d7f4d..adb63f6 100644 --- a/libavresample/x86/audio_convert_init.c +++ b/libavresample/x86/audio_convert_init.c @@ -99,6 +99,13 @@ extern void ff_conv_s16_to_s16p_2ch_ssse3(int16_t *const *dst, int16_t *src, extern void ff_conv_s16_to_s16p_2ch_avx (int16_t *const *dst, int16_t *src, int len, int channels); +extern void ff_conv_s16_to_s16p_6ch_sse2 (int16_t *const *dst, int16_t *src, + int len, int channels); +extern void ff_conv_s16_to_s16p_6ch_ssse3(int16_t *const *dst, int16_t *src, + int len, int channels); +extern void ff_conv_s16_to_s16p_6ch_avx (int16_t *const *dst, int16_t *src, + int len, int channels); + av_cold void ff_audio_convert_init_x86(AudioConvert *ac) { #if HAVE_YASM @@ -148,6 +155,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) 2, 16, 4, "SSE2", ff_conv_fltp_to_s16_2ch_sse2); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16, 2, 16, 8, "SSE2", ff_conv_s16_to_s16p_2ch_sse2); +ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16, + 6, 16, 4, "SSE2", ff_conv_s16_to_s16p_6ch_sse2); } if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSE) { ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P, @@ -156,6 +165,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) 2, 16, 4, "SSSE3", ff_conv_fltp_to_s16_2ch_ssse3); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16, 2, 16, 8, "SSSE3", ff_conv_s16_to_s16p_2ch_ssse3); +ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16, + 6, 16, 4, "SSSE3", ff_conv_s16_to_s16p_6ch_ssse3); } i
[libav-devel] [PATCH 08/15] lavr: x86: optimized 2-channel s16 to s16p conversion
--- libavresample/x86/audio_convert.asm| 50 libavresample/x86/audio_convert_init.c | 15 + libavresample/x86/util.asm |6 3 files changed, 71 insertions(+), 0 deletions(-) diff --git a/libavresample/x86/audio_convert.asm b/libavresample/x86/audio_convert.asm index ead4a5c..db141e2 100644 --- a/libavresample/x86/audio_convert.asm +++ b/libavresample/x86/audio_convert.asm @@ -33,6 +33,7 @@ pf_s16_scale: times 4 dd 0x4700 pb_shuf_unpack_even: db -1, -1, 0, 1, -1, -1, 2, 3, -1, -1, 8, 9, -1, -1, 10, 11 pb_shuf_unpack_odd: db -1, -1, 4, 5, -1, -1, 6, 7, -1, -1, 12, 13, -1, -1, 14, 15 pb_interleave_words: SHUFFLE_MASK_W 0, 4, 1, 5, 2, 6, 3, 7 +pb_deinterleave_words: SHUFFLE_MASK_W 0, 2, 4, 6, 1, 3, 5, 7 SECTION_TEXT @@ -812,3 +813,52 @@ CONV_FLTP_TO_FLT_6CH INIT_XMM avx CONV_FLTP_TO_FLT_6CH %endif + +;-- +; void ff_conv_s16_to_s16p_2ch(int16_t *const *dst, int16_t *src, int len, +; int channels); +;-- + +%macro CONV_S16_TO_S16P_2CH 0 +cglobal conv_s16_to_s16p_2ch, 3,4,4, dst0, src, len, dst1 +lea lenq, [2*lend] +mov dst1q, [dst0q+gprsize] +mov dst0q, [dst0q] +lea srcq, [srcq+2*lenq] +add dst0q, lenq +add dst1q, lenq +neg lenq +%if cpuflag(ssse3) +movam3, [pb_deinterleave_words] +%endif +.loop: +movam0, [srcq+2*lenq ] ; m0 = 0, 1, 2, 3, 4, 5, 6, 7 +movam1, [srcq+2*lenq+mmsize] ; m1 = 8, 9, 10, 11, 12, 13, 14, 15 +%if cpuflag(ssse3) +pshufb m0, m3; m0 = 0, 2, 4, 6, 1, 3, 5, 7 +pshufb m1, m3; m1 = 8, 10, 12, 14, 9, 11, 13, 15 +SBUTTERFLY2 qdq, 0, 1, 2 ; m0 = 0, 2, 4, 6, 8, 10, 12, 14 + ; m1 = 1, 3, 5, 7, 9, 11, 13, 15 +%else ; sse2 +pshuflw m0, m0, q3120 ; m0 = 0, 2, 1, 3, 4, 5, 6, 7 +pshufhw m0, m0, q3120 ; m0 = 0, 2, 1, 3, 4, 6, 5, 7 +pshuflw m1, m1, q3120 ; m1 = 8, 10, 9, 11, 12, 13, 14, 15 +pshufhw m1, m1, q3120 ; m1 = 8, 10, 9, 11, 12, 14, 13, 15 +DEINT2_PS0, 1, 2 ; m0 = 0, 2, 4, 6, 8, 10, 12, 14 + ; m1 = 1, 3, 5, 7, 9, 11, 13, 15 +%endif +mova [dst0q+lenq], m0 +mova [dst1q+lenq], m1 +add lenq, mmsize +jl .loop +REP_RET +%endmacro + +INIT_XMM sse2 +CONV_S16_TO_S16P_2CH +INIT_XMM ssse3 +CONV_S16_TO_S16P_2CH +%if HAVE_AVX +INIT_XMM avx +CONV_S16_TO_S16P_2CH +%endif diff --git a/libavresample/x86/audio_convert_init.c b/libavresample/x86/audio_convert_init.c index 6e78338..79d7f4d 100644 --- a/libavresample/x86/audio_convert_init.c +++ b/libavresample/x86/audio_convert_init.c @@ -90,6 +90,15 @@ extern void ff_conv_fltp_to_flt_6ch_sse4(float *dst, float *const *src, int len, extern void ff_conv_fltp_to_flt_6ch_avx (float *dst, float *const *src, int len, int channels); +/* deinterleave conversions */ + +extern void ff_conv_s16_to_s16p_2ch_sse2(int16_t *const *dst, int16_t *src, + int len, int channels); +extern void ff_conv_s16_to_s16p_2ch_ssse3(int16_t *const *dst, int16_t *src, + int len, int channels); +extern void ff_conv_s16_to_s16p_2ch_avx (int16_t *const *dst, int16_t *src, + int len, int channels); + av_cold void ff_audio_convert_init_x86(AudioConvert *ac) { #if HAVE_YASM @@ -137,12 +146,16 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) 6, 16, 4, "SSE2", ff_conv_s16p_to_flt_6ch_sse2); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP, 2, 16, 4, "SSE2", ff_conv_fltp_to_s16_2ch_sse2); +ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16, + 2, 16, 8, "SSE2", ff_conv_s16_to_s16p_2ch_sse2); } if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSE) { ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P, 6, 16, 4, "SSSE3", ff_conv_s16p_to_flt_6ch_ssse3); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP, 2, 16, 4, "SSSE3", ff_conv_fltp_to_s16_2ch_ssse3); +ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16, + 2, 16, 8, "SSSE3", ff_conv_s16_to_s16p_2ch_ssse3); } if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) { ff_audio_
[libav-devel] [PATCH 06/15] lavr: x86: optimized 6-channel fltp to s16 conversion
--- libavresample/x86/audio_convert.asm| 114 libavresample/x86/audio_convert_init.c | 15 2 files changed, 129 insertions(+), 0 deletions(-) diff --git a/libavresample/x86/audio_convert.asm b/libavresample/x86/audio_convert.asm index 15aaa6a..8240a32 100644 --- a/libavresample/x86/audio_convert.asm +++ b/libavresample/x86/audio_convert.asm @@ -581,6 +581,120 @@ CONV_FLTP_TO_S16_2CH INIT_XMM ssse3 CONV_FLTP_TO_S16_2CH +;-- +; void ff_conv_fltp_to_s16_6ch(int16_t *dst, float *const *src, int len, +; int channels); +;-- + +%macro CONV_FLTP_TO_S16_6CH 0 +%if ARCH_X86_64 +cglobal conv_fltp_to_s16_6ch, 3,8,7, dst, src, len, src1, src2, src3, src4, src5 +%else +cglobal conv_fltp_to_s16_6ch, 2,7,7, dst, src, src1, src2, src3, src4, src5 +%define lend dword r2m +%endif +movsrc1q, [srcq+1*gprsize] +movsrc2q, [srcq+2*gprsize] +movsrc3q, [srcq+3*gprsize] +movsrc4q, [srcq+4*gprsize] +movsrc5q, [srcq+5*gprsize] +mov srcq, [srcq] +subsrc1q, srcq +subsrc2q, srcq +subsrc3q, srcq +subsrc4q, srcq +subsrc5q, srcq +movaps xmm6, [pf_s16_scale] +.loop: +%if cpuflag(sse2) +mulps m0, m6, [srcq ] +mulps m1, m6, [srcq+src1q] +mulps m2, m6, [srcq+src2q] +mulps m3, m6, [srcq+src3q] +mulps m4, m6, [srcq+src4q] +mulps m5, m6, [srcq+src5q] +cvtps2dq m0, m0 +cvtps2dq m1, m1 +cvtps2dq m2, m2 +cvtps2dq m3, m3 +cvtps2dq m4, m4 +cvtps2dq m5, m5 +packssdw m0, m3; m0 = 0, 6, 12, 18, 3, 9, 15, 21 +packssdw m1, m4; m1 = 1, 7, 13, 19, 4, 10, 16, 22 +packssdw m2, m5; m2 = 2, 8, 14, 20, 5, 11, 17, 23 +; unpack words: +movhlps m3, m0; m3 = 3, 9, 15, 21, x, x, x, x +punpcklwd m0, m1; m0 = 0, 1, 6, 7, 12, 13, 18, 19 +punpckhwd m1, m2; m1 = 4, 5, 10, 11, 16, 17, 22, 23 +punpcklwd m2, m3; m2 = 2, 3, 8, 9, 14, 15, 20, 21 +; blend dwords: +shufpsm3, m0, m2, q2020 ; m3 = 0, 1, 12, 13, 2, 3, 14, 15 +shufpsm0, m1, q2031 ; m0 = 6, 7, 18, 19, 4, 5, 16, 17 +shufpsm2, m1, q3131 ; m2 = 8, 9, 20, 21, 10, 11, 22, 23 +; shuffle dwords: +shufpsm1, m2, m3, q3120 ; m1 = 8, 9, 10, 11, 12, 13, 14, 15 +shufpsm3, m0, q0220 ; m3 = 0, 1, 2, 3, 4, 5, 6, 7 +shufpsm0, m2, q3113 ; m0 = 16, 17, 18, 19, 20, 21, 22, 23 +mova [dstq+0*mmsize], m3 +mova [dstq+1*mmsize], m1 +mova [dstq+2*mmsize], m0 +%else ; sse +movaxmm0, [srcq ] +movaxmm1, [srcq+src1q] +movaxmm2, [srcq+src2q] +movaxmm3, [srcq+src3q] +movaxmm4, [srcq+src4q] +movaxmm5, [srcq+src5q] +mulps xmm0, xmm6 +mulps xmm1, xmm6 +mulps xmm2, xmm6 +mulps xmm3, xmm6 +mulps xmm4, xmm6 +mulps xmm5, xmm6 +cvtps2pi mm0, xmm0 +cvtps2pi mm1, xmm1 +cvtps2pi mm2, xmm2 +cvtps2pi mm3, xmm3 +cvtps2pi mm4, xmm4 +cvtps2pi mm5, xmm5 +packssdw mm0, mm3 ; m0 = 0, 6, 3, 9 +packssdw mm1, mm4 ; m1 = 1, 7, 4, 10 +packssdw mm2, mm5 ; m2 = 2, 8, 5, 11 +; unpack words +pshufw mm3, mm0, q1032; m3 = 3, 9, 0, 6 +punpcklwdmm0, mm1 ; m0 = 0, 1, 6, 7 +punpckhwdmm1, mm2 ; m1 = 4, 5, 10, 11 +punpcklwdmm2, mm3 ; m2 = 2, 3, 8, 9 +; unpack dwords +pshufw mm3, mm0, q1032; m3 = 6, 7, 0, 1 +punpckldqmm0, mm2 ; m0 = 0, 1, 2, 3 (final) +punpckhdqmm2, mm1 ; m2 = 8, 9, 10, 11 (final) +punpckldqmm1, mm3 ; m1 = 4, 5, 6, 7 (final) +mova [dstq+0*mmsize], mm0 +mova [dstq+1*mmsize], mm1 +mova [dstq+2*mmsize], mm2 +%endif +add srcq, mmsize +add dstq, mmsize*3 +sub lend, mmsize/4 +jg .loop +%if mmsize == 8 +emms +RET +%else +REP_RET +%endif +%endmacro + +INIT_MMX sse +CONV_FLTP_TO_S16_6CH +INIT_XMM sse2 +CONV_FLTP_TO_S16_6CH +%if HAVE_AVX +INIT_XMM avx +CONV_FLTP_TO_S16_6CH +%endif + ;- ; void ff_conv_fltp_to_flt_6ch(float *dst, float *const *src, int len, ;
[libav-devel] [PATCH 07/15] lavr: x86: optimized 2-channel fltp to flt conversion
--- libavresample/x86/audio_convert.asm| 37 libavresample/x86/audio_convert_init.c |7 ++ 2 files changed, 44 insertions(+), 0 deletions(-) diff --git a/libavresample/x86/audio_convert.asm b/libavresample/x86/audio_convert.asm index 8240a32..ead4a5c 100644 --- a/libavresample/x86/audio_convert.asm +++ b/libavresample/x86/audio_convert.asm @@ -695,6 +695,43 @@ INIT_XMM avx CONV_FLTP_TO_S16_6CH %endif +;-- +; void ff_conv_fltp_to_flt_2ch(float *dst, float *const *src, int len, +; int channels); +;-- + +%macro CONV_FLTP_TO_FLT_2CH 0 +cglobal conv_fltp_to_flt_2ch, 3,4,5, dst, src0, len, src1 +mov src1q, [src0q+gprsize] +mov src0q, [src0q] +lea lenq, [4*lend] +add src0q, lenq +add src1q, lenq +lea dstq, [dstq+2*lenq] +neg lenq +.loop +movam0, [src0q+lenq ] +movam1, [src1q+lenq ] +movam2, [src0q+lenq+mmsize] +movam3, [src1q+lenq+mmsize] +SBUTTERFLYPS 0, 1, 4 +SBUTTERFLYPS 2, 3, 4 +mova [dstq+2*lenq+0*mmsize], m0 +mova [dstq+2*lenq+1*mmsize], m1 +mova [dstq+2*lenq+2*mmsize], m2 +mova [dstq+2*lenq+3*mmsize], m3 +add lenq, 2*mmsize +jl .loop +REP_RET +%endmacro + +INIT_XMM sse +CONV_FLTP_TO_FLT_2CH +%if HAVE_AVX +INIT_XMM avx +CONV_FLTP_TO_FLT_2CH +%endif + ;- ; void ff_conv_fltp_to_flt_6ch(float *dst, float *const *src, int len, ; int channels); diff --git a/libavresample/x86/audio_convert_init.c b/libavresample/x86/audio_convert_init.c index 51e51f5..6e78338 100644 --- a/libavresample/x86/audio_convert_init.c +++ b/libavresample/x86/audio_convert_init.c @@ -78,6 +78,11 @@ extern void ff_conv_fltp_to_s16_6ch_sse2(int16_t *dst, float *const *src, extern void ff_conv_fltp_to_s16_6ch_avx (int16_t *dst, float *const *src, int len, int channels); +extern void ff_conv_fltp_to_flt_2ch_sse(float *dst, float *const *src, int len, +int channels); +extern void ff_conv_fltp_to_flt_2ch_avx(float *dst, float *const *src, int len, +int channels); + extern void ff_conv_fltp_to_flt_6ch_mmx (float *dst, float *const *src, int len, int channels); extern void ff_conv_fltp_to_flt_6ch_sse4(float *dst, float *const *src, int len, @@ -99,6 +104,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) { ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP, 6, 1, 2, "SSE", ff_conv_fltp_to_s16_6ch_sse); +ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, + 2, 16, 8, "SSE", ff_conv_fltp_to_flt_2ch_sse); } if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) { if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) { -- 1.7.1 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 04/15] lavr: x86: optimized 6-channel s16p to flt conversion
--- libavresample/x86/audio_convert.asm| 106 libavresample/x86/audio_convert_init.c | 15 + 2 files changed, 121 insertions(+), 0 deletions(-) diff --git a/libavresample/x86/audio_convert.asm b/libavresample/x86/audio_convert.asm index c3cc76f..622a84c 100644 --- a/libavresample/x86/audio_convert.asm +++ b/libavresample/x86/audio_convert.asm @@ -30,6 +30,8 @@ pf_s32_inv_scale: times 8 dd 0x3000 pf_s32_scale: times 8 dd 0x4f00 pf_s16_inv_scale: times 4 dd 0x3800 pf_s16_scale: times 4 dd 0x4700 +pb_shuf_unpack_even: db -1, -1, 0, 1, -1, -1, 2, 3, -1, -1, 8, 9, -1, -1, 10, 11 +pb_shuf_unpack_odd: db -1, -1, 4, 5, -1, -1, 6, 7, -1, -1, 12, 13, -1, -1, 14, 15 SECTION_TEXT @@ -432,6 +434,110 @@ INIT_XMM avx CONV_S16P_TO_FLT_2CH %endif +;-- +; void ff_conv_s16p_to_flt_6ch(float *dst, int16_t *const *src, int len, +; int channels); +;-- + +%macro CONV_S16P_TO_FLT_6CH 0 +%if ARCH_X86_64 +cglobal conv_s16p_to_flt_6ch, 3,8,8, dst, src, len, src1, src2, src3, src4, src5 +%else +cglobal conv_s16p_to_flt_6ch, 2,7,8, dst, src, src1, src2, src3, src4, src5 +%define lend dword r2m +%endif +mov src1q, [srcq+1*gprsize] +mov src2q, [srcq+2*gprsize] +mov src3q, [srcq+3*gprsize] +mov src4q, [srcq+4*gprsize] +mov src5q, [srcq+5*gprsize] +mov srcq, [srcq] +sub src1q, srcq +sub src2q, srcq +sub src3q, srcq +sub src4q, srcq +sub src5q, srcq +mova m7, [pf_s32_inv_scale] +%if cpuflag(ssse3) +%define unpack_even m6 +mova m6, [pb_shuf_unpack_even] +%if ARCH_X86_64 +%define unpack_odd m8 +mova m8, [pb_shuf_unpack_odd] +%else +%define unpack_odd [pb_shuf_unpack_odd] +%endif +%endif +.loop: +movq m0, [srcq ] ; m0 = 0, 6, 12, 18, x, x, x, x +movq m1, [srcq+src1q] ; m1 = 1, 7, 13, 19, x, x, x, x +movq m2, [srcq+src2q] ; m2 = 2, 8, 14, 20, x, x, x, x +movq m3, [srcq+src3q] ; m3 = 3, 9, 15, 21, x, x, x, x +movq m4, [srcq+src4q] ; m4 = 4, 10, 16, 22, x, x, x, x +movq m5, [srcq+src5q] ; m5 = 5, 11, 17, 23, x, x, x, x + ; unpack words: +punpcklwd m0, m1; m0 = 0, 1, 6, 7, 12, 13, 18, 19 +punpcklwd m2, m3; m2 = 2, 3, 8, 9, 14, 15, 20, 21 +punpcklwd m4, m5; m4 = 4, 5, 10, 11, 16, 17, 22, 23 + ; blend dwords +shufps m1, m4, m0, q3120 ; m1 = 4, 5, 16, 17, 6, 7, 18, 19 +shufps m0, m2, q2020 ; m0 = 0, 1, 12, 13, 2, 3, 14, 15 +shufps m2, m4, q3131 ; m2 = 8, 9, 20, 21, 10, 11, 22, 23 +%if cpuflag(ssse3) +pshufb m3, m0, unpack_odd ; m3 = 12, 13, 14, 15 +pshufb m0, unpack_even ; m0 = 0, 1, 2, 3 +pshufb m4, m1, unpack_odd ; m4 = 16, 17, 18, 19 +pshufb m1, unpack_even ; m1 = 4, 5, 6, 7 +pshufb m5, m2, unpack_odd ; m5 = 20, 21, 22, 23 +pshufb m2, unpack_even ; m2 = 8, 9, 10, 11 +%else + ; shuffle dwords +pshufd m0, m0, q3120 ; m0 = 0, 1, 2, 3, 12, 13, 14, 15 +pshufd m1, m1, q3120 ; m1 = 4, 5, 6, 7, 16, 17, 18, 19 +pshufd m2, m2, q3120 ; m2 = 8, 9, 10, 11, 20, 21, 22, 23 +pxor m6, m6; convert s16 in m0-m2 to s32 in m0-m5 +punpcklwd m3, m6, m0; m3 = 0, 1, 2, 3 +punpckhwd m4, m6, m0; m4 = 12, 13, 14, 15 +punpcklwd m0, m6, m1; m0 = 4, 5, 6, 7 +punpckhwd m5, m6, m1; m5 = 16, 17, 18, 19 +punpcklwd m1, m6, m2; m1 = 8, 9, 10, 11 +punpckhwd m6, m2; m6 = 20, 21, 22, 23 +SWAP 6,2,1,0,3,4,5 ; swap registers 3,0,1,4,5,6 to 0,1,2,3,4,5 +%endif +cvtdq2ps m0, m0; convert s32 to float +cvtdq2ps m1, m1 +cvtdq2ps m2, m2 +cvtdq2ps m3, m3 +cvtdq2ps m4, m4 +cvtdq2ps m5, m5 +mulps m0, m7; scale float from s32 range to [-1.0,1.0] +mulps m1, m7 +mulps m2, m7 +mulps m3, m7 +mulps m4, m7 +mulps m5, m7 +mova [dstq ], m0 +mova [dstq+ mmsize], m1 +mova [dstq+2*mmsize], m2 +mova [dstq+3*mmsize], m3 +mova [dstq+4*mmsize], m4 +mova [dstq+5*mmsize], m5 +add srcq, mmsize/2 +add dstq, mmsize*6 +sub lend, mmsize/4 +jg .loop +REP_RET +%endmacro + +INIT_XMM sse2 +CONV_S16P_TO_FLT_6CH
[libav-devel] [PATCH 05/15] lavr: x86: optimized 2-channel fltp to s16 conversion
--- libavresample/x86/audio_convert.asm| 43 libavresample/x86/audio_convert_init.c |9 ++ 2 files changed, 52 insertions(+), 0 deletions(-) diff --git a/libavresample/x86/audio_convert.asm b/libavresample/x86/audio_convert.asm index 622a84c..15aaa6a 100644 --- a/libavresample/x86/audio_convert.asm +++ b/libavresample/x86/audio_convert.asm @@ -32,6 +32,7 @@ pf_s16_inv_scale: times 4 dd 0x3800 pf_s16_scale: times 4 dd 0x4700 pb_shuf_unpack_even: db -1, -1, 0, 1, -1, -1, 2, 3, -1, -1, 8, 9, -1, -1, 10, 11 pb_shuf_unpack_odd: db -1, -1, 4, 5, -1, -1, 6, 7, -1, -1, 12, 13, -1, -1, 14, 15 +pb_interleave_words: SHUFFLE_MASK_W 0, 4, 1, 5, 2, 6, 3, 7 SECTION_TEXT @@ -538,6 +539,48 @@ INIT_XMM avx CONV_S16P_TO_FLT_6CH %endif +;-- +; void ff_conv_fltp_to_s16_2ch(int16_t *dst, float *const *src, int len, +; int channels); +;-- + +%macro CONV_FLTP_TO_S16_2CH 0 +cglobal conv_fltp_to_s16_2ch, 3,4,3, dst, src0, len, src1 +lea lenq, [4*lend] +mov src1q, [src0q+gprsize] +mov src0q, [src0q] +add dstq, lenq +add src0q, lenq +add src1q, lenq +neg lenq +mova m2, [pf_s16_scale] +%if cpuflag(ssse3) +mova m3, [pb_interleave_words] +%endif +.loop: +mulps m0, m2, [src0q+lenq] ; m0 =0,2,4,6 +mulps m1, m2, [src1q+lenq] ; m1 =1,3,5,7 +cvtps2dq m0, m0 +cvtps2dq m1, m1 +%if cpuflag(ssse3) +packssdw m0, m1 ; m0 = 0, 2, 4, 6, 1, 3, 5, 7 +pshufb m0, m3 ; m0 = 0, 1, 2, 3, 4, 5, 6, 7 +%else +packssdw m0, m0 ; m0 = 0, 2, 4, 6, x, x, x, x +packssdw m1, m1 ; m1 = 1, 3, 5, 7, x, x, x, x +punpcklwd m0, m1 ; m0 = 0, 1, 2, 3, 4, 5, 6, 7 +%endif +mova [dstq+lenq], m0 +add lenq, mmsize +jl .loop +REP_RET +%endmacro + +INIT_XMM sse2 +CONV_FLTP_TO_S16_2CH +INIT_XMM ssse3 +CONV_FLTP_TO_S16_2CH + ;- ; void ff_conv_fltp_to_flt_6ch(float *dst, float *const *src, int len, ; int channels); diff --git a/libavresample/x86/audio_convert_init.c b/libavresample/x86/audio_convert_init.c index 6bcf093..3098658 100644 --- a/libavresample/x86/audio_convert_init.c +++ b/libavresample/x86/audio_convert_init.c @@ -66,6 +66,11 @@ extern void ff_conv_s16p_to_flt_6ch_ssse3(float *dst, int16_t *const *src, extern void ff_conv_s16p_to_flt_6ch_avx (float *dst, int16_t *const *src, int len, int channels); +extern void ff_conv_fltp_to_s16_2ch_sse2 (int16_t *dst, float *const *src, + int len, int channels); +extern void ff_conv_fltp_to_s16_2ch_ssse3(int16_t *dst, float *const *src, + int len, int channels); + extern void ff_conv_fltp_to_flt_6ch_mmx (float *dst, float *const *src, int len, int channels); extern void ff_conv_fltp_to_flt_6ch_sse4(float *dst, float *const *src, int len, @@ -110,10 +115,14 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) 2, 16, 8, "SSE2", ff_conv_s16p_to_flt_2ch_sse2); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P, 6, 16, 4, "SSE2", ff_conv_s16p_to_flt_6ch_sse2); +ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP, + 2, 16, 4, "SSE2", ff_conv_fltp_to_s16_2ch_sse2); } if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSE) { ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P, 6, 16, 4, "SSSE3", ff_conv_s16p_to_flt_6ch_ssse3); +ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP, + 2, 16, 4, "SSSE3", ff_conv_fltp_to_s16_2ch_ssse3); } if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) { ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, -- 1.7.1 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 02/15] lavr: x86: optimized 6-channel s16p to s16 conversion
--- libavresample/x86/audio_convert.asm| 123 libavresample/x86/audio_convert_init.c | 14 2 files changed, 137 insertions(+), 0 deletions(-) diff --git a/libavresample/x86/audio_convert.asm b/libavresample/x86/audio_convert.asm index 4a92952..ee05efc 100644 --- a/libavresample/x86/audio_convert.asm +++ b/libavresample/x86/audio_convert.asm @@ -260,6 +260,129 @@ INIT_XMM avx CONV_S16P_TO_S16_2CH %endif +;-- +; void ff_conv_s16p_to_s16_6ch(int16_t *dst, int16_t *const *src, int len, +; int channels); +;-- + +;-- +; NOTE: In the 6-channel functions, len could be used as an index on x86-64 +; instead of just a counter, which would avoid incrementing the +; pointers, but the extra complexity and amount of code is not worth +; the small gain. On x86-32 there are not enough registers to use len +; as an index without keeping two of the pointers on the stack and +; loading them in each iteration. +;-- + +%macro CONV_S16P_TO_S16_6CH 0 +%if ARCH_X86_64 +cglobal conv_s16p_to_s16_6ch, 3,8,7, dst, src0, len, src1, src2, src3, src4, src5 +%else +cglobal conv_s16p_to_s16_6ch, 2,7,7, dst, src0, src1, src2, src3, src4, src5 +%define lend dword r2m +%endif +mov src1q, [src0q+1*gprsize] +mov src2q, [src0q+2*gprsize] +mov src3q, [src0q+3*gprsize] +mov src4q, [src0q+4*gprsize] +mov src5q, [src0q+5*gprsize] +mov src0q, [src0q] +sub src1q, src0q +sub src2q, src0q +sub src3q, src0q +sub src4q, src0q +sub src5q, src0q +.loop: +%if cpuflag(sse2slow) +movqm0, [src0q ] ; m0 = 0, 6, 12, 18, x, x, x, x +movqm1, [src0q+src1q] ; m1 = 1, 7, 13, 19, x, x, x, x +movqm2, [src0q+src2q] ; m2 = 2, 8, 14, 20, x, x, x, x +movqm3, [src0q+src3q] ; m3 = 3, 9, 15, 21, x, x, x, x +movqm4, [src0q+src4q] ; m4 = 4, 10, 16, 22, x, x, x, x +movqm5, [src0q+src5q] ; m5 = 5, 11, 17, 23, x, x, x, x +; unpack words: +punpcklwd m0, m1 ; m0 = 0, 1, 6, 7, 12, 13, 18, 19 +punpcklwd m2, m3 ; m2 = 4, 5, 10, 11, 16, 17, 22, 23 +punpcklwd m4, m5 ; m4 = 2, 3, 8, 9, 14, 15, 20, 21 +; blend dwords +shufps m1, m0, m2, q2020 ; m1 = 0, 1, 12, 13, 2, 3, 14, 15 +shufps m0, m4, q2031 ; m0 = 6, 7, 18, 19, 4, 5, 16, 17 +shufps m2, m4, q3131 ; m2 = 8, 9, 20, 21, 10, 11, 22, 23 +; shuffle dwords +pshufd m0, m0, q1302 ; m0 = 4, 5, 6, 7, 16, 17, 18, 19 +pshufd m1, m1, q3120 ; m1 = 0, 1, 2, 3, 12, 13, 14, 15 +pshufd m2, m2, q3120 ; m2 = 8, 9, 10, 11, 20, 21, 22, 23 +movq [dstq+0*mmsize/2], m1 +movq [dstq+1*mmsize/2], m0 +movq [dstq+2*mmsize/2], m2 +movhps [dstq+3*mmsize/2], m1 +movhps [dstq+4*mmsize/2], m0 +movhps [dstq+5*mmsize/2], m2 +add src0q, mmsize/2 +add dstq, mmsize*3 +sub lend, mmsize/4 +%else +movam0, [src0q ] ; m0 = 0, 6, 12, 18, 24, 30, 36, 42 +movam1, [src0q+src1q] ; m1 = 1, 7, 13, 19, 25, 31, 37, 43 +movam2, [src0q+src2q] ; m2 = 2, 8, 14, 20, 26, 32, 38, 44 +movam3, [src0q+src3q] ; m3 = 3, 9, 15, 21, 27, 33, 39, 45 +movam4, [src0q+src4q] ; m4 = 4, 10, 16, 22, 28, 34, 40, 46 +movam5, [src0q+src5q] ; m5 = 5, 11, 17, 23, 29, 35, 41, 47 +; unpack words: +SBUTTERFLY2 wd, 0, 1, 6 ; m0 = 0, 1, 6, 7, 12, 13, 18, 19 +; m1 = 24, 25, 30, 31, 36, 37, 42, 43 +SBUTTERFLY2 wd, 2, 3, 6 ; m2 = 2, 3, 8, 9, 14, 15, 20, 21 +; m3 = 26, 27, 32, 33, 38, 39, 44, 45 +SBUTTERFLY2 wd, 4, 5, 6 ; m4 = 4, 5, 10, 11, 16, 17, 22, 23 +; m5 = 28, 29, 34, 35, 40, 41, 46, 47 +; blend dwords +shufps m6, m0, m2, q2020 ; m6 = 0, 1, 12, 13, 2, 3, 14, 15 +shufps m0, m4, q2031 ; m0 = 6, 7, 18, 19, 4, 5, 16, 17 +shufps m2, m4, q3131 ; m2 = 8, 9, 20, 21, 10, 11, 22, 23 +SWAP 4,6; m4 = 0, 1, 12, 13, 2, 3, 14, 15 +shufps m6, m1, m3, q2020 ; m6 = 24, 25, 36, 37, 26, 27, 38, 39 +shufps m1, m5, q2031 ; m1 = 30, 31, 42, 43, 28, 29, 40
[libav-devel] [PATCH 03/15] lavr: x86: optimized 2-channel s16p to flt conversion
--- libavresample/x86/audio_convert.asm| 49 libavresample/x86/audio_convert_init.c |9 ++ 2 files changed, 58 insertions(+), 0 deletions(-) diff --git a/libavresample/x86/audio_convert.asm b/libavresample/x86/audio_convert.asm index ee05efc..c3cc76f 100644 --- a/libavresample/x86/audio_convert.asm +++ b/libavresample/x86/audio_convert.asm @@ -383,6 +383,55 @@ INIT_XMM avx CONV_S16P_TO_S16_6CH %endif +;-- +; void ff_conv_s16p_to_flt_2ch(float *dst, int16_t *const *src, int len, +; int channels); +;-- + +%macro CONV_S16P_TO_FLT_2CH 0 +cglobal conv_s16p_to_flt_2ch, 3,4,6, dst, src0, len, src1 +lea lenq, [2*lend] +mov src1q, [src0q+gprsize] +mov src0q, [src0q] +lea dstq, [dstq+4*lenq] +add src0q, lenq +add src1q, lenq +neg lenq +movam5, [pf_s32_inv_scale] +.loop: +movam2, [src0q+lenq]; m2 = 0, 2, 4, 6, 8, 10, 12, 14 +movam4, [src1q+lenq]; m4 = 1, 3, 5, 7, 9, 11, 13, 15 +SBUTTERFLY2 wd, 2, 4, 3 ; m2 = 0, 1, 2, 3, 4, 5, 6, 7 +; m4 = 8, 9, 10, 11, 12, 13, 14, 15 +pxorm3, m3 +punpcklwd m0, m3, m2 ; m0 = 0, 1, 2, 3 +punpckhwd m1, m3, m2 ; m1 = 4, 5, 6, 7 +punpcklwd m2, m3, m4 ; m2 = 8, 9, 10, 11 +punpckhwd m3, m4 ; m3 = 12, 13, 14, 15 +cvtdq2psm0, m0 +cvtdq2psm1, m1 +cvtdq2psm2, m2 +cvtdq2psm3, m3 +mulps m0, m5 +mulps m1, m5 +mulps m2, m5 +mulps m3, m5 +mova [dstq+4*lenq ], m0 +mova [dstq+4*lenq+ mmsize], m1 +mova [dstq+4*lenq+2*mmsize], m2 +mova [dstq+4*lenq+3*mmsize], m3 +add lenq, mmsize +jl .loop +REP_RET +%endmacro + +INIT_XMM sse2 +CONV_S16P_TO_FLT_2CH +%if HAVE_AVX +INIT_XMM avx +CONV_S16P_TO_FLT_2CH +%endif + ;- ; void ff_conv_fltp_to_flt_6ch(float *dst, float *const *src, int len, ; int channels); diff --git a/libavresample/x86/audio_convert_init.c b/libavresample/x86/audio_convert_init.c index d9d4714..9706c71 100644 --- a/libavresample/x86/audio_convert_init.c +++ b/libavresample/x86/audio_convert_init.c @@ -54,6 +54,11 @@ extern void ff_conv_s16p_to_s16_6ch_sse2slow(int16_t *dst, int16_t *const *src, extern void ff_conv_s16p_to_s16_6ch_avx (int16_t *dst, int16_t *const *src, int len, int channels); +extern void ff_conv_s16p_to_flt_2ch_sse2(float *dst, int16_t *const *src, + int len, int channels); +extern void ff_conv_s16p_to_flt_2ch_avx (float *dst, int16_t *const *src, + int len, int channels); + extern void ff_conv_fltp_to_flt_6ch_mmx (float *dst, float *const *src, int len, int channels); extern void ff_conv_fltp_to_flt_6ch_sse4(float *dst, float *const *src, int len, @@ -94,6 +99,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) 0, 16, 16, "SSE2", ff_conv_flt_to_s32_sse2); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P, 2, 16, 16, "SSE2", ff_conv_s16p_to_s16_2ch_sse2); +ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P, + 2, 16, 8, "SSE2", ff_conv_s16p_to_flt_2ch_sse2); } if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) { ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, @@ -110,6 +117,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) 2, 16, 16, "AVX", ff_conv_s16p_to_s16_2ch_avx); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P, 6, 16, 8, "AVX", ff_conv_s16p_to_s16_6ch_avx); +ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P, + 2, 16, 8, "AVX", ff_conv_s16p_to_flt_2ch_avx); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, 6, 16, 4, "AVX", ff_conv_fltp_to_flt_6ch_avx); } -- 1.7.1 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 01/15] lavr: x86: optimized 2-channel s16p to s16 conversion
--- libavresample/x86/audio_convert.asm| 37 libavresample/x86/audio_convert_init.c | 13 +++ 2 files changed, 50 insertions(+), 0 deletions(-) diff --git a/libavresample/x86/audio_convert.asm b/libavresample/x86/audio_convert.asm index 244c4d1..4a92952 100644 --- a/libavresample/x86/audio_convert.asm +++ b/libavresample/x86/audio_convert.asm @@ -223,6 +223,43 @@ INIT_YMM avx CONV_FLT_TO_S32 %endif +;-- +; void ff_conv_s16p_to_s16_2ch(int16_t *dst, int16_t *const *src, int len, +; int channels); +;-- + +%macro CONV_S16P_TO_S16_2CH 0 +cglobal conv_s16p_to_s16_2ch, 3,4,5, dst, src0, len, src1 +mov src1q, [src0q+gprsize] +mov src0q, [src0q] +lealenq, [2*lend] +add src0q, lenq +add src1q, lenq +leadstq, [dstq+2*lenq] +neglenq +.loop +mova m0, [src0q+lenq ] +mova m1, [src1q+lenq ] +mova m2, [src0q+lenq+mmsize] +mova m3, [src1q+lenq+mmsize] +SBUTTERFLY2 wd, 0, 1, 4 +SBUTTERFLY2 wd, 2, 3, 4 +mova [dstq+2*lenq+0*mmsize], m0 +mova [dstq+2*lenq+1*mmsize], m1 +mova [dstq+2*lenq+2*mmsize], m2 +mova [dstq+2*lenq+3*mmsize], m3 +addlenq, 2*mmsize +jl .loop +REP_RET +%endmacro + +INIT_XMM sse2 +CONV_S16P_TO_S16_2CH +%if HAVE_AVX +INIT_XMM avx +CONV_S16P_TO_S16_2CH +%endif + ;- ; void ff_conv_fltp_to_flt_6ch(float *dst, float *const *src, int len, ; int channels); diff --git a/libavresample/x86/audio_convert_init.c b/libavresample/x86/audio_convert_init.c index 2de4970..9b7bcb1 100644 --- a/libavresample/x86/audio_convert_init.c +++ b/libavresample/x86/audio_convert_init.c @@ -22,6 +22,8 @@ #include "libavutil/cpu.h" #include "libavresample/audio_convert.h" +/* flat conversions */ + extern void ff_conv_s16_to_s32_sse2(int16_t *dst, const int32_t *src, int len); extern void ff_conv_s16_to_flt_sse2(float *dst, const int16_t *src, int len); @@ -38,6 +40,13 @@ extern void ff_conv_flt_to_s16_sse2(int16_t *dst, const float *src, int len); extern void ff_conv_flt_to_s32_sse2(int32_t *dst, const float *src, int len); extern void ff_conv_flt_to_s32_avx (int32_t *dst, const float *src, int len); +/* interleave conversions */ + +extern void ff_conv_s16p_to_s16_2ch_sse2(int16_t *dst, int16_t *const *src, + int len, int channels); +extern void ff_conv_s16p_to_s16_2ch_avx (int16_t *dst, int16_t *const *src, + int len, int channels); + extern void ff_conv_fltp_to_flt_6ch_mmx (float *dst, float *const *src, int len, int channels); extern void ff_conv_fltp_to_flt_6ch_sse4(float *dst, float *const *src, int len, @@ -71,6 +80,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) 0, 16, 16, "SSE2", ff_conv_flt_to_s16_sse2); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT, 0, 16, 16, "SSE2", ff_conv_flt_to_s32_sse2); +ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P, + 2, 16, 16, "SSE2", ff_conv_s16p_to_s16_2ch_sse2); } if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) { ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, @@ -83,6 +94,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) 0, 32, 16, "AVX", ff_conv_s32_to_flt_avx); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT, 0, 32, 32, "AVX", ff_conv_flt_to_s32_avx); +ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P, + 2, 16, 16, "AVX", ff_conv_s16p_to_s16_2ch_avx); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, 6, 16, 4, "AVX", ff_conv_fltp_to_flt_6ch_avx); } -- 1.7.1 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 00/15] lavr: x86: 2 and 6 channel (de)interleaving (ver 3)
New round of patches for libavresample x86 asm conversions. Justin Ruggles (15): lavr: x86: optimized 2-channel s16p to s16 conversion lavr: x86: optimized 6-channel s16p to s16 conversion lavr: x86: optimized 2-channel s16p to flt conversion lavr: x86: optimized 6-channel s16p to flt conversion lavr: x86: optimized 2-channel fltp to s16 conversion lavr: x86: optimized 6-channel fltp to s16 conversion lavr: x86: optimized 2-channel fltp to flt conversion lavr: x86: optimized 2-channel s16 to s16p conversion lavr: x86: optimized 6-channel s16 to s16p conversion lavr: x86: optimized 2-channel s16 to fltp conversion lavr: x86: optimized 6-channel s16 to fltp conversion lavr: x86: optimized 2-channel flt to s16p conversion lavr: x86: optimized 6-channel flt to s16p conversion lavr: x86: optimized 2-channel flt to fltp conversion lavr: x86: optimized 6-channel flt to fltp conversion libavresample/x86/audio_convert.asm| 957 libavresample/x86/audio_convert_init.c | 180 ++ libavresample/x86/util.asm |6 + libavutil/x86/x86util.asm | 12 + 4 files changed, 1155 insertions(+), 0 deletions(-) ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 1/2] [HACK] x86: fix build with nasm 2.08
It appears that something goes wrong in old nasm versions when the %+ operator is used in the last argument of a macro invocation and this argument is tested with %ifdef within the macro. Adding a dummy argument somehow fixes this. --- libavutil/x86/x86inc.asm | 9 ++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm index e729924..3a640d1 100644 --- a/libavutil/x86/x86inc.asm +++ b/libavutil/x86/x86inc.asm @@ -507,11 +507,14 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14 ; Appends cpuflags to the function name if cpuflags has been specified. %macro cglobal 1-2+ ; name, [PROLOGUE args] %if %0 == 1 -cglobal_internal %1 %+ SUFFIX +cglobal_internal2 %1 %+ SUFFIX, nasm_is_stupid %else cglobal_internal %1 %+ SUFFIX, %2 %endif %endmacro +%macro cglobal_internal2 2 +cglobal_internal %1 +%endmacro %macro cglobal_internal 1-2+ %ifndef cglobaled_%1 %xdefine %1 mangle(program_name %+ _ %+ %1) @@ -795,9 +798,9 @@ INIT_XMM ; Append cpuflags to the callee's name iff the appended name is known and the plain name isn't %macro call 1 -call_internal %1, %1 %+ SUFFIX +call_internal %1, %1 %+ SUFFIX, nasm_is_stupid %endmacro -%macro call_internal 2 +%macro call_internal 3 %xdefine %%i %1 %ifndef cglobaled_%1 %ifdef cglobaled_%2 -- 1.7.11.1 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 2/2] x86: build avx functions only if HAVE_AVX is set
This is required with oldish nasm versions still shipped with some systems. Without this patch, the code still builds but due to a bug[1] crashes at runtime even if avx is not used. [1] http://repo.or.cz/w/nasm.git/commitdiff/3cb0e8c052a672424eaf59a021f0dbfb6ef205b8 Signed-off-by: Mans Rullgard --- libavcodec/x86/h264_deblock.asm | 10 ++ libavcodec/x86/h264_deblock_10bit.asm | 14 ++ libavcodec/x86/h264dsp_mmx.c | 2 +- libavcodec/x86/imdct36_sse.asm| 4 4 files changed, 29 insertions(+), 1 deletion(-) diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm index 940a8f7..cea02da 100644 --- a/libavcodec/x86/h264_deblock.asm +++ b/libavcodec/x86/h264_deblock.asm @@ -390,8 +390,11 @@ cglobal deblock_h_luma_8, 5,9 INIT_XMM sse2 DEBLOCK_LUMA + +%if HAVE_AVX INIT_XMM avx DEBLOCK_LUMA +%endif %else @@ -509,8 +512,11 @@ INIT_MMX mmx2 DEBLOCK_LUMA v8, 8 INIT_XMM sse2 DEBLOCK_LUMA v, 16 + +%if HAVE_AVX INIT_XMM avx DEBLOCK_LUMA v, 16 +%endif %endif ; ARCH @@ -781,8 +787,12 @@ cglobal deblock_h_luma_intra_8, 2,4 INIT_XMM sse2 DEBLOCK_LUMA_INTRA v + +%if HAVE_AVX INIT_XMM avx DEBLOCK_LUMA_INTRA v +%endif + %if ARCH_X86_64 == 0 INIT_MMX mmx2 DEBLOCK_LUMA_INTRA v8 diff --git a/libavcodec/x86/h264_deblock_10bit.asm b/libavcodec/x86/h264_deblock_10bit.asm index 7b9316d..56f46e0 100644 --- a/libavcodec/x86/h264_deblock_10bit.asm +++ b/libavcodec/x86/h264_deblock_10bit.asm @@ -418,10 +418,14 @@ cglobal deblock_h_luma_10, 5,7,15 INIT_XMM sse2 DEBLOCK_LUMA_64 + +%if HAVE_AVX INIT_XMM avx DEBLOCK_LUMA_64 %endif +%endif + %macro SWAPMOVA 2 %ifid %1 SWAP %1, %2 @@ -713,8 +717,11 @@ cglobal deblock_h_luma_intra_10, 4,7,16 INIT_XMM sse2 DEBLOCK_LUMA_INTRA_64 + +%if HAVE_AVX INIT_XMM avx DEBLOCK_LUMA_INTRA_64 +%endif %endif @@ -798,11 +805,15 @@ DEBLOCK_LUMA_INTRA INIT_XMM sse2 DEBLOCK_LUMA DEBLOCK_LUMA_INTRA + +%if HAVE_AVX INIT_XMM avx DEBLOCK_LUMA DEBLOCK_LUMA_INTRA %endif +%endif + ; in: %1=p0, %2=q0, %3=p1, %4=q1, %5=mask, %6=tmp, %7=tmp ; out: %1=p0', %2=q0' %macro CHROMA_DEBLOCK_P0_Q0_INTRA 7 @@ -912,5 +923,8 @@ DEBLOCK_CHROMA %endif INIT_XMM sse2 DEBLOCK_CHROMA + +%if HAVE_AVX INIT_XMM avx DEBLOCK_CHROMA +%endif diff --git a/libavcodec/x86/h264dsp_mmx.c b/libavcodec/x86/h264dsp_mmx.c index 0612ffb..130308d 100644 --- a/libavcodec/x86/h264dsp_mmx.c +++ b/libavcodec/x86/h264dsp_mmx.c @@ -292,7 +292,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_ssse3; c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_ssse3; } -if (mm_flags & AV_CPU_FLAG_AVX) { +if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) { #if HAVE_ALIGNED_STACK c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_avx; c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_avx; diff --git a/libavcodec/x86/imdct36_sse.asm b/libavcodec/x86/imdct36_sse.asm index 937a2cc..336e9f0 100644 --- a/libavcodec/x86/imdct36_sse.asm +++ b/libavcodec/x86/imdct36_sse.asm @@ -371,8 +371,10 @@ DEFINE_IMDCT INIT_XMM ssse3 DEFINE_IMDCT +%if HAVE_AVX INIT_XMM avx DEFINE_IMDCT +%endif INIT_XMM sse @@ -717,5 +719,7 @@ cglobal four_imdct36_float, 5,5,16, out, buf, in, win, tmp INIT_XMM sse DEFINE_FOUR_IMDCT +%if HAVE_AVX INIT_XMM avx DEFINE_FOUR_IMDCT +%endif -- 1.7.11.1 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH v2] x86: use 32-bit source registers with movd instruction
yasm tolerates mismatch between movd/movq and source register size, adjusting the instruction according to the register. nasm is more strict. Signed-off-by: Mans Rullgard --- Missed a couple that somehow worked with nasm 2.10 but fail with 2.08. --- libavcodec/x86/h264_deblock_10bit.asm | 12 ++-- libavcodec/x86/rv34dsp.asm| 6 +++--- libavcodec/x86/rv40dsp.asm| 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/libavcodec/x86/h264_deblock_10bit.asm b/libavcodec/x86/h264_deblock_10bit.asm index b18f7bc..7b9316d 100644 --- a/libavcodec/x86/h264_deblock_10bit.asm +++ b/libavcodec/x86/h264_deblock_10bit.asm @@ -165,7 +165,7 @@ cglobal deblock_v_luma_10, 5,5,8*(mmsize/16) SUBrsp, pad shlr2d, 2 shlr3d, 2 -LOAD_AB m4, m5, r2, r3 +LOAD_AB m4, m5, r2d, r3d mov r3, 32/mmsize mov r2, r0 sub r0, r1 @@ -222,7 +222,7 @@ cglobal deblock_h_luma_10, 5,6,8*(mmsize/16) SUBrsp, pad shlr2d, 2 shlr3d, 2 -LOAD_AB m4, m5, r2, r3 +LOAD_AB m4, m5, r2d, r3d mov r3, r1 movaam, m4 add r3, r1 @@ -351,7 +351,7 @@ cglobal deblock_v_luma_10, 5,5,15 %define mask2 m11 shlr2d, 2 shlr3d, 2 -LOAD_ABm12, m13, r2, r3 +LOAD_ABm12, m13, r2d, r3d mov r2, r0 sub r0, r1 sub r0, r1 @@ -379,7 +379,7 @@ cglobal deblock_v_luma_10, 5,5,15 cglobal deblock_h_luma_10, 5,7,15 shlr2d, 2 shlr3d, 2 -LOAD_ABm12, m13, r2, r3 +LOAD_ABm12, m13, r2d, r3d mov r2, r1 add r2, r1 add r2, r1 @@ -857,7 +857,7 @@ cglobal deblock_v_chroma_10, 5,7-(mmsize/16),8*(mmsize/16) .loop: %endif CHROMA_V_LOAD r5 -LOAD_AB m4, m5, r2, r3 +LOAD_AB m4, m5, r2d, r3d LOAD_MASK m0, m1, m2, m3, m4, m5, m7, m6, m4 pxorm4, m4 CHROMA_V_LOAD_TC m6, r4 @@ -891,7 +891,7 @@ cglobal deblock_v_chroma_intra_10, 4,6-(mmsize/16),8*(mmsize/16) .loop: %endif CHROMA_V_LOAD r4 -LOAD_AB m4, m5, r2, r3 +LOAD_AB m4, m5, r2d, r3d LOAD_MASK m0, m1, m2, m3, m4, m5, m7, m6, m4 CHROMA_DEBLOCK_P0_Q0_INTRA m1, m2, m0, m3, m7, m5, m6 CHROMA_V_STORE diff --git a/libavcodec/x86/rv34dsp.asm b/libavcodec/x86/rv34dsp.asm index c43b77a..78d8c92 100644 --- a/libavcodec/x86/rv34dsp.asm +++ b/libavcodec/x86/rv34dsp.asm @@ -49,7 +49,7 @@ SECTION .text cglobal rv34_idct_%1, 1, 2, 0 movsx r1, word [r0] IDCT_DC r1 -movdm0, r1 +movdm0, r1d pshufw m0, m0, 0 movq[r0+ 0], m0 movq[r0+ 8], m0 @@ -70,7 +70,7 @@ cglobal rv34_idct_dc_add, 3, 3 ; calculate DC IDCT_DC_ROUND r2 pxor m1, m1 -movd m0, r2 +movd m0, r2d psubw m1, m0 packuswb m0, m0 packuswb m1, m1 @@ -175,7 +175,7 @@ cglobal rv34_idct_dc_add, 3, 3, 6 pxor m1, m1 ; calculate DC -movd m0, r2 +movd m0, r2d lear2, [r0+r1*2] movd m2, [r0] movd m3, [r0+r1] diff --git a/libavcodec/x86/rv40dsp.asm b/libavcodec/x86/rv40dsp.asm index ae740c2..70c0c04 100644 --- a/libavcodec/x86/rv40dsp.asm +++ b/libavcodec/x86/rv40dsp.asm @@ -466,8 +466,8 @@ cglobal rv40_weight_func_%1_%2, 6, 7, 8 addr2, r6 negr6 -movd m2, r3 -movd m3, r4 +movd m2, r3d +movd m3, r4d %ifidn %1,rnd %define RND 0 SPLATW m2, m2 -- 1.7.11.1 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 0/5] Restore nasm support
All OK ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 1/5] x86: add colons after labels
nasm prints a warning if the colon is missing. Signed-off-by: Mans Rullgard --- libavcodec/x86/deinterlace.asm | 2 +- libavcodec/x86/dsputil_yasm.asm| 6 ++-- libavcodec/x86/dsputilenc_yasm.asm | 4 +-- libavcodec/x86/fft_mmx.asm | 2 +- libavcodec/x86/fmtconvert.asm | 2 +- libavcodec/x86/h264_chromamc.asm | 30 +- libavcodec/x86/h264_chromamc_10bit.asm | 16 +- libavcodec/x86/h264_deblock_10bit.asm | 4 +-- libavcodec/x86/h264_idct.asm | 58 +- libavcodec/x86/h264_idct_10bit.asm | 2 +- libavcodec/x86/h264_intrapred.asm | 16 +- libavcodec/x86/h264_qpel_10bit.asm | 16 +- libavcodec/x86/h264_weight.asm | 16 +- libavcodec/x86/h264_weight_10bit.asm | 16 +- libavcodec/x86/vp56dsp.asm | 4 +-- libavcodec/x86/vp8dsp.asm | 30 +- libavresample/x86/audio_mix.asm| 2 +- libavutil/x86/float_dsp.asm| 4 +-- 18 files changed, 115 insertions(+), 115 deletions(-) diff --git a/libavcodec/x86/deinterlace.asm b/libavcodec/x86/deinterlace.asm index 8613485..8681181 100644 --- a/libavcodec/x86/deinterlace.asm +++ b/libavcodec/x86/deinterlace.asm @@ -39,7 +39,7 @@ cglobal deinterlace_line_mmx, 7,7,7, dst, lum_m4, lum_m3, lum_m2, lum_m1 %endif pxor mm7, mm7 movq mm6, [pw_4] -.nextrow +.nextrow: movd mm0, [lum_m4q] movd mm1, [lum_m3q] movd mm2, [lum_m2q] diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm index af2de15..d6cf824 100644 --- a/libavcodec/x86/dsputil_yasm.asm +++ b/libavcodec/x86/dsputil_yasm.asm @@ -1143,7 +1143,7 @@ VECTOR_CLIP_INT32 6, 1, 0, 0 cglobal vector_fmul_reverse, 4,4,2, dst, src0, src1, len lea lenq, [lend*4 - 2*mmsize] ALIGN 16 -.loop +.loop: %if cpuflag(avx) vmovaps xmm0, [src1q + 16] vinsertf128 m0, m0, [src1q], 1 @@ -1182,7 +1182,7 @@ VECTOR_FMUL_REVERSE cglobal vector_fmul_add, 5,5,2, dst, src0, src1, src2, len lea lenq, [lend*4 - 2*mmsize] ALIGN 16 -.loop +.loop: movam0, [src0q + lenq] movam1, [src0q + lenq + mmsize] mulps m0, m0, [src1q + lenq] @@ -1313,7 +1313,7 @@ cglobal bswap32_buf, 3,4,5 add r0, 4 dec r2 jnz .loop2 -.end +.end: RET ; %1 = aligned/unaligned diff --git a/libavcodec/x86/dsputilenc_yasm.asm b/libavcodec/x86/dsputilenc_yasm.asm index cfd4e6d..b7078f1 100644 --- a/libavcodec/x86/dsputilenc_yasm.asm +++ b/libavcodec/x86/dsputilenc_yasm.asm @@ -184,7 +184,7 @@ cglobal hadamard8_diff16_%1, 5, 6, %2 call hadamard8x8_diff_%1 addr5d, eax -.done +.done: moveax, r5d %ifndef m8 ADDrsp, pad @@ -288,7 +288,7 @@ cglobal sse16_sse2, 5, 5, 8 pxor m0, m0 ; mm0 = 0 pxor m7, m7 ; mm7 holds the sum -.next2lines ; FIXME why are these unaligned movs? pix1[] is aligned +.next2lines: ; FIXME why are these unaligned movs? pix1[] is aligned movu m1, [r1 ]; mm1 = pix1[0][0-15] movu m2, [r2 ]; mm2 = pix2[0][0-15] movu m3, [r1+r3]; mm3 = pix1[1][0-15] diff --git a/libavcodec/x86/fft_mmx.asm b/libavcodec/x86/fft_mmx.asm index 6082d9e..60d6669 100644 --- a/libavcodec/x86/fft_mmx.asm +++ b/libavcodec/x86/fft_mmx.asm @@ -607,7 +607,7 @@ cglobal fft_calc, 2,5,8 add rcx, 3 shl r2, cl sub r4, r2 -.loop +.loop: %if mmsize == 8 PSWAPD m0, [r4 + r2 + 4] mova [r4 + r2 + 4], m0 diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm index 0fd14fe..46b7e85 100644 --- a/libavcodec/x86/fmtconvert.asm +++ b/libavcodec/x86/fmtconvert.asm @@ -404,7 +404,7 @@ cglobal float_interleave2_%1, 3,4,%2, dst, src, len, src1 mov src1q, [srcq+gprsize] mov srcq, [srcq] sub src1q, srcq -.loop +.loop: MOVPS m0, [srcq ] MOVPS m1, [srcq+src1q ] MOVPS m3, [srcq +mmsize] diff --git a/libavcodec/x86/h264_chromamc.asm b/libavcodec/x86/h264_chromamc.asm index 64a4efe..56b8e56 100644 --- a/libavcodec/x86/h264_chromamc.asm +++ b/libavcodec/x86/h264_chromamc.asm @@ -69,7 +69,7 @@ SECTION .text %macro mv0_pixels_mc8 0 lea r4, [r2*2 ] -.next4rows +.next4rows: movq mm0, [r1 ] movq mm1, [r1+r2] add r1, r4 @@ -117,7 +117,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7 + extra_regs, 0 mv0_pixels_mc8 REP_RET -.at_least_one_non_zero +.at_least_one_non_zero: %ifidn %2, rv40 %if ARCH_X86_64 mov r7, r5 @@ -145,7 +145,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7 + extra_regs, 0 test r4d, r4d mov r6, r2; dxy = x ? 1 : stride jne .both_non_zero -.my_is_zero +.my_is_zero: ; mx == 0 XOR my == 0 - 1 dimensional filter only or r4d
[libav-devel] [PATCH 2/5] x86: use 32-bit source registers with movd instruction
yasm tolerates mismatch between movd/movq and source register size, adjusting the instruction according to the register. nasm is more strict. Signed-off-by: Mans Rullgard --- libavcodec/x86/h264_deblock_10bit.asm | 12 ++-- libavcodec/x86/rv34dsp.asm| 2 +- libavcodec/x86/rv40dsp.asm| 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/libavcodec/x86/h264_deblock_10bit.asm b/libavcodec/x86/h264_deblock_10bit.asm index b18f7bc..7b9316d 100644 --- a/libavcodec/x86/h264_deblock_10bit.asm +++ b/libavcodec/x86/h264_deblock_10bit.asm @@ -165,7 +165,7 @@ cglobal deblock_v_luma_10, 5,5,8*(mmsize/16) SUBrsp, pad shlr2d, 2 shlr3d, 2 -LOAD_AB m4, m5, r2, r3 +LOAD_AB m4, m5, r2d, r3d mov r3, 32/mmsize mov r2, r0 sub r0, r1 @@ -222,7 +222,7 @@ cglobal deblock_h_luma_10, 5,6,8*(mmsize/16) SUBrsp, pad shlr2d, 2 shlr3d, 2 -LOAD_AB m4, m5, r2, r3 +LOAD_AB m4, m5, r2d, r3d mov r3, r1 movaam, m4 add r3, r1 @@ -351,7 +351,7 @@ cglobal deblock_v_luma_10, 5,5,15 %define mask2 m11 shlr2d, 2 shlr3d, 2 -LOAD_ABm12, m13, r2, r3 +LOAD_ABm12, m13, r2d, r3d mov r2, r0 sub r0, r1 sub r0, r1 @@ -379,7 +379,7 @@ cglobal deblock_v_luma_10, 5,5,15 cglobal deblock_h_luma_10, 5,7,15 shlr2d, 2 shlr3d, 2 -LOAD_ABm12, m13, r2, r3 +LOAD_ABm12, m13, r2d, r3d mov r2, r1 add r2, r1 add r2, r1 @@ -857,7 +857,7 @@ cglobal deblock_v_chroma_10, 5,7-(mmsize/16),8*(mmsize/16) .loop: %endif CHROMA_V_LOAD r5 -LOAD_AB m4, m5, r2, r3 +LOAD_AB m4, m5, r2d, r3d LOAD_MASK m0, m1, m2, m3, m4, m5, m7, m6, m4 pxorm4, m4 CHROMA_V_LOAD_TC m6, r4 @@ -891,7 +891,7 @@ cglobal deblock_v_chroma_intra_10, 4,6-(mmsize/16),8*(mmsize/16) .loop: %endif CHROMA_V_LOAD r4 -LOAD_AB m4, m5, r2, r3 +LOAD_AB m4, m5, r2d, r3d LOAD_MASK m0, m1, m2, m3, m4, m5, m7, m6, m4 CHROMA_DEBLOCK_P0_Q0_INTRA m1, m2, m0, m3, m7, m5, m6 CHROMA_V_STORE diff --git a/libavcodec/x86/rv34dsp.asm b/libavcodec/x86/rv34dsp.asm index c43b77a..9cd2a35 100644 --- a/libavcodec/x86/rv34dsp.asm +++ b/libavcodec/x86/rv34dsp.asm @@ -175,7 +175,7 @@ cglobal rv34_idct_dc_add, 3, 3, 6 pxor m1, m1 ; calculate DC -movd m0, r2 +movd m0, r2d lear2, [r0+r1*2] movd m2, [r0] movd m3, [r0+r1] diff --git a/libavcodec/x86/rv40dsp.asm b/libavcodec/x86/rv40dsp.asm index ae740c2..70c0c04 100644 --- a/libavcodec/x86/rv40dsp.asm +++ b/libavcodec/x86/rv40dsp.asm @@ -466,8 +466,8 @@ cglobal rv40_weight_func_%1_%2, 6, 7, 8 addr2, r6 negr6 -movd m2, r3 -movd m3, r4 +movd m2, r3d +movd m3, r4d %ifidn %1,rnd %define RND 0 SPLATW m2, m2 -- 1.7.11.1 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 3/5] build: add trailing / to yasm/nasm -I flags
nasm requires a trailing / on paths specified with -I. It does no harm with yasm. Signed-off-by: Mans Rullgard --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 644aff4..20e6a38 100644 --- a/Makefile +++ b/Makefile @@ -27,7 +27,7 @@ CPPFLAGS := $(IFLAGS) $(CPPFLAGS) CFLAGS += $(ECFLAGS) CCFLAGS = $(CPPFLAGS) $(CFLAGS) ASFLAGS:= $(CPPFLAGS) $(ASFLAGS) -YASMFLAGS += $(IFLAGS) -I$(SRC_PATH)/libavutil/x86/ -Pconfig.asm +YASMFLAGS += $(IFLAGS:%=%/) -I$(SRC_PATH)/libavutil/x86/ -Pconfig.asm HOSTCCFLAGS = $(IFLAGS) $(HOSTCFLAGS) LDFLAGS:= $(ALLFFLIBS:%=-Llib%) $(LDFLAGS) -- 1.7.11.1 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 4/5] x86: fix rNmp macros with nasm
For some reason, nasm requires this. No harm done to yasm. Signed-off-by: Mans Rullgard --- libavutil/x86/x86inc.asm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm index 03e6c07..86c406f 100644 --- a/libavutil/x86/x86inc.asm +++ b/libavutil/x86/x86inc.asm @@ -139,10 +139,10 @@ CPU amdnop %define r%1mp %2 %elif ARCH_X86_64 ; memory %define r%1m [rsp + stack_offset + %3] -%define r%1mp qword r %+ %1m +%define r%1mp qword r %+ %1 %+ m %else %define r%1m [esp + stack_offset + %3] -%define r%1mp dword r %+ %1m +%define r%1mp dword r %+ %1 %+ m %endif %define r%1 %2 %endmacro -- 1.7.11.1 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 5/5] x86: use nop cpu directives only if supported
nasm does not support 'CPU foonop' directives. This adds a configure test for the directive and uses it only if supported. Signed-off-by: Mans Rullgard --- configure| 2 ++ libavutil/x86/x86inc.asm | 12 +--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/configure b/configure index 84a1b6b..dff4bee 100755 --- a/configure +++ b/configure @@ -1084,6 +1084,7 @@ HAVE_LIST=" closesocket cmov cpuid +cpunop dcbzl dev_bktr_ioctl_bt848_h dev_bktr_ioctl_meteor_h @@ -2897,6 +2898,7 @@ EOF die "yasm not found, use --disable-yasm for a crippled build" check_yasm "vextractf128 xmm0, ymm0, 0" || disable avx check_yasm "vfmaddps ymm0, ymm1, ymm2, ymm3" || disable fma4 +check_yasm "CPU amdnop" && enable cpunop fi case "$cpu" in diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm index 86c406f..e729924 100644 --- a/libavutil/x86/x86inc.asm +++ b/libavutil/x86/x86inc.asm @@ -91,8 +91,14 @@ default rel %endif +%macro CPUNOP 1 +%if HAVE_CPUNOP +CPU %1 +%endif +%endmacro + ; Always use long nops (reduces 0x90 spam in disassembly on x86_32) -CPU amdnop +CPUNOP amdnop ; Macros to eliminate most code duplication between x86_32 and x86_64: ; Currently this works only for leaf functions which load all their arguments @@ -589,7 +595,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits ; All subsequent functions (up to the next INIT_CPUFLAGS) is built for the specified cpu. ; You shouldn't need to invoke this macro directly, it's a subroutine for INIT_MMX &co. %macro INIT_CPUFLAGS 0-2 -CPU amdnop +CPUNOP amdnop %if %0 >= 1 %xdefine cpuname %1 %assign cpuflags cpuflags_%1 @@ -612,7 +618,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits %define movu lddqu %endif %if notcpuflag(mmx2) -CPU basicnop +CPUNOP basicnop %endif %else %xdefine SUFFIX -- 1.7.11.1 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 0/5] Restore nasm support
These patches restore support for building x86 asm with nasm. Mans Rullgard (5): x86: add colons after labels x86: use 32-bit source registers with movd instruction build: add trailing / to yasm/nasm -I flags x86: fix rNmp macros with nasm x86: use nop cpu directives only if supported Makefile | 2 +- configure | 2 ++ libavcodec/x86/deinterlace.asm | 2 +- libavcodec/x86/dsputil_yasm.asm| 6 ++-- libavcodec/x86/dsputilenc_yasm.asm | 4 +-- libavcodec/x86/fft_mmx.asm | 2 +- libavcodec/x86/fmtconvert.asm | 2 +- libavcodec/x86/h264_chromamc.asm | 30 +- libavcodec/x86/h264_chromamc_10bit.asm | 16 +- libavcodec/x86/h264_deblock_10bit.asm | 16 +- libavcodec/x86/h264_idct.asm | 58 +- libavcodec/x86/h264_idct_10bit.asm | 2 +- libavcodec/x86/h264_intrapred.asm | 16 +- libavcodec/x86/h264_qpel_10bit.asm | 16 +- libavcodec/x86/h264_weight.asm | 16 +- libavcodec/x86/h264_weight_10bit.asm | 16 +- libavcodec/x86/rv34dsp.asm | 2 +- libavcodec/x86/rv40dsp.asm | 4 +-- libavcodec/x86/vp56dsp.asm | 4 +-- libavcodec/x86/vp8dsp.asm | 30 +- libavresample/x86/audio_mix.asm| 2 +- libavutil/x86/float_dsp.asm| 4 +-- libavutil/x86/x86inc.asm | 16 +++--- 23 files changed, 138 insertions(+), 130 deletions(-) -- 1.7.11.1 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 2/2] imc: remove unused field IMCContext.one_div_log2
OK ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 1/2] imc: fix size of a memset()
OK ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] Use log2(x) instead of log(x) / log(2)
OK ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] configure: x86: Check if Assembler can cope with "CPU" macro.
Diego Biurrun writes: > No, there is another issue that i cannot quite put my finger on, which > causes errors of the type: > > error: (call_internal:3) `%ifdef' expects macro identifiers I'm not seeing any such messages. I'm getting a bunch of other errors and warnings, mostly easy to fix. -- Måns Rullgård m...@mansr.com ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] configure: x86: Check if Assembler can cope with "CPU" macro.
Hi, On Sun, Aug 5, 2012 at 2:20 PM, Diego Biurrun wrote: > On Sun, Aug 05, 2012 at 09:35:27PM +0100, Måns Rullgård wrote: >> Diego Biurrun writes: >> > On Tue, May 22, 2012 at 09:09:39PM +0200, Diego Biurrun wrote: >> >> On Sun, May 20, 2012 at 06:18:10PM +0200, Diego Biurrun wrote: >> >> > On Sat, May 19, 2012 at 04:54:18PM +0100, Måns Rullgård wrote: >> >> > > Diego Biurrun writes: >> >> > > > On Sat, May 19, 2012 at 04:21:01PM +0100, Måns Rullgård wrote: >> >> > > >> Diego Biurrun writes: >> >> > > >> > This avoids nasm 2.08 being detected as a compatible Assembler. >> >> > > >> > --- >> >> > > >> > Probably at least the log message needs finetuning. That said, >> >> > > >> > this works >> >> > > >> > for me and avoids nasm being detected as a working Assembler and >> >> > > >> > later >> >> > > >> > failing with >> >> > > >> > --- a/configure >> >> > > >> > +++ b/configure >> >> > > >> > @@ -2815,7 +2815,7 @@ EOF >> >> > > >> > elf*) enabled debug && append YASMFLAGS $yasm_debug >> >> > > >> > ;; >> >> > > >> > esac >> >> > > >> > >> >> > > >> > -check_yasm "pextrd [eax], xmm0, 1" && enable yasm || >> >> > > >> > +check_yasm "CPU amdnop" && enable yasm || >> >> > > >> > die "yasm not found, use --disable-yasm for a >> >> > > >> > crippled build" >> >> > > >> > check_yasm "vextractf128 xmm0, ymm0, 0" || disable avx >> >> > > >> >> >> > > >> Is that directive the only one causing trouble? >> >> > > > >> >> > > > No, there are a few others, like certain AVX instructions, that also >> >> > > > cause trouble, >> >> > > >> >> > > Aren't those disabled by the next test? >> >> > >> >> > Quite possibly that is the intention, but if I read our asm files >> >> > correctly, then not all appearances of AVX instructions are "ifdeffed". >> >> > Given my lack of familiarity with yasm syntax, I might well be wrong. >> >> >> >> Justin's patch that I just pushed fixes that issue, but the following >> >> remains: >> >> >> >> libavcodec/x86/h264_idct.asm:613: error: (call_internal:3) `%ifdef' >> >> expects macro identifiers >> >> >> >> A few dozen identical ones with different line numbers follow. >> >> Can somebody shine a light on this one? >> >> >> >> The original error message that my patch addresses is >> >> >> >> x86inc.asm:100: error: unknown 'cpu' type >> > >> > .. ping .. >> > >> > The problem persists and none of the people able to fix this have shown >> > interest in fixing nasm support. >> > >> > My patch is enough to detect failing nasm versions in practice, so I'd >> > like to push it. configure should error out when we know that compilation >> > will fail. >> >> Is this the only thing it fails on? > > No, there is another issue that i cannot quite put my finger on, which > causes errors of the type: > > error: (call_internal:3) `%ifdef' expects macro identifiers I'm guessing this is because of the macro abstraction around "call". The purpose of these abstractions is so we can call something for cpuflags-enabled macro-functions, and call a cpuflag-specific implementation for each, e.g. something_mmx for the mmx function, but something_sse for the sse function. Nasm appears to bail if the macro is not actually defined (in which case with yasm, it defaults back to calling "just" something without a suffix). Look for all uses of call and it should be easy to comment 1-2 out and see when it disappears. Explicitely stating that the call is non-cpu'y may be a workaround. Maybe Loren has better ideas. Ronald ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] configure: x86: Check if Assembler can cope with "CPU" macro.
On Sun, Aug 05, 2012 at 10:45:19PM +0100, Måns Rullgård wrote: > Diego Biurrun writes: > > No, there is another issue that i cannot quite put my finger on, which > > causes errors of the type: > > > > error: (call_internal:3) `%ifdef' expects macro identifiers > > Does this happen with all files or just some? libavcodec/x86/dsputilen_yasm.asm libavcodec/x86/h264_idct.asm libavcodec/x86/h264_qpel_10bit.asm libavcodec/x86/h264_weight_10bit.asm libavcodec/x86/vc1dsp_yasm.asm Diego ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] configure: x86: Check if Assembler can cope with "CPU" macro.
Diego Biurrun writes: > No, there is another issue that i cannot quite put my finger on, which > causes errors of the type: > > error: (call_internal:3) `%ifdef' expects macro identifiers Does this happen with all files or just some? -- Måns Rullgård m...@mansr.com ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 2/2] imc: remove unused field IMCContext.one_div_log2
Signed-off-by: Mans Rullgard --- libavcodec/imc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/libavcodec/imc.c b/libavcodec/imc.c index 726ca67..0d41d5f 100644 --- a/libavcodec/imc.c +++ b/libavcodec/imc.c @@ -92,7 +92,6 @@ typedef struct { float sqrt_tab[30]; GetBitContext gb; -float one_div_log2; DSPContext dsp; FFTContext fft; @@ -227,7 +226,6 @@ static av_cold int imc_decode_init(AVCodecContext *avctx) imc_huffman_bits[i][j], 2, 2, INIT_VLC_USE_NEW_STATIC); } } -q->one_div_log2 = 1 / log(2); if (avctx->codec_id == CODEC_ID_IAC) { iac_generate_tabs(q, avctx->sample_rate); -- 1.7.11.1 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 1/2] imc: fix size of a memset()
IMCContext was changed from an array to a pointer in 66b84e4, but this memset() was not updated. Signed-off-by: Mans Rullgard --- libavcodec/imc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/imc.c b/libavcodec/imc.c index 899572a..726ca67 100644 --- a/libavcodec/imc.c +++ b/libavcodec/imc.c @@ -789,7 +789,7 @@ static int imc_decode_block(AVCodecContext *avctx, IMCContext *q, int ch) chctx->decoder_reset = 1; if (chctx->decoder_reset) { -memset(q->out_samples, 0, sizeof(q->out_samples)); +memset(q->out_samples, 0, COEFFS * sizeof(*q->out_samples)); for (i = 0; i < BANDS; i++) chctx->old_floor[i] = 1.0; for (i = 0; i < COEFFS; i++) -- 1.7.11.1 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH] Use log2(x) instead of log(x) / log(2)
Signed-off-by: Mans Rullgard --- avconv.c | 2 +- avprobe.c| 2 +- libavcodec/imc.c | 4 ++-- libavcodec/snowenc.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/avconv.c b/avconv.c index b20dbec..8429a72 100644 --- a/avconv.c +++ b/avconv.c @@ -1661,7 +1661,7 @@ static void print_report(int is_last_report, int64_t timer_start) if (qp >= 0 && qp < FF_ARRAY_ELEMS(qp_histogram)) qp_histogram[qp]++; for (j = 0; j < 32; j++) -snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "%X", (int)lrintf(log(qp_histogram[j] + 1) / log(2))); +snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "%X", (int)lrintf(log2(qp_histogram[j] + 1))); } if (enc->flags&CODEC_FLAG_PSNR) { int j; diff --git a/avprobe.c b/avprobe.c index 5fe5b89..5e03433 100644 --- a/avprobe.c +++ b/avprobe.c @@ -468,7 +468,7 @@ static char *value_string(char *buf, int buf_size, double val, const char *unit) int index; if (unit == unit_byte_str && use_byte_value_binary_prefix) { -index = (int) (log(val)/log(2)) / 10; +index = (int) log2(val) / 10; index = av_clip(index, 0, FF_ARRAY_ELEMS(binary_unit_prefixes) - 1); val /= pow(2, index * 10); prefix_string = binary_unit_prefixes[index]; diff --git a/libavcodec/imc.c b/libavcodec/imc.c index 297efbb..899572a 100644 --- a/libavcodec/imc.c +++ b/libavcodec/imc.c @@ -344,7 +344,7 @@ static void imc_decode_level_coefficients(IMCContext *q, int *levlCoeffBuf, // maybe some frequency division thingy flcoeffs1[0] = 2.0 / pow (2, levlCoeffBuf[0] * 0.18945); // 0.18945 = log2(10) * 0.05703125 -flcoeffs2[0] = log(flcoeffs1[0]) / log(2); +flcoeffs2[0] = log2f(flcoeffs1[0]); tmp = flcoeffs1[0]; tmp2 = flcoeffs2[0]; @@ -416,7 +416,7 @@ static int bit_allocation(IMCContext *q, IMCChannel *chctx, highest = FFMAX(highest, chctx->flcoeffs1[i]); for (i = 0; i < BANDS - 1; i++) -chctx->flcoeffs4[i] = chctx->flcoeffs3[i] - log(chctx->flcoeffs5[i]) / log(2); +chctx->flcoeffs4[i] = chctx->flcoeffs3[i] - log2f(chctx->flcoeffs5[i]); chctx->flcoeffs4[BANDS - 1] = limit; highest = highest * 0.25; diff --git a/libavcodec/snowenc.c b/libavcodec/snowenc.c index ebfeff6..6e57f82 100644 --- a/libavcodec/snowenc.c +++ b/libavcodec/snowenc.c @@ -1529,7 +1529,7 @@ static void update_last_header_values(SnowContext *s){ } static int qscale2qlog(int qscale){ -return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2)) +return rint(QROOT*log2(qscale / (float)FF_QP2LAMBDA)) + 61*QROOT/8; ///< 64 > 60 } -- 1.7.11.1 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] imc: use log2(x) instead of log(x) / log(2)
On Sun, Aug 05, 2012 at 10:22:14PM +0100, Mans Rullgard wrote: > Signed-off-by: Mans Rullgard > --- > libavcodec/imc.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/libavcodec/imc.c b/libavcodec/imc.c > index 297efbb..92e9c8c 100644 > --- a/libavcodec/imc.c > +++ b/libavcodec/imc.c > @@ -416,7 +416,7 @@ static int bit_allocation(IMCContext *q, IMCChannel > *chctx, > highest = FFMAX(highest, chctx->flcoeffs1[i]); > > for (i = 0; i < BANDS - 1; i++) > -chctx->flcoeffs4[i] = chctx->flcoeffs3[i] - log(chctx->flcoeffs5[i]) > / log(2); > +chctx->flcoeffs4[i] = chctx->flcoeffs3[i] - > log2(chctx->flcoeffs5[i]); > chctx->flcoeffs4[BANDS - 1] = limit; > > highest = highest * 0.25; Can't any of these be changed as well, or at least the one in the same file? % git grep 'log(.*log(2)' avconv.c:snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "%X", (int)lrintf(log(qp_histogram[j] + 1) / log(2))); avprobe.c:index = (int) (log(val)/log(2)) / 10; libavcodec/imc.c:flcoeffs2[0] = log(flcoeffs1[0]) / log(2); libavcodec/imc.c:chctx->flcoeffs4[i] = chctx->flcoeffs3[i] - log(chctx->flcoeffs5[i]) / log(2); libavcodec/snowenc.c:return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2)) [...] -- Clément B. pgpKaL65kiZwd.pgp Description: PGP signature ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] imc: use log2(x) instead of log(x) / log(2)
OK ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH] imc: use log2(x) instead of log(x) / log(2)
Signed-off-by: Mans Rullgard --- libavcodec/imc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/imc.c b/libavcodec/imc.c index 297efbb..92e9c8c 100644 --- a/libavcodec/imc.c +++ b/libavcodec/imc.c @@ -416,7 +416,7 @@ static int bit_allocation(IMCContext *q, IMCChannel *chctx, highest = FFMAX(highest, chctx->flcoeffs1[i]); for (i = 0; i < BANDS - 1; i++) -chctx->flcoeffs4[i] = chctx->flcoeffs3[i] - log(chctx->flcoeffs5[i]) / log(2); +chctx->flcoeffs4[i] = chctx->flcoeffs3[i] - log2(chctx->flcoeffs5[i]); chctx->flcoeffs4[BANDS - 1] = limit; highest = highest * 0.25; -- 1.7.11.1 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] configure: x86: Check if Assembler can cope with "CPU" macro.
On Sun, Aug 05, 2012 at 09:35:27PM +0100, Måns Rullgård wrote: > Diego Biurrun writes: > > On Tue, May 22, 2012 at 09:09:39PM +0200, Diego Biurrun wrote: > >> On Sun, May 20, 2012 at 06:18:10PM +0200, Diego Biurrun wrote: > >> > On Sat, May 19, 2012 at 04:54:18PM +0100, Måns Rullgård wrote: > >> > > Diego Biurrun writes: > >> > > > On Sat, May 19, 2012 at 04:21:01PM +0100, Måns Rullgård wrote: > >> > > >> Diego Biurrun writes: > >> > > >> > This avoids nasm 2.08 being detected as a compatible Assembler. > >> > > >> > --- > >> > > >> > Probably at least the log message needs finetuning. That said, > >> > > >> > this works > >> > > >> > for me and avoids nasm being detected as a working Assembler and > >> > > >> > later > >> > > >> > failing with > >> > > >> > --- a/configure > >> > > >> > +++ b/configure > >> > > >> > @@ -2815,7 +2815,7 @@ EOF > >> > > >> > elf*) enabled debug && append YASMFLAGS $yasm_debug > >> > > >> > ;; > >> > > >> > esac > >> > > >> > > >> > > >> > -check_yasm "pextrd [eax], xmm0, 1" && enable yasm || > >> > > >> > +check_yasm "CPU amdnop" && enable yasm || > >> > > >> > die "yasm not found, use --disable-yasm for a > >> > > >> > crippled build" > >> > > >> > check_yasm "vextractf128 xmm0, ymm0, 0" || disable avx > >> > > >> > >> > > >> Is that directive the only one causing trouble? > >> > > > > >> > > > No, there are a few others, like certain AVX instructions, that also > >> > > > cause trouble, > >> > > > >> > > Aren't those disabled by the next test? > >> > > >> > Quite possibly that is the intention, but if I read our asm files > >> > correctly, then not all appearances of AVX instructions are "ifdeffed". > >> > Given my lack of familiarity with yasm syntax, I might well be wrong. > >> > >> Justin's patch that I just pushed fixes that issue, but the following > >> remains: > >> > >> libavcodec/x86/h264_idct.asm:613: error: (call_internal:3) `%ifdef' > >> expects macro identifiers > >> > >> A few dozen identical ones with different line numbers follow. > >> Can somebody shine a light on this one? > >> > >> The original error message that my patch addresses is > >> > >> x86inc.asm:100: error: unknown 'cpu' type > > > > .. ping .. > > > > The problem persists and none of the people able to fix this have shown > > interest in fixing nasm support. > > > > My patch is enough to detect failing nasm versions in practice, so I'd > > like to push it. configure should error out when we know that compilation > > will fail. > > Is this the only thing it fails on? No, there is another issue that i cannot quite put my finger on, which causes errors of the type: error: (call_internal:3) `%ifdef' expects macro identifiers while the CPU directive causes errors of the type: x86inc.asm:100: error: unknown 'cpu' type Diego ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] x86/cpu: Include system headers before local headers
On Sun, 5 Aug 2012, Måns Rullgård wrote: Martin Storsjö writes: An inline function in MSVC system headers included by these headers use free(). Which function? It's in _freea in the MSVC malloc.h (which is included implicitly). It's not used by us, but we enable the equivalent of -Werror=implicit-function-declaration, so it fails just by parsing the inline function. The local headers (after 239fdf1b) include internal.h that redirect free to please_use_av_free_instead_of_free. That is because avutil.h foolishly includes common.h. It really ought not do that. Can we please fix that instead? I'll give it a shot. // Martin___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] lavf: Declare an AVRational struct without a struct literal
On Sun, 5 Aug 2012, Måns Rullgård wrote: Martin Storsjö writes: At this place, the normal way of initializing a struct works fine, there's no need for a struct literal. --- libavformat/utils.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavformat/utils.c b/libavformat/utils.c index 3630c6f..5b26c59 100644 --- a/libavformat/utils.c +++ b/libavformat/utils.c @@ -2499,7 +2499,7 @@ int avformat_find_stream_info(AVFormatContext *ic, AVDictionary **options) /* round guessed framerate to a "standard" framerate if it's * within 1% of the original estimate*/ for (j = 1; j < MAX_STD_TIMEBASES; j++) { -AVRational std_fps = (AVRational){get_std_framerate(j), 12*1001}; +AVRational std_fps = { get_std_framerate(j), 12*1001 }; double error = fabs(av_q2d(st->avg_frame_rate) / av_q2d(std_fps) - 1); if (error < best_error) { -- OK This is the second one of these I've seen. Is it the last one? I think so - the current MSVC preprocessor didn't handle this case properly, and it's the only such case in the preprocessor helper patchset right now as far as I can see. // Martin___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] lavf: Declare an AVRational struct without a struct literal
Martin Storsjö writes: > At this place, the normal way of initializing a struct works > fine, there's no need for a struct literal. > --- > libavformat/utils.c |2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/libavformat/utils.c b/libavformat/utils.c > index 3630c6f..5b26c59 100644 > --- a/libavformat/utils.c > +++ b/libavformat/utils.c > @@ -2499,7 +2499,7 @@ int avformat_find_stream_info(AVFormatContext *ic, > AVDictionary **options) > /* round guessed framerate to a "standard" framerate if it's > * within 1% of the original estimate*/ > for (j = 1; j < MAX_STD_TIMEBASES; j++) { > -AVRational std_fps = (AVRational){get_std_framerate(j), > 12*1001}; > +AVRational std_fps = { get_std_framerate(j), 12*1001 }; > double error = fabs(av_q2d(st->avg_frame_rate) / > av_q2d(std_fps) - 1); > > if (error < best_error) { > -- OK This is the second one of these I've seen. Is it the last one? -- Måns Rullgård m...@mansr.com ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] x86/cpu: Include system headers before local headers
Martin Storsjö writes: > An inline function in MSVC system headers included by these > headers use free(). Which function? > The local headers (after 239fdf1b) include internal.h that redirect > free to please_use_av_free_instead_of_free. That is because avutil.h foolishly includes common.h. It really ought not do that. Can we please fix that instead? -- Måns Rullgård m...@mansr.com ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH] lavf: Declare an AVRational struct without a struct literal
At this place, the normal way of initializing a struct works fine, there's no need for a struct literal. --- libavformat/utils.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavformat/utils.c b/libavformat/utils.c index 3630c6f..5b26c59 100644 --- a/libavformat/utils.c +++ b/libavformat/utils.c @@ -2499,7 +2499,7 @@ int avformat_find_stream_info(AVFormatContext *ic, AVDictionary **options) /* round guessed framerate to a "standard" framerate if it's * within 1% of the original estimate*/ for (j = 1; j < MAX_STD_TIMEBASES; j++) { -AVRational std_fps = (AVRational){get_std_framerate(j), 12*1001}; +AVRational std_fps = { get_std_framerate(j), 12*1001 }; double error = fabs(av_q2d(st->avg_frame_rate) / av_q2d(std_fps) - 1); if (error < best_error) { -- 1.7.9.4 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH] x86/cpu: Include system headers before local headers
An inline function in MSVC system headers included by these headers use free(). The local headers (after 239fdf1b) include internal.h that redirect free to please_use_av_free_instead_of_free. --- libavutil/x86/cpu.c | 14 ++ 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c index a63b564..27f51ca 100644 --- a/libavutil/x86/cpu.c +++ b/libavutil/x86/cpu.c @@ -22,6 +22,16 @@ #include #include +#if HAVE_CPUID +#include +#endif +#if HAVE_XGETBV +#include +#endif +#if HAVE_RWEFLAGS +#include +#endif + #include "libavutil/x86_cpu.h" #include "libavutil/cpu.h" @@ -35,7 +45,6 @@ : "=a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx)\ : "0" (index)) #elif HAVE_CPUID -#include #define cpuid(index, eax, ebx, ecx, edx)\ do {\ @@ -52,7 +61,6 @@ #define xgetbv(index, eax, edx) \ __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c" (index)) #elif HAVE_XGETBV -#include #define xgetbv(index, eax, edx) \ do {\ @@ -76,8 +84,6 @@ #elif HAVE_RWEFLAGS -#include - #define get_eflags(x) \ x = __readeflags() -- 1.7.9.4 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] configure: x86: Check if Assembler can cope with "CPU" macro.
Diego Biurrun writes: > On Tue, May 22, 2012 at 09:09:39PM +0200, Diego Biurrun wrote: >> On Sun, May 20, 2012 at 06:18:10PM +0200, Diego Biurrun wrote: >> > On Sat, May 19, 2012 at 04:54:18PM +0100, Måns Rullgård wrote: >> > > Diego Biurrun writes: >> > > > On Sat, May 19, 2012 at 04:21:01PM +0100, Måns Rullgård wrote: >> > > >> Diego Biurrun writes: >> > > >> > This avoids nasm 2.08 being detected as a compatible Assembler. >> > > >> > --- >> > > >> > Probably at least the log message needs finetuning. That said, >> > > >> > this works >> > > >> > for me and avoids nasm being detected as a working Assembler and >> > > >> > later >> > > >> > failing with >> > > >> > --- a/configure >> > > >> > +++ b/configure >> > > >> > @@ -2815,7 +2815,7 @@ EOF >> > > >> > elf*) enabled debug && append YASMFLAGS $yasm_debug ;; >> > > >> > esac >> > > >> > >> > > >> > -check_yasm "pextrd [eax], xmm0, 1" && enable yasm || >> > > >> > +check_yasm "CPU amdnop" && enable yasm || >> > > >> > die "yasm not found, use --disable-yasm for a crippled >> > > >> > build" >> > > >> > check_yasm "vextractf128 xmm0, ymm0, 0" || disable avx >> > > >> >> > > >> Is that directive the only one causing trouble? >> > > > >> > > > No, there are a few others, like certain AVX instructions, that also >> > > > cause trouble, >> > > >> > > Aren't those disabled by the next test? >> > >> > Quite possibly that is the intention, but if I read our asm files >> > correctly, then not all appearances of AVX instructions are "ifdeffed". >> > Given my lack of familiarity with yasm syntax, I might well be wrong. >> >> Justin's patch that I just pushed fixes that issue, but the following >> remains: >> >> libavcodec/x86/h264_idct.asm:613: error: (call_internal:3) `%ifdef' >> expects macro identifiers >> >> A few dozen identical ones with different line numbers follow. >> Can somebody shine a light on this one? >> >> The original error message that my patch addresses is >> >> x86inc.asm:100: error: unknown 'cpu' type > > .. ping .. > > The problem persists and none of the people able to fix this have shown > interest in fixing nasm support. > > My patch is enough to detect failing nasm versions in practice, so I'd > like to push it. configure should error out when we know that compilation > will fail. Is this the only thing it fails on? -- Måns Rullgård m...@mansr.com ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] configure: x86: Check if Assembler can cope with "CPU" macro.
On Tue, May 22, 2012 at 09:09:39PM +0200, Diego Biurrun wrote: > On Sun, May 20, 2012 at 06:18:10PM +0200, Diego Biurrun wrote: > > On Sat, May 19, 2012 at 04:54:18PM +0100, Måns Rullgård wrote: > > > Diego Biurrun writes: > > > > On Sat, May 19, 2012 at 04:21:01PM +0100, Måns Rullgård wrote: > > > >> Diego Biurrun writes: > > > >> > This avoids nasm 2.08 being detected as a compatible Assembler. > > > >> > --- > > > >> > Probably at least the log message needs finetuning. That said, this > > > >> > works > > > >> > for me and avoids nasm being detected as a working Assembler and > > > >> > later > > > >> > failing with > > > >> > --- a/configure > > > >> > +++ b/configure > > > >> > @@ -2815,7 +2815,7 @@ EOF > > > >> > elf*) enabled debug && append YASMFLAGS $yasm_debug ;; > > > >> > esac > > > >> > > > > >> > -check_yasm "pextrd [eax], xmm0, 1" && enable yasm || > > > >> > +check_yasm "CPU amdnop" && enable yasm || > > > >> > die "yasm not found, use --disable-yasm for a crippled > > > >> > build" > > > >> > check_yasm "vextractf128 xmm0, ymm0, 0" || disable avx > > > >> > > > >> Is that directive the only one causing trouble? > > > > > > > > No, there are a few others, like certain AVX instructions, that also > > > > cause trouble, > > > > > > Aren't those disabled by the next test? > > > > Quite possibly that is the intention, but if I read our asm files > > correctly, then not all appearances of AVX instructions are "ifdeffed". > > Given my lack of familiarity with yasm syntax, I might well be wrong. > > Justin's patch that I just pushed fixes that issue, but the following > remains: > > libavcodec/x86/h264_idct.asm:613: error: (call_internal:3) `%ifdef' expects > macro identifiers > > A few dozen identical ones with different line numbers follow. > Can somebody shine a light on this one? > > The original error message that my patch addresses is > > x86inc.asm:100: error: unknown 'cpu' type .. ping .. The problem persists and none of the people able to fix this have shown interest in fixing nasm support. My patch is enough to detect failing nasm versions in practice, so I'd like to push it. configure should error out when we know that compilation will fail. Diego ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 3/3] doc: Clarify licensing issues arising from external libraries
On Thu, Jul 26, 2012 at 11:56:05PM +0200, Luca Barbato wrote: > On 07/26/2012 11:40 PM, Diego Biurrun wrote: > > On Thu, Jul 26, 2012 at 02:28:29PM +0200, Luca Barbato wrote: > >> On 07/22/2012 12:17 AM, Diego Biurrun wrote: > >>> --- > >>> LICENSE | 37 - > >>> 1 files changed, 24 insertions(+), 13 deletions(-) > >> > >> So far ok but > >> > >>> +The Fraunhofer AAC library, FAAC and OpenSSL are under licenses > >>> incompatible > >>> +with all (L)GPL versions. Thus, unfortunately, since both licenses > >>> cannot be > >>> +satisfied simultaneously, binaries resulting from the combination of > >>> Libav > >>> +with these libraries are nonfree und unredistributable. If you wish to > >>> enable > >>> +any of these libraries nonetheless, pass --enable-nonfree to configure. > >> > >> OpenSSL, being a system library is NOT to be marked nonfree in a quite > >> number of platforms... > > > > Oh, that opens a big can of worms... > > > > What platforms do you have in mind? Also, we currently do mark it as > > incompatible in configure by requiring the nonfree flag. So I'm just > > describing the status quo. > > Let it as is, bsd people could survive till we fix that part. Sorry, I'm not following - what are you trying to say? Diego ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] imc: remove empty if() block
On Sun, Aug 05, 2012 at 06:34:26PM +0100, Mans Rullgard wrote: > Signed-off-by: Mans Rullgard > --- > libavcodec/imc.c | 3 --- > 1 file changed, 3 deletions(-) > > diff --git a/libavcodec/imc.c b/libavcodec/imc.c > index 6df3e58..297efbb 100644 > --- a/libavcodec/imc.c > +++ b/libavcodec/imc.c > @@ -230,9 +230,6 @@ static av_cold int imc_decode_init(AVCodecContext *avctx) > q->one_div_log2 = 1 / log(2); > > if (avctx->codec_id == CODEC_ID_IAC) { > -} > - > -if (avctx->codec_id == CODEC_ID_IAC) { > iac_generate_tabs(q, avctx->sample_rate); > } else { > memcpy(q->cyclTab, cyclTab, sizeof(cyclTab)); > -- sorry ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] imc: remove empty if() block
On Sun, 5 Aug 2012, Mans Rullgard wrote: Signed-off-by: Mans Rullgard --- libavcodec/imc.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/libavcodec/imc.c b/libavcodec/imc.c index 6df3e58..297efbb 100644 --- a/libavcodec/imc.c +++ b/libavcodec/imc.c @@ -230,9 +230,6 @@ static av_cold int imc_decode_init(AVCodecContext *avctx) q->one_div_log2 = 1 / log(2); if (avctx->codec_id == CODEC_ID_IAC) { -} - -if (avctx->codec_id == CODEC_ID_IAC) { iac_generate_tabs(q, avctx->sample_rate); } else { memcpy(q->cyclTab, cyclTab, sizeof(cyclTab)); -- 1.7.11.1 Ok // Martin ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH] imc: remove empty if() block
Signed-off-by: Mans Rullgard --- libavcodec/imc.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/libavcodec/imc.c b/libavcodec/imc.c index 6df3e58..297efbb 100644 --- a/libavcodec/imc.c +++ b/libavcodec/imc.c @@ -230,9 +230,6 @@ static av_cold int imc_decode_init(AVCodecContext *avctx) q->one_div_log2 = 1 / log(2); if (avctx->codec_id == CODEC_ID_IAC) { -} - -if (avctx->codec_id == CODEC_ID_IAC) { iac_generate_tabs(q, avctx->sample_rate); } else { memcpy(q->cyclTab, cyclTab, sizeof(cyclTab)); -- 1.7.11.1 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] dpx: Make start offset unsigned
On Sat, 4 Aug 2012, Derek Buitenhuis wrote: Some corrupted files would end up with a negative offset, and segfault. Fixes bug #177. Signed-off-by: Derek Buitenhuis --- libavcodec/dpx.c |3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libavcodec/dpx.c b/libavcodec/dpx.c index fadd5c3..9bce648 100644 --- a/libavcodec/dpx.c +++ b/libavcodec/dpx.c @@ -62,7 +62,8 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *const p = &s->picture; uint8_t *ptr; -int magic_num, offset, endian; +unsigned int offset; +int magic_num, endian; int x, y; int w, h, stride, bits_per_color, descriptor, elements, target_packet_size, source_packet_size; -- 1.7.10.4 Seems ok to me. // Martin ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] rational: add av_inv_q() returning the inverse of an AVRational
On Sun, Aug 05, 2012 at 08:19:25AM -0700, Ronald S. Bultje wrote: > On Sun, Aug 5, 2012 at 8:05 AM, Måns Rullgård wrote: > > "Ronald S. Bultje" writes: > >> On Sun, Aug 5, 2012 at 5:07 AM, Måns Rullgård wrote: > >>> Mans Rullgard writes: > >>> > This allows simplifying a few expressions. > > Signed-off-by: Mans Rullgard > --- > >>> > >>> Ping. > >>> > >>> Does this solve the msvc problem with those expressions? > >> > >> I'm currently working on the converter to handle such code; we might > >> want to keep the old code, even though msvc has issues with it, so I > >> have an incentive to fix it. > > > > This patch is an improvement regardless. Can you now please answer the > > question? > > It does. Great, let's push it then. Ronald can still improve the converter. Diego ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] rational: add av_inv_q() returning the inverse of an AVRational
Hi, On Sun, Aug 5, 2012 at 8:05 AM, Måns Rullgård wrote: > "Ronald S. Bultje" writes: >> On Sun, Aug 5, 2012 at 5:07 AM, Måns Rullgård wrote: >>> Mans Rullgard writes: >>> This allows simplifying a few expressions. Signed-off-by: Mans Rullgard --- >>> >>> Ping. >>> >>> Does this solve the msvc problem with those expressions? >> >> I'm currently working on the converter to handle such code; we might >> want to keep the old code, even though msvc has issues with it, so I >> have an incentive to fix it. > > This patch is an improvement regardless. Can you now please answer the > question? It does. Ronald ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] rational: add av_inv_q() returning the inverse of an AVRational
"Ronald S. Bultje" writes: > Hi, > > On Sun, Aug 5, 2012 at 5:07 AM, Måns Rullgård wrote: >> Mans Rullgard writes: >> >>> This allows simplifying a few expressions. >>> >>> Signed-off-by: Mans Rullgard >>> --- >> >> Ping. >> >> Does this solve the msvc problem with those expressions? > > I'm currently working on the converter to handle such code; we might > want to keep the old code, even though msvc has issues with it, so I > have an incentive to fix it. This patch is an improvement regardless. Can you now please answer the question? -- Måns Rullgård m...@mansr.com ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] rational: add av_inv_q() returning the inverse of an AVRational
Hi, On Sun, Aug 5, 2012 at 5:07 AM, Måns Rullgård wrote: > Mans Rullgard writes: > >> This allows simplifying a few expressions. >> >> Signed-off-by: Mans Rullgard >> --- > > Ping. > > Does this solve the msvc problem with those expressions? I'm currently working on the converter to handle such code; we might want to keep the old code, even though msvc has issues with it, so I have an incentive to fix it. Ronald ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 07/45] x86: mmx2 ---> mmxext in asm constructs
Hi, On Sun, Aug 5, 2012 at 2:44 AM, Diego Biurrun wrote: > On Sat, Aug 04, 2012 at 06:19:38PM -0700, Ronald S. Bultje wrote: >> On Sat, Aug 4, 2012 at 2:28 PM, Loren Merritt >> wrote: >> > On Sat, 4 Aug 2012, Diego Biurrun wrote: >> >> On Sat, Aug 04, 2012 at 03:11:50PM -0400, Justin Ruggles wrote: >> >>> On 07/31/2012 06:17 PM, Diego Biurrun wrote: >> --- >> 30 files changed, 215 insertions(+), 211 deletions(-) >> >>> >> >>> Looks ok, but probably should get other opinions on this as well. I know >> >>> Ronald was trying to keep x86inc.asm sychronized with x264, and trying >> >>> to do so after this change would likely require similar extensive >> >>> cpuflag modifications in x264. >> >> >> >> I volunteer to patch x264 if such a change would be accepted on their >> >> side. >> > >> > Rejected. I like "mmx2" better. >> > However, I wouldn't be opposed to dropping mmx1 entirely and using >> > the name "mmx" to refer to mmx2. (x264 doesn't actually support mmx1 >> > anyway; we use mmx2 inline asm that's actually inlined in places where >> > runtime cpu detection is impossible.) But that wouldn't help >> > synchronization if libav doesn't do so. >> >> Right - from my PoV, keeping us in sync with x264 is more important >> that having a slightly more accurate cpuflag for something that's >> never exposed to end users in any way. > > It is exposed through configure. > >> Diego, have ideas on how to fix this, > > Yes, why don't you convince Loren that mmxext is the more sensible name? > >> or can you live with changing everything to MMX2 instead? > > I believe mmxext is the better name and everybody except Loren seems > to agree. There are very few uses of mmx2/mmxext in x86inc.asm and > they are unlikely to ever conflict with future changes to this file. > Besides, we only sync a few times per year. > > So I'm unconvinced that using mmx2 would be a net benefit. I still > hope that Loren can change his mind, as I said, I volunteer to do > all the work. x86inc.asm is a x264 file that we sync, I'd like to keep it that way, especially given that I'm the guy who historically had to deal with the outfalls every time a sync doesn't work right (remember INIT_MMX not backing up XMM regs anymore on Win64? Remember the register reordering on Win64 recently?). Plus, I didn't say it was a good idea, I said I could live with it if others want it. Right now, it seems others (i.e. Loren) don't. Ronald ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 2/3] avconv: split options parsing stuff into a separate file.
On Sat, Aug 04, 2012 at 11:49:15AM +0200, Anton Khirnov wrote: > --- > Makefile |2 + > avconv.c | 2227 > ++ > avconv.h | 360 ++ > avconv_opt.c | 1916 ++ > 4 files changed, 2326 insertions(+), 2179 deletions(-) > create mode 100644 avconv.h > create mode 100644 avconv_opt.c avconv: split option parsing into a separate file > --- /dev/null > +++ b/avconv.h > @@ -0,0 +1,360 @@ > + > +#endif // AVCONV_H Use /* */ like everywhere else please. > --- /dev/null > +++ b/avconv_opt.c > @@ -0,0 +1,1916 @@ > +/* > + * avconv options parsing option > + * Copyright (c) 2000-2012 The libav developers. I don't think this denotes a proper legal entity, so just drop it. I'll do a more thorough review once I can apply this cleanly to some tree of mine. Diego ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 3/3] avconv: split configuring filters to a separate file.
On Sat, Aug 04, 2012 at 11:49:16AM +0200, Anton Khirnov wrote: > --- > Makefile|2 +- > avconv.c| 530 --- > avconv.h|2 + > avconv_filter.c | 562 > +++ > 4 files changed, 565 insertions(+), 531 deletions(-) > create mode 100644 avconv_filter.c avconv: split filter configuration to a separate file > --- /dev/null > +++ b/avconv_filter.c > @@ -0,0 +1,562 @@ > +/* > + * avconv filters configuration filter > + * Copyright (c) 2000-2012 The libav developers. I don't think this denotes a proper legal entity, so just drop it. I'll do a more thorough review once I can apply this cleanly to some tree of mine. Diego ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 19/45] x86: h264_idct: Rename x264_add8x4_idct_sse2 --> h264_add8x4_idct_sse2
On Wed, Aug 01, 2012 at 12:17:43AM +0200, Diego Biurrun wrote: > --- > libavcodec/x86/h264_idct.asm |8 > 1 files changed, 4 insertions(+), 4 deletions(-) OKed by Benjamin on IRC. Diego ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] build: generalise rules and variable settings for av* programs
On Sat, Aug 04, 2012 at 07:06:04PM +0100, Mans Rullgard wrote: > This simplifies adding extra flags for individual programs > and also allows more than one object file per program. > > Signed-off-by: Mans Rullgard > --- > Makefile | 17 - > configure | 6 +++--- > 2 files changed, 15 insertions(+), 8 deletions(-) LGTM Diego ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] fate: simplify variable setting filter.mak
On Sun, Aug 05, 2012 at 12:06:23PM +0100, Mans Rullgard wrote: > This removes some needless indirection and duplication. > > Signed-off-by: Mans Rullgard > --- > tests/fate/filter.mak | 9 ++--- > 1 file changed, 2 insertions(+), 7 deletions(-) OK Diego ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] rational: add av_inv_q() returning the inverse of an AVRational
On Sun, Jul 29, 2012 at 11:01:34PM +0200, Luca Barbato wrote: > On 07/29/2012 08:32 PM, Måns Rullgård wrote: > > "Ronald S. Bultje" writes: > >> On Jul 29, 2012 9:01 AM, "Luca Barbato" wrote: > >>> On 07/29/2012 04:00 PM, Mans Rullgard wrote: > This allows simplifying a few expressions. > > --- > avconv.c | 6 ++ > libavutil/rational.h | 11 +++ > 2 files changed, 13 insertions(+), 4 deletions(-) > >>> > >>> Ok. > >> > >> Wrong namespace? av_rational_inv()? > > > > It's consistent with the other functions in that header. I thought you > > liked consistency. > > I like the name short as is. Any name is fine with me. Let's settle on the short one and not bikeshed this further. If Ronald has strong feelings about it, just switch the name, but get this over with please. Diego ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] rational: add av_inv_q() returning the inverse of an AVRational
Mans Rullgard writes: > This allows simplifying a few expressions. > > Signed-off-by: Mans Rullgard > --- Ping. Does this solve the msvc problem with those expressions? -- Måns Rullgård m...@mansr.com ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH] fate: simplify variable setting filter.mak
This removes some needless indirection and duplication. Signed-off-by: Mans Rullgard --- tests/fate/filter.mak | 9 ++--- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/tests/fate/filter.mak b/tests/fate/filter.mak index 35b6558..e42f837 100644 --- a/tests/fate/filter.mak +++ b/tests/fate/filter.mak @@ -19,21 +19,16 @@ $(FATE_AMIX): CMP = oneoff $(FATE_AMIX): CMP_UNIT = f32 FATE_FILTER += $(FATE_AMIX) -FATE_SAMPLES_AVCONV += $(FATE_AMIX) -FATE_ASYNCTS += fate-filter-asyncts +FATE_FILTER += fate-filter-asyncts fate-filter-asyncts: SRC = $(SAMPLES)/nellymoser/nellymoser-discont.flv fate-filter-asyncts: CMD = pcm -analyzeduration 1000 -i $(SRC) -af asyncts fate-filter-asyncts: CMP = oneoff fate-filter-asyncts: REF = $(SAMPLES)/nellymoser/nellymoser-discont.pcm -FATE_FILTER += $(FATE_ASYNCTS) -FATE_SAMPLES_AVCONV += $(FATE_ASYNCTS) - fate-filter-delogo: CMD = framecrc -i $(SAMPLES)/real/rv30.rm -vf delogo=show=0:x=290:y=25:w=26:h=16 -an FATE_FILTER += fate-filter-delogo -FATE_SAMPLES_AVCONV += fate-filter-delogo FATE_YADIF += fate-filter-yadif-mode0 fate-filter-yadif-mode0: CMD = framecrc -flags bitexact -idct simple -i $(SAMPLES)/mpeg2/mpeg2_field_encoding.ts -vf yadif=0 @@ -42,6 +37,6 @@ FATE_YADIF += fate-filter-yadif-mode1 fate-filter-yadif-mode1: CMD = framecrc -flags bitexact -idct simple -i $(SAMPLES)/mpeg2/mpeg2_field_encoding.ts -vf yadif=1 FATE_FILTER += $(FATE_YADIF) -FATE_SAMPLES_AVCONV += $(FATE_YADIF) +FATE_SAMPLES_AVCONV += $(FATE_FILTER) fate-filter: $(FATE_FILTER) -- 1.7.11.1 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] lavu: add snprintf(), vsnprint() and strtod() replacements for MS runtime.
"Ronald S. Bultje" writes: > +#ifndef AVUTIL_OS_SUPPORT_H > +#define AVUTIL_OS_SUPPORT_H > + > +/** > + * @file > + * OSSupport > + */ > + > +#include > +#include > + > +/* > + * snprintf() on MSVC returns -1 (instead of required buffer length) > + * if the input buffer isn't big enough. Also, if the required buffer > + * length is exactly identical to the input buffer size, or if the > + * return value is -1 because the input buffer isn't big enough, MSVC > + * will fail to NULL-terminate the output buffer. vsnprintf() has the > + * same issue. > + * > + * Thus, provide our own fallback wrappers with correct behaviour. > + */ > +#undef snprintf > +#define snprintf avpriv_snprintf > +int snprintf(char *restrict s, size_t n, const char *restrict format, ...); > + > +#undef vsnprintf > +#define vsnprintf avpriv_vsnprintf > +int vsnprintf(char *restrict s, size_t n, const char *restrict format, > va_list ap); > + > +/* > + * strtod() on MSVC doesn't handle strings like 'inf' or 'nan'. Also, > + * it doesn't handle "0x" prefixes for hexadecimal input. > + * > + * Thus, provide our own fallback wrapper with correct behaviour. > + */ > +#undef strtod > +#define strtod avpriv_strtod > +double strtod(char *restrict nptr, char **restrict endptr); > + > +#endif /* AVUTIL_OS_SUPPORT_H */ > -- These replacements should be in separate files so that they can be enabled independently. -- Måns Rullgård m...@mansr.com ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] lavu: add snprintf(), vsnprint() and strtod() replacements for MS runtime.
"Ronald S. Bultje" writes: > From: "Ronald S. Bultje" > > The idea is to compile in os_support.c when compiling Libav against > the MS runtime (e.g. with the MSVC compiler) and thereby provide > replacements for some functions hat are not standards-compliant. We > can force-include the header using cl.exe -Fi, so we don't have to > contaminate source files outside the compat/ directory. > --- > configure |2 + > libavutil/Makefile|1 + > libavutil/compat/os_support.c | 130 > + > libavutil/compat/os_support.h | 61 +++ > 4 files changed, 194 insertions(+) > create mode 100644 libavutil/compat/os_support.c > create mode 100644 libavutil/compat/os_support.h When I said these things belong in compat/, I meant the one that already exists at the top level. > +#undef vsnprintf > +int avpriv_vsnprintf(char *restrict s, size_t n, const char *restrict fmt, > va_list ap) > +{ > +int ret; > + > +if (n == 0 || n > INT_MAX) > +return 0; I don't think zero is the proper value to return here. It is not consistent with any existing snprintf variant. > +/* we use n - 1 here because if the buffer is not big enough, the MS > runtime > + * libraries don't add a terminating zero at the end. MSDN recommends to > provide > + * _snprintf/_vsnprintf() a buffer size that is one less than the actual > buffer, > + * and zero it before calling _snprintf/_vsnprintf() to workaround this > problem. > + * See http://msdn.microsoft.com/en-us/library/1kt27hek(v=vs.80).aspx */ Watch that line length. > +memset(s, 0, n); > +ret = vsnprintf(s, n - 1, fmt, ap); > +if (ret == -1) > +ret = n; > + > +return ret; > +} > + > +static char *check_nan_suffix(char *s) > +{ > +char *start = s; > + > +if (*s++ != '(') > +return start; > + > +while ((*s >= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z') || > + (*s >= '0' && *s <= '9') || *s == '_') > +s++; > + > +return *s++ == ')' ? s : start; This has to be "*s == ')' ? s + 1 : start" to avoid incrementing past a null terminator. > +} > + > +#undef strtod > +double avpriv_strtod(char *restrict nptr, char **restrict endptr) > +{ > +char *end; > +double res; > + > +/* Skip leading spaces */ > +while (isspace(*nptr)) > +nptr++; > + > +if (!av_strncasecmp(nptr, "infinity", 8)) { > +end = nptr + 8; > +res = INFINITY; > +} else if (!av_strncasecmp(nptr, "inf", 3)) { > +end = nptr + 3; > +res = INFINITY; > +} else if (!av_strncasecmp(nptr, "+infinity", 9)) { > +end = nptr + 9; > +res = INFINITY; > +} else if (!av_strncasecmp(nptr, "+inf", 4)) { > +end = nptr + 4; > +res = INFINITY; > +} else if (!av_strncasecmp(nptr, "-infinity", 9)) { > +end = nptr + 9; > +res = -INFINITY; > +} else if (!av_strncasecmp(nptr, "-inf", 4)) { > +end = nptr + 4; > +res = -INFINITY; > +} else if (!av_strncasecmp(nptr, "nan", 3)) { > +end = check_nan_suffix(nptr + 3); > +res = NAN; > +} else if (!av_strncasecmp(nptr, "+nan", 4) || > + !av_strncasecmp(nptr, "-nan", 4)) { > +end = check_nan_suffix(nptr + 4); > +res = NAN; > +} else if (!av_strncasecmp(nptr, "0x", 2) || > + !av_strncasecmp(nptr, "-0x", 3) || > + !av_strncasecmp(nptr, "+0x", 3)) { > +/* FIXME this doesn't handle exponents or non-integers > (float/double) */ > +res = strtoll(nptr, &end, 16); It also doesn't handle integers too large for long long. > +} else { > +res = strtod(nptr, &end); > +} > + > +if (endptr) > +*endptr = end; > + > +return res; > +} > diff --git a/libavutil/compat/os_support.h b/libavutil/compat/os_support.h > new file mode 100644 > index 000..b813fee > --- /dev/null > +++ b/libavutil/compat/os_support.h > @@ -0,0 +1,61 @@ > +/* > + * Support functions for OSes lacking basic libc functionality > + * Copyright (c) 2012 Ronald S. Bultje > + * > + * This file is part of Libav. > + * > + * Libav is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * Libav is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with Libav; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +#ifndef AVUTIL_O
Re: [libav-devel] [PATCH] lavu: add snprintf(), vsnprint() and strtod() replacements on MSVC.
Diego Biurrun writes: > On Fri, Aug 03, 2012 at 09:38:29PM +0100, Måns Rullgård wrote: >> "Ronald S. Bultje" writes: >> >> Other than that, >> >> this feels like it belongs in compat/ rather than libavutil. Not really >> >> sure how best to build it though. >> > >> > I was actually thinking of that for the header, yes. I suppose we can >> > do it for the source also, but then again, we'd need to link lavu with >> > that, which seems kind of complex for the rather simple thing we're >> > trying to accomplish here? >> >> I'm trying to not contaminate libavutil with things that don't belong >> there. That is worth a little complexity. > > While we're discussing the subject (sorry if I hijack the thread), this > applies to all those math functions that we fall back on for obsolete > systems that lack them. Having a better place than libavutil for them > sounds like a good idea. Yes, although those are different in that they don't result in any code, let alone exported symbols. -- Måns Rullgård m...@mansr.com ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 07/45] x86: mmx2 ---> mmxext in asm constructs
On Sat, Aug 04, 2012 at 06:19:38PM -0700, Ronald S. Bultje wrote: > On Sat, Aug 4, 2012 at 2:28 PM, Loren Merritt wrote: > > On Sat, 4 Aug 2012, Diego Biurrun wrote: > >> On Sat, Aug 04, 2012 at 03:11:50PM -0400, Justin Ruggles wrote: > >>> On 07/31/2012 06:17 PM, Diego Biurrun wrote: > --- > 30 files changed, 215 insertions(+), 211 deletions(-) > >>> > >>> Looks ok, but probably should get other opinions on this as well. I know > >>> Ronald was trying to keep x86inc.asm sychronized with x264, and trying > >>> to do so after this change would likely require similar extensive > >>> cpuflag modifications in x264. > >> > >> I volunteer to patch x264 if such a change would be accepted on their > >> side. > > > > Rejected. I like "mmx2" better. > > However, I wouldn't be opposed to dropping mmx1 entirely and using > > the name "mmx" to refer to mmx2. (x264 doesn't actually support mmx1 > > anyway; we use mmx2 inline asm that's actually inlined in places where > > runtime cpu detection is impossible.) But that wouldn't help > > synchronization if libav doesn't do so. > > Right - from my PoV, keeping us in sync with x264 is more important > that having a slightly more accurate cpuflag for something that's > never exposed to end users in any way. It is exposed through configure. > Diego, have ideas on how to fix this, Yes, why don't you convince Loren that mmxext is the more sensible name? > or can you live with changing everything to MMX2 instead? I believe mmxext is the better name and everybody except Loren seems to agree. There are very few uses of mmx2/mmxext in x86inc.asm and they are unlikely to ever conflict with future changes to this file. Besides, we only sync a few times per year. So I'm unconvinced that using mmx2 would be a net benefit. I still hope that Loren can change his mind, as I said, I volunteer to do all the work. Diego ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] lavu: add snprintf(), vsnprint() and strtod() replacements on MSVC.
On Fri, Aug 03, 2012 at 09:38:29PM +0100, Måns Rullgård wrote: > "Ronald S. Bultje" writes: > >> Other than that, > >> this feels like it belongs in compat/ rather than libavutil. Not really > >> sure how best to build it though. > > > > I was actually thinking of that for the header, yes. I suppose we can > > do it for the source also, but then again, we'd need to link lavu with > > that, which seems kind of complex for the rather simple thing we're > > trying to accomplish here? > > I'm trying to not contaminate libavutil with things that don't belong > there. That is worth a little complexity. While we're discussing the subject (sorry if I hijack the thread), this applies to all those math functions that we fall back on for obsolete systems that lack them. Having a better place than libavutil for them sounds like a good idea. Diego ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 1/2] mpegaudiodec: don't print an error on > 1 frame in a packet.
It's a perfectly normal situation, nothing to spam about. --- libavcodec/mpegaudiodec.c |1 - 1 file changed, 1 deletion(-) diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c index 6c1e8af..f388d8b 100644 --- a/libavcodec/mpegaudiodec.c +++ b/libavcodec/mpegaudiodec.c @@ -1663,7 +1663,6 @@ static int decode_frame(AVCodecContext * avctx, void *data, int *got_frame_ptr, av_log(avctx, AV_LOG_ERROR, "incomplete frame\n"); return AVERROR_INVALIDDATA; } else if (s->frame_size < buf_size) { -av_log(avctx, AV_LOG_ERROR, "incorrect frame size\n"); buf_size= s->frame_size; } -- 1.7.10.4 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 2/2] mpegaudioenc: list supported channel layouts.
--- libavcodec/mpegaudioenc.c |5 + 1 file changed, 5 insertions(+) diff --git a/libavcodec/mpegaudioenc.c b/libavcodec/mpegaudioenc.c index 6e79a61..a0ae5a7 100644 --- a/libavcodec/mpegaudioenc.c +++ b/libavcodec/mpegaudioenc.c @@ -24,6 +24,8 @@ * The simplest mpeg audio layer 2 encoder. */ +#include "libavutil/audioconvert.h" + #include "avcodec.h" #include "internal.h" #include "put_bits.h" @@ -794,6 +796,9 @@ AVCodec ff_mp2_encoder = { .supported_samplerates = (const int[]){ 44100, 48000, 32000, 22050, 24000, 16000, 0 }, +.channel_layouts = (const uint64_t[]){ AV_CH_LAYOUT_MONO, + AV_CH_LAYOUT_STEREO, + 0 }, .long_name = NULL_IF_CONFIG_SMALL("MP2 (MPEG audio layer 2)"), .defaults = mp2_defaults, }; -- 1.7.10.4 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel