Re: [libav-devel] [PATCH v3] x86: fft: convert sse inline asm to yasm
On 24/06/2012 8:36 PM, Mans Rullgard wrote: > --- > This one has been tested 32-bit and 64-bit, Linux and Windows. > --- Tested and passed on 64-bit OS X too. Sounds right-o. - Derek ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH] lavr: x86: merge some branches
--- I think the duplicate sections was a result of some rebasing gone wrong on my part. libavresample/x86/audio_convert_init.c | 12 1 files changed, 4 insertions(+), 8 deletions(-) diff --git a/libavresample/x86/audio_convert_init.c b/libavresample/x86/audio_convert_init.c index f41d974..637fd2f 100644 --- a/libavresample/x86/audio_convert_init.c +++ b/libavresample/x86/audio_convert_init.c @@ -53,14 +53,6 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, 6, 1, 4, "MMX", ff_conv_fltp_to_flt_6ch_mmx); } -if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) { -ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, - 6, 16, 4, "SSE4", ff_conv_fltp_to_flt_6ch_sse4); -} -if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) { -ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, - 6, 16, 4, "AVX", ff_conv_fltp_to_flt_6ch_avx); -} if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) { if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) { ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32, @@ -80,12 +72,16 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) { ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, 0, 16, 8, "SSE4", ff_conv_s16_to_flt_sse4); +ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, + 6, 16, 4, "SSE4", ff_conv_fltp_to_flt_6ch_sse4); } if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) { ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32, 0, 32, 16, "AVX", ff_conv_s32_to_flt_avx); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT, 0, 32, 32, "AVX", ff_conv_flt_to_s32_avx); +ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, + 6, 16, 4, "AVX", ff_conv_fltp_to_flt_6ch_avx); } #endif } -- 1.7.1 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH v3] x86: fft: convert sse inline asm to yasm
--- This one has been tested 32-bit and 64-bit, Linux and Windows. --- libavcodec/x86/Makefile|1 - libavcodec/x86/fft_mmx.asm | 139 libavcodec/x86/fft_sse.c | 110 --- 3 files changed, 129 insertions(+), 121 deletions(-) delete mode 100644 libavcodec/x86/fft_sse.c diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index 6602cce..6464739 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -39,7 +39,6 @@ YASM-OBJS-$(CONFIG_DCT)+= x86/dct32_sse.o YASM-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc_yasm.o YASM-OBJS-FFT-$(HAVE_AMD3DNOW) += x86/fft_3dn.o YASM-OBJS-FFT-$(HAVE_AMD3DNOWEXT) += x86/fft_3dn2.o -YASM-OBJS-FFT-$(HAVE_SSE) += x86/fft_sse.o YASM-OBJS-$(CONFIG_FFT)+= x86/fft_mmx.o \ $(YASM-OBJS-FFT-yes) YASM-OBJS-$(CONFIG_H264CHROMA) += x86/h264_chromamc.o \ diff --git a/libavcodec/x86/fft_mmx.asm b/libavcodec/x86/fft_mmx.asm index b60d8b0..1cacfb7 100644 --- a/libavcodec/x86/fft_mmx.asm +++ b/libavcodec/x86/fft_mmx.asm @@ -45,6 +45,10 @@ struc FFTContext .mdctbits: resd 1 .tcos: pointer 1 .tsin: pointer 1 +.fftperm: pointer 1 +.fftcalc: pointer 1 +.imdctcalc:pointer 1 +.imdcthalf:pointer 1 endstruc SECTION_RODATA @@ -65,6 +69,7 @@ perm1: dd 0x00, 0x02, 0x03, 0x01, 0x03, 0x00, 0x02, 0x01 perm2: dd 0x00, 0x01, 0x02, 0x03, 0x01, 0x00, 0x02, 0x03 ps_p1p1m1p1root2: dd 1.0, 1.0, -1.0, 1.0, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2 ps_m1m1p1m1p1m1m1m1: dd 1<<31, 1<<31, 0, 1<<31, 0, 1<<31, 1<<31, 1<<31 +ps_m1m1m1m1: times 4 dd 1<<31 ps_m1p1: dd 1<<31, 0 %assign i 16 @@ -532,6 +537,16 @@ DEFINE_ARGS z, w, n, o1, o3 rep ret %endmacro +%macro FFT_DISPATCH 2; clobbers 5 GPRs, 8 XMMs +lea r2, [dispatch_tab%1] +mov r2, [r2 + (%2q-2)*gprsize] +%ifdef PIC +lea r3, [$$] +add r2, r3 +%endif +call r2 +%endmacro ; FFT_DISPATCH + INIT_YMM avx %if HAVE_AVX @@ -548,6 +563,14 @@ INIT_YMM avx DECL_PASS pass_avx, PASS_BIG 1 DECL_PASS pass_interleave_avx, PASS_BIG 0 + +cglobal fft_calc, 2,5,8 +mov r3d, [r0 + FFTContext.nbits] +mov r0, r1 +mov r1, r3 +FFT_DISPATCH _interleave %+ SUFFIX, r1 +REP_RET + %endif INIT_XMM sse @@ -565,6 +588,112 @@ INIT_XMM sse DECL_PASS pass_sse, PASS_BIG 1 DECL_PASS pass_interleave_sse, PASS_BIG 0 +cglobal fft_calc, 2,5,8 +mov r3d, [r0 + FFTContext.nbits] +PUSHr1 +PUSHr3 +mov r0, r1 +mov r1, r3 +FFT_DISPATCH _interleave %+ SUFFIX, r1 +POP rcx +POP r4 +cmp rcx, 4 +jg .end +mov r2, -1 +add rcx, 3 +shl r2, cl +sub r4, r2 +.loop +movaps xmm0, [r4 + r2] +movaps xmm1, xmm0 +unpcklps xmm0, [r4 + r2 + 16] +unpckhps xmm1, [r4 + r2 + 16] +movaps [r4 + r2], xmm0 +movaps [r4 + r2 + 16], xmm1 +add r2, 32 +jl .loop +.end: +REP_RET + +cextern_naked memcpy + +cglobal fft_permute, 2,7,1 +mov r4, [r0 + FFTContext.revtab] +mov r5, [r0 + FFTContext.tmpbuf] +mov ecx, [r0 + FFTContext.nbits] +mov r2, 1 +shl r2, cl +xor r0, r0 +%if ARCH_X86_32 +mov r1, r1m +%endif +.loop: +movaps xmm0, [r1 + 8*r0] +movzx r6, word [r4 + 2*r0] +movzx r3, word [r4 + 2*r0 + 2] +movlps [r5 + 8*r6], xmm0 +movhps [r5 + 8*r3], xmm0 +add r0, 2 +cmp r0, r2 +jl .loop +shl r2, 3 +%if ARCH_X86_64 +mov r0, r1 +mov r1, r5 +%else +pushr2 +pushr5 +pushr1 +%endif +%if ARCH_X86_64 && WIN64 == 0 +jmp memcpy +%else +callmemcpy +%if ARCH_X86_32 +add esp, 12 +%endif +REP_RET +%endif + +cglobal imdct_calc, 3,5,3 +mov r3d, [r0 + FFTContext.mdctsize] +mov r4, [r0 + FFTContext.imdcthalf] +add r1, r3 +PUSHr3 +PUSHr1 +%if ARCH_X86_32 +pushr2 +pushr1 +pushr0 +%else +sub rsp, 8 +%endif +callr4 +%if ARCH_X86_32 +add esp, 12 +%else +add rsp, 8 +%endif +POP r1 +POP r3 +lea r0, [r1 + 2*r3] +mov r2, r3 +sub r3, 16 +neg r2 +movaps xmm2, [ps_m1m1m1m1] +.loop: +movaps xmm0, [r1 + r3] +movaps xmm1, [r0 + r2] +shufps xmm0, xmm0, 0x1b +shufps xmm1, xmm1, 0x1b +xorps xmm0, xmm2 +movaps [r0 + r3], xmm1 +movaps [r1 + r2], xmm0 +sub r3, 16 +add r2, 16 +jl .loop +REP_RET + INIT_MMX 3dnow %define mulps pfmul %define addps pfadd @@ -582,16 +711,6 @@ DECL_PASS pass_interleave_3dnow, PASS_BIG 0 %define SECTION_REL %endif -%macro FFT_DISPATCH 2; clobbers 5 GPRs, 8 XMMs -lea r2, [dispatch_tab%1] -mov r2, [r2 + (%2q-2)*gprsize] -
Re: [libav-devel] [libav-commits] swscale: fix overflows in vertical scaling at top/bottom edges.
On 06/24/2012 09:53 AM, John Stebbins wrote: > On 06/23/2012 10:07 PM, John Stebbins wrote: >> On 06/23/2012 07:41 PM, Luca Barbato wrote: >>> On 06/23/2012 03:45 PM, John Stebbins wrote: On 12/18/2011 05:28 PM, Ronald S. Bultje wrote: > Module: libav > Branch: master > Commit: d49352c7cc22fd8928a761a373c3508be17c9f19 > > Author:Ronald S. Bultje > Committer: Ronald S. Bultje > Date: Sun Dec 18 08:27:43 2011 -0800 > > swscale: fix overflows in vertical scaling at top/bottom edges. > > This fixes integer multiplication overflows in RGB48 output > (vertical) scaling as detected by IOC. What happens is that for > certain types of filters (lanczos, spline, bicubic), the > intermediate sum of coefficients in the middle of a filter can > be larger than the fixed-point equivalent of 1.0, even if the > final sum is 1.0. This is fine and we support that. > > However, at frame edges, initFilter() will merge the coefficients > for the off-screen pixels into the top or bottom pixel, such as > to emulate edge extension. This means that suddenly, a single > coefficient can be larger than the fixed-point equivalent of > 1.0, which the vertical scaling routines do not support. > > Therefore, remove the merging of coefficients for edges for > the vertical scaling filter, and instead add edge detection > to the scaler itself so that it copies the pointers (not data) > for the edges (i.e. it uses line[0] for line[-1] as well), so > that a single coefficient is never larger than the fixed-point > equivalent of 1.0. > Hi, I've noticed that this commit is triggering the assert in swscale.c:632. assert(chrUSrcPtr + vChrFilterSize - 1< chrUPixBuf + vChrBufSize * 2); This happens when converting with lanczos from YUV420P to RGB32 and it is upscaling the chroma. The comment indicates that there is edge detection in the scaler. So I'm wondering if the edge detection is failing for this case or is the assert just vestigial and in need of removal? >>> Which is your testcase? Trying probably is fast. >>> >>> lu >>> >> I'm afraid I don't understand your question. Are you asking me to try >> something? Or are you asking for more details about how you can >> reproduce this yourself? I haven't tried to reproduce this yet with >> avconv or such. I'm using libswscale directly. I've already >> described the circumstances. Of coarse, the assert is silent on a >> standard debug build of libav. You would need to add "-DDEBUG" to >> your CFLAGS to enable it. I only bumped into it because the assert >> doesn't seem to be disabled when building on mingw64 and I was testing >> all the various platforms HandBrake supports. I was sure I answered you but looks like the email never reached the ml... >> > Here's a sample app and image that can be used to reproduce the assert. > Thank you! I'll have a look later. lu -- Luca Barbato Gentoo/linux http://dev.gentoo.org/~lu_zero ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] x86: place some inline asm under #if HAVE_INLINE_ASM
On Sun, 24 Jun 2012, Mans Rullgard wrote: From: "Ronald S. Bultje" Signed-off-by: Mans Rullgard --- Rebased to master and added h264/cabac. --- libavcodec/x86/cabac.h |3 +++ libavcodec/x86/cavsdsp_mmx.c |6 ++ libavcodec/x86/dnxhd_mmx.c |6 ++ libavcodec/x86/h264_i386.h |3 +++ libavcodec/x86/lpc_mmx.c |6 ++ libavcodec/x86/mathops.h |3 +++ libavcodec/x86/snowdsp_mmx.c |6 ++ libavcodec/x86/vc1dsp_mmx.c |6 ++ libavutil/internal.h |2 +- libavutil/x86/bswap.h|3 +++ libavutil/x86/intmath.h |2 ++ 11 files changed, 45 insertions(+), 1 deletion(-) This seems to work as advertised, so LGTM. // Martin ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] lavf: win32: use WSAPoll() if available
On Sun, 24 Jun 2012, Martin Storsjö wrote: On Wed, 20 Jun 2012, Mans Rullgard wrote: Windows Vista and later have a WSAPoll() function equivalent to the standard poll(). Use this instead of emulating it with select() when possible. --- This is completely untested. Help with that would be appreciated. This is missing a !HAVE_WSAPOLL around the fallback implementation in os_support.c. But even when I added that, it didn't work (haven't figured out why yet, might be something unrelated though). It turned out to be an unrelated issue - with that change, this patch would work as intended. But as said, I'm preparing a larger patchset that should have the same effect as this, and some more. // Martin___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] dxva2: include dxva.h if found
On Sun, 24 Jun 2012, Mans Rullgard wrote: From: "Ronald S. Bultje" Apparently, some build environments require dxva.h even for dxva2, while others lack this header entirely. Including it conditionally allows building in both cases. Signed-off-by: Mans Rullgard --- configure |2 ++ libavcodec/dxva2.h |3 +++ 2 files changed, 5 insertions(+) diff --git a/configure b/configure index 083ef77..1cba930 100755 --- a/configure +++ b/configure @@ -1079,6 +1079,7 @@ HAVE_LIST=" dlfcn_h dlopen dos_paths +dxva_h ebp_available ebx_available exp2 @@ -2907,6 +2908,7 @@ check_func_headers windows.h Sleep check_func_headers windows.h VirtualAlloc check_header dlfcn.h +check_header dxva.h check_header dxva2api.h check_header malloc.h check_header poll.h diff --git a/libavcodec/dxva2.h b/libavcodec/dxva2.h index c06f1f3..b9e3086 100644 --- a/libavcodec/dxva2.h +++ b/libavcodec/dxva2.h @@ -33,6 +33,9 @@ #include #include +#if HAVE_DXVA_H +#include +#endif This lacks an #include "config.h" somewhere before the HAVE_DXVA_H. Other than that, it works as intended, tested on mingw64 and MSVC. // Martin ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH] lavr: add x86-optimized mixing functions
Adds optimized functions for mixing 3 through 8 input channels to 1 and 2 output channels in fltp or s16p format with flt coeffs. --- Made changes as suggested by Loren. Benchmarks for Sandy Bridge: https://docs.google.com/spreadsheet/ccc?key=0AraK7SdCYBjVdG5zZjNHVFRIellsMENOSVdXYmk5aUE#gid=0 libavresample/utils.c |7 +- libavresample/x86/audio_mix.asm| 284 libavresample/x86/audio_mix_init.c | 130 3 files changed, 418 insertions(+), 3 deletions(-) diff --git a/libavresample/utils.c b/libavresample/utils.c index 6d4509d..c8f21d3 100644 --- a/libavresample/utils.c +++ b/libavresample/utils.c @@ -221,9 +221,10 @@ static int handle_buffered_output(AVAudioResampleContext *avr, return 0; } -int avresample_convert(AVAudioResampleContext *avr, void **output, - int out_plane_size, int out_samples, void **input, - int in_plane_size, int in_samples) +int attribute_align_arg avresample_convert(AVAudioResampleContext *avr, + void **output, int out_plane_size, + int out_samples, void **input, + int in_plane_size, int in_samples) { AudioData input_buffer; AudioData output_buffer; diff --git a/libavresample/x86/audio_mix.asm b/libavresample/x86/audio_mix.asm index 4b0434d..749ddc7 100644 --- a/libavresample/x86/audio_mix.asm +++ b/libavresample/x86/audio_mix.asm @@ -231,3 +231,287 @@ MIX_1_TO_2_S16P_FLT INIT_XMM avx MIX_1_TO_2_S16P_FLT %endif + +;- +; void ff_mix_3_8_to_1_2_fltp/s16p_flt(float/int16_t **src, float **matrix, +; int len, int out_ch, int in_ch); +;- + +%macro MIX_3_8_TO_1_2_FLT 3 ; %1 = in channels, %2 = out channels, %3 = s16p or fltp +; define some names to make the code clearer +%assign in_channels %1 +%assign stereo %2 - 1 +%ifidn %3, s16p +%assign is_s16 1 +%else +%assign is_s16 0 +%endif + +; determine how many matrix elements must go on the stack vs. mmregs +%assign matrix_elements %1 * %2 +%if is_s16 +%if stereo +%assign needed_mmregs 7 +%else +%assign needed_mmregs 5 +%endif +%else +%if stereo +%assign needed_mmregs 4 +%else +%assign needed_mmregs 3 +%endif +%endif +%assign matrix_elements_mm num_mmregs - needed_mmregs +%if matrix_elements < matrix_elements_mm +%assign matrix_elements_mm matrix_elements +%endif +%if matrix_elements_mm < matrix_elements +%assign matrix_elements_stack matrix_elements - matrix_elements_mm +%else +%assign matrix_elements_stack 0 +%endif + +cglobal mix_%1_to_%2_%3_flt, 3,%1+2,needed_mmregs+matrix_elements_mm, src0, src1, len, src2, src3, src4, src5, src6, src7 + +; get aligned stack space if needed +%if matrix_elements_stack > 0 +%if mmsize == 32 +%assign bkpreg %1 + 1 +%define bkpq r %+ bkpreg %+ q +mov bkpq, rsp +and rsp, ~(mmsize-1) +sub rsp, matrix_elements_stack * mmsize +%else +%assign pad matrix_elements_stack * mmsize + (mmsize - gprsize) - (stack_offset & (mmsize - gprsize)) +SUB rsp, pad +%endif +%endif + +; load matrix pointers +%define matrix0q r1q +%define matrix1q r3q +%if stereo +mov matrix1q, [matrix0q+gprsize] +%endif +mov matrix0q, [matrix0q] + +; define matrix coeff names +%assign %%i 0 +%assign %%j needed_mmregs +%rep in_channels +%if %%i >= matrix_elements_mm +CAT_XDEFINE mx_stack_0_, %%i, 1 +CAT_XDEFINE mx_0_, %%i, [rsp+(%%i-matrix_elements_mm)*mmsize] +%else +CAT_XDEFINE mx_stack_0_, %%i, 0 +CAT_XDEFINE mx_0_, %%i, m %+ %%j +%assign %%j %%j+1 +%endif +%assign %%i %%i+1 +%endrep +%if stereo +%assign %%i 0 +%rep in_channels +%if in_channels + %%i >= matrix_elements_mm +CAT_XDEFINE mx_stack_1_, %%i, 1 +CAT_XDEFINE mx_1_, %%i, [rsp+(in_channels+%%i-matrix_elements_mm)*mmsize] +%else +CAT_XDEFINE mx_stack_1_, %%i, 0 +CAT_XDEFINE mx_1_, %%i, m %+ %%j +%assign %%j %%j+1 +%endif +%assign %%i %%i+1 +%endrep +%endif + +; load/splat matrix coeffs +%assign %%i 0 +%rep in_channels +%if mx_stack_0_ %+ %%i +VBROADCASTSS m0, [matrix0q+4*%%i] +mova mx_0_ %+ %%i, m0 +%else +VBROADCASTSS mx_0_ %+ %%i, [matrix0q+4*%%i] +%endif +%if stereo +%if mx_stack_1_ %+ %%i +VBROADCASTSS m0, [matrix1q+4*%%i] +mova mx_1_ %+ %%i, m0 +%else +VBROADCASTSS mx_1_ %+ %%i, [matrix1q+4*%%i] +%endif +%endif +%assign %%i %%i+1 +%endrep + +; load channel pointers to registers as offsets from the first channel pointer +%if ARCH_X86_64 +movsxd lenq, r2d +%en
[libav-devel] [PATCH v2] x86: fft: convert sse inline asm to yasm
--- Some minor updates after comments from Jason. Now also replaces the avx fft_calc wrapper. Testing on win64 and avx would be appreciated. --- libavcodec/x86/Makefile|1 - libavcodec/x86/fft_mmx.asm | 133 libavcodec/x86/fft_sse.c | 110 3 files changed, 123 insertions(+), 121 deletions(-) delete mode 100644 libavcodec/x86/fft_sse.c diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index 6602cce..6464739 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -39,7 +39,6 @@ YASM-OBJS-$(CONFIG_DCT)+= x86/dct32_sse.o YASM-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc_yasm.o YASM-OBJS-FFT-$(HAVE_AMD3DNOW) += x86/fft_3dn.o YASM-OBJS-FFT-$(HAVE_AMD3DNOWEXT) += x86/fft_3dn2.o -YASM-OBJS-FFT-$(HAVE_SSE) += x86/fft_sse.o YASM-OBJS-$(CONFIG_FFT)+= x86/fft_mmx.o \ $(YASM-OBJS-FFT-yes) YASM-OBJS-$(CONFIG_H264CHROMA) += x86/h264_chromamc.o \ diff --git a/libavcodec/x86/fft_mmx.asm b/libavcodec/x86/fft_mmx.asm index b60d8b0..21ce309 100644 --- a/libavcodec/x86/fft_mmx.asm +++ b/libavcodec/x86/fft_mmx.asm @@ -45,6 +45,10 @@ struc FFTContext .mdctbits: resd 1 .tcos: pointer 1 .tsin: pointer 1 +.fftperm: pointer 1 +.fftcalc: pointer 1 +.imdctcalc:pointer 1 +.imdcthalf:pointer 1 endstruc SECTION_RODATA @@ -65,6 +69,7 @@ perm1: dd 0x00, 0x02, 0x03, 0x01, 0x03, 0x00, 0x02, 0x01 perm2: dd 0x00, 0x01, 0x02, 0x03, 0x01, 0x00, 0x02, 0x03 ps_p1p1m1p1root2: dd 1.0, 1.0, -1.0, 1.0, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2 ps_m1m1p1m1p1m1m1m1: dd 1<<31, 1<<31, 0, 1<<31, 0, 1<<31, 1<<31, 1<<31 +ps_m1m1m1m1: times 4 dd 1<<31 ps_m1p1: dd 1<<31, 0 %assign i 16 @@ -532,6 +537,16 @@ DEFINE_ARGS z, w, n, o1, o3 rep ret %endmacro +%macro FFT_DISPATCH 2; clobbers 5 GPRs, 8 XMMs +lea r2, [dispatch_tab%1] +mov r2, [r2 + (%2q-2)*gprsize] +%ifdef PIC +lea r3, [$$] +add r2, r3 +%endif +call r2 +%endmacro ; FFT_DISPATCH + INIT_YMM avx %if HAVE_AVX @@ -548,6 +563,14 @@ INIT_YMM avx DECL_PASS pass_avx, PASS_BIG 1 DECL_PASS pass_interleave_avx, PASS_BIG 0 + +cglobal fft_calc, 2,5,8 +mov r3d, [r0 + FFTContext.nbits] +mov r0, r1 +mov r1, r3 +FFT_DISPATCH _interleave %+ SUFFIX, r3 +REP_RET + %endif INIT_XMM sse @@ -565,6 +588,106 @@ INIT_XMM sse DECL_PASS pass_sse, PASS_BIG 1 DECL_PASS pass_interleave_sse, PASS_BIG 0 +cglobal fft_calc, 2,5,8 +mov r3d, [r0 + FFTContext.nbits] +PUSHr3 +mov r0, r1 +mov r1, r3 +FFT_DISPATCH _interleave %+ SUFFIX, r3 +POP rcx +cmp rcx, 4 +jg .end +mov r2, -1 +add rcx, 3 +shl r2, cl +sub r0, r2 +.loop +movaps xmm0, [r0 + r2] +movaps xmm1, xmm0 +unpcklps xmm0, [r0 + r2 + 16] +unpckhps xmm1, [r0 + r2 + 16] +movaps [r0 + r2], xmm0 +movaps [r0 + r2 + 16], xmm1 +add r2, 32 +jl .loop +.end: +REP_RET + +cextern_naked memcpy + +cglobal fft_permute, 2,7,1 +mov r4, [r0 + FFTContext.revtab] +mov r5, [r0 + FFTContext.tmpbuf] +mov ecx, [r0 + FFTContext.nbits] +mov r2, 1 +shl r2, cl +xor r0, r0 +%if ARCH_X86_32 +mov r1, r1m +%endif +.loop: +movaps xmm0, [r1 + 8*r0] +movzx r6, word [r4 + 2*r0] +movzx r3, word [r4 + 2*r0 + 2] +movlps [r5 + 8*r6], xmm0 +movhps [r5 + 8*r3], xmm0 +add r0, 2 +cmp r0, r2 +jl .loop +shl r2, 3 +%if ARCH_X86_64 +mov r0, r1 +mov r1, r5 +%else +pushr2 +pushr5 +pushr1 +%endif +%if ARCH_X86_64 && WIN64 == 0 +jmp memcpy +%else +callmemcpy +%if ARCH_X86_32 +add esp, 12 +%endif +REP_RET +%endif + +cglobal imdct_calc, 3,5,3 +mov r3d, [r0 + FFTContext.mdctsize] +mov r4, [r0 + FFTContext.imdcthalf] +add r1, r3 +PUSHr3 +PUSHr1 +%if ARCH_X86_32 +pushr2 +pushr1 +pushr0 +%endif +callr4 +%if ARCH_X86_32 +add esp, 12 +%endif +POP r1 +POP r3 +lea r0, [r1 + 2*r3] +mov r2, r3 +sub r3, 16 +neg r2 +movaps xmm2, [ps_m1m1m1m1] +.loop: +movaps xmm0, [r1 + r3] +movaps xmm1, [r0 + r2] +shufps xmm0, xmm0, 0x1b +shufps xmm1, xmm1, 0x1b +xorps xmm0, xmm2 +movaps [r0 + r3], xmm1 +movaps [r1 + r2], xmm0 +sub r3, 16 +add r2, 16 +jl .loop +REP_RET + INIT_MMX 3dnow %define mulps pfmul %define addps pfadd @@ -582,16 +705,6 @@ DECL_PASS pass_interleave_3dnow, PASS_BIG 0 %define SECTION_REL %endif -%macro FFT_DISPATCH 2; clobbers 5 GPRs, 8 XMMs -lea r2, [dispatch_tab%1] -mov r2, [r2 + (%2q-2)*gprsize] -%ifdef PIC
Re: [libav-devel] [PATCH] Add a news entry for lavfi major bump.
On 06/22/2012 04:50 PM, Anton Khirnov wrote: > --- > src/index | 18 ++ > 1 file changed, 18 insertions(+) > > diff --git a/src/index b/src/index > index 1da657c..ab6bc2f 100644 > --- a/src/index > +++ b/src/index > @@ -33,6 +33,24 @@ with the latest developments by subscribing to both the > > News > > +June 22, 2012 > + > +Major version of the libavfilter library has been bumped. This means > that > +ABI and API compatibility has been broken and some deprecated functions and > +structures have been removed. > + > + > + > +Most of those were related to filter internals, which are now private. > User-side > +filters are now not officially supported until libavfilter reaches a more > mature > +state. > + > + > + > +Libavfilter ABI is now declared unstable until further notice (stabilizing it > +should take a couple of weeks at most). > + > + > June 9, 2012 > > We have been busy lately! Today, we are updating all 4 of our release trees. LGTM. -Justin ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] Add a news entry for lavfi major bump.
ping -- Anton Khirnov ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] x86: h264: prevent yasm from issuing a i686 nopl instruction on i586 machines
On Wed, May 16, 2012 at 08:09:15AM -0700, Ronald S. Bultje wrote: > On Wed, May 16, 2012 at 5:34 AM, Diego Biurrun wrote: > > On Mon, May 14, 2012 at 12:41:45PM +0200, Diego Biurrun wrote: > >> From: Michael Kostylev > >> > >> --- > >> libavcodec/x86/h264_intrapred.asm | 4 > >> 1 files changed, 4 insertions(+), 0 deletions(-) > > > > ping > > If cmov/mmx is indendent, then this is too. We should use the same > solution here as we used in the other thread. There, we marked it as > mmx2. Here, we should likely do the same thing, or just make sure > these functions don't get assigned if HAVE_CMOV is not set. Now that the cmov issue is fixed I would like to ping this again. The solution for the cmov issue was not, in the end, marking all relevant functions as mmx2/mmxext. Doing so here would disable quite a few optimizations and leave many many old CPUs behind. This patch is quite localized and non-intrusive IMO, so I'm in favor of applying it as-is. Diego ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 2/2] x86: cpu: whitespace (mostly) cosmetics
This adds whitespace around operators, aligns line continuation backslashes, and breaks long lines. Also fixes an ifdef halfway through a statement. The one line of duplication this saved is not worth the ugliness. Signed-off-by: Mans Rullgard --- libavutil/x86/cpu.c | 71 +++ 1 file changed, 37 insertions(+), 34 deletions(-) diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c index 5c3af1f..80e7541 100644 --- a/libavutil/x86/cpu.c +++ b/libavutil/x86/cpu.c @@ -26,16 +26,15 @@ #include "libavutil/cpu.h" /* ebx saving is necessary for PIC. gcc seems unable to see it alone */ -#define cpuid(index,eax,ebx,ecx,edx)\ -__asm__ volatile\ -("mov %%"REG_b", %%"REG_S"\n\t"\ - "cpuid\n\t"\ - "xchg %%"REG_b", %%"REG_S\ - : "=a" (eax), "=S" (ebx),\ - "=c" (ecx), "=d" (edx)\ - : "0" (index)); - -#define xgetbv(index,eax,edx) \ +#define cpuid(index, eax, ebx, ecx, edx)\ +__asm__ volatile ( \ +"mov%%"REG_b", %%"REG_S" \n\t" \ +"cpuid \n\t" \ +"xchg %%"REG_b", %%"REG_S \ +: "=a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx)\ +: "0" (index)) + +#define xgetbv(index, eax, edx) \ __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c" (index)) #define get_eflags(x) \ @@ -53,8 +52,8 @@ int ff_get_cpu_flags_x86(void) { int rval = 0; int eax, ebx, ecx, edx; -int max_std_level, max_ext_level, std_caps=0, ext_caps=0; -int family=0, model=0; +int max_std_level, max_ext_level, std_caps = 0, ext_caps = 0; +int family = 0, model = 0; union { int i[3]; char c[12]; } vendor; #if ARCH_X86_32 @@ -75,19 +74,20 @@ int ff_get_cpu_flags_x86(void) vendor.i[1] = edx; vendor.i[2] = ecx; -if(max_std_level >= 1){ +if (max_std_level >= 1) { cpuid(1, eax, ebx, ecx, std_caps); -family = ((eax>>8)&0xf) + ((eax>>20)&0xff); -model = ((eax>>4)&0xf) + ((eax>>12)&0xf0); +family = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); +model = ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0); if (std_caps & (1 << 15)) rval |= AV_CPU_FLAG_CMOV; -if (std_caps & (1<<23)) +if (std_caps & (1 << 23)) rval |= AV_CPU_FLAG_MMX; -if (std_caps & (1<<25)) -rval |= AV_CPU_FLAG_MMX2 +if (std_caps & (1 << 25)) +rval |= AV_CPU_FLAG_MMX2; #if HAVE_SSE - | AV_CPU_FLAG_SSE; -if (std_caps & (1<<26)) +if (std_caps & (1 << 25)) +rval |= AV_CPU_FLAG_SSE; +if (std_caps & (1 << 26)) rval |= AV_CPU_FLAG_SSE2; if (ecx & 1) rval |= AV_CPU_FLAG_SSE3; @@ -107,20 +107,19 @@ int ff_get_cpu_flags_x86(void) } #endif #endif - ; } cpuid(0x8000, max_ext_level, ebx, ecx, edx); -if(max_ext_level >= 0x8001){ +if (max_ext_level >= 0x8001) { cpuid(0x8001, eax, ebx, ecx, ext_caps); -if (ext_caps & (1U<<31)) +if (ext_caps & (1U << 31)) rval |= AV_CPU_FLAG_3DNOW; -if (ext_caps & (1<<30)) +if (ext_caps & (1 << 30)) rval |= AV_CPU_FLAG_3DNOWEXT; -if (ext_caps & (1<<23)) +if (ext_caps & (1 << 23)) rval |= AV_CPU_FLAG_MMX; -if (ext_caps & (1<<22)) +if (ext_caps & (1 << 22)) rval |= AV_CPU_FLAG_MMX2; /* Allow for selectively disabling SSE2 functions on AMD processors @@ -147,14 +146,18 @@ int ff_get_cpu_flags_x86(void) if (!strncmp(vendor.c, "GenuineIntel", 12)) { if (family == 6 && (model == 9 || model == 13 || model == 14)) { -/* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and 6/14 (core1 "yonah") -* theoretically support sse2, but it's usually slower than mmx, -* so let's just pretend they don't. AV_CPU_FLAG_SSE2 is disabled and -* AV_CPU_FLAG_SSE2SLOW is enabled so that SSE2 is not used unless -* explicitly enabled by checking AV_CPU_FLAG_SSE2SLOW. The same -* situation applies for AV_CPU_FLAG_SSE3 and AV_CPU_FLAG_SSE3SLOW. */ -if (rval & AV_CPU_FLAG_SSE2) rval ^= AV_CPU_FLAG_SSE2SLOW|AV_CPU_FLAG_SSE2; -if (rval & AV_CPU_FLAG_SSE3) rval ^= AV_CPU_FLAG_SSE3SLOW|AV_CPU_FLAG_SSE3; +/* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and + * 6/14 (core1 "yonah") theoretically support sse2, but it's + * usually slower than mmx, so let's just pretend they don't. + * AV_CPU_FLAG_SSE2 is disabled and AV_CPU_FLAG_SSE2SLOW is + * enabled so that S
[libav-devel] [PATCH 1/2] x86: cpu: clean up check for cpuid instruction support
This adds macros for accessing the EFLAGS register and uses these instead of coding the entire check in inline asm. Signed-off-by: Mans Rullgard --- libavutil/x86/cpu.c | 36 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c index b87d3a3..5c3af1f 100644 --- a/libavutil/x86/cpu.c +++ b/libavutil/x86/cpu.c @@ -38,6 +38,16 @@ #define xgetbv(index,eax,edx) \ __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c" (index)) +#define get_eflags(x) \ +__asm__ volatile ("pushfl \n" \ + "pop%0 \n" \ + : "=r"(x)) + +#define set_eflags(x) \ +__asm__ volatile ("push%0 \n" \ + "popfl \n" \ + :: "r"(x)) + /* Function to test if multimedia instructions are supported... */ int ff_get_cpu_flags_x86(void) { @@ -49,26 +59,12 @@ int ff_get_cpu_flags_x86(void) #if ARCH_X86_32 x86_reg a, c; -__asm__ volatile ( -/* See if CPUID instruction is supported ... */ -/* ... Get copies of EFLAGS into eax and ecx */ -"pushfl\n\t" -"pop %0\n\t" -"mov %0, %1\n\t" - -/* ... Toggle the ID bit in one copy and store */ -/* to the EFLAGS reg */ -"xor $0x20, %0\n\t" -"push %0\n\t" -"popfl\n\t" - -/* ... Get the (hopefully modified) EFLAGS */ -"pushfl\n\t" -"pop %0\n\t" -: "=a" (a), "=c" (c) -: -: "cc" -); + +/* Check if CPUID is supported by attempting to toggle the ID bit in + * the EFLAGS register. */ +get_eflags(a); +set_eflags(a ^ 0x20); +get_eflags(c); if (a == c) return 0; /* CPUID not supported */ -- 1.7.10.2 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] FATE: add a test for itunes cover art.
Anton Khirnov writes: > --- > tests/Makefile |1 + > tests/fate/cover_art.mak |7 +++ > 2 files changed, 8 insertions(+) > create mode 100644 tests/fate/cover_art.mak > > diff --git a/tests/Makefile b/tests/Makefile > index 429762b..6203f48 100644 > --- a/tests/Makefile > +++ b/tests/Makefile > @@ -36,6 +36,7 @@ include $(SRC_PATH)/tests/fate/atrac.mak > include $(SRC_PATH)/tests/fate/audio.mak > include $(SRC_PATH)/tests/fate/bmp.mak > include $(SRC_PATH)/tests/fate/cdxl.mak > +include $(SRC_PATH)/tests/fate/cover_art.mak > include $(SRC_PATH)/tests/fate/dct.mak > include $(SRC_PATH)/tests/fate/demux.mak > include $(SRC_PATH)/tests/fate/dfa.mak > diff --git a/tests/fate/cover_art.mak b/tests/fate/cover_art.mak > new file mode 100644 > index 000..5d2d81e > --- /dev/null > +++ b/tests/fate/cover_art.mak > @@ -0,0 +1,7 @@ > +FATE_COVER_ART += fate-cover_art-m4a Diego probably wants you to use hyphens rather than underscores, and this case I'm inclined to agree. > +fate-cover_art-m4a: CMD = md5 -i > $(SAMPLES)/cover_art/Owner-iTunes_9.0.3.15.m4a -an -c:v copy -f rawvideo -c:v copy together with -f rawvideo looks weird, but I guess that's how it's done. > +fate-cover_art-m4a: REF = 08ba70a3b594ff6345a93965e96a9d3e > + > +$(FATE_COVER_ART): CMP = oneline > +FATE_SAMPLES_AVCONV += $(FATE_COVER_ART) > +fate-cover_art: $(FATE_COVER_ART) > -- LGTM otherwise. -- Måns Rullgård m...@mansr.com ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH] FATE: add a test for itunes cover art.
--- tests/Makefile |1 + tests/fate/cover_art.mak |7 +++ 2 files changed, 8 insertions(+) create mode 100644 tests/fate/cover_art.mak diff --git a/tests/Makefile b/tests/Makefile index 429762b..6203f48 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -36,6 +36,7 @@ include $(SRC_PATH)/tests/fate/atrac.mak include $(SRC_PATH)/tests/fate/audio.mak include $(SRC_PATH)/tests/fate/bmp.mak include $(SRC_PATH)/tests/fate/cdxl.mak +include $(SRC_PATH)/tests/fate/cover_art.mak include $(SRC_PATH)/tests/fate/dct.mak include $(SRC_PATH)/tests/fate/demux.mak include $(SRC_PATH)/tests/fate/dfa.mak diff --git a/tests/fate/cover_art.mak b/tests/fate/cover_art.mak new file mode 100644 index 000..5d2d81e --- /dev/null +++ b/tests/fate/cover_art.mak @@ -0,0 +1,7 @@ +FATE_COVER_ART += fate-cover_art-m4a +fate-cover_art-m4a: CMD = md5 -i $(SAMPLES)/cover_art/Owner-iTunes_9.0.3.15.m4a -an -c:v copy -f rawvideo +fate-cover_art-m4a: REF = 08ba70a3b594ff6345a93965e96a9d3e + +$(FATE_COVER_ART): CMP = oneline +FATE_SAMPLES_AVCONV += $(FATE_COVER_ART) +fate-cover_art: $(FATE_COVER_ART) -- 1.7.10 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] lavf: win32: use WSAPoll() if available
On Wed, 20 Jun 2012, Mans Rullgard wrote: Windows Vista and later have a WSAPoll() function equivalent to the standard poll(). Use this instead of emulating it with select() when possible. --- This is completely untested. Help with that would be appreciated. This is missing a !HAVE_WSAPOLL around the fallback implementation in os_support.c. But even when I added that, it didn't work (haven't figured out why yet, might be something unrelated though). I'll follow up with a more thorough patchset fixing the poll stuff soon though. // Martin ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH] x86: place some inline asm under #if HAVE_INLINE_ASM
From: "Ronald S. Bultje" Signed-off-by: Mans Rullgard --- Rebased to master and added h264/cabac. --- libavcodec/x86/cabac.h |3 +++ libavcodec/x86/cavsdsp_mmx.c |6 ++ libavcodec/x86/dnxhd_mmx.c |6 ++ libavcodec/x86/h264_i386.h |3 +++ libavcodec/x86/lpc_mmx.c |6 ++ libavcodec/x86/mathops.h |3 +++ libavcodec/x86/snowdsp_mmx.c |6 ++ libavcodec/x86/vc1dsp_mmx.c |6 ++ libavutil/internal.h |2 +- libavutil/x86/bswap.h|3 +++ libavutil/x86/intmath.h |2 ++ 11 files changed, 45 insertions(+), 1 deletion(-) diff --git a/libavcodec/x86/cabac.h b/libavcodec/x86/cabac.h index 6fc2ddb..02dbc54 100644 --- a/libavcodec/x86/cabac.h +++ b/libavcodec/x86/cabac.h @@ -27,6 +27,8 @@ #include "libavutil/internal.h" #include "config.h" +#if HAVE_INLINE_ASM + #ifdef BROKEN_RELOCATIONS #define TABLES_ARG , "r"(tables) @@ -225,4 +227,5 @@ static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val) return val; } +#endif /* HAVE_INLINE_ASM */ #endif /* AVCODEC_X86_CABAC_H */ diff --git a/libavcodec/x86/cavsdsp_mmx.c b/libavcodec/x86/cavsdsp_mmx.c index f56f859..b3d2c27 100644 --- a/libavcodec/x86/cavsdsp_mmx.c +++ b/libavcodec/x86/cavsdsp_mmx.c @@ -29,6 +29,8 @@ #include "libavcodec/cavsdsp.h" #include "dsputil_mmx.h" +#if HAVE_INLINE_ASM + /* in/out: mma=mma+mmb, mmb=mmb-mma */ #define SUMSUB_BA( a, b ) \ "paddw "#b", "#a" \n\t"\ @@ -477,10 +479,14 @@ static void ff_cavsdsp_init_3dnow(CAVSDSPContext* c, AVCodecContext *avctx) { c->cavs_idct8_add = cavs_idct8_add_mmx; } +#endif /* HAVE_INLINE_ASM */ + void ff_cavsdsp_init_mmx(CAVSDSPContext *c, AVCodecContext *avctx) { int mm_flags = av_get_cpu_flags(); +#if HAVE_INLINE_ASM if (mm_flags & AV_CPU_FLAG_MMX2) ff_cavsdsp_init_mmx2 (c, avctx); if (mm_flags & AV_CPU_FLAG_3DNOW) ff_cavsdsp_init_3dnow(c, avctx); +#endif /* HAVE_INLINE_ASM */ } diff --git a/libavcodec/x86/dnxhd_mmx.c b/libavcodec/x86/dnxhd_mmx.c index e193d62..54293aa 100644 --- a/libavcodec/x86/dnxhd_mmx.c +++ b/libavcodec/x86/dnxhd_mmx.c @@ -24,6 +24,8 @@ #include "libavutil/x86_cpu.h" #include "libavcodec/dnxhdenc.h" +#if HAVE_INLINE_ASM + static void get_pixels_8x4_sym_sse2(DCTELEM *block, const uint8_t *pixels, int line_size) { __asm__ volatile( @@ -50,10 +52,14 @@ static void get_pixels_8x4_sym_sse2(DCTELEM *block, const uint8_t *pixels, int l ); } +#endif /* HAVE_INLINE_ASM */ + void ff_dnxhd_init_mmx(DNXHDEncContext *ctx) { +#if HAVE_INLINE_ASM if (av_get_cpu_flags() & AV_CPU_FLAG_SSE2) { if (ctx->cid_table->bit_depth == 8) ctx->get_pixels_8x4_sym = get_pixels_8x4_sym_sse2; } +#endif /* HAVE_INLINE_ASM */ } diff --git a/libavcodec/x86/h264_i386.h b/libavcodec/x86/h264_i386.h index c0033b7..2daa40a 100644 --- a/libavcodec/x86/h264_i386.h +++ b/libavcodec/x86/h264_i386.h @@ -34,6 +34,8 @@ #include "libavcodec/cabac.h" #include "cabac.h" +#if HAVE_INLINE_ASM + //FIXME use some macros to avoid duplicating get_cabac (cannot be done yet //as that would make optimization work hard) #if HAVE_7REGS @@ -187,4 +189,5 @@ static int decode_significance_8x8_x86(CABACContext *c, } #endif /* HAVE_7REGS && !defined(BROKEN_RELOCATIONS) */ +#endif /* HAVE_INLINE_ASM */ #endif /* AVCODEC_X86_H264_I386_H */ diff --git a/libavcodec/x86/lpc_mmx.c b/libavcodec/x86/lpc_mmx.c index d41c19b..27bebe8 100644 --- a/libavcodec/x86/lpc_mmx.c +++ b/libavcodec/x86/lpc_mmx.c @@ -23,6 +23,8 @@ #include "libavutil/cpu.h" #include "libavcodec/lpc.h" +#if HAVE_INLINE_ASM + static void lpc_apply_welch_window_sse2(const int32_t *data, int len, double *w_data) { @@ -136,12 +138,16 @@ static void lpc_compute_autocorr_sse2(const double *data, int len, int lag, } } +#endif /* HAVE_INLINE_ASM */ + av_cold void ff_lpc_init_x86(LPCContext *c) { int mm_flags = av_get_cpu_flags(); +#if HAVE_INLINE_ASM if (mm_flags & (AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW)) { c->lpc_apply_welch_window = lpc_apply_welch_window_sse2; c->lpc_compute_autocorr = lpc_compute_autocorr_sse2; } +#endif /* HAVE_INLINE_ASM */ } diff --git a/libavcodec/x86/mathops.h b/libavcodec/x86/mathops.h index 50b0283..e056eb0 100644 --- a/libavcodec/x86/mathops.h +++ b/libavcodec/x86/mathops.h @@ -25,6 +25,8 @@ #include "config.h" #include "libavutil/common.h" +#if HAVE_INLINE_ASM + #if ARCH_X86_32 #define MULL MULL @@ -118,4 +120,5 @@ static inline uint32_t NEG_USR32(uint32_t a, int8_t s){ return a; } +#endif /* HAVE_INLINE_ASM */ #endif /* AVCODEC_X86_MATHOPS_H */ diff --git a/libavcodec/x86/snowdsp_mmx.c b/libavcodec/x86/snowdsp_mmx.c index 729a13a..38f3246 100644 --- a/libavcodec/x86/snowdsp_mmx.c +++ b/libavcodec/x86/snowdsp_mmx.c @@ -26,6 +26,8 @@ #include "libavcodec/dwt.h" #include "dsputil_mmx.h" +#if HAVE_INLINE_ASM + stat
[libav-devel] [PATCH] dxva2: include dxva.h if found
From: "Ronald S. Bultje" Apparently, some build environments require dxva.h even for dxva2, while others lack this header entirely. Including it conditionally allows building in both cases. Signed-off-by: Mans Rullgard --- configure |2 ++ libavcodec/dxva2.h |3 +++ 2 files changed, 5 insertions(+) diff --git a/configure b/configure index 083ef77..1cba930 100755 --- a/configure +++ b/configure @@ -1079,6 +1079,7 @@ HAVE_LIST=" dlfcn_h dlopen dos_paths +dxva_h ebp_available ebx_available exp2 @@ -2907,6 +2908,7 @@ check_func_headers windows.h Sleep check_func_headers windows.h VirtualAlloc check_header dlfcn.h +check_header dxva.h check_header dxva2api.h check_header malloc.h check_header poll.h diff --git a/libavcodec/dxva2.h b/libavcodec/dxva2.h index c06f1f3..b9e3086 100644 --- a/libavcodec/dxva2.h +++ b/libavcodec/dxva2.h @@ -33,6 +33,9 @@ #include #include +#if HAVE_DXVA_H +#include +#endif /** * @defgroup lavc_codec_hwaccel_dxva2 DXVA2 -- 1.7.10.2 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] dtx mode not implemented - sample file (not subscribed)
> On Mon, 11 Jun 2012, Huw Greenhough wrote: >> Just to let you know I uploaded MOV00669.3gp & MOV00669.3gp.txt to >> upload.libav.org/incoming/ in case they are useful. Sorry I couldn't figure >> Git out, so if the problem is already fixed, sorry for troubling you. >From: Martin Storsjö >Sent: Thursday, 14 June 2012, 22:16 >It's not fixed (and there's not much progress on getting it implemented >currently either), but if you need it handled, you can use the >libopencore_amrnb decoder instead of the built-in one - that one handles DTX >packets just fine. >// Martin Thanks for the response. I assume it's this: http://ffmpeg.org/general.html#OpenCORE-AMR you're referring to. I've made a note of it, but will save it for when I have more time - I haven't done a lot of compiling & installing, & it often doesn't go smoothly for me. The quality problems I had were minor for my purposes anyway. Thanks again, Huw ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 2/8] FATE: add a test for itunes cover art.
Anton Khirnov writes: > --- > tests/Makefile |1 + > tests/fate/cover_art.mak |5 + > tests/ref/fate/cover_art-m4a |1 + > 3 files changed, 7 insertions(+) > create mode 100644 tests/fate/cover_art.mak > create mode 100644 tests/ref/fate/cover_art-m4a > > diff --git a/tests/Makefile b/tests/Makefile > index 429762b..6203f48 100644 > --- a/tests/Makefile > +++ b/tests/Makefile > @@ -36,6 +36,7 @@ include $(SRC_PATH)/tests/fate/atrac.mak > include $(SRC_PATH)/tests/fate/audio.mak > include $(SRC_PATH)/tests/fate/bmp.mak > include $(SRC_PATH)/tests/fate/cdxl.mak > +include $(SRC_PATH)/tests/fate/cover_art.mak > include $(SRC_PATH)/tests/fate/dct.mak > include $(SRC_PATH)/tests/fate/demux.mak > include $(SRC_PATH)/tests/fate/dfa.mak > diff --git a/tests/fate/cover_art.mak b/tests/fate/cover_art.mak > new file mode 100644 > index 000..70c8f51 > --- /dev/null > +++ b/tests/fate/cover_art.mak > @@ -0,0 +1,5 @@ > +FATE_COVER_ART += fate-cover_art-m4a > +fate-cover_art-m4a: CMD = md5 -i > $(SAMPLES)/cover_art/Owner-iTunes_9.0.3.15.m4a -an -c:v copy -f rawvideo > + > +FATE_SAMPLES_AVCONV += $(FATE_COVER_ART) > +fate-cover_art: $(FATE_COVER_ART) > diff --git a/tests/ref/fate/cover_art-m4a b/tests/ref/fate/cover_art-m4a > new file mode 100644 > index 000..fb077d1 > --- /dev/null > +++ b/tests/ref/fate/cover_art-m4a > @@ -0,0 +1 @@ > +08ba70a3b594ff6345a93965e96a9d3e > -- You could use CMP=oneline and put the checksum directly in the makefile. -- Måns Rullgård m...@mansr.com ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH] x86: place some inline asm under #if HAVE_INLINE_ASM
On Fri, 22 Jun 2012, Mans Rullgard wrote: From: "Ronald S. Bultje" Signed-off-by: Mans Rullgard --- These are some trivial ones. Tested by manually disabling HAVE_INLINE_ASM. --- libavcodec/x86/cavsdsp_mmx.c |6 ++ libavcodec/x86/dnxhd_mmx.c |6 ++ libavcodec/x86/lpc_mmx.c |6 ++ libavcodec/x86/mathops.h |3 +++ libavcodec/x86/snowdsp_mmx.c |6 ++ libavcodec/x86/vc1dsp_mmx.c |6 ++ libavutil/internal.h |2 +- libavutil/x86/bswap.h|3 +++ libavutil/x86/intmath.h |2 ++ 9 files changed, 39 insertions(+), 1 deletion(-) LGTM, although I didn't test it. // Martin ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel