Re: [libav-devel] [PATCH v3] x86: fft: convert sse inline asm to yasm

2012-06-24 Thread Derek Buitenhuis
On 24/06/2012 8:36 PM, Mans Rullgard wrote:
> ---
> This one has been tested 32-bit and 64-bit, Linux and Windows.
> ---

Tested and passed on 64-bit OS X too.

Sounds right-o.

- Derek
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH] lavr: x86: merge some branches

2012-06-24 Thread Justin Ruggles
---
I think the duplicate sections was a result of some rebasing gone wrong on
my part.

 libavresample/x86/audio_convert_init.c |   12 
 1 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/libavresample/x86/audio_convert_init.c 
b/libavresample/x86/audio_convert_init.c
index f41d974..637fd2f 100644
--- a/libavresample/x86/audio_convert_init.c
+++ b/libavresample/x86/audio_convert_init.c
@@ -53,14 +53,6 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
   6, 1, 4, "MMX", ff_conv_fltp_to_flt_6ch_mmx);
 }
-if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) {
-ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
-  6, 16, 4, "SSE4", 
ff_conv_fltp_to_flt_6ch_sse4);
-}
-if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) {
-ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
-  6, 16, 4, "AVX", 
ff_conv_fltp_to_flt_6ch_avx);
-}
 if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) {
 if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32,
@@ -80,12 +72,16 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
 if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) {
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16,
   0, 16, 8, "SSE4", ff_conv_s16_to_flt_sse4);
+ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
+  6, 16, 4, "SSE4", 
ff_conv_fltp_to_flt_6ch_sse4);
 }
 if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) {
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32,
   0, 32, 16, "AVX", ff_conv_s32_to_flt_avx);
 ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT,
   0, 32, 32, "AVX", ff_conv_flt_to_s32_avx);
+ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
+  6, 16, 4, "AVX", 
ff_conv_fltp_to_flt_6ch_avx);
 }
 #endif
 }
-- 
1.7.1

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH v3] x86: fft: convert sse inline asm to yasm

2012-06-24 Thread Mans Rullgard
---
This one has been tested 32-bit and 64-bit, Linux and Windows.
---
 libavcodec/x86/Makefile|1 -
 libavcodec/x86/fft_mmx.asm |  139 
 libavcodec/x86/fft_sse.c   |  110 ---
 3 files changed, 129 insertions(+), 121 deletions(-)
 delete mode 100644 libavcodec/x86/fft_sse.c

diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 6602cce..6464739 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -39,7 +39,6 @@ YASM-OBJS-$(CONFIG_DCT)+= x86/dct32_sse.o
 YASM-OBJS-$(CONFIG_ENCODERS)   += x86/dsputilenc_yasm.o
 YASM-OBJS-FFT-$(HAVE_AMD3DNOW) += x86/fft_3dn.o
 YASM-OBJS-FFT-$(HAVE_AMD3DNOWEXT)  += x86/fft_3dn2.o
-YASM-OBJS-FFT-$(HAVE_SSE)  += x86/fft_sse.o
 YASM-OBJS-$(CONFIG_FFT)+= x86/fft_mmx.o \
   $(YASM-OBJS-FFT-yes)
 YASM-OBJS-$(CONFIG_H264CHROMA) += x86/h264_chromamc.o   \
diff --git a/libavcodec/x86/fft_mmx.asm b/libavcodec/x86/fft_mmx.asm
index b60d8b0..1cacfb7 100644
--- a/libavcodec/x86/fft_mmx.asm
+++ b/libavcodec/x86/fft_mmx.asm
@@ -45,6 +45,10 @@ struc FFTContext
 .mdctbits: resd 1
 .tcos: pointer 1
 .tsin: pointer 1
+.fftperm:  pointer 1
+.fftcalc:  pointer 1
+.imdctcalc:pointer 1
+.imdcthalf:pointer 1
 endstruc
 
 SECTION_RODATA
@@ -65,6 +69,7 @@ perm1: dd 0x00, 0x02, 0x03, 0x01, 0x03, 0x00, 0x02, 0x01
 perm2: dd 0x00, 0x01, 0x02, 0x03, 0x01, 0x00, 0x02, 0x03
 ps_p1p1m1p1root2: dd 1.0, 1.0, -1.0, 1.0, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, 
M_SQRT1_2
 ps_m1m1p1m1p1m1m1m1: dd 1<<31, 1<<31, 0, 1<<31, 0, 1<<31, 1<<31, 1<<31
+ps_m1m1m1m1: times 4 dd 1<<31
 ps_m1p1: dd 1<<31, 0
 
 %assign i 16
@@ -532,6 +537,16 @@ DEFINE_ARGS z, w, n, o1, o3
 rep ret
 %endmacro
 
+%macro FFT_DISPATCH 2; clobbers 5 GPRs, 8 XMMs
+lea r2, [dispatch_tab%1]
+mov r2, [r2 + (%2q-2)*gprsize]
+%ifdef PIC
+lea r3, [$$]
+add r2, r3
+%endif
+call r2
+%endmacro ; FFT_DISPATCH
+
 INIT_YMM avx
 
 %if HAVE_AVX
@@ -548,6 +563,14 @@ INIT_YMM avx
 
 DECL_PASS pass_avx, PASS_BIG 1
 DECL_PASS pass_interleave_avx, PASS_BIG 0
+
+cglobal fft_calc, 2,5,8
+mov r3d, [r0 + FFTContext.nbits]
+mov r0, r1
+mov r1, r3
+FFT_DISPATCH _interleave %+ SUFFIX, r1
+REP_RET
+
 %endif
 
 INIT_XMM sse
@@ -565,6 +588,112 @@ INIT_XMM sse
 DECL_PASS pass_sse, PASS_BIG 1
 DECL_PASS pass_interleave_sse, PASS_BIG 0
 
+cglobal fft_calc, 2,5,8
+mov r3d, [r0 + FFTContext.nbits]
+PUSHr1
+PUSHr3
+mov r0, r1
+mov r1, r3
+FFT_DISPATCH _interleave %+ SUFFIX, r1
+POP rcx
+POP r4
+cmp rcx, 4
+jg  .end
+mov r2, -1
+add rcx, 3
+shl r2, cl
+sub r4, r2
+.loop
+movaps   xmm0, [r4 + r2]
+movaps   xmm1, xmm0
+unpcklps xmm0, [r4 + r2 + 16]
+unpckhps xmm1, [r4 + r2 + 16]
+movaps   [r4 + r2],  xmm0
+movaps   [r4 + r2 + 16], xmm1
+add  r2, 32
+jl   .loop
+.end:
+REP_RET
+
+cextern_naked memcpy
+
+cglobal fft_permute, 2,7,1
+mov r4,  [r0 + FFTContext.revtab]
+mov r5,  [r0 + FFTContext.tmpbuf]
+mov ecx, [r0 + FFTContext.nbits]
+mov r2, 1
+shl r2, cl
+xor r0, r0
+%if ARCH_X86_32
+mov r1, r1m
+%endif
+.loop:
+movaps  xmm0, [r1 + 8*r0]
+movzx   r6, word [r4 + 2*r0]
+movzx   r3, word [r4 + 2*r0 + 2]
+movlps  [r5 + 8*r6], xmm0
+movhps  [r5 + 8*r3], xmm0
+add r0, 2
+cmp r0, r2
+jl  .loop
+shl r2, 3
+%if ARCH_X86_64
+mov r0, r1
+mov r1, r5
+%else
+pushr2
+pushr5
+pushr1
+%endif
+%if ARCH_X86_64 && WIN64 == 0
+jmp memcpy
+%else
+callmemcpy
+%if ARCH_X86_32
+add esp, 12
+%endif
+REP_RET
+%endif
+
+cglobal imdct_calc, 3,5,3
+mov r3d, [r0 + FFTContext.mdctsize]
+mov r4,  [r0 + FFTContext.imdcthalf]
+add r1,  r3
+PUSHr3
+PUSHr1
+%if ARCH_X86_32
+pushr2
+pushr1
+pushr0
+%else
+sub rsp, 8
+%endif
+callr4
+%if ARCH_X86_32
+add esp, 12
+%else
+add rsp, 8
+%endif
+POP r1
+POP r3
+lea r0, [r1 + 2*r3]
+mov r2, r3
+sub r3, 16
+neg r2
+movaps  xmm2, [ps_m1m1m1m1]
+.loop:
+movaps  xmm0, [r1 + r3]
+movaps  xmm1, [r0 + r2]
+shufps  xmm0, xmm0, 0x1b
+shufps  xmm1, xmm1, 0x1b
+xorps   xmm0, xmm2
+movaps  [r0 + r3], xmm1
+movaps  [r1 + r2], xmm0
+sub r3, 16
+add r2, 16
+jl  .loop
+REP_RET
+
 INIT_MMX 3dnow
 %define mulps pfmul
 %define addps pfadd
@@ -582,16 +711,6 @@ DECL_PASS pass_interleave_3dnow, PASS_BIG 0
 %define SECTION_REL
 %endif
 
-%macro FFT_DISPATCH 2; clobbers 5 GPRs, 8 XMMs
-lea r2, [dispatch_tab%1]
-mov r2, [r2 + (%2q-2)*gprsize]
-

Re: [libav-devel] [libav-commits] swscale: fix overflows in vertical scaling at top/bottom edges.

2012-06-24 Thread Luca Barbato
On 06/24/2012 09:53 AM, John Stebbins wrote:
> On 06/23/2012 10:07 PM, John Stebbins wrote:
>> On 06/23/2012 07:41 PM, Luca Barbato wrote:
>>> On 06/23/2012 03:45 PM, John Stebbins wrote:
 On 12/18/2011 05:28 PM, Ronald S. Bultje wrote:
> Module: libav
> Branch: master
> Commit: d49352c7cc22fd8928a761a373c3508be17c9f19
>
> Author:Ronald S. Bultje
> Committer: Ronald S. Bultje
> Date:  Sun Dec 18 08:27:43 2011 -0800
>
> swscale: fix overflows in vertical scaling at top/bottom edges.
>
> This fixes integer multiplication overflows in RGB48 output
> (vertical) scaling as detected by IOC. What happens is that for
> certain types of filters (lanczos, spline, bicubic), the
> intermediate sum of coefficients in the middle of a filter can
> be larger than the fixed-point equivalent of 1.0, even if the
> final sum is 1.0. This is fine and we support that.
>
> However, at frame edges, initFilter() will merge the coefficients
> for the off-screen pixels into the top or bottom pixel, such as
> to emulate edge extension. This means that suddenly, a single
> coefficient can be larger than the fixed-point equivalent of
> 1.0, which the vertical scaling routines do not support.
>
> Therefore, remove the merging of coefficients for edges for
> the vertical scaling filter, and instead add edge detection
> to the scaler itself so that it copies the pointers (not data)
> for the edges (i.e. it uses line[0] for line[-1] as well), so
> that a single coefficient is never larger than the fixed-point
> equivalent of 1.0.
>
 Hi,

 I've noticed that this commit is triggering the assert in
 swscale.c:632.
 assert(chrUSrcPtr + vChrFilterSize - 1<  chrUPixBuf + vChrBufSize * 2);

 This happens when converting with lanczos from YUV420P to RGB32 and it
 is upscaling the chroma.  The comment indicates that there is edge
 detection in the scaler.  So I'm wondering if the edge detection is
 failing for this case or is the assert just vestigial and in need of
 removal?
>>> Which is your testcase? Trying probably is fast.
>>>
>>> lu
>>>
>> I'm afraid I don't understand your question.  Are you asking me to try
>> something?  Or are you asking for more details about how you can
>> reproduce this yourself?  I haven't tried to reproduce this yet with
>> avconv or such.  I'm using libswscale directly.  I've already
>> described the circumstances.  Of coarse, the assert is silent on a
>> standard debug build of libav.  You would need to add "-DDEBUG" to
>> your CFLAGS to enable it.  I only bumped into it because the assert
>> doesn't seem to be disabled when building on mingw64 and I was testing
>> all the various platforms HandBrake supports.

I was sure I answered you but looks like the email never reached the ml...

>>
> Here's a sample app and image that can be used to reproduce the assert.
> 

Thank you! I'll have a look later.

lu

-- 

Luca Barbato
Gentoo/linux
http://dev.gentoo.org/~lu_zero

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] x86: place some inline asm under #if HAVE_INLINE_ASM

2012-06-24 Thread Martin Storsjö

On Sun, 24 Jun 2012, Mans Rullgard wrote:


From: "Ronald S. Bultje" 

Signed-off-by: Mans Rullgard 
---
Rebased to master and added h264/cabac.
---
libavcodec/x86/cabac.h   |3 +++
libavcodec/x86/cavsdsp_mmx.c |6 ++
libavcodec/x86/dnxhd_mmx.c   |6 ++
libavcodec/x86/h264_i386.h   |3 +++
libavcodec/x86/lpc_mmx.c |6 ++
libavcodec/x86/mathops.h |3 +++
libavcodec/x86/snowdsp_mmx.c |6 ++
libavcodec/x86/vc1dsp_mmx.c  |6 ++
libavutil/internal.h |2 +-
libavutil/x86/bswap.h|3 +++
libavutil/x86/intmath.h  |2 ++
11 files changed, 45 insertions(+), 1 deletion(-)


This seems to work as advertised, so LGTM.

// Martin
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] lavf: win32: use WSAPoll() if available

2012-06-24 Thread Martin Storsjö

On Sun, 24 Jun 2012, Martin Storsjö wrote:


On Wed, 20 Jun 2012, Mans Rullgard wrote:


Windows Vista and later have a WSAPoll() function equivalent to the
standard poll().  Use this instead of emulating it with select()
when possible.
---
This is completely untested.  Help with that would be appreciated.


This is missing a !HAVE_WSAPOLL around the fallback implementation in 
os_support.c. But even when I added that, it didn't work (haven't figured out 
why yet, might be something unrelated though).


It turned out to be an unrelated issue - with that change, this patch 
would work as intended. But as said, I'm preparing a larger patchset that 
should have the same effect as this, and some more.


// Martin___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] dxva2: include dxva.h if found

2012-06-24 Thread Martin Storsjö

On Sun, 24 Jun 2012, Mans Rullgard wrote:


From: "Ronald S. Bultje" 

Apparently, some build environments require dxva.h even for dxva2,
while others lack this header entirely.  Including it conditionally
allows building in both cases.

Signed-off-by: Mans Rullgard 
---
configure  |2 ++
libavcodec/dxva2.h |3 +++
2 files changed, 5 insertions(+)

diff --git a/configure b/configure
index 083ef77..1cba930 100755
--- a/configure
+++ b/configure
@@ -1079,6 +1079,7 @@ HAVE_LIST="
dlfcn_h
dlopen
dos_paths
+dxva_h
ebp_available
ebx_available
exp2
@@ -2907,6 +2908,7 @@ check_func_headers windows.h Sleep
check_func_headers windows.h VirtualAlloc

check_header dlfcn.h
+check_header dxva.h
check_header dxva2api.h
check_header malloc.h
check_header poll.h
diff --git a/libavcodec/dxva2.h b/libavcodec/dxva2.h
index c06f1f3..b9e3086 100644
--- a/libavcodec/dxva2.h
+++ b/libavcodec/dxva2.h
@@ -33,6 +33,9 @@

#include 
#include 
+#if HAVE_DXVA_H
+#include 
+#endif


This lacks an #include "config.h" somewhere before the HAVE_DXVA_H.

Other than that, it works as intended, tested on mingw64 and MSVC.

// Martin
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH] lavr: add x86-optimized mixing functions

2012-06-24 Thread Justin Ruggles
Adds optimized functions for mixing 3 through 8 input channels to 1 and 2
output channels in fltp or s16p format with flt coeffs.
---
Made changes as suggested by Loren.

Benchmarks for Sandy Bridge:
https://docs.google.com/spreadsheet/ccc?key=0AraK7SdCYBjVdG5zZjNHVFRIellsMENOSVdXYmk5aUE#gid=0


 libavresample/utils.c  |7 +-
 libavresample/x86/audio_mix.asm|  284 
 libavresample/x86/audio_mix_init.c |  130 
 3 files changed, 418 insertions(+), 3 deletions(-)

diff --git a/libavresample/utils.c b/libavresample/utils.c
index 6d4509d..c8f21d3 100644
--- a/libavresample/utils.c
+++ b/libavresample/utils.c
@@ -221,9 +221,10 @@ static int handle_buffered_output(AVAudioResampleContext 
*avr,
 return 0;
 }
 
-int avresample_convert(AVAudioResampleContext *avr, void **output,
-   int out_plane_size, int out_samples, void **input,
-   int in_plane_size, int in_samples)
+int attribute_align_arg avresample_convert(AVAudioResampleContext *avr,
+   void **output, int out_plane_size,
+   int out_samples, void **input,
+   int in_plane_size, int in_samples)
 {
 AudioData input_buffer;
 AudioData output_buffer;
diff --git a/libavresample/x86/audio_mix.asm b/libavresample/x86/audio_mix.asm
index 4b0434d..749ddc7 100644
--- a/libavresample/x86/audio_mix.asm
+++ b/libavresample/x86/audio_mix.asm
@@ -231,3 +231,287 @@ MIX_1_TO_2_S16P_FLT
 INIT_XMM avx
 MIX_1_TO_2_S16P_FLT
 %endif
+
+;-
+; void ff_mix_3_8_to_1_2_fltp/s16p_flt(float/int16_t **src, float **matrix,
+;  int len, int out_ch, int in_ch);
+;-
+
+%macro MIX_3_8_TO_1_2_FLT 3 ; %1 = in channels, %2 = out channels, %3 = s16p 
or fltp
+; define some names to make the code clearer
+%assign in_channels %1
+%assign stereo %2 - 1
+%ifidn %3, s16p
+%assign is_s16 1
+%else
+%assign is_s16 0
+%endif
+
+; determine how many matrix elements must go on the stack vs. mmregs
+%assign matrix_elements %1 * %2
+%if is_s16
+%if stereo
+%assign needed_mmregs 7
+%else
+%assign needed_mmregs 5
+%endif
+%else
+%if stereo
+%assign needed_mmregs 4
+%else
+%assign needed_mmregs 3
+%endif
+%endif
+%assign matrix_elements_mm num_mmregs - needed_mmregs
+%if matrix_elements < matrix_elements_mm
+%assign matrix_elements_mm matrix_elements
+%endif
+%if matrix_elements_mm < matrix_elements
+%assign matrix_elements_stack matrix_elements - matrix_elements_mm
+%else
+%assign matrix_elements_stack 0
+%endif
+
+cglobal mix_%1_to_%2_%3_flt, 3,%1+2,needed_mmregs+matrix_elements_mm, src0, 
src1, len, src2, src3, src4, src5, src6, src7
+
+; get aligned stack space if needed
+%if matrix_elements_stack > 0
+%if mmsize == 32
+%assign bkpreg %1 + 1
+%define bkpq r %+ bkpreg %+ q
+mov   bkpq, rsp
+and   rsp, ~(mmsize-1)
+sub   rsp, matrix_elements_stack * mmsize
+%else
+%assign pad matrix_elements_stack * mmsize + (mmsize - gprsize) - 
(stack_offset & (mmsize - gprsize))
+SUB   rsp, pad
+%endif
+%endif
+
+; load matrix pointers
+%define matrix0q r1q
+%define matrix1q r3q
+%if stereo
+mov  matrix1q, [matrix0q+gprsize]
+%endif
+mov  matrix0q, [matrix0q]
+
+; define matrix coeff names
+%assign %%i 0
+%assign %%j needed_mmregs
+%rep in_channels
+%if %%i >= matrix_elements_mm
+CAT_XDEFINE mx_stack_0_, %%i, 1
+CAT_XDEFINE mx_0_, %%i, [rsp+(%%i-matrix_elements_mm)*mmsize]
+%else
+CAT_XDEFINE mx_stack_0_, %%i, 0
+CAT_XDEFINE mx_0_, %%i, m %+ %%j
+%assign %%j %%j+1
+%endif
+%assign %%i %%i+1
+%endrep
+%if stereo
+%assign %%i 0
+%rep in_channels
+%if in_channels + %%i >= matrix_elements_mm
+CAT_XDEFINE mx_stack_1_, %%i, 1
+CAT_XDEFINE mx_1_, %%i, 
[rsp+(in_channels+%%i-matrix_elements_mm)*mmsize]
+%else
+CAT_XDEFINE mx_stack_1_, %%i, 0
+CAT_XDEFINE mx_1_, %%i, m %+ %%j
+%assign %%j %%j+1
+%endif
+%assign %%i %%i+1
+%endrep
+%endif
+
+; load/splat matrix coeffs
+%assign %%i 0
+%rep in_channels
+%if mx_stack_0_ %+ %%i
+VBROADCASTSS m0, [matrix0q+4*%%i]
+mova  mx_0_ %+ %%i, m0
+%else
+VBROADCASTSS mx_0_ %+ %%i, [matrix0q+4*%%i]
+%endif
+%if stereo
+%if mx_stack_1_ %+ %%i
+VBROADCASTSS m0, [matrix1q+4*%%i]
+mova  mx_1_ %+ %%i, m0
+%else
+VBROADCASTSS mx_1_ %+ %%i, [matrix1q+4*%%i]
+%endif
+%endif
+%assign %%i %%i+1
+%endrep
+
+; load channel pointers to registers as offsets from the first channel pointer
+%if ARCH_X86_64
+movsxd   lenq, r2d
+%en

[libav-devel] [PATCH v2] x86: fft: convert sse inline asm to yasm

2012-06-24 Thread Mans Rullgard
---
Some minor updates after comments from Jason.
Now also replaces the avx fft_calc wrapper.

Testing on win64 and avx would be appreciated.
---
 libavcodec/x86/Makefile|1 -
 libavcodec/x86/fft_mmx.asm |  133 
 libavcodec/x86/fft_sse.c   |  110 
 3 files changed, 123 insertions(+), 121 deletions(-)
 delete mode 100644 libavcodec/x86/fft_sse.c

diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 6602cce..6464739 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -39,7 +39,6 @@ YASM-OBJS-$(CONFIG_DCT)+= x86/dct32_sse.o
 YASM-OBJS-$(CONFIG_ENCODERS)   += x86/dsputilenc_yasm.o
 YASM-OBJS-FFT-$(HAVE_AMD3DNOW) += x86/fft_3dn.o
 YASM-OBJS-FFT-$(HAVE_AMD3DNOWEXT)  += x86/fft_3dn2.o
-YASM-OBJS-FFT-$(HAVE_SSE)  += x86/fft_sse.o
 YASM-OBJS-$(CONFIG_FFT)+= x86/fft_mmx.o \
   $(YASM-OBJS-FFT-yes)
 YASM-OBJS-$(CONFIG_H264CHROMA) += x86/h264_chromamc.o   \
diff --git a/libavcodec/x86/fft_mmx.asm b/libavcodec/x86/fft_mmx.asm
index b60d8b0..21ce309 100644
--- a/libavcodec/x86/fft_mmx.asm
+++ b/libavcodec/x86/fft_mmx.asm
@@ -45,6 +45,10 @@ struc FFTContext
 .mdctbits: resd 1
 .tcos: pointer 1
 .tsin: pointer 1
+.fftperm:  pointer 1
+.fftcalc:  pointer 1
+.imdctcalc:pointer 1
+.imdcthalf:pointer 1
 endstruc
 
 SECTION_RODATA
@@ -65,6 +69,7 @@ perm1: dd 0x00, 0x02, 0x03, 0x01, 0x03, 0x00, 0x02, 0x01
 perm2: dd 0x00, 0x01, 0x02, 0x03, 0x01, 0x00, 0x02, 0x03
 ps_p1p1m1p1root2: dd 1.0, 1.0, -1.0, 1.0, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, 
M_SQRT1_2
 ps_m1m1p1m1p1m1m1m1: dd 1<<31, 1<<31, 0, 1<<31, 0, 1<<31, 1<<31, 1<<31
+ps_m1m1m1m1: times 4 dd 1<<31
 ps_m1p1: dd 1<<31, 0
 
 %assign i 16
@@ -532,6 +537,16 @@ DEFINE_ARGS z, w, n, o1, o3
 rep ret
 %endmacro
 
+%macro FFT_DISPATCH 2; clobbers 5 GPRs, 8 XMMs
+lea r2, [dispatch_tab%1]
+mov r2, [r2 + (%2q-2)*gprsize]
+%ifdef PIC
+lea r3, [$$]
+add r2, r3
+%endif
+call r2
+%endmacro ; FFT_DISPATCH
+
 INIT_YMM avx
 
 %if HAVE_AVX
@@ -548,6 +563,14 @@ INIT_YMM avx
 
 DECL_PASS pass_avx, PASS_BIG 1
 DECL_PASS pass_interleave_avx, PASS_BIG 0
+
+cglobal fft_calc, 2,5,8
+mov r3d, [r0 + FFTContext.nbits]
+mov r0, r1
+mov r1, r3
+FFT_DISPATCH _interleave %+ SUFFIX, r3
+REP_RET
+
 %endif
 
 INIT_XMM sse
@@ -565,6 +588,106 @@ INIT_XMM sse
 DECL_PASS pass_sse, PASS_BIG 1
 DECL_PASS pass_interleave_sse, PASS_BIG 0
 
+cglobal fft_calc, 2,5,8
+mov r3d, [r0 + FFTContext.nbits]
+PUSHr3
+mov r0, r1
+mov r1, r3
+FFT_DISPATCH _interleave %+ SUFFIX, r3
+POP rcx
+cmp rcx, 4
+jg  .end
+mov r2, -1
+add rcx, 3
+shl r2, cl
+sub r0, r2
+.loop
+movaps   xmm0, [r0 + r2]
+movaps   xmm1, xmm0
+unpcklps xmm0, [r0 + r2 + 16]
+unpckhps xmm1, [r0 + r2 + 16]
+movaps   [r0 + r2],  xmm0
+movaps   [r0 + r2 + 16], xmm1
+add  r2, 32
+jl   .loop
+.end:
+REP_RET
+
+cextern_naked memcpy
+
+cglobal fft_permute, 2,7,1
+mov r4,  [r0 + FFTContext.revtab]
+mov r5,  [r0 + FFTContext.tmpbuf]
+mov ecx, [r0 + FFTContext.nbits]
+mov r2, 1
+shl r2, cl
+xor r0, r0
+%if ARCH_X86_32
+mov r1, r1m
+%endif
+.loop:
+movaps  xmm0, [r1 + 8*r0]
+movzx   r6, word [r4 + 2*r0]
+movzx   r3, word [r4 + 2*r0 + 2]
+movlps  [r5 + 8*r6], xmm0
+movhps  [r5 + 8*r3], xmm0
+add r0, 2
+cmp r0, r2
+jl  .loop
+shl r2, 3
+%if ARCH_X86_64
+mov r0, r1
+mov r1, r5
+%else
+pushr2
+pushr5
+pushr1
+%endif
+%if ARCH_X86_64 && WIN64 == 0
+jmp memcpy
+%else
+callmemcpy
+%if ARCH_X86_32
+add esp, 12
+%endif
+REP_RET
+%endif
+
+cglobal imdct_calc, 3,5,3
+mov r3d, [r0 + FFTContext.mdctsize]
+mov r4,  [r0 + FFTContext.imdcthalf]
+add r1,  r3
+PUSHr3
+PUSHr1
+%if ARCH_X86_32
+pushr2
+pushr1
+pushr0
+%endif
+callr4
+%if ARCH_X86_32
+add esp, 12
+%endif
+POP r1
+POP r3
+lea r0, [r1 + 2*r3]
+mov r2, r3
+sub r3, 16
+neg r2
+movaps  xmm2, [ps_m1m1m1m1]
+.loop:
+movaps  xmm0, [r1 + r3]
+movaps  xmm1, [r0 + r2]
+shufps  xmm0, xmm0, 0x1b
+shufps  xmm1, xmm1, 0x1b
+xorps   xmm0, xmm2
+movaps  [r0 + r3], xmm1
+movaps  [r1 + r2], xmm0
+sub r3, 16
+add r2, 16
+jl  .loop
+REP_RET
+
 INIT_MMX 3dnow
 %define mulps pfmul
 %define addps pfadd
@@ -582,16 +705,6 @@ DECL_PASS pass_interleave_3dnow, PASS_BIG 0
 %define SECTION_REL
 %endif
 
-%macro FFT_DISPATCH 2; clobbers 5 GPRs, 8 XMMs
-lea r2, [dispatch_tab%1]
-mov r2, [r2 + (%2q-2)*gprsize]
-%ifdef PIC

Re: [libav-devel] [PATCH] Add a news entry for lavfi major bump.

2012-06-24 Thread Justin Ruggles
On 06/22/2012 04:50 PM, Anton Khirnov wrote:
> ---
>  src/index |   18 ++
>  1 file changed, 18 insertions(+)
> 
> diff --git a/src/index b/src/index
> index 1da657c..ab6bc2f 100644
> --- a/src/index
> +++ b/src/index
> @@ -33,6 +33,24 @@ with the latest developments by subscribing to both the
>  
>  News
>  
> +June 22, 2012
> +
> +Major version of the libavfilter library has been bumped. This means 
> that
> +ABI and API compatibility has been broken and some deprecated functions and
> +structures have been removed.
> +
> +
> +
> +Most of those were related to filter internals, which are now private. 
> User-side
> +filters are now not officially supported until libavfilter reaches a more 
> mature
> +state.
> +
> +
> +
> +Libavfilter ABI is now declared unstable until further notice (stabilizing it
> +should take a couple of weeks at most).
> +
> +
>  June 9, 2012
>  
>  We have been busy lately! Today, we are updating all 4 of our release trees.

LGTM.

-Justin

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] Add a news entry for lavfi major bump.

2012-06-24 Thread Anton Khirnov

ping

-- 
Anton Khirnov
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] x86: h264: prevent yasm from issuing a i686 nopl instruction on i586 machines

2012-06-24 Thread Diego Biurrun
On Wed, May 16, 2012 at 08:09:15AM -0700, Ronald S. Bultje wrote:
> On Wed, May 16, 2012 at 5:34 AM, Diego Biurrun  wrote:
> > On Mon, May 14, 2012 at 12:41:45PM +0200, Diego Biurrun wrote:
> >> From: Michael Kostylev 
> >>
> >> ---
> >>  libavcodec/x86/h264_intrapred.asm |    4 
> >>  1 files changed, 4 insertions(+), 0 deletions(-)
> >
> > ping
> 
> If cmov/mmx is indendent, then this is too. We should use the same
> solution here as we used in the other thread. There, we marked it as
> mmx2. Here, we should likely do the same thing, or just make sure
> these functions don't get assigned if HAVE_CMOV is not set.

Now that the cmov issue is fixed I would like to ping this again.

The solution for the cmov issue was not, in the end, marking all
relevant functions as mmx2/mmxext.  Doing so here would disable
quite a few optimizations and leave many many old CPUs behind.

This patch is quite localized and non-intrusive IMO, so I'm in
favor of applying it as-is.

Diego
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 2/2] x86: cpu: whitespace (mostly) cosmetics

2012-06-24 Thread Mans Rullgard
This adds whitespace around operators, aligns line continuation
backslashes, and breaks long lines.  Also fixes an ifdef halfway
through a statement.  The one line of duplication this saved is
not worth the ugliness.

Signed-off-by: Mans Rullgard 
---
 libavutil/x86/cpu.c |   71 +++
 1 file changed, 37 insertions(+), 34 deletions(-)

diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c
index 5c3af1f..80e7541 100644
--- a/libavutil/x86/cpu.c
+++ b/libavutil/x86/cpu.c
@@ -26,16 +26,15 @@
 #include "libavutil/cpu.h"
 
 /* ebx saving is necessary for PIC. gcc seems unable to see it alone */
-#define cpuid(index,eax,ebx,ecx,edx)\
-__asm__ volatile\
-("mov %%"REG_b", %%"REG_S"\n\t"\
- "cpuid\n\t"\
- "xchg %%"REG_b", %%"REG_S\
- : "=a" (eax), "=S" (ebx),\
-   "=c" (ecx), "=d" (edx)\
- : "0" (index));
-
-#define xgetbv(index,eax,edx)   \
+#define cpuid(index, eax, ebx, ecx, edx)\
+__asm__ volatile (  \
+"mov%%"REG_b", %%"REG_S" \n\t"  \
+"cpuid   \n\t"  \
+"xchg   %%"REG_b", %%"REG_S \
+: "=a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx)\
+: "0" (index))
+
+#define xgetbv(index, eax, edx) \
 __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c" (index))
 
 #define get_eflags(x)   \
@@ -53,8 +52,8 @@ int ff_get_cpu_flags_x86(void)
 {
 int rval = 0;
 int eax, ebx, ecx, edx;
-int max_std_level, max_ext_level, std_caps=0, ext_caps=0;
-int family=0, model=0;
+int max_std_level, max_ext_level, std_caps = 0, ext_caps = 0;
+int family = 0, model = 0;
 union { int i[3]; char c[12]; } vendor;
 
 #if ARCH_X86_32
@@ -75,19 +74,20 @@ int ff_get_cpu_flags_x86(void)
 vendor.i[1] = edx;
 vendor.i[2] = ecx;
 
-if(max_std_level >= 1){
+if (max_std_level >= 1) {
 cpuid(1, eax, ebx, ecx, std_caps);
-family = ((eax>>8)&0xf) + ((eax>>20)&0xff);
-model  = ((eax>>4)&0xf) + ((eax>>12)&0xf0);
+family = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
+model  = ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0);
 if (std_caps & (1 << 15))
 rval |= AV_CPU_FLAG_CMOV;
-if (std_caps & (1<<23))
+if (std_caps & (1 << 23))
 rval |= AV_CPU_FLAG_MMX;
-if (std_caps & (1<<25))
-rval |= AV_CPU_FLAG_MMX2
+if (std_caps & (1 << 25))
+rval |= AV_CPU_FLAG_MMX2;
 #if HAVE_SSE
-  | AV_CPU_FLAG_SSE;
-if (std_caps & (1<<26))
+if (std_caps & (1 << 25))
+rval |= AV_CPU_FLAG_SSE;
+if (std_caps & (1 << 26))
 rval |= AV_CPU_FLAG_SSE2;
 if (ecx & 1)
 rval |= AV_CPU_FLAG_SSE3;
@@ -107,20 +107,19 @@ int ff_get_cpu_flags_x86(void)
 }
 #endif
 #endif
-  ;
 }
 
 cpuid(0x8000, max_ext_level, ebx, ecx, edx);
 
-if(max_ext_level >= 0x8001){
+if (max_ext_level >= 0x8001) {
 cpuid(0x8001, eax, ebx, ecx, ext_caps);
-if (ext_caps & (1U<<31))
+if (ext_caps & (1U << 31))
 rval |= AV_CPU_FLAG_3DNOW;
-if (ext_caps & (1<<30))
+if (ext_caps & (1 << 30))
 rval |= AV_CPU_FLAG_3DNOWEXT;
-if (ext_caps & (1<<23))
+if (ext_caps & (1 << 23))
 rval |= AV_CPU_FLAG_MMX;
-if (ext_caps & (1<<22))
+if (ext_caps & (1 << 22))
 rval |= AV_CPU_FLAG_MMX2;
 
 /* Allow for selectively disabling SSE2 functions on AMD processors
@@ -147,14 +146,18 @@ int ff_get_cpu_flags_x86(void)
 
 if (!strncmp(vendor.c, "GenuineIntel", 12)) {
 if (family == 6 && (model == 9 || model == 13 || model == 14)) {
-/* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and 6/14 
(core1 "yonah")
-* theoretically support sse2, but it's usually slower than mmx,
-* so let's just pretend they don't. AV_CPU_FLAG_SSE2 is disabled 
and
-* AV_CPU_FLAG_SSE2SLOW is enabled so that SSE2 is not used unless
-* explicitly enabled by checking AV_CPU_FLAG_SSE2SLOW. The same
-* situation applies for AV_CPU_FLAG_SSE3 and AV_CPU_FLAG_SSE3SLOW. 
*/
-if (rval & AV_CPU_FLAG_SSE2) rval ^= 
AV_CPU_FLAG_SSE2SLOW|AV_CPU_FLAG_SSE2;
-if (rval & AV_CPU_FLAG_SSE3) rval ^= 
AV_CPU_FLAG_SSE3SLOW|AV_CPU_FLAG_SSE3;
+/* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and
+ * 6/14 (core1 "yonah") theoretically support sse2, but it's
+ * usually slower than mmx, so let's just pretend they don't.
+ * AV_CPU_FLAG_SSE2 is disabled and AV_CPU_FLAG_SSE2SLOW is
+ * enabled so that S

[libav-devel] [PATCH 1/2] x86: cpu: clean up check for cpuid instruction support

2012-06-24 Thread Mans Rullgard
This adds macros for accessing the EFLAGS register and uses
these instead of coding the entire check in inline asm.

Signed-off-by: Mans Rullgard 
---
 libavutil/x86/cpu.c |   36 
 1 file changed, 16 insertions(+), 20 deletions(-)

diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c
index b87d3a3..5c3af1f 100644
--- a/libavutil/x86/cpu.c
+++ b/libavutil/x86/cpu.c
@@ -38,6 +38,16 @@
 #define xgetbv(index,eax,edx)   \
 __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c" (index))
 
+#define get_eflags(x)   \
+__asm__ volatile ("pushfl \n"   \
+  "pop%0  \n"   \
+  : "=r"(x))
+
+#define set_eflags(x)   \
+__asm__ volatile ("push%0 \n"   \
+  "popfl  \n"   \
+  :: "r"(x))
+
 /* Function to test if multimedia instructions are supported...  */
 int ff_get_cpu_flags_x86(void)
 {
@@ -49,26 +59,12 @@ int ff_get_cpu_flags_x86(void)
 
 #if ARCH_X86_32
 x86_reg a, c;
-__asm__ volatile (
-/* See if CPUID instruction is supported ... */
-/* ... Get copies of EFLAGS into eax and ecx */
-"pushfl\n\t"
-"pop %0\n\t"
-"mov %0, %1\n\t"
-
-/* ... Toggle the ID bit in one copy and store */
-/* to the EFLAGS reg */
-"xor $0x20, %0\n\t"
-"push %0\n\t"
-"popfl\n\t"
-
-/* ... Get the (hopefully modified) EFLAGS */
-"pushfl\n\t"
-"pop %0\n\t"
-: "=a" (a), "=c" (c)
-:
-: "cc"
-);
+
+/* Check if CPUID is supported by attempting to toggle the ID bit in
+ * the EFLAGS register. */
+get_eflags(a);
+set_eflags(a ^ 0x20);
+get_eflags(c);
 
 if (a == c)
 return 0; /* CPUID not supported */
-- 
1.7.10.2

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] FATE: add a test for itunes cover art.

2012-06-24 Thread Måns Rullgård
Anton Khirnov  writes:

> ---
>  tests/Makefile   |1 +
>  tests/fate/cover_art.mak |7 +++
>  2 files changed, 8 insertions(+)
>  create mode 100644 tests/fate/cover_art.mak
>
> diff --git a/tests/Makefile b/tests/Makefile
> index 429762b..6203f48 100644
> --- a/tests/Makefile
> +++ b/tests/Makefile
> @@ -36,6 +36,7 @@ include $(SRC_PATH)/tests/fate/atrac.mak
>  include $(SRC_PATH)/tests/fate/audio.mak
>  include $(SRC_PATH)/tests/fate/bmp.mak
>  include $(SRC_PATH)/tests/fate/cdxl.mak
> +include $(SRC_PATH)/tests/fate/cover_art.mak
>  include $(SRC_PATH)/tests/fate/dct.mak
>  include $(SRC_PATH)/tests/fate/demux.mak
>  include $(SRC_PATH)/tests/fate/dfa.mak
> diff --git a/tests/fate/cover_art.mak b/tests/fate/cover_art.mak
> new file mode 100644
> index 000..5d2d81e
> --- /dev/null
> +++ b/tests/fate/cover_art.mak
> @@ -0,0 +1,7 @@
> +FATE_COVER_ART += fate-cover_art-m4a

Diego probably wants you to use hyphens rather than underscores, and
this case I'm inclined to agree.

> +fate-cover_art-m4a: CMD = md5 -i 
> $(SAMPLES)/cover_art/Owner-iTunes_9.0.3.15.m4a -an -c:v copy -f rawvideo

-c:v copy together with -f rawvideo looks weird, but I guess that's how
it's done.

> +fate-cover_art-m4a: REF = 08ba70a3b594ff6345a93965e96a9d3e
> +
> +$(FATE_COVER_ART): CMP = oneline
> +FATE_SAMPLES_AVCONV += $(FATE_COVER_ART)
> +fate-cover_art: $(FATE_COVER_ART)
> -- 

LGTM otherwise.

-- 
Måns Rullgård
m...@mansr.com
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH] FATE: add a test for itunes cover art.

2012-06-24 Thread Anton Khirnov
---
 tests/Makefile   |1 +
 tests/fate/cover_art.mak |7 +++
 2 files changed, 8 insertions(+)
 create mode 100644 tests/fate/cover_art.mak

diff --git a/tests/Makefile b/tests/Makefile
index 429762b..6203f48 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -36,6 +36,7 @@ include $(SRC_PATH)/tests/fate/atrac.mak
 include $(SRC_PATH)/tests/fate/audio.mak
 include $(SRC_PATH)/tests/fate/bmp.mak
 include $(SRC_PATH)/tests/fate/cdxl.mak
+include $(SRC_PATH)/tests/fate/cover_art.mak
 include $(SRC_PATH)/tests/fate/dct.mak
 include $(SRC_PATH)/tests/fate/demux.mak
 include $(SRC_PATH)/tests/fate/dfa.mak
diff --git a/tests/fate/cover_art.mak b/tests/fate/cover_art.mak
new file mode 100644
index 000..5d2d81e
--- /dev/null
+++ b/tests/fate/cover_art.mak
@@ -0,0 +1,7 @@
+FATE_COVER_ART += fate-cover_art-m4a
+fate-cover_art-m4a: CMD = md5 -i 
$(SAMPLES)/cover_art/Owner-iTunes_9.0.3.15.m4a -an -c:v copy -f rawvideo
+fate-cover_art-m4a: REF = 08ba70a3b594ff6345a93965e96a9d3e
+
+$(FATE_COVER_ART): CMP = oneline
+FATE_SAMPLES_AVCONV += $(FATE_COVER_ART)
+fate-cover_art: $(FATE_COVER_ART)
-- 
1.7.10

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] lavf: win32: use WSAPoll() if available

2012-06-24 Thread Martin Storsjö

On Wed, 20 Jun 2012, Mans Rullgard wrote:


Windows Vista and later have a WSAPoll() function equivalent to the
standard poll().  Use this instead of emulating it with select()
when possible.
---
This is completely untested.  Help with that would be appreciated.


This is missing a !HAVE_WSAPOLL around the fallback implementation in 
os_support.c. But even when I added that, it didn't work (haven't figured 
out why yet, might be something unrelated though).


I'll follow up with a more thorough patchset fixing the poll stuff soon 
though.


// Martin
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH] x86: place some inline asm under #if HAVE_INLINE_ASM

2012-06-24 Thread Mans Rullgard
From: "Ronald S. Bultje" 

Signed-off-by: Mans Rullgard 
---
Rebased to master and added h264/cabac.
---
 libavcodec/x86/cabac.h   |3 +++
 libavcodec/x86/cavsdsp_mmx.c |6 ++
 libavcodec/x86/dnxhd_mmx.c   |6 ++
 libavcodec/x86/h264_i386.h   |3 +++
 libavcodec/x86/lpc_mmx.c |6 ++
 libavcodec/x86/mathops.h |3 +++
 libavcodec/x86/snowdsp_mmx.c |6 ++
 libavcodec/x86/vc1dsp_mmx.c  |6 ++
 libavutil/internal.h |2 +-
 libavutil/x86/bswap.h|3 +++
 libavutil/x86/intmath.h  |2 ++
 11 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/libavcodec/x86/cabac.h b/libavcodec/x86/cabac.h
index 6fc2ddb..02dbc54 100644
--- a/libavcodec/x86/cabac.h
+++ b/libavcodec/x86/cabac.h
@@ -27,6 +27,8 @@
 #include "libavutil/internal.h"
 #include "config.h"
 
+#if HAVE_INLINE_ASM
+
 #ifdef BROKEN_RELOCATIONS
 #define TABLES_ARG , "r"(tables)
 
@@ -225,4 +227,5 @@ static av_always_inline int 
get_cabac_bypass_sign_x86(CABACContext *c, int val)
 return val;
 }
 
+#endif /* HAVE_INLINE_ASM */
 #endif /* AVCODEC_X86_CABAC_H */
diff --git a/libavcodec/x86/cavsdsp_mmx.c b/libavcodec/x86/cavsdsp_mmx.c
index f56f859..b3d2c27 100644
--- a/libavcodec/x86/cavsdsp_mmx.c
+++ b/libavcodec/x86/cavsdsp_mmx.c
@@ -29,6 +29,8 @@
 #include "libavcodec/cavsdsp.h"
 #include "dsputil_mmx.h"
 
+#if HAVE_INLINE_ASM
+
 /* in/out: mma=mma+mmb, mmb=mmb-mma */
 #define SUMSUB_BA( a, b ) \
 "paddw "#b", "#a" \n\t"\
@@ -477,10 +479,14 @@ static void ff_cavsdsp_init_3dnow(CAVSDSPContext* c, 
AVCodecContext *avctx) {
 c->cavs_idct8_add = cavs_idct8_add_mmx;
 }
 
+#endif /* HAVE_INLINE_ASM */
+
 void ff_cavsdsp_init_mmx(CAVSDSPContext *c, AVCodecContext *avctx)
 {
 int mm_flags = av_get_cpu_flags();
 
+#if HAVE_INLINE_ASM
 if (mm_flags & AV_CPU_FLAG_MMX2)  ff_cavsdsp_init_mmx2 (c, avctx);
 if (mm_flags & AV_CPU_FLAG_3DNOW) ff_cavsdsp_init_3dnow(c, avctx);
+#endif /* HAVE_INLINE_ASM */
 }
diff --git a/libavcodec/x86/dnxhd_mmx.c b/libavcodec/x86/dnxhd_mmx.c
index e193d62..54293aa 100644
--- a/libavcodec/x86/dnxhd_mmx.c
+++ b/libavcodec/x86/dnxhd_mmx.c
@@ -24,6 +24,8 @@
 #include "libavutil/x86_cpu.h"
 #include "libavcodec/dnxhdenc.h"
 
+#if HAVE_INLINE_ASM
+
 static void get_pixels_8x4_sym_sse2(DCTELEM *block, const uint8_t *pixels, int 
line_size)
 {
 __asm__ volatile(
@@ -50,10 +52,14 @@ static void get_pixels_8x4_sym_sse2(DCTELEM *block, const 
uint8_t *pixels, int l
 );
 }
 
+#endif /* HAVE_INLINE_ASM */
+
 void ff_dnxhd_init_mmx(DNXHDEncContext *ctx)
 {
+#if HAVE_INLINE_ASM
 if (av_get_cpu_flags() & AV_CPU_FLAG_SSE2) {
 if (ctx->cid_table->bit_depth == 8)
 ctx->get_pixels_8x4_sym = get_pixels_8x4_sym_sse2;
 }
+#endif /* HAVE_INLINE_ASM */
 }
diff --git a/libavcodec/x86/h264_i386.h b/libavcodec/x86/h264_i386.h
index c0033b7..2daa40a 100644
--- a/libavcodec/x86/h264_i386.h
+++ b/libavcodec/x86/h264_i386.h
@@ -34,6 +34,8 @@
 #include "libavcodec/cabac.h"
 #include "cabac.h"
 
+#if HAVE_INLINE_ASM
+
 //FIXME use some macros to avoid duplicating get_cabac (cannot be done yet
 //as that would make optimization work hard)
 #if HAVE_7REGS
@@ -187,4 +189,5 @@ static int decode_significance_8x8_x86(CABACContext *c,
 }
 #endif /* HAVE_7REGS && !defined(BROKEN_RELOCATIONS) */
 
+#endif /* HAVE_INLINE_ASM */
 #endif /* AVCODEC_X86_H264_I386_H */
diff --git a/libavcodec/x86/lpc_mmx.c b/libavcodec/x86/lpc_mmx.c
index d41c19b..27bebe8 100644
--- a/libavcodec/x86/lpc_mmx.c
+++ b/libavcodec/x86/lpc_mmx.c
@@ -23,6 +23,8 @@
 #include "libavutil/cpu.h"
 #include "libavcodec/lpc.h"
 
+#if HAVE_INLINE_ASM
+
 static void lpc_apply_welch_window_sse2(const int32_t *data, int len,
 double *w_data)
 {
@@ -136,12 +138,16 @@ static void lpc_compute_autocorr_sse2(const double *data, 
int len, int lag,
 }
 }
 
+#endif /* HAVE_INLINE_ASM */
+
 av_cold void ff_lpc_init_x86(LPCContext *c)
 {
 int mm_flags = av_get_cpu_flags();
 
+#if HAVE_INLINE_ASM
 if (mm_flags & (AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW)) {
 c->lpc_apply_welch_window = lpc_apply_welch_window_sse2;
 c->lpc_compute_autocorr   = lpc_compute_autocorr_sse2;
 }
+#endif /* HAVE_INLINE_ASM */
 }
diff --git a/libavcodec/x86/mathops.h b/libavcodec/x86/mathops.h
index 50b0283..e056eb0 100644
--- a/libavcodec/x86/mathops.h
+++ b/libavcodec/x86/mathops.h
@@ -25,6 +25,8 @@
 #include "config.h"
 #include "libavutil/common.h"
 
+#if HAVE_INLINE_ASM
+
 #if ARCH_X86_32
 
 #define MULL MULL
@@ -118,4 +120,5 @@ static inline uint32_t NEG_USR32(uint32_t a, int8_t s){
 return a;
 }
 
+#endif /* HAVE_INLINE_ASM */
 #endif /* AVCODEC_X86_MATHOPS_H */
diff --git a/libavcodec/x86/snowdsp_mmx.c b/libavcodec/x86/snowdsp_mmx.c
index 729a13a..38f3246 100644
--- a/libavcodec/x86/snowdsp_mmx.c
+++ b/libavcodec/x86/snowdsp_mmx.c
@@ -26,6 +26,8 @@
 #include "libavcodec/dwt.h"
 #include "dsputil_mmx.h"
 
+#if HAVE_INLINE_ASM
+
 stat

[libav-devel] [PATCH] dxva2: include dxva.h if found

2012-06-24 Thread Mans Rullgard
From: "Ronald S. Bultje" 

Apparently, some build environments require dxva.h even for dxva2,
while others lack this header entirely.  Including it conditionally
allows building in both cases.

Signed-off-by: Mans Rullgard 
---
 configure  |2 ++
 libavcodec/dxva2.h |3 +++
 2 files changed, 5 insertions(+)

diff --git a/configure b/configure
index 083ef77..1cba930 100755
--- a/configure
+++ b/configure
@@ -1079,6 +1079,7 @@ HAVE_LIST="
 dlfcn_h
 dlopen
 dos_paths
+dxva_h
 ebp_available
 ebx_available
 exp2
@@ -2907,6 +2908,7 @@ check_func_headers windows.h Sleep
 check_func_headers windows.h VirtualAlloc
 
 check_header dlfcn.h
+check_header dxva.h
 check_header dxva2api.h
 check_header malloc.h
 check_header poll.h
diff --git a/libavcodec/dxva2.h b/libavcodec/dxva2.h
index c06f1f3..b9e3086 100644
--- a/libavcodec/dxva2.h
+++ b/libavcodec/dxva2.h
@@ -33,6 +33,9 @@
 
 #include 
 #include 
+#if HAVE_DXVA_H
+#include 
+#endif
 
 /**
  * @defgroup lavc_codec_hwaccel_dxva2 DXVA2
-- 
1.7.10.2

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] dtx mode not implemented - sample file (not subscribed)

2012-06-24 Thread Huw Greenhough
> On Mon, 11 Jun 2012, Huw Greenhough wrote:


>> Just to let you know I uploaded MOV00669.3gp & MOV00669.3gp.txt to 
>> upload.libav.org/incoming/ in case they are useful. Sorry I couldn't figure 
>> Git out, so if the problem is already fixed, sorry for troubling you.

>From: Martin Storsjö 
>Sent: Thursday, 14 June 2012, 22:16

>It's not fixed (and there's not much progress on getting it implemented 
>currently either), but if you need it handled, you can use the 
>libopencore_amrnb decoder instead of the built-in one - that one handles DTX 
>packets just fine.

>// Martin

Thanks for the response. I assume it's this:
http://ffmpeg.org/general.html#OpenCORE-AMR
you're referring to. I've made a note of it, but will save it for when I have 
more time - I haven't done a lot of compiling & installing, & it often doesn't 
go smoothly for me.

The quality problems I had were minor for my purposes anyway.

Thanks again,
Huw

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH 2/8] FATE: add a test for itunes cover art.

2012-06-24 Thread Måns Rullgård
Anton Khirnov  writes:

> ---
>  tests/Makefile   |1 +
>  tests/fate/cover_art.mak |5 +
>  tests/ref/fate/cover_art-m4a |1 +
>  3 files changed, 7 insertions(+)
>  create mode 100644 tests/fate/cover_art.mak
>  create mode 100644 tests/ref/fate/cover_art-m4a
>
> diff --git a/tests/Makefile b/tests/Makefile
> index 429762b..6203f48 100644
> --- a/tests/Makefile
> +++ b/tests/Makefile
> @@ -36,6 +36,7 @@ include $(SRC_PATH)/tests/fate/atrac.mak
>  include $(SRC_PATH)/tests/fate/audio.mak
>  include $(SRC_PATH)/tests/fate/bmp.mak
>  include $(SRC_PATH)/tests/fate/cdxl.mak
> +include $(SRC_PATH)/tests/fate/cover_art.mak
>  include $(SRC_PATH)/tests/fate/dct.mak
>  include $(SRC_PATH)/tests/fate/demux.mak
>  include $(SRC_PATH)/tests/fate/dfa.mak
> diff --git a/tests/fate/cover_art.mak b/tests/fate/cover_art.mak
> new file mode 100644
> index 000..70c8f51
> --- /dev/null
> +++ b/tests/fate/cover_art.mak
> @@ -0,0 +1,5 @@
> +FATE_COVER_ART += fate-cover_art-m4a
> +fate-cover_art-m4a: CMD = md5 -i 
> $(SAMPLES)/cover_art/Owner-iTunes_9.0.3.15.m4a -an -c:v copy -f rawvideo
> +
> +FATE_SAMPLES_AVCONV += $(FATE_COVER_ART)
> +fate-cover_art: $(FATE_COVER_ART)
> diff --git a/tests/ref/fate/cover_art-m4a b/tests/ref/fate/cover_art-m4a
> new file mode 100644
> index 000..fb077d1
> --- /dev/null
> +++ b/tests/ref/fate/cover_art-m4a
> @@ -0,0 +1 @@
> +08ba70a3b594ff6345a93965e96a9d3e
> -- 

You could use CMP=oneline and put the checksum directly in the makefile.

-- 
Måns Rullgård
m...@mansr.com
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH] x86: place some inline asm under #if HAVE_INLINE_ASM

2012-06-24 Thread Martin Storsjö

On Fri, 22 Jun 2012, Mans Rullgard wrote:


From: "Ronald S. Bultje" 

Signed-off-by: Mans Rullgard 
---
These are some trivial ones.  Tested by manually disabling HAVE_INLINE_ASM.
---
libavcodec/x86/cavsdsp_mmx.c |6 ++
libavcodec/x86/dnxhd_mmx.c   |6 ++
libavcodec/x86/lpc_mmx.c |6 ++
libavcodec/x86/mathops.h |3 +++
libavcodec/x86/snowdsp_mmx.c |6 ++
libavcodec/x86/vc1dsp_mmx.c  |6 ++
libavutil/internal.h |2 +-
libavutil/x86/bswap.h|3 +++
libavutil/x86/intmath.h  |2 ++
9 files changed, 39 insertions(+), 1 deletion(-)


LGTM, although I didn't test it.

// Martin
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel