On Sun, Feb 09, 2014 at 02:33:42PM +0100, Christophe Gisquet wrote: > --- a/libavcodec/arm/dcadsp_init_arm.c > +++ b/libavcodec/arm/dcadsp_init_arm.c > @@ -53,6 +53,45 @@ void ff_synth_filter_float_neon(FFTContext *imdct, > > +#if HAVE_NEON_INLINE && HAVE_ASM_MOD_Y > + > +static inline void int8x8_fmul_int32(av_unused DCADSPContext *dsp, > + float *dst, const int8_t *src, int > scale) > +{ > +} > + > +static void decode_hf_neon(float dst[DCA_SUBBANDS][8], > + const int32_t vq_num[DCA_SUBBANDS], > + const int8_t hf_vq[1024][32], intptr_t vq_offset, > + int32_t scale[DCA_SUBBANDS][2], > + intptr_t start, intptr_t end) > +{ > +} > + > +#endif
Please comment the #endif. > --- a/libavcodec/dcadec.c > +++ b/libavcodec/dcadec.c > @@ -50,14 +50,11 @@ > #if ARCH_ARM > # include "arm/dca.h" > #endif > -#if ARCH_X86 > -# include "x86/dca.h" > -#endif > > //#define TRACE > > #define DCA_PRIM_CHANNELS_MAX (7) > -#define DCA_SUBBANDS (32) > +// DCA_SUBBANDS defined in dcadsp.h Drop this comment. > --- a/libavcodec/dcadsp.h > +++ b/libavcodec/dcadsp.h > @@ -22,6 +22,8 @@ > > +#define DCA_SUBBANDS (32) The spacing looks slightly off, the () are redundant. > --- a/libavcodec/x86/dca.h > +++ /dev/null > @@ -1,55 +0,0 @@ > - > -#if ARCH_X86_64 && HAVE_SSE2_INLINE > -# include "libavutil/x86/asm.h" > -# include "libavutil/mem.h" > -#include "libavcodec/dcadsp.h" > - > -# define int8x8_fmul_int32 int8x8_fmul_int32 > -static inline void int8x8_fmul_int32(av_unused DCADSPContext *dsp, > - float *dst, const int8_t *src, int > scale) > -{ > - DECLARE_ALIGNED(16, static const uint32_t, inverse16) = 0x3D800000; > - __asm__ volatile ( > - "cvtsi2ss %2, %%xmm0 \n\t" > - "mulss %3, %%xmm0 \n\t" > - "movq (%1), %%xmm1 \n\t" > - "punpcklbw %%xmm1, %%xmm1 \n\t" > - "movaps %%xmm1, %%xmm2 \n\t" > - "punpcklwd %%xmm1, %%xmm1 \n\t" > - "punpckhwd %%xmm2, %%xmm2 \n\t" > - "psrad $24, %%xmm1 \n\t" > - "psrad $24, %%xmm2 \n\t" > - "shufps $0, %%xmm0, %%xmm0 \n\t" > - "cvtdq2ps %%xmm1, %%xmm1 \n\t" > - "cvtdq2ps %%xmm2, %%xmm2 \n\t" > - "mulps %%xmm0, %%xmm1 \n\t" > - "mulps %%xmm0, %%xmm2 \n\t" > - "movaps %%xmm1, 0(%0) \n\t" > - "movaps %%xmm2, 16(%0) \n\t" > - :: "r"(dst), "r"(src), "m"(scale), "m"(inverse16) > - XMM_CLOBBERS_ONLY("xmm0", "xmm1", "xmm2") > - ); > -} > - > -#endif /* ARCH_X86_64 && HAVE_SSE2_INLINE */ > --- a/libavcodec/x86/dcadsp.asm > +++ b/libavcodec/x86/dcadsp.asm > @@ -26,18 +26,37 @@ pf_inv16: times 4 dd 0x3D800000 ; 1/16 > > -; void int8x8_fmul_int32_sse2(float *dst, const int8_t *src, int scale) > -%macro INT8X8_FMUL_INT32 0 > -cglobal int8x8_fmul_int32, 3,3,5, dst, src, scale > - cvtsi2ss m0, scalem > +; decode_hf(float dst[DCA_SUBBANDS][8], const int32_t vq_num[DCA_SUBBANDS], > +; const int8_t hf_vq[1024][32], intptr_t vq_offset, > +; int32_t scale[DCA_SUBBANDS][2], > +; intptr_t start, intptr_t end) Indentation is off. > +%macro DECODE_HF 0 > +cglobal decode_hf, 6,6,5, dst, num, src, offset, scale, start, end I owe you a beer for eliminating the inline assembly :) > @@ -67,27 +86,33 @@ cglobal int8x8_fmul_int32, 3,3,5, dst, src, scale > mulps m1, m0 > mulps m2, m0 > - mova [dstq+ 0], m1 > - mova [dstq+16], m2 > + mova [dstq + 8*startq + 0], m1 > + mova [dstq + 8*startq + 16], m2 > + add startq, 4 > + cmp startq, endm > + jl .loop > +.end: > +%if cpuflag(sse2) == 0 notcpuflag? Diego _______________________________________________ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel