On Fri, Aug 17, 2012 at 01:49:45PM +0200, Nedeljko Babic wrote: > AMR NB and WB decoders are optimized for MIPS architecture. > Appropriate Makefiles are changed accordingly. > > Cnfigure script is changed in order to support optimizations. > Optimizations are enabled by default when compiling is done for > mips architecture. > Appropriate cflags are automatically set. > Support for several mips CPUs is added in configure script. > > New ffmpeg options are added for disabling optimizations. > > The FFMPEG option --disable-mipsfpu disables MIPS floating point > optimizations. > The FFMPEG option --disable-mips32r2 disables MIPS32R2 > optimizations. > The FFMPEG option --disable-mipsdspr1 disables MIPS DSP ASE R1 > optimizations. > The FFMPEG option --disable-mipsdspr2 disables MIPS DSP ASE R2 > optimizations.
No "ffmpeg" anywhere in sight here ;) > --- a/configure > +++ b/configure > @@ -256,7 +256,10 @@ Optimization options (experts only): > --disable-neon disable NEON optimizations > --disable-vis disable VIS optimizations > --disable-yasm disable use of yasm assembler > - > + --disable-mips32r2 disable MIPS32R2 optimizations > + --disable-mipsdspr1 disable MIPS DSP ASE R1 optimizations > + --disable-mipsdspr2 disable MIPS DSP ASE R2 optimizations > + --disable-mipsfpu disable floating point MIPS optimizations Move these directly above the mmi entry. > @@ -1069,6 +1072,10 @@ ARCH_EXT_LIST=' > ssse3 > vfpv3 > vis > + mipsfpu > + mips32r2 > + mipsdspr1 > + mipsdspr2 > ' ditto > @@ -1305,6 +1312,10 @@ armvfp_deps="arm" > neon_deps="arm" > vfpv3_deps="armvfp" > > +mipsfpu_deps="mips" > +mips32r2_deps="mips" > +mipsdspr1_deps="mips" > +mipsdspr2_deps="mips" > mmi_deps="mips" .. like you did here. > @@ -2491,6 +2502,28 @@ elif enabled mips; then > > + case $cpu in > + 24kc) > + disable mipsfpu > + disable mipsdspr1 > + disable mipsdspr2 > + ;; Skip the indentation level after the case statement. The disable() function can handle a list of arguments, no need to call it three times. > + 24kf*) > + disable mipsdspr1 > + disable mipsdspr2 > + ;; > + 24kec|34kc|1004kc) > + disable mipsfpu > + disable mipsdspr2 > + ;; > + 24kef*|34kf*|1004kf*) > + disable mipsdspr2 > + ;; > + 74kc) > + disable mipsfpu > + ;; > + esac same here > @@ -2854,6 +2887,14 @@ elif enabled mips; then > > check_inline_asm loongson '"dmult.g $1, $2, $3"' > enabled mmi && check_inline_asm mmi '"lq $2, 0($2)"' > + enabled mips32r2 && add_cflags "-mips32r2" && add_asflags "-mips32r2" && > + check_inline_asm mips32r2 '"rotr $t0, $t1, 1"' > + enabled mipsdspr1 && add_cflags "-mdsp" && add_asflags "-mdsp" && > + check_inline_asm mipsdspr1 '"addu.qb $t0, $t1, $t2"' > + enabled mipsdspr2 && add_cflags "-mdspr2" && add_asflags "-mdspr2" && > + check_inline_asm mipsdspr2 '"absq_s.qb $t0, $t1"' > + enabled mipsfpu && add_cflags "-mhard-float" && add_asflags > "-mhard-float" && > + check_inline_asm mipsfpu '"madd.d $f0, $f2, $f4, $f6"' Indent by 4 spaces, not 1. > --- a/libavcodec/acelp_filters.c > +++ b/libavcodec/acelp_filters.c > @@ -143,3 +143,12 @@ void ff_tilt_compensation(float *mem, float tilt, float > *samples, int size) > samples[0] -= tilt * *mem; > *mem = new_tilt_mem; > } > + > +void ff_acelp_filter_init(ACELPFContext *c) > +{ > + c->acelp_interpolatef = ff_acelp_interpolatef; > + c->acelp_apply_order_2_transfer_function = > ff_acelp_apply_order_2_transfer_function; > + > + if(HAVE_MIPSFPU) if ( > --- a/libavcodec/acelp_filters.h > +++ b/libavcodec/acelp_filters.h > @@ -25,6 +25,39 @@ > > +typedef struct ACELPFContext { > + /** > + * Floating point version of ff_acelp_interpolate() > + */ > + void (*acelp_interpolatef)(float *out, const float *in, > + const float *filter_coeffs, int precision, > + int frac_pos, int filter_length, int length); Indentation is off. > +}ACELPFContext; space after { > +/** > + * Initialize ACELPFContext. > + */ > +void ff_acelp_filter_init(ACELPFContext *c); > +void ff_acelp_filter_init_mips(ACELPFContext *c); The comment is redundant. > --- a/libavcodec/acelp_vectors.c > +++ b/libavcodec/acelp_vectors.c > @@ -240,3 +240,11 @@ void ff_clear_fixed_vector(float *out, const AMRFixed > *in, int size) > + > +void ff_acelp_vectors_init(ACELPVContext *c) > +{ > + c->weighted_vector_sumf = ff_weighted_vector_sumf; > + > + if(HAVE_MIPSFPU) if ( more below > --- a/libavcodec/acelp_vectors.h > +++ b/libavcodec/acelp_vectors.h > @@ -25,6 +25,30 @@ > > +typedef struct ACELPVContext { > + > +}ACELPVContext; > + > +/** > + * Initialize ACELPVContext. > + */ > +void ff_acelp_vectors_init(ACELPVContext *c); > +void ff_acelp_vectors_init_mips(ACELPVContext *c); same comments apply > --- a/libavcodec/amrnbdec.c > +++ b/libavcodec/amrnbdec.c > @@ -394,7 +405,8 @@ static void decode_pitch_vector(AMRContext *p, > > - ff_acelp_interpolatef(p->excitation, p->excitation + 1 - pitch_lag_int, > + p->acelpf_ctx.acelp_interpolatef(p->excitation, > + p->excitation + 1 - pitch_lag_int, > ff_b60_sinc, 6, > @@ -779,12 +791,12 @@ static int synthesis(AMRContext *p, float *lpc, > > - ff_weighted_vector_sumf(excitation, p->pitch_vector, fixed_vector, > + p->acelpv_ctx.weighted_vector_sumf(excitation, p->pitch_vector, > fixed_vector, > p->pitch_gain[4], fixed_gain, AMR_SUBFRAME_SIZE); > > - float energy = ff_dot_productf(excitation, excitation, > + float energy = p->celpm_ctx.dot_productf(excitation, excitation, > AMR_SUBFRAME_SIZE); > @@ -799,7 +811,8 @@ static int synthesis(AMRContext *p, float *lpc, > > - ff_celp_lp_synthesis_filterf(samples, lpc, excitation, AMR_SUBFRAME_SIZE, > + p->celpf_ctx.celp_lp_synthesis_filterf(samples, lpc, excitation, > + AMR_SUBFRAME_SIZE, > LP_FILTER_ORDER); Indentation is now off, more below. > --- a/libavcodec/amrwbdec.c > +++ b/libavcodec/amrwbdec.c > @@ -579,15 +591,17 @@ static void pitch_sharpening(AMRWBContext *ctx, float > *fixed_vector) > * > * @param[in] p_vector, f_vector Pitch and fixed excitation vectors > * @param[in] p_gain, f_gain Pitch and fixed gains > + * @param[in] ctx The context > */ > // XXX: There is something wrong with the precision here! The magnitudes > // of the energies are not correct. Please check the reference code carefully > static float voice_factor(float *p_vector, float p_gain, > - float *f_vector, float f_gain) > + float *f_vector, float f_gain, > + CELPMContext *ctx) The context usually is the first function parameter. Above you added it as first function parameter... > @@ -1023,6 +1039,8 @@ static void hb_synthesis(AMRWBContext *ctx, int > subframe, float *samples, > * @remark It is safe to pass the same array in in and out parameters > */ > + > +#ifndef hb_fir_filter > static void hb_fir_filter(float *out, const float fir_coef[HB_FIR_SIZE + 1], > float mem[HB_FIR_SIZE], const float *in) > @@ -1040,6 +1058,7 @@ static void hb_fir_filter(float *out, const float > fir_coef[HB_FIR_SIZE + 1], > memcpy(mem, data + AMRWB_SFR_SIZE_16k, HB_FIR_SIZE * sizeof(float)); > } > +#endif /* hb_fir_filter */ > > --- a/libavcodec/lsp.c > +++ b/libavcodec/lsp.c > @@ -158,6 +160,7 @@ void ff_acelp_lp_decode(int16_t* lp_1st, int16_t* lp_2nd, > const int16_t* lsp_2nd > > +#ifndef ff_lsp2polyf > void ff_lsp2polyf(const double *lsp, double *f, int lp_half_order) > @@ -174,6 +177,7 @@ void ff_lsp2polyf(const double *lsp, double *f, int > lp_half_order) > } > } > +#endif /* ff_lsp2polyf */ We usually do this via function pointer assignments instead of ifdefs. > --- a/libavcodec/celp_filters.h > +++ b/libavcodec/celp_filters.h > @@ -25,6 +25,55 @@ > > +typedef struct CELPFContext { > + > +}CELPFContext; > + > +/** > + * Initialize CELPFContext. > + */ > +void ff_celp_filter_init(CELPFContext *c); > +void ff_celp_filter_init_mips(CELPFContext *c); see above > --- a/libavcodec/celp_math.h > +++ b/libavcodec/celp_math.h > @@ -25,6 +25,25 @@ > > #include <stdint.h> > > +typedef struct CELPMContext { > + /** > + * Return the dot product. > + * @param a input data array > + * @param b input data array > + * @param length number of elements > + * > + * @return dot product = sum of elementwise products > + */ > + float (*dot_productf)(const float* a, const float* b, int length); > + > +}CELPMContext; > + > +/** > + * Initialize CELPMContext. > + */ > +void ff_celp_math_init(CELPMContext *c); > +void ff_celp_math_init_mips(CELPMContext *c); ditto > --- a/libavcodec/mips/Makefile > +++ b/libavcodec/mips/Makefile > @@ -1,3 +1,13 @@ > MMI-OBJS += mips/dsputil_mmi.o \ > mips/idct_mmi.o \ > - mips/mpegvideo_mmi.o \ > + mips/mpegvideo_mmi.o This is a stray change. > --- /dev/null > +++ b/libavcodec/mips/acelp_filters_mips.c > @@ -0,0 +1,210 @@ > + > +/** > + * @file > + * Reference: libavcodec/acelp_filters.c > + */ I'm not sure what you mean by "reference" here. The filenames have the same basename, so it's clear what the optimization is derived from. > +static void ff_acelp_interpolatef_mips(float *out, const float *in, > + const float *filter_coeffs, int precision, > + int frac_pos, int filter_length, int length) Indentation is off, more below. > --- /dev/null > +++ b/libavcodec/mips/acelp_vectors_mips.c > @@ -0,0 +1,96 @@ > + > +static void ff_weighted_vector_sumf_mips( > + float *out, const float *in_a, const float *in_b, > + float weight_coeff_a, float weight_coeff_b, int length) > +{ Please use consistent formatting, i.e. don't break the line after '('. > --- /dev/null > +++ b/libavcodec/mips/amrwbdec_mips.c > @@ -0,0 +1,186 @@ > +void hb_fir_filter_mips(float *out, const float fir_coef[HB_FIR_SIZE + 1], > + float mem[HB_FIR_SIZE], const float *in) Indentation is off. > + memcpy(mem, data + AMRWB_SFR_SIZE_16k, HB_FIR_SIZE * sizeof(float)); sizeof(*variable) is preferable to sizeof(type) > --- /dev/null > +++ b/libavcodec/mips/lsp_mips.h > @@ -0,0 +1,108 @@ > + > + for(i=2; i<=lp_half_order; i++) > + { for (i = 2; i <= lp_half_order; i++) { > --- /dev/null > +++ b/libavutil/mips/libm_mips.h > @@ -0,0 +1,73 @@ > + > +#ifndef AVUTIL_LIBM_MIPS_H > +#define AVUTIL_LIBM_MIPS_H > + > +static av_always_inline av_const long int lrintf_mips(float x) > +{ > + register int ret_int; > + > + __asm__ __volatile__ ( > + "cvt.w.s %[x], %[x] \n\t" > + "mfc1 %[ret_int], %[x] \n\t" > + > + :[x]"+f"(x), [ret_int]"=r"(ret_int) > + ); > + return ret_int; > +} > + > +#undef lrintf > +#define lrintf(x) lrintf_mips(x) > + > +#define HAVE_LRINTF 1 > +#endif /* AVUTIL_LIBM_MIPS_H */ Why should this be in libav and not in libc, where all programs will benefit? Diego _______________________________________________ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel