Re: [libav-devel] [PATCH 1/3] mips: Optimization of AMR NB and WB decoders for MIPS

Diego Biurrun Fri, 17 Aug 2012 06:42:40 -0700

On Fri, Aug 17, 2012 at 01:49:45PM +0200, Nedeljko Babic wrote:
> AMR NB and WB decoders are optimized for MIPS architecture.
> Appropriate Makefiles are changed accordingly.
> 
> Cnfigure script is changed in order to support optimizations.
>  Optimizations are enabled by default when compiling is done for
>   mips architecture.
>  Appropriate cflags are automatically set.
>  Support for several mips CPUs is added in configure script.
> 
> New ffmpeg options are added for disabling optimizations.
> 
> The FFMPEG option --disable-mipsfpu disables MIPS floating point
>  optimizations.
> The FFMPEG option --disable-mips32r2 disables MIPS32R2
>  optimizations.
> The FFMPEG option --disable-mipsdspr1 disables MIPS DSP ASE R1
>  optimizations.
> The FFMPEG option --disable-mipsdspr2 disables MIPS DSP ASE R2
>  optimizations.


No "ffmpeg" anywhere in sight here ;)

> --- a/configure
> +++ b/configure
> @@ -256,7 +256,10 @@ Optimization options (experts only):
>    --disable-neon           disable NEON optimizations
>    --disable-vis            disable VIS optimizations
>    --disable-yasm           disable use of yasm assembler
> -
> +  --disable-mips32r2       disable MIPS32R2 optimizations
> +  --disable-mipsdspr1      disable MIPS DSP ASE R1 optimizations
> +  --disable-mipsdspr2      disable MIPS DSP ASE R2 optimizations
> +  --disable-mipsfpu        disable floating point MIPS optimizations

Move these directly above the mmi entry.

> @@ -1069,6 +1072,10 @@ ARCH_EXT_LIST='
>      ssse3
>      vfpv3
>      vis
> +    mipsfpu
> +    mips32r2
> +    mipsdspr1
> +    mipsdspr2
>  '

ditto

> @@ -1305,6 +1312,10 @@ armvfp_deps="arm"
>  neon_deps="arm"
>  vfpv3_deps="armvfp"
>  
> +mipsfpu_deps="mips"
> +mips32r2_deps="mips"
> +mipsdspr1_deps="mips"
> +mipsdspr2_deps="mips"
>  mmi_deps="mips"

.. like you did here.

> @@ -2491,6 +2502,28 @@ elif enabled mips; then
>  
> +    case $cpu in
> +        24kc)
> +            disable mipsfpu
> +            disable mipsdspr1
> +            disable mipsdspr2
> +        ;;

Skip the indentation level after the case statement.

The disable() function can handle a list of arguments, no need
to call it three times.

> +        24kf*)
> +            disable mipsdspr1
> +            disable mipsdspr2
> +        ;;
> +        24kec|34kc|1004kc)
> +            disable mipsfpu
> +            disable mipsdspr2
> +        ;;
> +        24kef*|34kf*|1004kf*)
> +            disable mipsdspr2
> +        ;;
> +        74kc)
> +            disable mipsfpu
> +        ;;
> +    esac

same here

> @@ -2854,6 +2887,14 @@ elif enabled mips; then
>  
>      check_inline_asm loongson '"dmult.g $1, $2, $3"'
>      enabled mmi && check_inline_asm mmi '"lq $2, 0($2)"'
> +    enabled mips32r2  && add_cflags "-mips32r2" && add_asflags "-mips32r2" &&
> +     check_inline_asm mips32r2  '"rotr $t0, $t1, 1"'
> +    enabled mipsdspr1 && add_cflags "-mdsp" && add_asflags "-mdsp" &&
> +     check_inline_asm mipsdspr1 '"addu.qb $t0, $t1, $t2"'
> +    enabled mipsdspr2 && add_cflags "-mdspr2" && add_asflags "-mdspr2" &&
> +     check_inline_asm mipsdspr2 '"absq_s.qb $t0, $t1"'
> +    enabled mipsfpu   && add_cflags "-mhard-float" && add_asflags 
> "-mhard-float" &&
> +     check_inline_asm mipsfpu   '"madd.d $f0, $f2, $f4, $f6"'

Indent by 4 spaces, not 1.

> --- a/libavcodec/acelp_filters.c
> +++ b/libavcodec/acelp_filters.c
> @@ -143,3 +143,12 @@ void ff_tilt_compensation(float *mem, float tilt, float 
> *samples, int size)
>      samples[0] -= tilt * *mem;
>      *mem = new_tilt_mem;
>  }
> +
> +void ff_acelp_filter_init(ACELPFContext *c)
> +{
> +    c->acelp_interpolatef                      = ff_acelp_interpolatef;
> +    c->acelp_apply_order_2_transfer_function   = 
> ff_acelp_apply_order_2_transfer_function;
> +
> +    if(HAVE_MIPSFPU)

if (

> --- a/libavcodec/acelp_filters.h
> +++ b/libavcodec/acelp_filters.h
> @@ -25,6 +25,39 @@
>  
> +typedef struct ACELPFContext {
> +    /**
> +    * Floating point version of ff_acelp_interpolate()
> +    */
> +    void (*acelp_interpolatef)(float *out, const float *in,
> +                            const float *filter_coeffs, int precision,
> +                            int frac_pos, int filter_length, int length);

Indentation is off.

> +}ACELPFContext;

space after {

> +/**
> + * Initialize ACELPFContext.
> + */
> +void ff_acelp_filter_init(ACELPFContext *c);
> +void ff_acelp_filter_init_mips(ACELPFContext *c);

The comment is redundant.

> --- a/libavcodec/acelp_vectors.c
> +++ b/libavcodec/acelp_vectors.c
> @@ -240,3 +240,11 @@ void ff_clear_fixed_vector(float *out, const AMRFixed 
> *in, int size)
> +
> +void ff_acelp_vectors_init(ACELPVContext *c)
> +{
> +    c->weighted_vector_sumf   = ff_weighted_vector_sumf;
> +
> +    if(HAVE_MIPSFPU)

if (

more below

> --- a/libavcodec/acelp_vectors.h
> +++ b/libavcodec/acelp_vectors.h
> @@ -25,6 +25,30 @@
>  
> +typedef struct ACELPVContext {
> +
> +}ACELPVContext;
> +
> +/**
> + * Initialize ACELPVContext.
> + */
> +void ff_acelp_vectors_init(ACELPVContext *c);
> +void ff_acelp_vectors_init_mips(ACELPVContext *c);

same comments apply

> --- a/libavcodec/amrnbdec.c
> +++ b/libavcodec/amrnbdec.c
> @@ -394,7 +405,8 @@ static void decode_pitch_vector(AMRContext *p,
>  
> -    ff_acelp_interpolatef(p->excitation, p->excitation + 1 - pitch_lag_int,
> +    p->acelpf_ctx.acelp_interpolatef(p->excitation,
> +                          p->excitation + 1 - pitch_lag_int,
>                            ff_b60_sinc, 6,
> @@ -779,12 +791,12 @@ static int synthesis(AMRContext *p, float *lpc,
>  
> -    ff_weighted_vector_sumf(excitation, p->pitch_vector, fixed_vector,
> +    p->acelpv_ctx.weighted_vector_sumf(excitation, p->pitch_vector, 
> fixed_vector,
>                              p->pitch_gain[4], fixed_gain, AMR_SUBFRAME_SIZE);
>  
> -        float energy = ff_dot_productf(excitation, excitation,
> +        float energy = p->celpm_ctx.dot_productf(excitation, excitation,
>                                         AMR_SUBFRAME_SIZE);
> @@ -799,7 +811,8 @@ static int synthesis(AMRContext *p, float *lpc,
>  
> -    ff_celp_lp_synthesis_filterf(samples, lpc, excitation, AMR_SUBFRAME_SIZE,
> +    p->celpf_ctx.celp_lp_synthesis_filterf(samples, lpc, excitation,
> +                                 AMR_SUBFRAME_SIZE,
>                                   LP_FILTER_ORDER);

Indentation is now off, more below.

> --- a/libavcodec/amrwbdec.c
> +++ b/libavcodec/amrwbdec.c
> @@ -579,15 +591,17 @@ static void pitch_sharpening(AMRWBContext *ctx, float 
> *fixed_vector)
>   *
>   * @param[in] p_vector, f_vector   Pitch and fixed excitation vectors
>   * @param[in] p_gain, f_gain       Pitch and fixed gains
> + * @param[in] ctx                  The context
>   */
>  // XXX: There is something wrong with the precision here! The magnitudes
>  // of the energies are not correct. Please check the reference code carefully
>  static float voice_factor(float *p_vector, float p_gain,
> -                          float *f_vector, float f_gain)
> +                          float *f_vector, float f_gain,
> +                          CELPMContext *ctx)

The context usually is the first function parameter.
Above you added it as first function parameter...

> @@ -1023,6 +1039,8 @@ static void hb_synthesis(AMRWBContext *ctx, int 
> subframe, float *samples,
>   * @remark It is safe to pass the same array in in and out parameters
>   */
> +
> +#ifndef hb_fir_filter
>  static void hb_fir_filter(float *out, const float fir_coef[HB_FIR_SIZE + 1],
>                            float mem[HB_FIR_SIZE], const float *in)
> @@ -1040,6 +1058,7 @@ static void hb_fir_filter(float *out, const float 
> fir_coef[HB_FIR_SIZE + 1],
>      memcpy(mem, data + AMRWB_SFR_SIZE_16k, HB_FIR_SIZE * sizeof(float));
>  }
> +#endif /* hb_fir_filter */
>  
> --- a/libavcodec/lsp.c
> +++ b/libavcodec/lsp.c
> @@ -158,6 +160,7 @@ void ff_acelp_lp_decode(int16_t* lp_1st, int16_t* lp_2nd, 
> const int16_t* lsp_2nd
>  
> +#ifndef ff_lsp2polyf
>  void ff_lsp2polyf(const double *lsp, double *f, int lp_half_order)
> @@ -174,6 +177,7 @@ void ff_lsp2polyf(const double *lsp, double *f, int 
> lp_half_order)
>      }
>  }
> +#endif /* ff_lsp2polyf */

We usually do this via function pointer assignments instead of ifdefs.

> --- a/libavcodec/celp_filters.h
> +++ b/libavcodec/celp_filters.h
> @@ -25,6 +25,55 @@
>  
> +typedef struct CELPFContext {
> +
> +}CELPFContext;
> +
> +/**
> + * Initialize CELPFContext.
> + */
> +void ff_celp_filter_init(CELPFContext *c);
> +void ff_celp_filter_init_mips(CELPFContext *c);

see above

> --- a/libavcodec/celp_math.h
> +++ b/libavcodec/celp_math.h
> @@ -25,6 +25,25 @@
>  
>  #include <stdint.h>
>  
> +typedef struct CELPMContext {
> +    /**
> +     * Return the dot product.
> +     * @param a input data array
> +     * @param b input data array
> +     * @param length number of elements
> +     *
> +     * @return dot product = sum of elementwise products
> +     */
> +    float (*dot_productf)(const float* a, const float* b, int length);
> +
> +}CELPMContext;
> +
> +/**
> + * Initialize CELPMContext.
> + */
> +void ff_celp_math_init(CELPMContext *c);
> +void ff_celp_math_init_mips(CELPMContext *c);

ditto

> --- a/libavcodec/mips/Makefile
> +++ b/libavcodec/mips/Makefile
> @@ -1,3 +1,13 @@
>  MMI-OBJS += mips/dsputil_mmi.o                                          \
>              mips/idct_mmi.o                                             \
> -            mips/mpegvideo_mmi.o                                        \
> +            mips/mpegvideo_mmi.o

This is a stray change.

> --- /dev/null
> +++ b/libavcodec/mips/acelp_filters_mips.c
> @@ -0,0 +1,210 @@
> +
> +/**
> + * @file
> + * Reference: libavcodec/acelp_filters.c
> + */

I'm not sure what you mean by "reference" here.  The filenames have the
same basename, so it's clear what the optimization is derived from.

> +static void ff_acelp_interpolatef_mips(float *out, const float *in,
> +                           const float *filter_coeffs, int precision,
> +                           int frac_pos, int filter_length, int length)

Indentation is off, more below.

> --- /dev/null
> +++ b/libavcodec/mips/acelp_vectors_mips.c
> @@ -0,0 +1,96 @@
> +
> +static void ff_weighted_vector_sumf_mips(
> +                  float *out, const float *in_a, const float *in_b,
> +                  float weight_coeff_a, float weight_coeff_b, int length)
> +{

Please use consistent formatting, i.e. don't break the line after '('.

> --- /dev/null
> +++ b/libavcodec/mips/amrwbdec_mips.c
> @@ -0,0 +1,186 @@
> +void hb_fir_filter_mips(float *out, const float fir_coef[HB_FIR_SIZE + 1],
> +                          float mem[HB_FIR_SIZE], const float *in)

Indentation is off.

> +    memcpy(mem, data + AMRWB_SFR_SIZE_16k, HB_FIR_SIZE * sizeof(float));

sizeof(*variable) is preferable to sizeof(type)

> --- /dev/null
> +++ b/libavcodec/mips/lsp_mips.h
> @@ -0,0 +1,108 @@
> +
> +    for(i=2; i<=lp_half_order; i++)
> +    {

for (i = 2; i <= lp_half_order; i++) {

> --- /dev/null
> +++ b/libavutil/mips/libm_mips.h
> @@ -0,0 +1,73 @@
> +
> +#ifndef AVUTIL_LIBM_MIPS_H
> +#define AVUTIL_LIBM_MIPS_H
> +
> +static av_always_inline av_const long int lrintf_mips(float x)
> +{
> +    register int ret_int;
> +
> +    __asm__ __volatile__ (
> +        "cvt.w.s    %[x],       %[x]    \n\t"
> +        "mfc1       %[ret_int], %[x]    \n\t"
> +
> +        :[x]"+f"(x), [ret_int]"=r"(ret_int)
> +    );
> +    return ret_int;
> +}
> +
> +#undef lrintf
> +#define lrintf(x)   lrintf_mips(x)
> +
> +#define HAVE_LRINTF 1
> +#endif /* AVUTIL_LIBM_MIPS_H */

Why should this be in libav and not in libc, where all programs
will benefit?

Diego
_______________________________________________
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 1/3] mips: Optimization of AMR NB and WB decoders for MIPS

Reply via email to