On 2014-02-06 00:40:52 +0000, Christophe Gisquet wrote:
> The x86 runs short on registers because numerous elements are not static.
> In addition, splitting them allows more optimized code, at least for x86.

Can you expand on what gets better optimized if decifactor is constant?
I don't see anything for the arm neon versions.
> ---
>  libavcodec/arm/dcadsp_init_arm.c | 33 ++++++++++++++++++++++++++++++---
>  libavcodec/dcadec.c              | 10 +++++-----
>  libavcodec/dcadsp.c              | 20 +++++++++++++++++---
>  libavcodec/dcadsp.h              |  4 ++--
>  4 files changed, 54 insertions(+), 13 deletions(-)
> 
> diff --git a/libavcodec/arm/dcadsp_init_arm.c 
> b/libavcodec/arm/dcadsp_init_arm.c
> index d49a176..905e4f9 100644
> --- a/libavcodec/arm/dcadsp_init_arm.c
> +++ b/libavcodec/arm/dcadsp_init_arm.c
> @@ -47,16 +47,43 @@ void ff_synth_filter_float_neon(FFTContext *imdct,
>                                  float out[32], const float in[32],
>                                  float scale);
>  
> +static void lfe_fir0_vfp(float *out, const float *in, const float *coefs,
> +                         float scale)
> +{
> +    ff_dca_lfe_fir_vfp(out, in, coefs, 32, scale);
> +}
> +
> +static void lfe_fir1_vfp(float *out, const float *in, const float *coefs,
> +                         float scale)
> +{
> +    ff_dca_lfe_fir_vfp(out, in, coefs, 64, scale);
> +}
> +
> +static void lfe_fir0_neon(float *out, const float *in, const float *coefs,
> +                          float scale)
> +{
> +    ff_dca_lfe_fir_neon(out, in, coefs, 32, scale);
> +}
> +
> +static void lfe_fir1_neon(float *out, const float *in, const float *coefs,
> +                          float scale)
> +{
> +    ff_dca_lfe_fir_neon(out, in, coefs, 64, scale);
> +}
> +

I'll update the arm asm and send a patch you can fold into this one.

>  av_cold void ff_dcadsp_init_arm(DCADSPContext *s)
>  {
>      int cpu_flags = av_get_cpu_flags();
>  
>      if (have_vfp(cpu_flags) && !have_vfpv3(cpu_flags)) {
> -        s->lfe_fir = ff_dca_lfe_fir_vfp;
> +        s->lfe_fir[0]      = lfe_fir0_vfp;
> +        s->lfe_fir[1]      = lfe_fir1_vfp;
>          s->qmf_32_subbands = ff_dca_qmf_32_subbands_vfp;
>      }
> -    if (have_neon(cpu_flags))
> -        s->lfe_fir = ff_dca_lfe_fir_neon;
> +    if (have_neon(cpu_flags)) {
> +        s->lfe_fir[0] = lfe_fir0_neon;
> +        s->lfe_fir[1] = lfe_fir1_neon;
> +    }
>  }
>  
>  av_cold void ff_synth_filter_init_arm(SynthFilterContext *s)
> diff --git a/libavcodec/dcadec.c b/libavcodec/dcadec.c
> index c6d9be8..f40a693 100644
> --- a/libavcodec/dcadec.c
> +++ b/libavcodec/dcadec.c
> @@ -957,23 +957,23 @@ static void lfe_interpolation_fir(DCAContext *s, int 
> decimation_select,
>       * samples_out: An array holding interpolated samples
>       */
>  
> -    int decifactor;
> +    int idx;
>      const float *prCoeff;
>      int deciindex;
>  
>      /* Select decimation filter */
>      if (decimation_select == 1) {
> -        decifactor = 64;
> +        idx = 1;
>          prCoeff = lfe_fir_128;
>      } else {
> -        decifactor = 32;
> +        idx = 0;
>          prCoeff = lfe_fir_64;
>      }
>      /* Interpolation */
>      for (deciindex = 0; deciindex < num_deci_sample; deciindex++) {
> -        s->dcadsp.lfe_fir(samples_out, samples_in, prCoeff, decifactor, 
> scale);
> +        s->dcadsp.lfe_fir[idx](samples_out, samples_in, prCoeff, scale);
>          samples_in++;
> -        samples_out += 2 * decifactor;
> +        samples_out += 2 * 32 * (1 + idx);
>      }
>  }
>  
> diff --git a/libavcodec/dcadsp.c b/libavcodec/dcadsp.c
> index 148f6dd..8d242c5 100644
> --- a/libavcodec/dcadsp.c
> +++ b/libavcodec/dcadsp.c
> @@ -32,8 +32,9 @@ static void int8x8_fmul_int32_c(float *dst, const int8_t 
> *src, int scale)
>          dst[i] = src[i] * fscale;
>  }
>  
> -static void dca_lfe_fir_c(float *out, const float *in, const float *coefs,
> -                          int decifactor, float scale)
> +static inline void
> +dca_lfe_fir(float *out, const float *in, const float *coefs,
> +            int decifactor, float scale)
>  {
>      float *out2 = out + decifactor;
>      const float *cf0 = coefs;
> @@ -82,9 +83,22 @@ static void dca_qmf_32_subbands(float samples_in[32][8], 
> int sb_act,
>      }
>  }
>  
> +static void dca_lfe_fir0_c(float *out, const float *in, const float *coefs,
> +                           float scale)
> +{
> +    dca_lfe_fir(out, in, coefs, 32, scale);
> +}
> +
> +static void dca_lfe_fir1_c(float *out, const float *in, const float *coefs,
> +                           float scale)
> +{
> +    dca_lfe_fir(out, in, coefs, 64, scale);
> +}
> +
>  av_cold void ff_dcadsp_init(DCADSPContext *s)
>  {
> -    s->lfe_fir = dca_lfe_fir_c;
> +    s->lfe_fir[0] = dca_lfe_fir0_c;
> +    s->lfe_fir[1] = dca_lfe_fir1_c;
>      s->qmf_32_subbands = dca_qmf_32_subbands;
>      s->int8x8_fmul_int32 = int8x8_fmul_int32_c;
>      if (ARCH_ARM) ff_dcadsp_init_arm(s);
> diff --git a/libavcodec/dcadsp.h b/libavcodec/dcadsp.h
> index afe40c4..f344a0d 100644
> --- a/libavcodec/dcadsp.h
> +++ b/libavcodec/dcadsp.h
> @@ -23,8 +23,8 @@
>  #include "synth_filter.h"
>  
>  typedef struct DCADSPContext {
> -    void (*lfe_fir)(float *out, const float *in, const float *coefs,
> -                    int decifactor, float scale);
> +    void (*lfe_fir[2])(float *out, const float *in, const float *coefs,
> +                       float scale);
>      void (*qmf_32_subbands)(float samples_in[32][8], int sb_act,
>                              SynthFilterContext *synth, FFTContext *imdct,
>                              float synth_buf_ptr[512],

otherwise ok

Janne
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to