On Tue, Feb 28, 2012 at 9:26 PM, Alex Converse <[email protected]> wrote:
> On Tue, Feb 28, 2012 at 4:20 AM, Christophe Gisquet
> <[email protected]> wrote:
>> 2012/2/24 Christophe Gisquet <[email protected]>:
>>> this 19KB memcpy seems superfluous compared to swapping an index.
>>> Unless I'm missing something. This is more a patch to demonstrate the
>>> way, as I'm quite unsure of where to initialize said index, when to
>>> reset it, etc.
>>>
>>> Christophe
>>
>> ping?
>>
>> (note: it's not only cosmetical, I measured gains of up to 2% with that 
>> patch)
>
> LGTM
>
> For future readability someone may want to rename Y1 and Y0 to
> Y_current and Y_old or similar or not.

Actually this is a little bit more complicated than it needs to be.

> From 836460a4db1ee2e9466a2a9e254472d904b6ca38 Mon Sep 17 00:00:00 2001
> From: Christophe GISQUET <[email protected]>
> Date: Fri, 24 Feb 2012 00:16:53 +0100
> Subject: [PATCH 7/7] SBR DSP: use a swap index rather than copy buffers.
>
> ---
>  libavcodec/aacsbr.c |   35 +++++++++++++++++++++--------------
>  libavcodec/sbr.h    |    1 +
>  2 files changed, 22 insertions(+), 14 deletions(-)
>
> diff --git a/libavcodec/aacsbr.c b/libavcodec/aacsbr.c
> index 3a30fca..47e95f1 100644
> --- a/libavcodec/aacsbr.c
> +++ b/libavcodec/aacsbr.c
> @@ -134,6 +134,7 @@ av_cold void ff_aac_sbr_ctx_init(AACContext *ac, 
> SpectralBandReplication *sbr)
>      sbr->data[0].e_a[1] = sbr->data[1].e_a[1] = -1;
>      sbr->data[0].synthesis_filterbank_samples_offset = 
> SBR_SYNTHESIS_BUF_SIZE - (1280 - 128);
>      sbr->data[1].synthesis_filterbank_samples_offset = 
> SBR_SYNTHESIS_BUF_SIZE - (1280 - 128);
> +    sbr->data[0].Ypos = sbr->data[1].Ypos = 1;

Why are we initializing to 1? If we pick zero we get it for free as
part of av_mallocz().

>      /* SBR requires samples to be scaled to +/-32768.0 to work correctly.
>       * mdct scale factors are adjusted to scale up from +/-1.0 at analysis
>       * and scale back down at synthesis. */
> @@ -1349,8 +1350,8 @@ static int sbr_hf_gen(AACContext *ac, 
> SpectralBandReplication *sbr,
>
>  /// Generate the subband filtered lowband
>  static int sbr_x_gen(SpectralBandReplication *sbr, float X[2][38][64],
> -                     const float X_low[32][40][2], const float 
> Y[2][38][64][2],
> -                     int ch)
> +                     const float Y0[38][64][2], const float Y1[38][64][2],
> +                     const float X_low[32][40][2], int ch)
>  {
>      int k, i;
>      const int i_f = 32;
> @@ -1364,8 +1365,8 @@ static int sbr_x_gen(SpectralBandReplication *sbr, 
> float X[2][38][64],
>      }
>      for (; k < sbr->kx[0] + sbr->m[0]; k++) {
>          for (i = 0; i < i_Temp; i++) {
> -            X[0][i][k] = Y[0][i + i_f][k][0];
> -            X[1][i][k] = Y[0][i + i_f][k][1];
> +            X[0][i][k] = Y0[i + i_f][k][0];
> +            X[1][i][k] = Y0[i + i_f][k][1];
>          }
>      }
>
> @@ -1377,8 +1378,8 @@ static int sbr_x_gen(SpectralBandReplication *sbr, 
> float X[2][38][64],
>      }
>      for (; k < sbr->kx[1] + sbr->m[1]; k++) {
>          for (i = i_Temp; i < i_f; i++) {
> -            X[0][i][k] = Y[1][i][k][0];
> -            X[1][i][k] = Y[1][i][k][1];
> +            X[0][i][k] = Y1[i][k][0];
> +            X[1][i][k] = Y1[i][k][1];
>          }
>      }
>      return 0;
> @@ -1537,7 +1538,8 @@ static void sbr_gain_calc(AACContext *ac, 
> SpectralBandReplication *sbr,
>  }
>
>  /// Assembling HF Signals (14496-3 sp04 p220)
> -static void sbr_hf_assemble(float Y[2][38][64][2], const float 
> X_high[64][40][2],
> +static void sbr_hf_assemble(float Y0[38][64][2], float Y1[38][64][2],

This function no longer uses Y0.

> +                            const float X_high[64][40][2],
>                              SpectralBandReplication *sbr, SBRData *ch_data,
>                              const int e_a[2])
>  {
> @@ -1559,7 +1561,6 @@ static void sbr_hf_assemble(float Y[2][38][64][2], 
> const float X_high[64][40][2]
>      float (*g_temp)[48] = ch_data->g_temp, (*q_temp)[48] = ch_data->q_temp;
>      int indexnoise = ch_data->f_indexnoise;
>      int indexsine  = ch_data->f_indexsine;
> -    memcpy(Y[0], Y[1], sizeof(Y[0]));
>
>      if (sbr->reset) {
>          for (i = 0; i < h_SL; i++) {
> @@ -1602,18 +1603,18 @@ static void sbr_hf_assemble(float Y[2][38][64][2], 
> const float X_high[64][40][2]
>                  q_filt = q_temp[i];
>              }
>
> -            sbr->dsp.hf_g_filt(Y[1][i] + kx, X_high + kx, g_filt, m_max,
> +            sbr->dsp.hf_g_filt(Y1[i] + kx, X_high + kx, g_filt, m_max,
>                                 i + ENVELOPE_ADJUSTMENT_OFFSET);
>
>              if (e != e_a[0] && e != e_a[1]) {
> -                sbr->dsp.hf_apply_noise[indexsine](Y[1][i] + kx, sbr->s_m[e],
> +                sbr->dsp.hf_apply_noise[indexsine](Y1[i] + kx, sbr->s_m[e],
>                                                     q_filt, indexnoise,
>                                                     kx, m_max);
>              } else {
>                  for (m = 0; m < m_max; m++) {
> -                    Y[1][i][m + kx][0] +=
> +                    Y1[i][m + kx][0] +=
>                          sbr->s_m[e][m] * phi[0][indexsine];
> -                    Y[1][i][m + kx][1] +=
> +                    Y1[i][m + kx][1] +=
>                          sbr->s_m[e][m] * (phi[1][indexsine] * phi_sign);
>                      phi_sign = -phi_sign;
>                  }
> @@ -1653,12 +1654,18 @@ void ff_sbr_apply(AACContext *ac, 
> SpectralBandReplication *sbr, int id_aac,
>              sbr_mapping(ac, sbr, &sbr->data[ch], sbr->data[ch].e_a);
>              sbr_env_estimate(sbr->e_curr, sbr->X_high, sbr, &sbr->data[ch]);
>              sbr_gain_calc(ac, sbr, &sbr->data[ch], sbr->data[ch].e_a);
> -            sbr_hf_assemble(sbr->data[ch].Y, sbr->X_high, sbr, 
> &sbr->data[ch],
> +            sbr->data[ch].Ypos ^= 1;
> +            sbr_hf_assemble(sbr->data[ch].Y[1-sbr->data[ch].Ypos],
> +                            sbr->data[ch].Y[  sbr->data[ch].Ypos],
> +                            sbr->X_high, sbr, &sbr->data[ch],
>                              sbr->data[ch].e_a);
>          }
>
>          /* synthesis */
> -        sbr_x_gen(sbr, sbr->X[ch], sbr->X_low, sbr->data[ch].Y, ch);
> +        sbr_x_gen(sbr, sbr->X[ch],
> +                  sbr->data[ch].Y[1-sbr->data[ch].Ypos],
> +                  sbr->data[ch].Y[  sbr->data[ch].Ypos],
> +                  sbr->X_low, ch);
>      }
>
>      if (ac->m4ac.ps == 1) {
> diff --git a/libavcodec/sbr.h b/libavcodec/sbr.h
> index 7d06fad..8da8e57 100644
> --- a/libavcodec/sbr.h
> +++ b/libavcodec/sbr.h
> @@ -88,6 +88,7 @@ typedef struct {
>      ///QMF values of the original signal
>      float              W[2][32][32][2];
>      ///QMF output of the HF adjustor
> +    int                Ypos;

This is a scalar between large round sized arrays. Maybe it can be
placed better?

>      DECLARE_ALIGNED(16, float, Y)[2][38][64][2];
>      DECLARE_ALIGNED(16, float, g_temp)[42][48];
>      float              q_temp[42][48];
> --
> 1.7.7.msysgit.0
>
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to