On Tue, Feb 28, 2012 at 9:26 PM, Alex Converse <[email protected]> wrote: > On Tue, Feb 28, 2012 at 4:20 AM, Christophe Gisquet > <[email protected]> wrote: >> 2012/2/24 Christophe Gisquet <[email protected]>: >>> this 19KB memcpy seems superfluous compared to swapping an index. >>> Unless I'm missing something. This is more a patch to demonstrate the >>> way, as I'm quite unsure of where to initialize said index, when to >>> reset it, etc. >>> >>> Christophe >> >> ping? >> >> (note: it's not only cosmetical, I measured gains of up to 2% with that >> patch) > > LGTM > > For future readability someone may want to rename Y1 and Y0 to > Y_current and Y_old or similar or not.
Actually this is a little bit more complicated than it needs to be. > From 836460a4db1ee2e9466a2a9e254472d904b6ca38 Mon Sep 17 00:00:00 2001 > From: Christophe GISQUET <[email protected]> > Date: Fri, 24 Feb 2012 00:16:53 +0100 > Subject: [PATCH 7/7] SBR DSP: use a swap index rather than copy buffers. > > --- > libavcodec/aacsbr.c | 35 +++++++++++++++++++++-------------- > libavcodec/sbr.h | 1 + > 2 files changed, 22 insertions(+), 14 deletions(-) > > diff --git a/libavcodec/aacsbr.c b/libavcodec/aacsbr.c > index 3a30fca..47e95f1 100644 > --- a/libavcodec/aacsbr.c > +++ b/libavcodec/aacsbr.c > @@ -134,6 +134,7 @@ av_cold void ff_aac_sbr_ctx_init(AACContext *ac, > SpectralBandReplication *sbr) > sbr->data[0].e_a[1] = sbr->data[1].e_a[1] = -1; > sbr->data[0].synthesis_filterbank_samples_offset = > SBR_SYNTHESIS_BUF_SIZE - (1280 - 128); > sbr->data[1].synthesis_filterbank_samples_offset = > SBR_SYNTHESIS_BUF_SIZE - (1280 - 128); > + sbr->data[0].Ypos = sbr->data[1].Ypos = 1; Why are we initializing to 1? If we pick zero we get it for free as part of av_mallocz(). > /* SBR requires samples to be scaled to +/-32768.0 to work correctly. > * mdct scale factors are adjusted to scale up from +/-1.0 at analysis > * and scale back down at synthesis. */ > @@ -1349,8 +1350,8 @@ static int sbr_hf_gen(AACContext *ac, > SpectralBandReplication *sbr, > > /// Generate the subband filtered lowband > static int sbr_x_gen(SpectralBandReplication *sbr, float X[2][38][64], > - const float X_low[32][40][2], const float > Y[2][38][64][2], > - int ch) > + const float Y0[38][64][2], const float Y1[38][64][2], > + const float X_low[32][40][2], int ch) > { > int k, i; > const int i_f = 32; > @@ -1364,8 +1365,8 @@ static int sbr_x_gen(SpectralBandReplication *sbr, > float X[2][38][64], > } > for (; k < sbr->kx[0] + sbr->m[0]; k++) { > for (i = 0; i < i_Temp; i++) { > - X[0][i][k] = Y[0][i + i_f][k][0]; > - X[1][i][k] = Y[0][i + i_f][k][1]; > + X[0][i][k] = Y0[i + i_f][k][0]; > + X[1][i][k] = Y0[i + i_f][k][1]; > } > } > > @@ -1377,8 +1378,8 @@ static int sbr_x_gen(SpectralBandReplication *sbr, > float X[2][38][64], > } > for (; k < sbr->kx[1] + sbr->m[1]; k++) { > for (i = i_Temp; i < i_f; i++) { > - X[0][i][k] = Y[1][i][k][0]; > - X[1][i][k] = Y[1][i][k][1]; > + X[0][i][k] = Y1[i][k][0]; > + X[1][i][k] = Y1[i][k][1]; > } > } > return 0; > @@ -1537,7 +1538,8 @@ static void sbr_gain_calc(AACContext *ac, > SpectralBandReplication *sbr, > } > > /// Assembling HF Signals (14496-3 sp04 p220) > -static void sbr_hf_assemble(float Y[2][38][64][2], const float > X_high[64][40][2], > +static void sbr_hf_assemble(float Y0[38][64][2], float Y1[38][64][2], This function no longer uses Y0. > + const float X_high[64][40][2], > SpectralBandReplication *sbr, SBRData *ch_data, > const int e_a[2]) > { > @@ -1559,7 +1561,6 @@ static void sbr_hf_assemble(float Y[2][38][64][2], > const float X_high[64][40][2] > float (*g_temp)[48] = ch_data->g_temp, (*q_temp)[48] = ch_data->q_temp; > int indexnoise = ch_data->f_indexnoise; > int indexsine = ch_data->f_indexsine; > - memcpy(Y[0], Y[1], sizeof(Y[0])); > > if (sbr->reset) { > for (i = 0; i < h_SL; i++) { > @@ -1602,18 +1603,18 @@ static void sbr_hf_assemble(float Y[2][38][64][2], > const float X_high[64][40][2] > q_filt = q_temp[i]; > } > > - sbr->dsp.hf_g_filt(Y[1][i] + kx, X_high + kx, g_filt, m_max, > + sbr->dsp.hf_g_filt(Y1[i] + kx, X_high + kx, g_filt, m_max, > i + ENVELOPE_ADJUSTMENT_OFFSET); > > if (e != e_a[0] && e != e_a[1]) { > - sbr->dsp.hf_apply_noise[indexsine](Y[1][i] + kx, sbr->s_m[e], > + sbr->dsp.hf_apply_noise[indexsine](Y1[i] + kx, sbr->s_m[e], > q_filt, indexnoise, > kx, m_max); > } else { > for (m = 0; m < m_max; m++) { > - Y[1][i][m + kx][0] += > + Y1[i][m + kx][0] += > sbr->s_m[e][m] * phi[0][indexsine]; > - Y[1][i][m + kx][1] += > + Y1[i][m + kx][1] += > sbr->s_m[e][m] * (phi[1][indexsine] * phi_sign); > phi_sign = -phi_sign; > } > @@ -1653,12 +1654,18 @@ void ff_sbr_apply(AACContext *ac, > SpectralBandReplication *sbr, int id_aac, > sbr_mapping(ac, sbr, &sbr->data[ch], sbr->data[ch].e_a); > sbr_env_estimate(sbr->e_curr, sbr->X_high, sbr, &sbr->data[ch]); > sbr_gain_calc(ac, sbr, &sbr->data[ch], sbr->data[ch].e_a); > - sbr_hf_assemble(sbr->data[ch].Y, sbr->X_high, sbr, > &sbr->data[ch], > + sbr->data[ch].Ypos ^= 1; > + sbr_hf_assemble(sbr->data[ch].Y[1-sbr->data[ch].Ypos], > + sbr->data[ch].Y[ sbr->data[ch].Ypos], > + sbr->X_high, sbr, &sbr->data[ch], > sbr->data[ch].e_a); > } > > /* synthesis */ > - sbr_x_gen(sbr, sbr->X[ch], sbr->X_low, sbr->data[ch].Y, ch); > + sbr_x_gen(sbr, sbr->X[ch], > + sbr->data[ch].Y[1-sbr->data[ch].Ypos], > + sbr->data[ch].Y[ sbr->data[ch].Ypos], > + sbr->X_low, ch); > } > > if (ac->m4ac.ps == 1) { > diff --git a/libavcodec/sbr.h b/libavcodec/sbr.h > index 7d06fad..8da8e57 100644 > --- a/libavcodec/sbr.h > +++ b/libavcodec/sbr.h > @@ -88,6 +88,7 @@ typedef struct { > ///QMF values of the original signal > float W[2][32][32][2]; > ///QMF output of the HF adjustor > + int Ypos; This is a scalar between large round sized arrays. Maybe it can be placed better? > DECLARE_ALIGNED(16, float, Y)[2][38][64][2]; > DECLARE_ALIGNED(16, float, g_temp)[42][48]; > float q_temp[42][48]; > -- > 1.7.7.msysgit.0 > _______________________________________________ libav-devel mailing list [email protected] https://lists.libav.org/mailman/listinfo/libav-devel
