[FFmpeg-soc] [PATCH] AACENC: refactor quant for massive speedup

Alex Converse Tue, 16 Jun 2009 17:33:40 -0700

This patch set reduces encoding time by 56% for me.

I hope it's not premature.

From 537a41c2f95d98910daada74069d61f2eac8a3aa Mon Sep 17 00:00:00 2001
From: Alex Converse <alex.conve...@gmail.com>
Date: Fri, 24 Apr 2009 00:18:35 -0400
Subject: [PATCH 01/15] Factor out the band quantizer
MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="------------1"


This is a multi-part message in MIME format.
--------------1
Content-Type: text/plain; charset=UTF-8; format=fixed
Content-Transfer-Encoding: 8bit

---
 libavcodec/aaccoder.c |   55 +++++++++++++++++++++++++-----------------------
 libavcodec/aacenc.c   |    2 +-
 libavcodec/aacenc.h   |    3 +-
 3 files changed, 32 insertions(+), 28 deletions(-)


--------------1
Content-Type: text/x-patch; name="0001-Factor-out-the-band-quantizer.patch"
Content-Transfer-Encoding: 8bit
Content-Disposition: attachment; filename="0001-Factor-out-the-band-quantizer.patch"

diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c
index e05ae4f..c5d6670 100644
--- a/libavcodec/aaccoder.c
+++ b/libavcodec/aaccoder.c
@@ -66,6 +66,21 @@ static av_always_inline int quant(float coef, const float Q)
 
 #if 1
 
+static void quantize_bands(int (*out)[2], const float* in, int size, float Q, int is_signed, int maxval)
+{
+    int i;
+    double qc;
+    for (i = 0; i < size; i++) {
+        qc = pow(fabsf(in[i]) * Q, 0.75);
+        out[i][0] = (int)FFMIN((int)qc, maxval);
+        out[i][1] = (int)FFMIN((int)(qc + 0.4054), maxval);
+        if (is_signed && in[i] < 0.0f) {
+            out[i][0] = -out[i][0];
+            out[i][1] = -out[i][1];
+        }
+    }
+}
+
 static av_always_inline int quant2(float coef, const float Q)
 {
     return pow(coef * Q, 0.75);
@@ -79,7 +94,7 @@ static const uint8_t aac_cb_maxval[12] = {0, 1, 1, 2, 2, 4, 4, 7, 7, 12, 12, 16}
  *
  * @return quantization distortion
  */
-static float quantize_band_cost(const float *in, int size, int scale_idx, int cb,
+static float quantize_band_cost(struct AACEncContext *s, const float *in, int size, int scale_idx, int cb,
                                  const float lambda, const float uplim, int *bits)
 {
     const float IQ = ff_aac_pow2sf_tab[200 + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
@@ -101,20 +116,14 @@ static float quantize_band_cost(const float *in, int size, int scale_idx, int cb
     offs[0] = 1;
     for(i = 1; i < dim; i++)
         offs[i] = offs[i-1]*range;
+    quantize_bands(s->qcoefs, in, size, Q, !IS_CODEBOOK_UNSIGNED(cb), maxval);
     for(i = 0; i < size; i += dim){
         float mincost;
         int minidx = 0;
         int minbits = 0;
-        int quants[4][2];
+        int (*quants)[2] = &s->qcoefs[i];
         mincost = 0.0f;
         for(j = 0; j < dim; j++){
-            quants[j][0] = quant2(fabsf(in[i+j]), Q);
-            quants[j][1] = quant (fabsf(in[i+j]), Q);
-            for(k = 0; k < 2; k++){
-                quants[j][k] = FFMIN(quants[j][k], maxval);
-                if(!IS_CODEBOOK_UNSIGNED(cb) && in[i+j] < 0.0f)
-                    quants[j][k] = -quants[j][k];
-            }
             mincost += in[i+j]*in[i+j]*lambda;
         }
         minidx = IS_CODEBOOK_UNSIGNED(cb) ? 0 : 40;
@@ -187,7 +196,7 @@ static float quantize_band_cost(const float *in, int size, int scale_idx, int cb
     return cost;
 }
 
-static void quantize_and_encode_band(PutBitContext *pb, const float *in, int size,
+static void quantize_and_encode_band(struct AACEncContext *s, PutBitContext *pb, const float *in, int size,
                                      int scale_idx, int cb, const float lambda)
 {
     const float IQ = ff_aac_pow2sf_tab[200 + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
@@ -205,20 +214,14 @@ static void quantize_and_encode_band(PutBitContext *pb, const float *in, int siz
     offs[0] = 1;
     for(i = 1; i < dim; i++)
         offs[i] = offs[i-1]*range;
+    quantize_bands(s->qcoefs, in, size, Q, !IS_CODEBOOK_UNSIGNED(cb), maxval);
     for(i = 0; i < size; i += dim){
         float mincost;
         int minidx = 0;
         int minbits = 0;
-        int quants[4][2];
+        int (*quants)[2] = &s->qcoefs[i];
         mincost = 0.0f;
         for(j = 0; j < dim; j++){
-            quants[j][0] = quant2(fabsf(in[i+j]), Q);
-            quants[j][1] = quant (fabsf(in[i+j]), Q);
-            for(k = 0; k < 2; k++){
-                quants[j][k] = FFMIN(quants[j][k], maxval);
-                if(!IS_CODEBOOK_UNSIGNED(cb) && in[i+j] < 0.0f)
-                    quants[j][k] = -quants[j][k];
-            }
             mincost += in[i+j]*in[i+j]*lambda;
         }
         minidx = IS_CODEBOOK_UNSIGNED(cb) ? 0 : 40;
@@ -516,7 +519,7 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce
                 float rd = 0.0f;
                 for(w = 0; w < group_len; w++){
                     FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(win+w)*16+swb];
-                    rd += quantize_band_cost(sce->coeffs + start + w*128, size,
+                    rd += quantize_band_cost(s, sce->coeffs + start + w*128, size,
                                              sce->sf_idx[(win+w)*16+swb], cb,
                                              lambda / band->threshold, INFINITY, NULL);
                 }
@@ -654,7 +657,7 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
                         FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
                         int cb;
                         for(cb = 0; cb <= ESC_BT; cb++){
-                            dists[cb] += quantize_band_cost(coefs + w2*128, sce->ics.swb_sizes[g],
+                            dists[cb] += quantize_band_cost(s, coefs + w2*128, sce->ics.swb_sizes[g],
                                                             q, cb, lambda / band->threshold, INFINITY, NULL);
                         }
                     }
@@ -808,7 +811,7 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx, AACEncContext *
                         int bb = 0;
                         for(w2 = 0; w2 < sce->ics.group_len[w]; w2++){
                             int b;
-                            dist += quantize_band_cost(coefs + w2*128,
+                            dist += quantize_band_cost(s, coefs + w2*128,
                                                        sce->ics.swb_sizes[g],
                                                        sce->sf_idx[w*16+g],
                                                        ESC_BT,
@@ -977,7 +980,7 @@ static void search_for_quantizers_faac(AVCodecContext *avctx, AACEncContext *s,
 
                 for(w2 = 0; w2 < sce->ics.group_len[w]; w2++){
                     int b;
-                    dist += quantize_band_cost(coefs + w2*128,
+                    dist += quantize_band_cost(s, coefs + w2*128,
                                                sce->ics.swb_sizes[g],
                                                scf,
                                                ESC_BT,
@@ -1086,22 +1089,22 @@ static void search_for_ms(AACEncContext *s, ChannelElement *cpe, const float lam
                         S[i] =  sce0->coeffs[start+w2*128+i]
                               - sce1->coeffs[start+w2*128+i];
                     }
-                    dist1 += quantize_band_cost(sce0->coeffs + start + w2*128,
+                    dist1 += quantize_band_cost(s, sce0->coeffs + start + w2*128,
                                                 sce0->ics.swb_sizes[g],
                                                 sce0->sf_idx[(w+w2)*16+g],
                                                 sce0->band_type[(w+w2)*16+g],
                                                 lambda / band0->threshold, INFINITY, NULL);
-                    dist1 += quantize_band_cost(sce1->coeffs + start + w2*128,
+                    dist1 += quantize_band_cost(s, sce1->coeffs + start + w2*128,
                                                 sce1->ics.swb_sizes[g],
                                                 sce1->sf_idx[(w+w2)*16+g],
                                                 sce1->band_type[(w+w2)*16+g],
                                                 lambda / band1->threshold, INFINITY, NULL);
-                    dist2 += quantize_band_cost(M,
+                    dist2 += quantize_band_cost(s, M,
                                                 sce0->ics.swb_sizes[g],
                                                 sce0->sf_idx[(w+w2)*16+g],
                                                 sce0->band_type[(w+w2)*16+g],
                                                 lambda / maxthr, INFINITY, NULL);
-                    dist2 += quantize_band_cost(S,
+                    dist2 += quantize_band_cost(s, S,
                                                 sce1->ics.swb_sizes[g],
                                                 sce1->sf_idx[(w+w2)*16+g],
                                                 sce1->band_type[(w+w2)*16+g],
diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c
index 7ff24bb..430b800 100644
--- a/libavcodec/aacenc.c
+++ b/libavcodec/aacenc.c
@@ -419,7 +419,7 @@ static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
                 continue;
             }
             for(w2 = w; w2 < w + sce->ics.group_len[w]; w2++){
-                s->coder->quantize_and_encode_band(&s->pb, sce->coeffs + start + w2*128,
+                s->coder->quantize_and_encode_band(s, &s->pb, sce->coeffs + start + w2*128,
                                          sce->ics.swb_sizes[i],
                                          sce->sf_idx[w*16 + i],
                                          sce->band_type[w*16 + i],
diff --git a/libavcodec/aacenc.h b/libavcodec/aacenc.h
index 9dc1c78..940f5a9 100644
--- a/libavcodec/aacenc.h
+++ b/libavcodec/aacenc.h
@@ -37,7 +37,7 @@ typedef struct AACCoefficientsEncoder{
                                   SingleChannelElement *sce, const float lambda);
     void (*encode_window_bands_info)(struct AACEncContext *s, SingleChannelElement *sce,
                                      int win, int group_len, const float lambda);
-    void (*quantize_and_encode_band)(PutBitContext *pb, const float *in, int size,
+    void (*quantize_and_encode_band)(struct AACEncContext *s, PutBitContext *pb, const float *in, int size,
                                      int scale_idx, int cb, const float lambda);
     void (*search_for_ms)(struct AACEncContext *s, ChannelElement *cpe, const float lambda);
 }AACCoefficientsEncoder;
@@ -64,6 +64,7 @@ typedef struct AACEncContext {
     int cur_channel;
     int last_frame;
     float lambda;
+    DECLARE_ALIGNED_16(int,   qcoefs[96][2]);    ///< quantized coefficients
 } AACEncContext;
 
 #endif /* AVCODEC_AACENC_H */

--------------1--

From 1bc96020add2ee2cb2aa9375c194317a871bb976 Mon Sep 17 00:00:00 2001
From: Alex Converse <alex.conve...@gmail.com>
Date: Tue, 16 Jun 2009 18:08:07 -0400
Subject: [PATCH 02/15] Split quantize_bands in two
MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="------------1"

This is a multi-part message in MIME format.
--------------1
Content-Type: text/plain; charset=UTF-8; format=fixed
Content-Transfer-Encoding: 8bit

---
 libavcodec/aaccoder.c |   22 ++++++++++++++++++----
 libavcodec/aacenc.h   |    1 +
 2 files changed, 19 insertions(+), 4 deletions(-)


--------------1
Content-Type: text/x-patch; name="0002-Split-quantize_bands-in-two.patch"
Content-Transfer-Encoding: 8bit
Content-Disposition: attachment; filename="0002-Split-quantize_bands-in-two.patch"

diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c
index c5d6670..dfa2c39 100644
--- a/libavcodec/aaccoder.c
+++ b/libavcodec/aaccoder.c
@@ -66,12 +66,12 @@ static av_always_inline int quant(float coef, const float Q)
 
 #if 1
 
-static void quantize_bands(int (*out)[2], const float* in, int size, float Q, int is_signed, int maxval)
+static void quantize_bands(int (*out)[2], const float *in, const float *scaled, int size, float Q34, int is_signed, int maxval)
 {
     int i;
     double qc;
     for (i = 0; i < size; i++) {
-        qc = pow(fabsf(in[i]) * Q, 0.75);
+        qc = scaled[i] * Q34;
         out[i][0] = (int)FFMIN((int)qc, maxval);
         out[i][1] = (int)FFMIN((int)(qc + 0.4054), maxval);
         if (is_signed && in[i] < 0.0f) {
@@ -81,6 +81,14 @@ static void quantize_bands(int (*out)[2], const float* in, int size, float Q, in
     }
 }
 
+static void abs_pow34_v(float *out, const float* in, const int size)
+{
+    int i;
+    for (i = 0; i < size; i++) {
+        out[i] = pow(fabsf(in[i]), 0.75);
+    }
+}
+
 static av_always_inline int quant2(float coef, const float Q)
 {
     return pow(coef * Q, 0.75);
@@ -99,6 +107,7 @@ static float quantize_band_cost(struct AACEncContext *s, const float *in, int si
 {
     const float IQ = ff_aac_pow2sf_tab[200 + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
     const float  Q = ff_aac_pow2sf_tab[200 - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
+    const float  Q34 = pow(Q, 0.75);
     const float CLIPPED_ESCAPE = 165140.0f*IQ;
     int i, j, k;
     float cost = 0;
@@ -107,6 +116,7 @@ static float quantize_band_cost(struct AACEncContext *s, const float *in, int si
     const int range = aac_cb_range[cb];
     const int maxval = aac_cb_maxval[cb];
     int offs[4];
+    float *scaled = s->scoefs;
 
     if(!cb){
         for(i = 0; i < size; i++)
@@ -116,7 +126,8 @@ static float quantize_band_cost(struct AACEncContext *s, const float *in, int si
     offs[0] = 1;
     for(i = 1; i < dim; i++)
         offs[i] = offs[i-1]*range;
-    quantize_bands(s->qcoefs, in, size, Q, !IS_CODEBOOK_UNSIGNED(cb), maxval);
+    abs_pow34_v(scaled, in, size);
+    quantize_bands(s->qcoefs, in, scaled, size, Q34, !IS_CODEBOOK_UNSIGNED(cb), maxval);
     for(i = 0; i < size; i += dim){
         float mincost;
         int minidx = 0;
@@ -201,12 +212,14 @@ static void quantize_and_encode_band(struct AACEncContext *s, PutBitContext *pb,
 {
     const float IQ = ff_aac_pow2sf_tab[200 + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
     const float  Q = ff_aac_pow2sf_tab[200 - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
+    const float  Q34 = pow(Q, 0.75);
     const float CLIPPED_ESCAPE = 165140.0f*IQ;
     const int range = aac_cb_range[cb];
     const int maxval = aac_cb_maxval[cb];
     const int dim = (cb < FIRST_PAIR_BT) ? 4 : 2;
     int i, j, k;
     int offs[4];
+    float *scaled = s->scoefs;
 
     if(!cb)
         return;
@@ -214,7 +227,8 @@ static void quantize_and_encode_band(struct AACEncContext *s, PutBitContext *pb,
     offs[0] = 1;
     for(i = 1; i < dim; i++)
         offs[i] = offs[i-1]*range;
-    quantize_bands(s->qcoefs, in, size, Q, !IS_CODEBOOK_UNSIGNED(cb), maxval);
+    abs_pow34_v(scaled, in, size);
+    quantize_bands(s->qcoefs, in, scaled, size, Q34, !IS_CODEBOOK_UNSIGNED(cb), maxval);
     for(i = 0; i < size; i += dim){
         float mincost;
         int minidx = 0;
diff --git a/libavcodec/aacenc.h b/libavcodec/aacenc.h
index 940f5a9..458d3d5 100644
--- a/libavcodec/aacenc.h
+++ b/libavcodec/aacenc.h
@@ -65,6 +65,7 @@ typedef struct AACEncContext {
     int last_frame;
     float lambda;
     DECLARE_ALIGNED_16(int,   qcoefs[96][2]);    ///< quantized coefficients
+    DECLARE_ALIGNED_16(float, scoefs[1024]);     ///< scaled coefficients
 } AACEncContext;
 
 #endif /* AVCODEC_AACENC_H */

--------------1--

From 73b6ce55a24ae0dae0fd4b33528b0fa8332ea800 Mon Sep 17 00:00:00 2001
From: Alex Converse <alex.conve...@gmail.com>
Date: Tue, 16 Jun 2009 18:08:31 -0400
Subject: [PATCH 03/15] Hoist abs_pow34_v
MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="------------1"

This is a multi-part message in MIME format.
--------------1
Content-Type: text/plain; charset=UTF-8; format=fixed
Content-Transfer-Encoding: 8bit

---
 libavcodec/aaccoder.c |   26 +++++++++++++++++++++-----
 1 files changed, 21 insertions(+), 5 deletions(-)


--------------1
Content-Type: text/x-patch; name="0003-Hoist-abs_pow34_v.patch"
Content-Transfer-Encoding: 8bit
Content-Disposition: attachment; filename="0003-Hoist-abs_pow34_v.patch"

diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c
index dfa2c39..5d45497 100644
--- a/libavcodec/aaccoder.c
+++ b/libavcodec/aaccoder.c
@@ -102,7 +102,7 @@ static const uint8_t aac_cb_maxval[12] = {0, 1, 1, 2, 2, 4, 4, 7, 7, 12, 12, 16}
  *
  * @return quantization distortion
  */
-static float quantize_band_cost(struct AACEncContext *s, const float *in, int size, int scale_idx, int cb,
+static float quantize_band_cost(struct AACEncContext *s, const float *in, const float *scaled, int size, int scale_idx, int cb,
                                  const float lambda, const float uplim, int *bits)
 {
     const float IQ = ff_aac_pow2sf_tab[200 + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
@@ -116,7 +116,6 @@ static float quantize_band_cost(struct AACEncContext *s, const float *in, int si
     const int range = aac_cb_range[cb];
     const int maxval = aac_cb_maxval[cb];
     int offs[4];
-    float *scaled = s->scoefs;
 
     if(!cb){
         for(i = 0; i < size; i++)
@@ -126,7 +125,6 @@ static float quantize_band_cost(struct AACEncContext *s, const float *in, int si
     offs[0] = 1;
     for(i = 1; i < dim; i++)
         offs[i] = offs[i-1]*range;
-    abs_pow34_v(scaled, in, size);
     quantize_bands(s->qcoefs, in, scaled, size, Q34, !IS_CODEBOOK_UNSIGNED(cb), maxval);
     for(i = 0; i < size; i += dim){
         float mincost;
@@ -508,6 +506,7 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce
     float next_minrd = INFINITY;
     int next_mincb = 0;
 
+    abs_pow34_v(s->scoefs, sce->coeffs, 1024);
     start = win*128;
     for(cb = 0; cb < 12; cb++){
         path[0][cb].cost = 0.0f;
@@ -533,7 +532,8 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce
                 float rd = 0.0f;
                 for(w = 0; w < group_len; w++){
                     FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(win+w)*16+swb];
-                    rd += quantize_band_cost(s, sce->coeffs + start + w*128, size,
+                    rd += quantize_band_cost(s, sce->coeffs + start + w*128,
+                                             s->scoefs + start + w*128, size,
                                              sce->sf_idx[(win+w)*16+swb], cb,
                                              lambda / band->threshold, INFINITY, NULL);
                 }
@@ -633,6 +633,7 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
         paths[i].max_val = 0;
     }
     idx = 256;
+    abs_pow34_v(s->scoefs, sce->coeffs, 1024);
     for(w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]){
         start = w*128;
         for(g = 0; g < sce->ics.num_swb; g++){
@@ -671,7 +672,7 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
                         FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
                         int cb;
                         for(cb = 0; cb <= ESC_BT; cb++){
-                            dists[cb] += quantize_band_cost(s, coefs + w2*128, sce->ics.swb_sizes[g],
+                            dists[cb] += quantize_band_cost(s, coefs + w2*128, s->scoefs + start + w2*128, sce->ics.swb_sizes[g],
                                                             q, cb, lambda / band->threshold, INFINITY, NULL);
                         }
                     }
@@ -797,6 +798,7 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx, AACEncContext *
 
     if(!allz)
         return;
+    abs_pow34_v(s->scoefs, sce->coeffs, 1024);
     //perform two-loop search
     //outer loop - improve quality
     do{
@@ -812,6 +814,7 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx, AACEncContext *
                 start = w*128;
                 for(g = 0;  g < sce->ics.num_swb; g++){
                     const float *coefs = sce->coeffs + start;
+                    const float *scaled = s->scoefs + start;
                     int bits = 0;
                     int cb;
                     float mindist = INFINITY;
@@ -826,6 +829,7 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx, AACEncContext *
                         for(w2 = 0; w2 < sce->ics.group_len[w]; w2++){
                             int b;
                             dist += quantize_band_cost(s, coefs + w2*128,
+                                                       scaled + w2*128,
                                                        sce->ics.swb_sizes[g],
                                                        sce->sf_idx[w*16+g],
                                                        ESC_BT,
@@ -972,10 +976,12 @@ static void search_for_quantizers_faac(AVCodecContext *avctx, AACEncContext *s,
         }
     }
     memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
+    abs_pow34_v(s->scoefs, sce->coeffs, 1024);
     for(w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]){
         start = w*128;
         for(g = 0;  g < sce->ics.num_swb; g++){
             const float *coefs = sce->coeffs + start;
+            const float *scaled = s->scoefs + start;
             const int size = sce->ics.swb_sizes[g];
             int scf, prev_scf, step;
             int min_scf = 0, max_scf = 255;
@@ -995,6 +1001,7 @@ static void search_for_quantizers_faac(AVCodecContext *avctx, AACEncContext *s,
                 for(w2 = 0; w2 < sce->ics.group_len[w]; w2++){
                     int b;
                     dist += quantize_band_cost(s, coefs + w2*128,
+                                               scaled + w2*128,
                                                sce->ics.swb_sizes[g],
                                                scf,
                                                ESC_BT,
@@ -1084,6 +1091,7 @@ static void search_for_ms(AACEncContext *s, ChannelElement *cpe, const float lam
 {
     int start = 0, i, w, w2, g;
     float M[128], S[128];
+    float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
     SingleChannelElement *sce0 = &cpe->ch[0];
     SingleChannelElement *sce1 = &cpe->ch[1];
     if(!cpe->common_window)
@@ -1103,22 +1111,30 @@ static void search_for_ms(AACEncContext *s, ChannelElement *cpe, const float lam
                         S[i] =  sce0->coeffs[start+w2*128+i]
                               - sce1->coeffs[start+w2*128+i];
                     }
+                    abs_pow34_v(L34, sce0->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
+                    abs_pow34_v(R34, sce1->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
+                    abs_pow34_v(M34, M,                         sce0->ics.swb_sizes[g]);
+                    abs_pow34_v(S34, S,                         sce0->ics.swb_sizes[g]);
                     dist1 += quantize_band_cost(s, sce0->coeffs + start + w2*128,
+                                                L34,
                                                 sce0->ics.swb_sizes[g],
                                                 sce0->sf_idx[(w+w2)*16+g],
                                                 sce0->band_type[(w+w2)*16+g],
                                                 lambda / band0->threshold, INFINITY, NULL);
                     dist1 += quantize_band_cost(s, sce1->coeffs + start + w2*128,
+                                                R34,
                                                 sce1->ics.swb_sizes[g],
                                                 sce1->sf_idx[(w+w2)*16+g],
                                                 sce1->band_type[(w+w2)*16+g],
                                                 lambda / band1->threshold, INFINITY, NULL);
                     dist2 += quantize_band_cost(s, M,
+                                                M34,
                                                 sce0->ics.swb_sizes[g],
                                                 sce0->sf_idx[(w+w2)*16+g],
                                                 sce0->band_type[(w+w2)*16+g],
                                                 lambda / maxthr, INFINITY, NULL);
                     dist2 += quantize_band_cost(s, S,
+                                                S34,
                                                 sce1->ics.swb_sizes[g],
                                                 sce1->sf_idx[(w+w2)*16+g],
                                                 sce1->band_type[(w+w2)*16+g],

--------------1--

From 9a3ef241972bd8cdb00c3aa633e1a395f68d4940 Mon Sep 17 00:00:00 2001
From: Alex Converse <alex.conve...@gmail.com>
Date: Thu, 23 Apr 2009 23:21:15 -0400
Subject: [PATCH 04/15] Make quantizer^0.75 a table
MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="------------1"

This is a multi-part message in MIME format.
--------------1
Content-Type: text/plain; charset=UTF-8; format=fixed
Content-Transfer-Encoding: 8bit

---
 libavcodec/aaccoder.c |    4 ++--
 libavcodec/aacenc.c   |    5 +++++
 libavcodec/aactab.h   |    1 +
 3 files changed, 8 insertions(+), 2 deletions(-)


--------------1
Content-Type: text/x-patch; name="0004-Make-quantizer-0.75-a-table.patch"
Content-Transfer-Encoding: 8bit
Content-Disposition: attachment; filename="0004-Make-quantizer-0.75-a-table.patch"

diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c
index 5d45497..7007c73 100644
--- a/libavcodec/aaccoder.c
+++ b/libavcodec/aaccoder.c
@@ -107,7 +107,7 @@ static float quantize_band_cost(struct AACEncContext *s, const float *in, const
 {
     const float IQ = ff_aac_pow2sf_tab[200 + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
     const float  Q = ff_aac_pow2sf_tab[200 - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
-    const float  Q34 = pow(Q, 0.75);
+    const float  Q34 = ff_aac_pow2sf_34_tab[200 - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
     const float CLIPPED_ESCAPE = 165140.0f*IQ;
     int i, j, k;
     float cost = 0;
@@ -210,7 +210,7 @@ static void quantize_and_encode_band(struct AACEncContext *s, PutBitContext *pb,
 {
     const float IQ = ff_aac_pow2sf_tab[200 + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
     const float  Q = ff_aac_pow2sf_tab[200 - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
-    const float  Q34 = pow(Q, 0.75);
+    const float  Q34 = ff_aac_pow2sf_34_tab[200 - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
     const float CLIPPED_ESCAPE = 165140.0f*IQ;
     const int range = aac_cb_range[cb];
     const int maxval = aac_cb_maxval[cb];
diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c
index 430b800..e12b22b 100644
--- a/libavcodec/aacenc.c
+++ b/libavcodec/aacenc.c
@@ -120,6 +120,8 @@ static const uint8_t *swb_size_128[] = {
     swb_size_128_16, swb_size_128_16, swb_size_128_8
 };
 
+float ff_aac_pow2sf_34_tab[428];
+
 /** default channel configurations */
 static const uint8_t aac_chan_configs[6][5] = {
  {1, TYPE_SCE},                               // 1 channel  - single channel element
@@ -198,7 +200,10 @@ static av_cold int aac_encode_init(AVCodecContext *avctx)
     s->lambda = avctx->global_quality ? avctx->global_quality : 120;
 #if !CONFIG_HARDCODED_TABLES
     for (i = 0; i < 428; i++)
+    {
         ff_aac_pow2sf_tab[i] = pow(2, (i - 200)/4.);
+        ff_aac_pow2sf_34_tab[i] = pow(ff_aac_pow2sf_tab[i], 0.75);
+    }
 #endif /* CONFIG_HARDCODED_TABLES */
 
     return 0;
diff --git a/libavcodec/aactab.h b/libavcodec/aactab.h
index fd0929c..e681be4 100644
--- a/libavcodec/aactab.h
+++ b/libavcodec/aactab.h
@@ -75,6 +75,7 @@ extern const uint8_t ff_tns_max_bands_128 [13];
 extern const float ff_aac_pow2sf_tab[428];
 #else
 extern       float ff_aac_pow2sf_tab[428];
+extern       float ff_aac_pow2sf_34_tab[428];
 #endif /* CONFIG_HARDCODED_TABLES */
 
 #endif /* AVCODEC_AACTAB_H */

--------------1--

From 3583c05b2e69ac69e3c5143158077afe848e4054 Mon Sep 17 00:00:00 2001
From: Alex Converse <alex.conve...@gmail.com>
Date: Tue, 16 Jun 2009 17:20:21 -0400
Subject: [PATCH 05/15] Cleanup some clipping operations
MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="------------1"

This is a multi-part message in MIME format.
--------------1
Content-Type: text/plain; charset=UTF-8; format=fixed
Content-Transfer-Encoding: 8bit

---
 libavcodec/aaccoder.c |   14 ++++++++------
 1 files changed, 8 insertions(+), 6 deletions(-)


--------------1
Content-Type: text/x-patch; name="0005-Cleanup-some-clipping-operations.patch"
Content-Transfer-Encoding: 8bit
Content-Disposition: attachment; filename="0005-Cleanup-some-clipping-operations.patch"

diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c
index 7007c73..0b65e82 100644
--- a/libavcodec/aaccoder.c
+++ b/libavcodec/aaccoder.c
@@ -106,7 +106,6 @@ static float quantize_band_cost(struct AACEncContext *s, const float *in, const
                                  const float lambda, const float uplim, int *bits)
 {
     const float IQ = ff_aac_pow2sf_tab[200 + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
-    const float  Q = ff_aac_pow2sf_tab[200 - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
     const float  Q34 = ff_aac_pow2sf_34_tab[200 - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
     const float CLIPPED_ESCAPE = 165140.0f*IQ;
     int i, j, k;
@@ -170,7 +169,8 @@ static float quantize_band_cost(struct AACEncContext *s, const float *in, const
                             di = t - CLIPPED_ESCAPE;
                             curbits += 21;
                         }else{
-                            int c = av_clip(quant(t, Q), 0, 8191);
+                            int c = scaled[i+k]*Q34 + 0.4054;
+                            c = FFMIN(c, 8191);
                             di = t - c*cbrt(c)*IQ;
                             curbits += av_log2(c)*2 - 4 + 1;
                         }
@@ -209,7 +209,6 @@ static void quantize_and_encode_band(struct AACEncContext *s, PutBitContext *pb,
                                      int scale_idx, int cb, const float lambda)
 {
     const float IQ = ff_aac_pow2sf_tab[200 + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
-    const float  Q = ff_aac_pow2sf_tab[200 - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
     const float  Q34 = ff_aac_pow2sf_34_tab[200 - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
     const float CLIPPED_ESCAPE = 165140.0f*IQ;
     const int range = aac_cb_range[cb];
@@ -271,7 +270,8 @@ static void quantize_and_encode_band(struct AACEncContext *s, PutBitContext *pb,
                             di = t - CLIPPED_ESCAPE;
                             curbits += 21;
                         }else{
-                            int c = av_clip(quant(t, Q), 0, 8191);
+                            int c = scaled[i+k] * Q34 + 0.4054;
+                            c = FFMIN(c, 8191);
                             di = t - c*cbrt(c)*IQ;
                             curbits += av_log2(c)*2 - 4 + 1;
                         }
@@ -303,8 +303,10 @@ static void quantize_and_encode_band(struct AACEncContext *s, PutBitContext *pb,
         if(cb == ESC_BT){
             for(j = 0; j < 2; j++){
                 if(ff_aac_codebook_vectors[cb-1][minidx*2+j] == 64.0f){
-                    int coef = av_clip(quant(fabsf(in[i+j]), Q), 0, 8191);
-                    int len = av_log2(coef);
+                    int len;
+                    int coef = scaled[i+j] * Q34 + 0.4054;
+                    coef = FFMIN(coef, 8191);
+                    len = av_log2(coef);
 
                     put_bits(pb, len - 4 + 1, (1 << (len - 4 + 1)) - 2);
                     put_bits(pb, len, coef & ((1 << len) - 1));

--------------1--

_______________________________________________
FFmpeg-soc mailing list
FFmpeg-soc@mplayerhq.hu
https://lists.mplayerhq.hu/mailman/listinfo/ffmpeg-soc

[FFmpeg-soc] [PATCH] AACENC: refactor quant for massive speedup

Reply via email to