Renamed mdct512() to calc_mdct() since it can work for transform sizes other 
than 512.
Also 6% faster in function calc_mdct().
---
 libavcodec/ac3enc.c |  121 ++++++++++++++++++++++++++++++++++++++-------------
 1 files changed, 90 insertions(+), 31 deletions(-)

diff --git libavcodec/ac3enc.c libavcodec/ac3enc.c
index 50340fe..cf0a563 100644
--- libavcodec/ac3enc.c
+++ libavcodec/ac3enc.c
@@ -36,6 +36,21 @@
 #include "ac3.h"
 #include "audioconvert.h"
 
+typedef struct IComplex {
+    int16_t re,im;
+} IComplex;
+
+typedef struct AC3MDCTContext {
+    AVCodecContext *avctx;                  ///< parent context for av_log()
+    int nbits;                              ///< log2(transform size)
+    int16_t  *costab;                       ///< FFT cos table
+    int16_t  *sintab;                       ///< FFT sin table
+    int16_t  *xcos1;                        ///< MDCT cos table
+    int16_t  *xsin1;                        ///< MDCT sin table
+    int16_t  *rot_tmp;                      ///< temp buffer for pre-rotated samples
+    IComplex *cplx_tmp;                     ///< temp buffer for complex pre-rotated samples
+} AC3MDCTContext;
+
 typedef struct AC3Block {
     uint8_t **bap;                          ///< bap for each channel in this block
     int32_t **mdct_coef;                    ///< MDCT coefficients for each channel in this block
@@ -49,6 +64,7 @@ typedef struct AC3Block {
 typedef struct AC3EncodeContext {
     AVCodecContext *avctx;                  ///< parent context
     PutBitContext pb;                       ///< bitstream writer
+    AC3MDCTContext mdct;                    ///< MDCT context
 
     AC3Block blocks[AC3_MAX_BLOCKS];        ///< per-block info
 
@@ -108,45 +124,83 @@ typedef struct AC3EncodeContext {
  */
 int exponent_group_tab[3][256];
 
-static int16_t costab[64];
-static int16_t sintab[64];
-static int16_t xcos1[128];
-static int16_t xsin1[128];
-
 #define MDCT_NBITS 9
 #define MDCT_SAMPLES (1 << MDCT_NBITS)
 
 /** convert float in range [-1..1] to int16_t in range [-32768..32767] */
 #define FIX15(a) (av_clip_int16(lrintf(a * (float)(1 << 15))))
 
-typedef struct IComplex {
-    int16_t re,im;
-} IComplex;
+static av_cold void mdct_end(AC3MDCTContext *mdct)
+{
+    mdct->nbits  = 0;
+    av_freep(&mdct->costab);
+    av_freep(&mdct->sintab);
+    av_freep(&mdct->xcos1);
+    av_freep(&mdct->xsin1);
+    av_freep(&mdct->rot_tmp);
+    av_freep(&mdct->cplx_tmp);
+}
 
-static av_cold void fft_init(int ln)
+static av_cold int fft_init(AC3MDCTContext *mdct, int ln)
 {
-    int i, n;
+    int i, n, n2;
     float alpha;
 
     n = 1 << ln;
+    n2 = n >> 1;
 
-    for (i = 0; i < n/2; i++) {
+    FF_ALLOC_OR_GOTO(mdct->avctx, mdct->costab, n2 * sizeof(*mdct->costab),
+                     fft_alloc_fail);
+    FF_ALLOC_OR_GOTO(mdct->avctx, mdct->sintab, n2 * sizeof(*mdct->sintab),
+                     fft_alloc_fail);
+
+    for (i = 0; i < n2; i++) {
         alpha = 2 * M_PI * (float)i / (float)n;
-        costab[i] = FIX15(cos(alpha));
-        sintab[i] = FIX15(sin(alpha));
+        mdct->costab[i] = FIX15(cos(alpha));
+        mdct->sintab[i] = FIX15(sin(alpha));
     }
+
+    return 0;
+
+fft_alloc_fail:
+    mdct_end(mdct);
+    return AVERROR(ENOMEM);
 }
 
-static av_cold void mdct_init(void)
+static av_cold int mdct_init(AC3MDCTContext *mdct, int nbits)
 {
-    int i;
+    int i, n, n4;
+    int ret;
+
+    mdct->nbits = nbits;
+
+    ret = fft_init(mdct, nbits - 2);
+    if (ret)
+        return ret;
+
+    n  = 1 << nbits;
+    n4 = n >> 2;
+
+    FF_ALLOC_OR_GOTO(mdct->avctx, mdct->xcos1,    n4 * sizeof(*mdct->xcos1),
+                     mdct_alloc_fail);
+    FF_ALLOC_OR_GOTO(mdct->avctx, mdct->xsin1 ,   n4 * sizeof(*mdct->xsin1),
+                     mdct_alloc_fail);
+    FF_ALLOC_OR_GOTO(mdct->avctx, mdct->rot_tmp,  n  * sizeof(*mdct->rot_tmp),
+                     mdct_alloc_fail);
+    FF_ALLOC_OR_GOTO(mdct->avctx, mdct->cplx_tmp, n4 * sizeof(*mdct->cplx_tmp),
+                     mdct_alloc_fail);
 
-    fft_init(MDCT_NBITS - 2);
-    for (i = 0; i < MDCT_SAMPLES/4; i++) {
-        float alpha = 2 * M_PI * (i + 1.0 / 8.0) / (float)MDCT_SAMPLES;
-        xcos1[i] = FIX15(-cos(alpha));
-        xsin1[i] = FIX15(-sin(alpha));
+    for (i = 0; i < n4; i++) {
+        float alpha = 2.0 * M_PI * (i + 1.0 / 8.0) / n;
+        mdct->xcos1[i] = FIX15(-cos(alpha));
+        mdct->xsin1[i] = FIX15(-sin(alpha));
     }
+
+    return 0;
+
+mdct_alloc_fail:
+    mdct_end(mdct);
+    return AVERROR(ENOMEM);
 }
 
 /* butter fly op */
@@ -171,7 +225,7 @@ static av_cold void mdct_init(void)
 
 
 /* do a 2^n point complex fft on 2^ln points. */
-static void fft(IComplex *z, int ln)
+static void calc_fft(AC3MDCTContext *mdct, IComplex *z, int ln)
 {
     int j, l, np, np2;
     int nblocks, nloops;
@@ -224,7 +278,7 @@ static void fft(IComplex *z, int ln)
             p++;
             q++;
             for (l = nblocks; l < np2; l += nblocks) {
-                CMUL(tmp_re, tmp_im, costab[l], -sintab[l], q->re, q->im);
+                CMUL(tmp_re, tmp_im, mdct->costab[l], -mdct->sintab[l], q->re, q->im);
                 BF(p->re, p->im, q->re, q->im,
                    p->re, p->im, tmp_re, tmp_im);
                 p++;
@@ -239,12 +293,12 @@ static void fft(IComplex *z, int ln)
 }
 
 /* do a 512 point mdct */
-static void mdct512(int32_t *out, int16_t *in)
+static void calc_mdct(AC3MDCTContext *mdct, int32_t *out, int16_t *in)
 {
     int i, re, im;
     int n, n2, n4;
-    int16_t rot[MDCT_SAMPLES];
-    IComplex x[MDCT_SAMPLES/4];
+    int16_t *rot = mdct->rot_tmp;
+    IComplex *x  = mdct->cplx_tmp;
 
     n  = MDCT_SAMPLES;
     n2 = n >> 1;
@@ -259,16 +313,16 @@ static void mdct512(int32_t *out, int16_t *in)
     for (i = 0; i < n4; i++) {
         re =  ((int)rot[   2*i] - (int)rot[ n-1-2*i]) >> 1;
         im = -((int)rot[n2+2*i] - (int)rot[n2-1-2*i]) >> 1;
-        CMUL(x[i].re, x[i].im, re, im, -xcos1[i], xsin1[i]);
+        CMUL(x[i].re, x[i].im, re, im, -mdct->xcos1[i], mdct->xsin1[i]);
     }
 
-    fft(x, MDCT_NBITS - 2);
+    calc_fft(mdct, x, MDCT_NBITS - 2);
 
     /* post rotation */
     for (i = 0; i < n4; i++) {
         re = x[i].re;
         im = x[i].im;
-        CMUL(out[n2-1-2*i], out[2*i], re, im, xsin1[i], xcos1[i]);
+        CMUL(out[n2-1-2*i], out[2*i], re, im, mdct->xsin1[i], mdct->xcos1[i]);
     }
 }
 
@@ -321,7 +375,7 @@ static void apply_mdct(AC3EncodeContext *s)
             lshift_tab(s->windowed_samples, AC3_BLOCK_SIZE*2, v);
 
             /* do the MDCT */
-            mdct512(s->blocks[blk].mdct_coef[ch], s->windowed_samples);
+            calc_mdct(&s->mdct, s->blocks[blk].mdct_coef[ch], s->windowed_samples);
         }
     }
 }
@@ -897,9 +951,10 @@ static av_cold int AC3_encode_init(AVCodecContext *avctx)
     int bitrate = avctx->bit_rate;
     AC3EncodeContext *s = avctx->priv_data;
     int i, j, blk, ch;
-    int bw_code, frame_size_58;
+    int bw_code, frame_size_58, ret;
 
     s->avctx = avctx;
+    s->mdct.avctx = avctx;
 
     avctx->frame_size = AC3_FRAME_SIZE;
 
@@ -986,7 +1041,9 @@ static av_cold int AC3_encode_init(AVCodecContext *avctx)
 
     count_frame_bits_fixed(s);
 
-    mdct_init();
+    ret = mdct_init(&s->mdct, MDCT_NBITS);
+    if (ret)
+        return ret;
 
     for (j = 0; j < 256; j++) {
         exponent_group_tab[0][j] = (j - 1) /  3;
@@ -1539,6 +1596,8 @@ static av_cold int AC3_encode_close(AVCodecContext *avctx)
         av_freep(&s->blocks[blk].mask);
     }
 
+    mdct_end(&s->mdct);
+
     av_freep(&avctx->coded_frame);
     return 0;
 }
_______________________________________________
FFmpeg-soc mailing list
[email protected]
https://lists.mplayerhq.hu/mailman/listinfo/ffmpeg-soc

Reply via email to