Renamed mdct512() to calc_mdct() since it can work for transform sizes other
than 512.
Also 6% faster in function calc_mdct().
---
libavcodec/ac3enc.c | 121 ++++++++++++++++++++++++++++++++++++++-------------
1 files changed, 90 insertions(+), 31 deletions(-)
diff --git libavcodec/ac3enc.c libavcodec/ac3enc.c
index 50340fe..cf0a563 100644
--- libavcodec/ac3enc.c
+++ libavcodec/ac3enc.c
@@ -36,6 +36,21 @@
#include "ac3.h"
#include "audioconvert.h"
+typedef struct IComplex {
+ int16_t re,im;
+} IComplex;
+
+typedef struct AC3MDCTContext {
+ AVCodecContext *avctx; ///< parent context for av_log()
+ int nbits; ///< log2(transform size)
+ int16_t *costab; ///< FFT cos table
+ int16_t *sintab; ///< FFT sin table
+ int16_t *xcos1; ///< MDCT cos table
+ int16_t *xsin1; ///< MDCT sin table
+ int16_t *rot_tmp; ///< temp buffer for pre-rotated samples
+ IComplex *cplx_tmp; ///< temp buffer for complex pre-rotated samples
+} AC3MDCTContext;
+
typedef struct AC3Block {
uint8_t **bap; ///< bap for each channel in this block
int32_t **mdct_coef; ///< MDCT coefficients for each channel in this block
@@ -49,6 +64,7 @@ typedef struct AC3Block {
typedef struct AC3EncodeContext {
AVCodecContext *avctx; ///< parent context
PutBitContext pb; ///< bitstream writer
+ AC3MDCTContext mdct; ///< MDCT context
AC3Block blocks[AC3_MAX_BLOCKS]; ///< per-block info
@@ -108,45 +124,83 @@ typedef struct AC3EncodeContext {
*/
int exponent_group_tab[3][256];
-static int16_t costab[64];
-static int16_t sintab[64];
-static int16_t xcos1[128];
-static int16_t xsin1[128];
-
#define MDCT_NBITS 9
#define MDCT_SAMPLES (1 << MDCT_NBITS)
/** convert float in range [-1..1] to int16_t in range [-32768..32767] */
#define FIX15(a) (av_clip_int16(lrintf(a * (float)(1 << 15))))
-typedef struct IComplex {
- int16_t re,im;
-} IComplex;
+static av_cold void mdct_end(AC3MDCTContext *mdct)
+{
+ mdct->nbits = 0;
+ av_freep(&mdct->costab);
+ av_freep(&mdct->sintab);
+ av_freep(&mdct->xcos1);
+ av_freep(&mdct->xsin1);
+ av_freep(&mdct->rot_tmp);
+ av_freep(&mdct->cplx_tmp);
+}
-static av_cold void fft_init(int ln)
+static av_cold int fft_init(AC3MDCTContext *mdct, int ln)
{
- int i, n;
+ int i, n, n2;
float alpha;
n = 1 << ln;
+ n2 = n >> 1;
- for (i = 0; i < n/2; i++) {
+ FF_ALLOC_OR_GOTO(mdct->avctx, mdct->costab, n2 * sizeof(*mdct->costab),
+ fft_alloc_fail);
+ FF_ALLOC_OR_GOTO(mdct->avctx, mdct->sintab, n2 * sizeof(*mdct->sintab),
+ fft_alloc_fail);
+
+ for (i = 0; i < n2; i++) {
alpha = 2 * M_PI * (float)i / (float)n;
- costab[i] = FIX15(cos(alpha));
- sintab[i] = FIX15(sin(alpha));
+ mdct->costab[i] = FIX15(cos(alpha));
+ mdct->sintab[i] = FIX15(sin(alpha));
}
+
+ return 0;
+
+fft_alloc_fail:
+ mdct_end(mdct);
+ return AVERROR(ENOMEM);
}
-static av_cold void mdct_init(void)
+static av_cold int mdct_init(AC3MDCTContext *mdct, int nbits)
{
- int i;
+ int i, n, n4;
+ int ret;
+
+ mdct->nbits = nbits;
+
+ ret = fft_init(mdct, nbits - 2);
+ if (ret)
+ return ret;
+
+ n = 1 << nbits;
+ n4 = n >> 2;
+
+ FF_ALLOC_OR_GOTO(mdct->avctx, mdct->xcos1, n4 * sizeof(*mdct->xcos1),
+ mdct_alloc_fail);
+ FF_ALLOC_OR_GOTO(mdct->avctx, mdct->xsin1 , n4 * sizeof(*mdct->xsin1),
+ mdct_alloc_fail);
+ FF_ALLOC_OR_GOTO(mdct->avctx, mdct->rot_tmp, n * sizeof(*mdct->rot_tmp),
+ mdct_alloc_fail);
+ FF_ALLOC_OR_GOTO(mdct->avctx, mdct->cplx_tmp, n4 * sizeof(*mdct->cplx_tmp),
+ mdct_alloc_fail);
- fft_init(MDCT_NBITS - 2);
- for (i = 0; i < MDCT_SAMPLES/4; i++) {
- float alpha = 2 * M_PI * (i + 1.0 / 8.0) / (float)MDCT_SAMPLES;
- xcos1[i] = FIX15(-cos(alpha));
- xsin1[i] = FIX15(-sin(alpha));
+ for (i = 0; i < n4; i++) {
+ float alpha = 2.0 * M_PI * (i + 1.0 / 8.0) / n;
+ mdct->xcos1[i] = FIX15(-cos(alpha));
+ mdct->xsin1[i] = FIX15(-sin(alpha));
}
+
+ return 0;
+
+mdct_alloc_fail:
+ mdct_end(mdct);
+ return AVERROR(ENOMEM);
}
/* butter fly op */
@@ -171,7 +225,7 @@ static av_cold void mdct_init(void)
/* do a 2^n point complex fft on 2^ln points. */
-static void fft(IComplex *z, int ln)
+static void calc_fft(AC3MDCTContext *mdct, IComplex *z, int ln)
{
int j, l, np, np2;
int nblocks, nloops;
@@ -224,7 +278,7 @@ static void fft(IComplex *z, int ln)
p++;
q++;
for (l = nblocks; l < np2; l += nblocks) {
- CMUL(tmp_re, tmp_im, costab[l], -sintab[l], q->re, q->im);
+ CMUL(tmp_re, tmp_im, mdct->costab[l], -mdct->sintab[l], q->re, q->im);
BF(p->re, p->im, q->re, q->im,
p->re, p->im, tmp_re, tmp_im);
p++;
@@ -239,12 +293,12 @@ static void fft(IComplex *z, int ln)
}
/* do a 512 point mdct */
-static void mdct512(int32_t *out, int16_t *in)
+static void calc_mdct(AC3MDCTContext *mdct, int32_t *out, int16_t *in)
{
int i, re, im;
int n, n2, n4;
- int16_t rot[MDCT_SAMPLES];
- IComplex x[MDCT_SAMPLES/4];
+ int16_t *rot = mdct->rot_tmp;
+ IComplex *x = mdct->cplx_tmp;
n = MDCT_SAMPLES;
n2 = n >> 1;
@@ -259,16 +313,16 @@ static void mdct512(int32_t *out, int16_t *in)
for (i = 0; i < n4; i++) {
re = ((int)rot[ 2*i] - (int)rot[ n-1-2*i]) >> 1;
im = -((int)rot[n2+2*i] - (int)rot[n2-1-2*i]) >> 1;
- CMUL(x[i].re, x[i].im, re, im, -xcos1[i], xsin1[i]);
+ CMUL(x[i].re, x[i].im, re, im, -mdct->xcos1[i], mdct->xsin1[i]);
}
- fft(x, MDCT_NBITS - 2);
+ calc_fft(mdct, x, MDCT_NBITS - 2);
/* post rotation */
for (i = 0; i < n4; i++) {
re = x[i].re;
im = x[i].im;
- CMUL(out[n2-1-2*i], out[2*i], re, im, xsin1[i], xcos1[i]);
+ CMUL(out[n2-1-2*i], out[2*i], re, im, mdct->xsin1[i], mdct->xcos1[i]);
}
}
@@ -321,7 +375,7 @@ static void apply_mdct(AC3EncodeContext *s)
lshift_tab(s->windowed_samples, AC3_BLOCK_SIZE*2, v);
/* do the MDCT */
- mdct512(s->blocks[blk].mdct_coef[ch], s->windowed_samples);
+ calc_mdct(&s->mdct, s->blocks[blk].mdct_coef[ch], s->windowed_samples);
}
}
}
@@ -897,9 +951,10 @@ static av_cold int AC3_encode_init(AVCodecContext *avctx)
int bitrate = avctx->bit_rate;
AC3EncodeContext *s = avctx->priv_data;
int i, j, blk, ch;
- int bw_code, frame_size_58;
+ int bw_code, frame_size_58, ret;
s->avctx = avctx;
+ s->mdct.avctx = avctx;
avctx->frame_size = AC3_FRAME_SIZE;
@@ -986,7 +1041,9 @@ static av_cold int AC3_encode_init(AVCodecContext *avctx)
count_frame_bits_fixed(s);
- mdct_init();
+ ret = mdct_init(&s->mdct, MDCT_NBITS);
+ if (ret)
+ return ret;
for (j = 0; j < 256; j++) {
exponent_group_tab[0][j] = (j - 1) / 3;
@@ -1539,6 +1596,8 @@ static av_cold int AC3_encode_close(AVCodecContext *avctx)
av_freep(&s->blocks[blk].mask);
}
+ mdct_end(&s->mdct);
+
av_freep(&avctx->coded_frame);
return 0;
}
_______________________________________________
FFmpeg-soc mailing list
[email protected]
https://lists.mplayerhq.hu/mailman/listinfo/ffmpeg-soc