Allocating with av_malloc() reduces memory usage and aligns the start of each
channel bap array for future SIMD. Moving the 2 bap arrays to AC3EncodeContext
allows FFSWAP() instead of memcpy() and speeds up compute_bit_allocation() by
about 1%.
---
libavcodec/ac3enc.c | 75 ++++++++++++++++++++++++++++++++++++--------------
1 files changed, 54 insertions(+), 21 deletions(-)
diff --git libavcodec/ac3enc.c libavcodec/ac3enc.c
index 8c35220..8cc69c6 100644
--- libavcodec/ac3enc.c
+++ libavcodec/ac3enc.c
@@ -34,10 +34,16 @@
#include "ac3.h"
#include "audioconvert.h"
+typedef struct AC3Block {
+ uint8_t **bap; ///< bap for each channel in this block
+} AC3Block;
+
typedef struct AC3EncodeContext {
AVCodecContext *avctx; ///< parent context
PutBitContext pb; ///< bitstream writer
+ AC3Block blocks[AC3_MAX_BLOCKS]; ///< per-block info
+
int bitstream_id; ///< bitstream id (bsid)
int bitstream_mode; ///< bitstream mode (bsmod)
@@ -78,6 +84,8 @@ typedef struct AC3EncodeContext {
int16_t *windowed_samples; ///< windowed samples for the current block
int16_t *planar_samples[AC3_MAX_CHANNELS]; ///< full input frame, deinterleaved
+ uint8_t *bap_buffer;
+ uint8_t *bap1_buffer;
} AC3EncodeContext;
/**
@@ -603,7 +611,6 @@ static void bit_alloc_masking(AC3EncodeContext *s,
static int bit_alloc(AC3EncodeContext *s,
int16_t mask[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][50],
int16_t psd[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][AC3_MAX_COEFS],
- uint8_t bap[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][AC3_MAX_COEFS],
int snr_offset)
{
int blk, ch;
@@ -613,15 +620,17 @@ static int bit_alloc(AC3EncodeContext *s,
/* compute size */
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+ AC3Block *block = &s->blocks[blk];
s->mant1_cnt = 0;
s->mant2_cnt = 0;
s->mant4_cnt = 0;
for (ch = 0; ch < s->channels; ch++) {
+ block->bap[ch] = &s->bap_buffer[AC3_MAX_COEFS * (blk * s->channels + ch)];
ff_ac3_bit_alloc_calc_bap(mask[blk][ch], psd[blk][ch], 0,
s->nb_coefs[ch], snr_offset,
s->bit_alloc.floor, ff_ac3_bap_tab,
- bap[blk][ch]);
- mant_bits += compute_mantissa_size(s, bap[blk][ch], s->nb_coefs[ch]);
+ block->bap[ch]);
+ mant_bits += compute_mantissa_size(s, block->bap[ch], s->nb_coefs[ch]);
}
}
#ifdef DEBUG_BITALLOC
@@ -690,14 +699,23 @@ static void count_frame_bits(AC3EncodeContext *s,
s->frame_bits = frame_bits;
}
+/**
+ * Sets block bap pointers to the current bap_buffer.
+ */
+static void reset_block_bap(AC3EncodeContext *s)
+{
+ int blk, ch;
+ for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
+ for (ch = 0; ch < s->channels; ch++)
+ s->blocks[blk].bap[ch] = &s->bap_buffer[AC3_MAX_COEFS * (blk * s->channels + ch)];
+}
+
static int csnr_bit_alloc(AC3EncodeContext *s,
int16_t mask[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][50],
- int16_t psd[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][AC3_MAX_COEFS],
- uint8_t bap[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][AC3_MAX_COEFS])
+ int16_t psd[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][AC3_MAX_COEFS])
{
int ch;
int snr_offset, snr_incr, bits_left;
- uint8_t bap1[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][AC3_MAX_COEFS];
bits_left = 16 * s->frame_size - (s->frame_bits + s->exp_bits);
@@ -706,19 +724,22 @@ static int csnr_bit_alloc(AC3EncodeContext *s,
snr_offset = 1023;
while (snr_offset >= 0 &&
- bit_alloc(s, mask, psd, bap, snr_offset) > bits_left)
+ bit_alloc(s, mask, psd, snr_offset) > bits_left)
snr_offset -= 64;
if (snr_offset < 0) {
return -1;
}
+ FFSWAP(uint8_t *, s->bap_buffer, s->bap1_buffer);
for (snr_incr = 64; snr_incr > 0; snr_incr >>= 2) {
while (snr_offset + snr_incr <= 1023 &&
- bit_alloc(s, mask, psd, bap1, snr_offset + snr_incr) <= bits_left) {
+ bit_alloc(s, mask, psd, snr_offset + snr_incr) <= bits_left) {
snr_offset += snr_incr;
- memcpy(bap, bap1, sizeof(bap1));
+ FFSWAP(uint8_t *, s->bap_buffer, s->bap1_buffer);
}
}
+ FFSWAP(uint8_t *, s->bap_buffer, s->bap1_buffer);
+ reset_block_bap(s);
s->coarse_snr_offset = snr_offset >> 4;
for(ch=0;ch<s->channels;ch++)
@@ -728,7 +749,6 @@ static int csnr_bit_alloc(AC3EncodeContext *s,
}
static int compute_bit_allocation(AC3EncodeContext *s,
- uint8_t bap[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][AC3_MAX_COEFS],
uint8_t encoded_exp[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][AC3_MAX_COEFS],
uint8_t exp_strategy[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS])
{
@@ -743,7 +763,7 @@ static int compute_bit_allocation(AC3EncodeContext *s,
bit_alloc_masking(s, encoded_exp, exp_strategy, psd, mask);
- return csnr_bit_alloc(s, mask, psd, bap);
+ return csnr_bit_alloc(s, mask, psd);
}
static av_cold int set_channel_info(AC3EncodeContext *s, int channels,
@@ -795,7 +815,7 @@ static av_cold int AC3_encode_init(AVCodecContext *avctx)
int freq = avctx->sample_rate;
int bitrate = avctx->bit_rate;
AC3EncodeContext *s = avctx->priv_data;
- int i, j, ch;
+ int i, j, blk, ch;
int bw_code;
s->avctx = avctx;
@@ -897,6 +917,16 @@ static av_cold int AC3_encode_init(AVCodecContext *avctx)
(AC3_FRAME_SIZE+AC3_BLOCK_SIZE) * sizeof(*s->planar_samples[0]),
alloc_fail)
}
+ FF_ALLOC_OR_GOTO(avctx, s->bap_buffer, AC3_MAX_BLOCKS * s->channels *
+ AC3_MAX_COEFS * sizeof(*s->bap_buffer), alloc_fail);
+ FF_ALLOC_OR_GOTO(avctx, s->bap1_buffer, AC3_MAX_BLOCKS * s->channels *
+ AC3_MAX_COEFS * sizeof(*s->bap1_buffer), alloc_fail);
+ for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+ AC3Block *block = &s->blocks[blk];
+ FF_ALLOCZ_OR_GOTO(avctx, block->bap, s->channels * sizeof(*block->bap),
+ alloc_fail);
+ }
+ reset_block_bap(s);
avctx->coded_frame = avcodec_alloc_frame();
avctx->coded_frame->key_frame= 1;
@@ -978,7 +1008,6 @@ static inline int asym_quant(int c, int e, int qbits)
static void output_audio_block(AC3EncodeContext *s,
uint8_t exp_strategy[AC3_MAX_CHANNELS],
uint8_t encoded_exp[AC3_MAX_CHANNELS][AC3_MAX_COEFS],
- uint8_t bap[AC3_MAX_CHANNELS][AC3_MAX_COEFS],
int32_t mdct_coefs[AC3_MAX_CHANNELS][AC3_MAX_COEFS],
int8_t exp_shift[AC3_MAX_CHANNELS],
int block_num)
@@ -1106,7 +1135,7 @@ static void output_audio_block(AC3EncodeContext *s,
for (i = 0; i < s->nb_coefs[ch]; i++) {
c = mdct_coefs[ch][i];
e = encoded_exp[ch][i] - exp_shift[ch];
- b = bap[ch][i];
+ b = s->blocks[block_num].bap[ch][i];
switch(b) {
case 0:
v = 0;
@@ -1192,7 +1221,7 @@ static void output_audio_block(AC3EncodeContext *s,
for (i = 0; i < s->nb_coefs[ch]; i++) {
q = qmant[ch][i];
- b = bap[ch][i];
+ b = s->blocks[block_num].bap[ch][i];
switch(b) {
case 0: break;
case 1: if (q != 128) put_bits(&s->pb, 5, q); break;
@@ -1372,7 +1401,6 @@ static int AC3_encode_frame(AVCodecContext *avctx,
int32_t mdct_coef[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][AC3_MAX_COEFS];
uint8_t exp[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][AC3_MAX_COEFS];
uint8_t exp_strategy[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS];
- uint8_t bap[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][AC3_MAX_COEFS];
int8_t exp_shift[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS];
int frame_size, err;
@@ -1384,20 +1412,20 @@ static int AC3_encode_frame(AVCodecContext *avctx,
process_exponents(s, mdct_coef, exp, exp_strategy, exp_shift);
- err = compute_bit_allocation(s, bap, exp, exp_strategy);
+ err = compute_bit_allocation(s, exp, exp_strategy);
while (err) {
/* fallback 1: downgrade exponents */
if (downgrade_exponents(s, exp_strategy)) {
calculate_exponents(s, mdct_coef, exp, exp_shift);
encode_exponents(s, exp, exp_strategy);
- err = compute_bit_allocation(s, bap, exp, exp_strategy);
+ err = compute_bit_allocation(s, exp, exp_strategy);
continue;
}
/* fallback 2: reduce bandwidth code down to 0 */
if (reduce_bandwidth(s, 0)) {
process_exponents(s, mdct_coef, exp, exp_strategy, exp_shift);
- err = compute_bit_allocation(s, bap, exp, exp_strategy);
+ err = compute_bit_allocation(s, exp, exp_strategy);
continue;
}
@@ -1408,7 +1436,7 @@ static int AC3_encode_frame(AVCodecContext *avctx,
output_frame_header(s, frame);
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
- output_audio_block(s, exp_strategy[blk], exp[blk], bap[blk],
+ output_audio_block(s, exp_strategy[blk], exp[blk],
mdct_coef[blk], exp_shift[blk], blk);
}
frame_size = output_frame_end(s);
@@ -1422,12 +1450,17 @@ static int AC3_encode_frame(AVCodecContext *avctx,
static av_cold int AC3_encode_close(AVCodecContext *avctx)
{
AC3EncodeContext *s = avctx->priv_data;
- int ch;
+ int blk, ch;
av_freep(&s->windowed_samples);
+ av_freep(&s->bap_buffer);
+ av_freep(&s->bap1_buffer);
for (ch = 0; ch < s->channels; ch++) {
av_freep(&s->planar_samples[ch]);
}
+ for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+ av_freep(&s->blocks[blk].bap);
+ }
av_freep(&avctx->coded_frame);
return 0;
_______________________________________________
FFmpeg-soc mailing list
[email protected]
https://lists.mplayerhq.hu/mailman/listinfo/ffmpeg-soc