Allocating with av_malloc() reduces memory usage and aligns the start of each 
channel bap array for future SIMD. Moving the 2 bap arrays to AC3EncodeContext 
allows FFSWAP() instead of memcpy() and speeds up compute_bit_allocation() by 
about 1%.
---
 libavcodec/ac3enc.c |   75 ++++++++++++++++++++++++++++++++++++--------------
 1 files changed, 54 insertions(+), 21 deletions(-)

diff --git libavcodec/ac3enc.c libavcodec/ac3enc.c
index 8c35220..8cc69c6 100644
--- libavcodec/ac3enc.c
+++ libavcodec/ac3enc.c
@@ -34,10 +34,16 @@
 #include "ac3.h"
 #include "audioconvert.h"
 
+typedef struct AC3Block {
+    uint8_t **bap;                          ///< bap for each channel in this block
+} AC3Block;
+
 typedef struct AC3EncodeContext {
     AVCodecContext *avctx;                  ///< parent context
     PutBitContext pb;                       ///< bitstream writer
 
+    AC3Block blocks[AC3_MAX_BLOCKS];        ///< per-block info
+
     int bitstream_id;                       ///< bitstream id                           (bsid)
     int bitstream_mode;                     ///< bitstream mode                         (bsmod)
 
@@ -78,6 +84,8 @@ typedef struct AC3EncodeContext {
 
     int16_t *windowed_samples;                  ///< windowed samples for the current block
     int16_t *planar_samples[AC3_MAX_CHANNELS];  ///< full input frame, deinterleaved
+    uint8_t *bap_buffer;
+    uint8_t *bap1_buffer;
 } AC3EncodeContext;
 
 /**
@@ -603,7 +611,6 @@ static void bit_alloc_masking(AC3EncodeContext *s,
 static int bit_alloc(AC3EncodeContext *s,
                      int16_t mask[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][50],
                      int16_t psd[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][AC3_MAX_COEFS],
-                     uint8_t bap[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][AC3_MAX_COEFS],
                      int snr_offset)
 {
     int blk, ch;
@@ -613,15 +620,17 @@ static int bit_alloc(AC3EncodeContext *s,
 
     /* compute size */
     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+        AC3Block *block = &s->blocks[blk];
         s->mant1_cnt = 0;
         s->mant2_cnt = 0;
         s->mant4_cnt = 0;
         for (ch = 0; ch < s->channels; ch++) {
+            block->bap[ch] = &s->bap_buffer[AC3_MAX_COEFS * (blk * s->channels + ch)];
             ff_ac3_bit_alloc_calc_bap(mask[blk][ch], psd[blk][ch], 0,
                                       s->nb_coefs[ch], snr_offset,
                                       s->bit_alloc.floor, ff_ac3_bap_tab,
-                                      bap[blk][ch]);
-            mant_bits += compute_mantissa_size(s, bap[blk][ch], s->nb_coefs[ch]);
+                                      block->bap[ch]);
+            mant_bits += compute_mantissa_size(s, block->bap[ch], s->nb_coefs[ch]);
         }
     }
 #ifdef DEBUG_BITALLOC
@@ -690,14 +699,23 @@ static void count_frame_bits(AC3EncodeContext *s,
     s->frame_bits = frame_bits;
 }
 
+/**
+ * Sets block bap pointers to the current bap_buffer.
+ */
+static void reset_block_bap(AC3EncodeContext *s)
+{
+    int blk, ch;
+    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
+        for (ch = 0; ch < s->channels; ch++)
+            s->blocks[blk].bap[ch] = &s->bap_buffer[AC3_MAX_COEFS * (blk * s->channels + ch)];
+}
+
 static int csnr_bit_alloc(AC3EncodeContext *s,
                           int16_t mask[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][50],
-                          int16_t psd[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][AC3_MAX_COEFS],
-                          uint8_t bap[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][AC3_MAX_COEFS])
+                          int16_t psd[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][AC3_MAX_COEFS])
 {
     int ch;
     int snr_offset, snr_incr, bits_left;
-    uint8_t bap1[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][AC3_MAX_COEFS];
 
     bits_left = 16 * s->frame_size - (s->frame_bits + s->exp_bits);
 
@@ -706,19 +724,22 @@ static int csnr_bit_alloc(AC3EncodeContext *s,
         snr_offset = 1023;
 
     while (snr_offset >= 0 &&
-           bit_alloc(s, mask, psd, bap, snr_offset) > bits_left)
+           bit_alloc(s, mask, psd, snr_offset) > bits_left)
         snr_offset -= 64;
     if (snr_offset < 0) {
         return -1;
     }
 
+    FFSWAP(uint8_t *, s->bap_buffer, s->bap1_buffer);
     for (snr_incr = 64; snr_incr > 0; snr_incr >>= 2) {
         while (snr_offset + snr_incr <= 1023 &&
-               bit_alloc(s, mask, psd, bap1, snr_offset + snr_incr) <= bits_left) {
+               bit_alloc(s, mask, psd, snr_offset + snr_incr) <= bits_left) {
             snr_offset += snr_incr;
-            memcpy(bap, bap1, sizeof(bap1));
+            FFSWAP(uint8_t *, s->bap_buffer, s->bap1_buffer);
         }
     }
+    FFSWAP(uint8_t *, s->bap_buffer, s->bap1_buffer);
+    reset_block_bap(s);
 
     s->coarse_snr_offset = snr_offset >> 4;
     for(ch=0;ch<s->channels;ch++)
@@ -728,7 +749,6 @@ static int csnr_bit_alloc(AC3EncodeContext *s,
 }
 
 static int compute_bit_allocation(AC3EncodeContext *s,
-                                  uint8_t bap[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][AC3_MAX_COEFS],
                                   uint8_t encoded_exp[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][AC3_MAX_COEFS],
                                   uint8_t exp_strategy[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS])
 {
@@ -743,7 +763,7 @@ static int compute_bit_allocation(AC3EncodeContext *s,
 
     bit_alloc_masking(s, encoded_exp, exp_strategy, psd, mask);
 
-    return csnr_bit_alloc(s, mask, psd, bap);
+    return csnr_bit_alloc(s, mask, psd);
 }
 
 static av_cold int set_channel_info(AC3EncodeContext *s, int channels,
@@ -795,7 +815,7 @@ static av_cold int AC3_encode_init(AVCodecContext *avctx)
     int freq = avctx->sample_rate;
     int bitrate = avctx->bit_rate;
     AC3EncodeContext *s = avctx->priv_data;
-    int i, j, ch;
+    int i, j, blk, ch;
     int bw_code;
 
     s->avctx = avctx;
@@ -897,6 +917,16 @@ static av_cold int AC3_encode_init(AVCodecContext *avctx)
                           (AC3_FRAME_SIZE+AC3_BLOCK_SIZE) * sizeof(*s->planar_samples[0]),
                           alloc_fail)
     }
+    FF_ALLOC_OR_GOTO(avctx, s->bap_buffer,  AC3_MAX_BLOCKS * s->channels *
+                     AC3_MAX_COEFS * sizeof(*s->bap_buffer),  alloc_fail);
+    FF_ALLOC_OR_GOTO(avctx, s->bap1_buffer, AC3_MAX_BLOCKS * s->channels *
+                     AC3_MAX_COEFS * sizeof(*s->bap1_buffer), alloc_fail);
+    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+        AC3Block *block = &s->blocks[blk];
+        FF_ALLOCZ_OR_GOTO(avctx, block->bap, s->channels * sizeof(*block->bap),
+                          alloc_fail);
+    }
+    reset_block_bap(s);
 
     avctx->coded_frame = avcodec_alloc_frame();
     avctx->coded_frame->key_frame= 1;
@@ -978,7 +1008,6 @@ static inline int asym_quant(int c, int e, int qbits)
 static void output_audio_block(AC3EncodeContext *s,
                                uint8_t exp_strategy[AC3_MAX_CHANNELS],
                                uint8_t encoded_exp[AC3_MAX_CHANNELS][AC3_MAX_COEFS],
-                               uint8_t bap[AC3_MAX_CHANNELS][AC3_MAX_COEFS],
                                int32_t mdct_coefs[AC3_MAX_CHANNELS][AC3_MAX_COEFS],
                                int8_t exp_shift[AC3_MAX_CHANNELS],
                                int block_num)
@@ -1106,7 +1135,7 @@ static void output_audio_block(AC3EncodeContext *s,
         for (i = 0; i < s->nb_coefs[ch]; i++) {
             c = mdct_coefs[ch][i];
             e = encoded_exp[ch][i] - exp_shift[ch];
-            b = bap[ch][i];
+            b = s->blocks[block_num].bap[ch][i];
             switch(b) {
             case 0:
                 v = 0;
@@ -1192,7 +1221,7 @@ static void output_audio_block(AC3EncodeContext *s,
 
         for (i = 0; i < s->nb_coefs[ch]; i++) {
             q = qmant[ch][i];
-            b = bap[ch][i];
+            b = s->blocks[block_num].bap[ch][i];
             switch(b) {
             case  0:                                         break;
             case  1: if (q != 128) put_bits(&s->pb,  5,  q); break;
@@ -1372,7 +1401,6 @@ static int AC3_encode_frame(AVCodecContext *avctx,
     int32_t mdct_coef[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][AC3_MAX_COEFS];
     uint8_t exp[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][AC3_MAX_COEFS];
     uint8_t exp_strategy[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS];
-    uint8_t bap[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][AC3_MAX_COEFS];
     int8_t exp_shift[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS];
     int frame_size, err;
 
@@ -1384,20 +1412,20 @@ static int AC3_encode_frame(AVCodecContext *avctx,
 
     process_exponents(s, mdct_coef, exp, exp_strategy, exp_shift);
 
-    err = compute_bit_allocation(s, bap, exp, exp_strategy);
+    err = compute_bit_allocation(s, exp, exp_strategy);
     while (err) {
         /* fallback 1: downgrade exponents */
         if (downgrade_exponents(s, exp_strategy)) {
             calculate_exponents(s, mdct_coef, exp, exp_shift);
             encode_exponents(s, exp, exp_strategy);
-            err = compute_bit_allocation(s, bap, exp, exp_strategy);
+            err = compute_bit_allocation(s, exp, exp_strategy);
             continue;
         }
 
         /* fallback 2: reduce bandwidth code down to 0 */
         if (reduce_bandwidth(s, 0)) {
             process_exponents(s, mdct_coef, exp, exp_strategy, exp_shift);
-            err = compute_bit_allocation(s, bap, exp, exp_strategy);
+            err = compute_bit_allocation(s, exp, exp_strategy);
             continue;
         }
 
@@ -1408,7 +1436,7 @@ static int AC3_encode_frame(AVCodecContext *avctx,
     output_frame_header(s, frame);
 
     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
-        output_audio_block(s, exp_strategy[blk], exp[blk], bap[blk],
+        output_audio_block(s, exp_strategy[blk], exp[blk],
                            mdct_coef[blk], exp_shift[blk], blk);
     }
     frame_size = output_frame_end(s);
@@ -1422,12 +1450,17 @@ static int AC3_encode_frame(AVCodecContext *avctx,
 static av_cold int AC3_encode_close(AVCodecContext *avctx)
 {
     AC3EncodeContext *s = avctx->priv_data;
-    int ch;
+    int blk, ch;
 
     av_freep(&s->windowed_samples);
+    av_freep(&s->bap_buffer);
+    av_freep(&s->bap1_buffer);
     for (ch = 0; ch < s->channels; ch++) {
         av_freep(&s->planar_samples[ch]);
     }
+    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+        av_freep(&s->blocks[blk].bap);
+    }
 
     av_freep(&avctx->coded_frame);
     return 0;
_______________________________________________
FFmpeg-soc mailing list
[email protected]
https://lists.mplayerhq.hu/mailman/listinfo/ffmpeg-soc

Reply via email to