[FFmpeg-cvslog] AAC: Fix M/S stereo encoding

Claudio Freire Tue, 03 Mar 2015 05:09:15 -0800

ffmpeg | branch: master | Claudio Freire <klaussfre...@gmail.com> | Tue Mar  3 
03:43:06 2015 -0300| [6394acaf36da3106f4793bda32730f8ff6b0ddb1] | committer: 
Michael Niedermayer


AAC: Fix M/S stereo encoding

This patch fixes a pointer arithmetic bug in adjust_frame_information that 
resulted in heavily corrupted audio when using M/S encoding. Also, a backup 
copy of untransformed coefficients has to be kept around or attempts at 
re-processing the frame (which happens when hevavily overspending bits during 
transients) will result in re-encoding of the coefficients and subsequent 
corruption of the resulting stream.

A/B testing shows the bug as corrected, but still cannot prove that M/S coding 
is a win at least in numbers. Limited listening tests do show improvement on 
M/S encoded samples in lower bitrates, but they're hidden among the other 
artifacts that remain to be corrected in the encoder.

Some of the regressions flagged in the report do show poor stereo image (but 
not buggy), so M/S encoding is clearly not good enough yet to be defaulted to 
auto.

In numbers, Patched against Unpatched, stereo_mode auto:

  Files: 114
  Bitrates: 6
  Tests: 683

  Serious Regressions: 0 (0%)
  Regressions: 0 (0%)
  Improvements: 227 (33%)
  Big improvements: 92 (13%)
  Worst regression - mybloodrusts.wv - 256k
    - StdDev: 28.61       pSNR: -0.43     maxdiff: 1372.00
  Best improvement - 60.wv - 384k
    - StdDev: -369.57     pSNR: 45.02     maxdiff: -13322.00
  Average          - StdDev: -80.56       pSNR: 2.49      maxdiff: -8858.00

Patched against Unpatched stereo_mode ms_off shows no difference.

Patched stereo_mode auto vs Unpatched stereo_mode ms_off shows a small average 
improvement, just not too significant:

  Serious Regressions: 0 (0%)
  Regressions: 10 (1%)
  Improvements: 45 (6%)
  Big improvements: 2 (0%)
  Worst regression - Illinois.wv - 256k
    - StdDev: 33.20       pSNR: -2.03     maxdiff: 477.00
  Best improvement - song_of_circomstances.flac - 384k
    - StdDev: -3.97       pSNR: 7.61      maxdiff: -826.00
  Average          - StdDev: -10.25       pSNR: 0.20      maxdiff: -281.00

Signed-off-by: Michael Niedermayer <michae...@gmx.at>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=6394acaf36da3106f4793bda32730f8ff6b0ddb1
---

 libavcodec/aac.h      |    3 ++-
 libavcodec/aaccoder.c |    6 +++---
 libavcodec/aacenc.c   |   38 ++++++++++++++++++++++++++------------
 3 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/libavcodec/aac.h b/libavcodec/aac.h
index e9c373f..b25b40c 100644
--- a/libavcodec/aac.h
+++ b/libavcodec/aac.h
@@ -233,7 +233,8 @@ typedef struct SingleChannelElement {
     float sf[120];                                  ///< scalefactors
     int sf_idx[128];                                ///< scalefactor indices 
(used by encoder)
     uint8_t zeroes[128];                            ///< band is not coded 
(used by encoder)
-    DECLARE_ALIGNED(32, float,   coeffs)[1024];     ///< coefficients for IMDCT
+    DECLARE_ALIGNED(32, float,   pcoeffs)[1024];    ///< coefficients for 
IMDCT, pristine
+    DECLARE_ALIGNED(32, float,   coeffs)[1024];     ///< coefficients for 
IMDCT, maybe processed
     DECLARE_ALIGNED(32, float,   saved)[1536];      ///< overlap
     DECLARE_ALIGNED(32, float,   ret_buf)[2048];    ///< PCM output buffer
     DECLARE_ALIGNED(16, float,   ltp_state)[3072];  ///< time signal for LTP
diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c
index b4d2009..64eee32 100644
--- a/libavcodec/aaccoder.c
+++ b/libavcodec/aaccoder.c
@@ -1069,10 +1069,10 @@ static void search_for_ms(AACEncContext *s, 
ChannelElement *cpe,
                     float minthr = FFMIN(band0->threshold, band1->threshold);
                     float maxthr = FFMAX(band0->threshold, band1->threshold);
                     for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
-                        M[i] = (sce0->coeffs[start+w2*128+i]
-                              + sce1->coeffs[start+w2*128+i]) * 0.5;
+                        M[i] = (sce0->pcoeffs[start+w2*128+i]
+                              + sce1->pcoeffs[start+w2*128+i]) * 0.5;
                         S[i] =  M[i]
-                              - sce1->coeffs[start+w2*128+i];
+                              - sce1->pcoeffs[start+w2*128+i];
                     }
                     abs_pow34_v(L34, sce0->coeffs+start+w2*128, 
sce0->ics.swb_sizes[g]);
                     abs_pow34_v(R34, sce1->coeffs+start+w2*128, 
sce0->ics.swb_sizes[g]);
diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c
index 9c910b7..7c286aa 100644
--- a/libavcodec/aacenc.c
+++ b/libavcodec/aacenc.c
@@ -260,6 +260,7 @@ static void apply_window_and_mdct(AACEncContext *s, 
SingleChannelElement *sce,
         for (i = 0; i < 1024; i += 128)
             s->mdct128.mdct_calc(&s->mdct128, sce->coeffs + i, output + i*2);
     memcpy(audio, audio + 1024, sizeof(audio[0]) * 1024);
+    memcpy(sce->pcoeffs, sce->coeffs, sizeof(sce->pcoeffs));
 }
 
 /**
@@ -311,20 +312,23 @@ static void adjust_frame_information(ChannelElement *cpe, 
int chans)
         start = 0;
         maxsfb = 0;
         cpe->ch[ch].pulse.num_pulse = 0;
-        for (w = 0; w < ics->num_windows*16; w += 16) {
-            for (g = 0; g < ics->num_swb; g++) {
-                //apply M/S
-                if (cpe->common_window && !ch && cpe->ms_mask[w + g]) {
-                    for (i = 0; i < ics->swb_sizes[g]; i++) {
-                        cpe->ch[0].coeffs[start+i] = 
(cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i]) / 2.0;
-                        cpe->ch[1].coeffs[start+i] =  
cpe->ch[0].coeffs[start+i] - cpe->ch[1].coeffs[start+i];
+        for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
+            for (w2 = 0; w2 < ics->group_len[w]; w2++) {
+                start = (w+w2) * 128;
+                for (g = 0; g < ics->num_swb; g++) {
+                    //apply M/S
+                    if (cpe->common_window && !ch && cpe->ms_mask[w*16 + g]) {
+                        for (i = 0; i < ics->swb_sizes[g]; i++) {
+                            cpe->ch[0].coeffs[start+i] = 
(cpe->ch[0].pcoeffs[start+i] + cpe->ch[1].pcoeffs[start+i]) * 0.5f;
+                            cpe->ch[1].coeffs[start+i] = 
cpe->ch[0].coeffs[start+i] - cpe->ch[1].pcoeffs[start+i];
+                        }
                     }
+                    start += ics->swb_sizes[g];
                 }
-                start += ics->swb_sizes[g];
+                for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && 
cpe->ch[ch].zeroes[w*16+cmaxsfb-1]; cmaxsfb--)
+                    ;
+                maxsfb = FFMAX(maxsfb, cmaxsfb);
             }
-            for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && 
cpe->ch[ch].zeroes[w+cmaxsfb-1]; cmaxsfb--)
-                ;
-            maxsfb = FFMAX(maxsfb, cmaxsfb);
         }
         ics->max_sfb = maxsfb;
 
@@ -507,7 +511,7 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket 
*avpkt,
     AACEncContext *s = avctx->priv_data;
     float **samples = s->planar_samples, *samples2, *la, *overlap;
     ChannelElement *cpe;
-    int i, ch, w, g, chans, tag, start_ch, ret;
+    int i, ch, w, g, chans, tag, start_ch, ret, ms_mode = 0;
     int chan_el_counter[4];
     FFPsyWindowInfo windows[AAC_MAX_CHANNELS];
 
@@ -630,6 +634,7 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket 
*avpkt,
                 if (cpe->common_window) {
                     put_ics_info(s, &cpe->ch[0].ics);
                     encode_ms_info(&s->pb, cpe);
+                    if (cpe->ms_mode) ms_mode = 1;
                 }
             }
             for (ch = 0; ch < chans; ch++) {
@@ -644,6 +649,15 @@ static int aac_encode_frame(AVCodecContext *avctx, 
AVPacket *avpkt,
             s->psy.bitres.bits = frame_bits / s->channels;
             break;
         }
+        if (ms_mode) {
+            for (i = 0; i < s->chan_map[0]; i++) {
+                // Must restore coeffs
+                chans = tag == TYPE_CPE ? 2 : 1;
+                cpe = &s->cpe[i];
+                for (ch = 0; ch < chans; ch++)
+                    memcpy(cpe->ch[ch].coeffs, cpe->ch[ch].pcoeffs, 
sizeof(cpe->ch[ch].coeffs));
+            }
+        }
 
         s->lambda *= avctx->bit_rate * 1024.0f / avctx->sample_rate / 
frame_bits;
 

_______________________________________________
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] AAC: Fix M/S stereo encoding

Reply via email to