PR #20815 opened by Agent45
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20815
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20815.patch

- Fixes indexing errors in attack detection logic and introduces a state flag 
(next_attack0_zero) to stabilize attack[0] prediction across frames. This 
reduces vertical line artifacts in periodic signals such as trumpet.
- Changes PSY_LAME_NUM_SUBBLOCKS from 3 to 2 to ensure full coverage of all 
1024 MDCT samples, with each subblock containing exactly 64 samples—matching 
LAME’s empirical design. And adjust attack threshold presets. This improves the 
handling of periodic signals, especially under low bitrate conditions.
- Disables PNS when the per-channel bitrate exceeds 64 kbps. This avoids 
unnecessary noise substitution in high-bitrate scenarios where it may degrade 
quality.
This resolves issue #20200.


From 458a942481151ede27478e6ac6d9d2866d438b84 Mon Sep 17 00:00:00 2001
From: Agent45 <[email protected]>
Date: Sat, 1 Nov 2025 19:49:05 +0000
Subject: [PATCH 1/2] avcodec/aacenc: add bitrate threshold for PNS

---
 libavcodec/aaccoder.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c
index 7f1c4cdcc1..ddebdfd53d 100644
--- a/libavcodec/aaccoder.c
+++ b/libavcodec/aaccoder.c
@@ -58,6 +58,9 @@
  * replace low energy non zero bands */
 #define NOISE_LAMBDA_REPLACE 1.948f
 
+/* Bitrate threshold (in bits/sec/channel) above which PNS is disabled. */
+#define PNS_BITRATE_LIMIT 64000.0f
+
 #include "libavcodec/aaccoder_trellis.h"
 
 typedef float (*quantize_and_encode_band_func)(struct AACEncContext *s, 
PutBitContext *pb,
@@ -513,6 +516,7 @@ static void search_for_pns(AACEncContext *s, AVCodecContext 
*avctx, SingleChanne
         ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
         : (avctx->bit_rate / avctx->ch_layout.nb_channels);
 
+       int pns_at_low_bitrate = frame_bit_rate < PNS_BITRATE_LIMIT;
     frame_bit_rate *= 1.15f;
 
     if (avctx->cutoff > 0) {
@@ -536,7 +540,7 @@ static void search_for_pns(AACEncContext *s, AVCodecContext 
*avctx, SingleChanne
             const int start = wstart+sce->ics.swb_offset[g];
             const float freq = (start-wstart)*freq_mult;
             const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f);
-            if (freq < NOISE_LOW_LIMIT || (start-wstart) >= cutoff) {
+            if (!pns_at_low_bitrate || freq < NOISE_LOW_LIMIT || 
(start-wstart) >= cutoff) {
                 if (!sce->zeroes[w*16+g])
                     prev_sf = sce->sf_idx[w*16+g];
                 continue;
@@ -649,6 +653,7 @@ static void mark_pns(AACEncContext *s, AVCodecContext 
*avctx, SingleChannelEleme
         ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
         : (avctx->bit_rate / avctx->ch_layout.nb_channels);
 
+       int pns_at_low_bitrate = frame_bit_rate < PNS_BITRATE_LIMIT;
     frame_bit_rate *= 1.15f;
 
     if (avctx->cutoff > 0) {
@@ -667,7 +672,7 @@ static void mark_pns(AACEncContext *s, AVCodecContext 
*avctx, SingleChannelEleme
             const int start = sce->ics.swb_offset[g];
             const float freq = start*freq_mult;
             const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f);
-            if (freq < NOISE_LOW_LIMIT || start >= cutoff) {
+            if (!pns_at_low_bitrate || freq < NOISE_LOW_LIMIT || start >= 
cutoff) {
                 sce->can_pns[w*16+g] = 0;
                 continue;
             }
-- 
2.49.1


From 81a985d6a92eb411ba495fb05a3a962d181649ea Mon Sep 17 00:00:00 2001
From: Agent45 <[email protected]>
Date: Sat, 1 Nov 2025 20:41:24 +0000
Subject: [PATCH 2/2] avcodec/aacpsy: fix attack detection logic and subblock
 indexing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix several indexing errors in attack detection logic and refine transient 
handling in the AAC psychoacoustic model.

- Change PSY_LAME_NUM_SUBBLOCKS from 3 to 2 to ensure full coverage of all 1024 
MDCT samples, with each subblock containing exactly 1024 / (8 * 2) = 64 
samples—matching LAME’s empirical design.
- Introduce next_attack0_zero state flag to stabilize attack[0] prediction 
across frames.
- Adjust attack threshold presets.

These changes improve the handling of periodic signals such as trumpet, 
especially under low bitrate conditions.
---
 libavcodec/aacpsy.c | 64 +++++++++++++++++++++++----------------------
 1 file changed, 33 insertions(+), 31 deletions(-)

diff --git a/libavcodec/aacpsy.c b/libavcodec/aacpsy.c
index ed03cb68ac..f91ba45a52 100644
--- a/libavcodec/aacpsy.c
+++ b/libavcodec/aacpsy.c
@@ -97,7 +97,7 @@ enum {
 #define AAC_BLOCK_SIZE_LONG 1024    ///< long block size
 #define AAC_BLOCK_SIZE_SHORT 128    ///< short block size
 #define AAC_NUM_BLOCKS_SHORT 8      ///< number of blocks in a short sequence
-#define PSY_LAME_NUM_SUBBLOCKS 3    ///< Number of sub-blocks in each short 
block
+#define PSY_LAME_NUM_SUBBLOCKS 2    ///< Number of sub-blocks in each short 
block
 
 /**
  * @}
@@ -133,6 +133,7 @@ typedef struct AacPsyChannel{
     float attack_threshold;              ///< attack threshold for this channel
     float prev_energy_subshort[AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS];
     int   prev_attack;                   ///< attack value for the last short 
block in the previous sequence
+    int   next_attack0_zero;          ///< whether attack[0] of the next frame 
is zero
 }AacPsyChannel;
 
 /**
@@ -181,19 +182,19 @@ typedef struct PsyLamePreset {
 static const PsyLamePreset psy_abr_map[] = {
 /* TODO: Tuning. These were taken from LAME. */
 /* kbps/ch st_lrm   */
-    {  8,  6.60},
-    { 16,  6.60},
-    { 24,  6.60},
-    { 32,  6.60},
-    { 40,  6.60},
-    { 48,  6.60},
-    { 56,  6.60},
-    { 64,  6.40},
-    { 80,  6.00},
-    { 96,  5.60},
-    {112,  5.20},
-    {128,  5.20},
-    {160,  5.20}
+    {  8,  7.60},
+    { 16,  7.60},
+    { 24,  7.60},
+    { 32,  7.60},
+    { 40,  7.60},
+    { 48,  7.60},
+    { 56,  7.60},
+    { 64,  7.40},
+    { 80,  7.00},
+    { 96,  6.60},
+    {112,  6.20},
+    {128,  6.20},
+    {160,  6.20}
 };
 
 /**
@@ -900,8 +901,8 @@ static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, 
const float *audio,
         /* Calculate the energies of each sub-shortblock */
         for (i = 0; i < PSY_LAME_NUM_SUBBLOCKS; i++) {
             energy_subshort[i] = pch->prev_energy_subshort[i + 
((AAC_NUM_BLOCKS_SHORT - 1) * PSY_LAME_NUM_SUBBLOCKS)];
-            assert(pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 2) * 
PSY_LAME_NUM_SUBBLOCKS + 1)] > 0);
-            attack_intensity[i] = energy_subshort[i] / 
pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 2) * 
PSY_LAME_NUM_SUBBLOCKS + 1)];
+            assert(pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 1) * 
PSY_LAME_NUM_SUBBLOCKS - 2)] > 0);
+            attack_intensity[i] = energy_subshort[i] / 
pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 1) * 
PSY_LAME_NUM_SUBBLOCKS - 2)];
             energy_short[0] += energy_subshort[i];
         }
 
@@ -912,17 +913,12 @@ static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, 
const float *audio,
                 p = FFMAX(p, fabsf(*pf));
             pch->prev_energy_subshort[i] = energy_subshort[i + 
PSY_LAME_NUM_SUBBLOCKS] = p;
             energy_short[1 + i / PSY_LAME_NUM_SUBBLOCKS] += p;
-            /* NOTE: The indexes below are [i + 3 - 2] in the LAME source.
-             *       Obviously the 3 and 2 have some significance, or this 
would be just [i + 1]
-             *       (which is what we use here). What the 3 stands for is 
ambiguous, as it is both
-             *       number of short blocks, and the number of sub-short 
blocks.
-             *       It seems that LAME is comparing each sub-block to 
sub-block + 1 in the
-             *       previous block.
-             */
-            if (p > energy_subshort[i + 1])
-                p = p / energy_subshort[i + 1];
-            else if (energy_subshort[i + 1] > p * 10.0f)
-                p = energy_subshort[i + 1] / (p * 10.0f);
+            
+            /* NOTE: The indexes below are [i + 3 - 2] in the LAME source. 
Compare each sub-block to sub-block - 2 */
+            if (p > energy_subshort[i + PSY_LAME_NUM_SUBBLOCKS - 2])
+                p = p / energy_subshort[i + PSY_LAME_NUM_SUBBLOCKS - 2];
+            else if (energy_subshort[i + PSY_LAME_NUM_SUBBLOCKS - 2] > p * 
10.0f)
+                p = energy_subshort[i + PSY_LAME_NUM_SUBBLOCKS - 2] / (p * 
10.0f);
             else
                 p = 0.0;
             attack_intensity[i + PSY_LAME_NUM_SUBBLOCKS] = p;
@@ -943,7 +939,7 @@ static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, 
const float *audio,
             const float v = energy_short[i];
             const float m = FFMAX(u, v);
             if (m < 40000) {                          /* (2) */
-                if (u < 1.7f * v && v < 1.7f * u) {   /* (1) */
+                if (u < 2.3f * v && v < 2.3f * u) {   /* (1) */
                     if (i == 1 && attacks[0] < attacks[i])
                         attacks[0] = 0;
                     attacks[i] = 0;
@@ -951,13 +947,19 @@ static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, 
const float *audio,
             }
             att_sum += attacks[i];
         }
+               if (pch->next_attack0_zero)
+            attacks[0] = 0;
+               if (attacks[AAC_NUM_BLOCKS_SHORT] == 0)
+            pch->next_attack0_zero = 1;
+        else
+                       pch->next_attack0_zero = 0;
 
         if (attacks[0] <= pch->prev_attack)
             attacks[0] = 0;
 
         att_sum += attacks[0];
-        /* 3 below indicates the previous attack happened in the last 
sub-block of the previous sequence */
-        if (pch->prev_attack == 3 || att_sum) {
+               /* If the previous attack happened in the last sub-block of the 
previous sequence, or if there's a new attack, use short window */
+        if (pch->prev_attack == PSY_LAME_NUM_SUBBLOCKS || att_sum) {
             uselongblock = 0;
 
             for (i = 1; i < AAC_NUM_BLOCKS_SHORT + 1; i++)
@@ -1007,7 +1009,7 @@ static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, 
const float *audio,
     }
     pch->next_grouping = window_grouping[grouping];
 
-    pch->prev_attack = attacks[8];
+    pch->prev_attack = attacks[AAC_NUM_BLOCKS_SHORT - 1];
 
     return wi;
 }
-- 
2.49.1

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to