PR #20815 opened by Agent45 URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20815 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20815.patch
- Fixes indexing errors in attack detection logic and introduces a state flag (next_attack0_zero) to stabilize attack[0] prediction across frames. This reduces vertical line artifacts in periodic signals such as trumpet. - Changes PSY_LAME_NUM_SUBBLOCKS from 3 to 2 to ensure full coverage of all 1024 MDCT samples, with each subblock containing exactly 64 samples—matching LAME’s empirical design. And adjust attack threshold presets. This improves the handling of periodic signals, especially under low bitrate conditions. - Disables PNS when the per-channel bitrate exceeds 64 kbps. This avoids unnecessary noise substitution in high-bitrate scenarios where it may degrade quality. This resolves issue #20200. From 458a942481151ede27478e6ac6d9d2866d438b84 Mon Sep 17 00:00:00 2001 From: Agent45 <[email protected]> Date: Sat, 1 Nov 2025 19:49:05 +0000 Subject: [PATCH 1/2] avcodec/aacenc: add bitrate threshold for PNS --- libavcodec/aaccoder.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c index 7f1c4cdcc1..ddebdfd53d 100644 --- a/libavcodec/aaccoder.c +++ b/libavcodec/aaccoder.c @@ -58,6 +58,9 @@ * replace low energy non zero bands */ #define NOISE_LAMBDA_REPLACE 1.948f +/* Bitrate threshold (in bits/sec/channel) above which PNS is disabled. */ +#define PNS_BITRATE_LIMIT 64000.0f + #include "libavcodec/aaccoder_trellis.h" typedef float (*quantize_and_encode_band_func)(struct AACEncContext *s, PutBitContext *pb, @@ -513,6 +516,7 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024) : (avctx->bit_rate / avctx->ch_layout.nb_channels); + int pns_at_low_bitrate = frame_bit_rate < PNS_BITRATE_LIMIT; frame_bit_rate *= 1.15f; if (avctx->cutoff > 0) { @@ -536,7 +540,7 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne const int start = wstart+sce->ics.swb_offset[g]; const float freq = (start-wstart)*freq_mult; const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f); - if (freq < NOISE_LOW_LIMIT || (start-wstart) >= cutoff) { + if (!pns_at_low_bitrate || freq < NOISE_LOW_LIMIT || (start-wstart) >= cutoff) { if (!sce->zeroes[w*16+g]) prev_sf = sce->sf_idx[w*16+g]; continue; @@ -649,6 +653,7 @@ static void mark_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelEleme ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024) : (avctx->bit_rate / avctx->ch_layout.nb_channels); + int pns_at_low_bitrate = frame_bit_rate < PNS_BITRATE_LIMIT; frame_bit_rate *= 1.15f; if (avctx->cutoff > 0) { @@ -667,7 +672,7 @@ static void mark_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelEleme const int start = sce->ics.swb_offset[g]; const float freq = start*freq_mult; const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f); - if (freq < NOISE_LOW_LIMIT || start >= cutoff) { + if (!pns_at_low_bitrate || freq < NOISE_LOW_LIMIT || start >= cutoff) { sce->can_pns[w*16+g] = 0; continue; } -- 2.49.1 From 81a985d6a92eb411ba495fb05a3a962d181649ea Mon Sep 17 00:00:00 2001 From: Agent45 <[email protected]> Date: Sat, 1 Nov 2025 20:41:24 +0000 Subject: [PATCH 2/2] avcodec/aacpsy: fix attack detection logic and subblock indexing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix several indexing errors in attack detection logic and refine transient handling in the AAC psychoacoustic model. - Change PSY_LAME_NUM_SUBBLOCKS from 3 to 2 to ensure full coverage of all 1024 MDCT samples, with each subblock containing exactly 1024 / (8 * 2) = 64 samples—matching LAME’s empirical design. - Introduce next_attack0_zero state flag to stabilize attack[0] prediction across frames. - Adjust attack threshold presets. These changes improve the handling of periodic signals such as trumpet, especially under low bitrate conditions. --- libavcodec/aacpsy.c | 64 +++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 31 deletions(-) diff --git a/libavcodec/aacpsy.c b/libavcodec/aacpsy.c index ed03cb68ac..f91ba45a52 100644 --- a/libavcodec/aacpsy.c +++ b/libavcodec/aacpsy.c @@ -97,7 +97,7 @@ enum { #define AAC_BLOCK_SIZE_LONG 1024 ///< long block size #define AAC_BLOCK_SIZE_SHORT 128 ///< short block size #define AAC_NUM_BLOCKS_SHORT 8 ///< number of blocks in a short sequence -#define PSY_LAME_NUM_SUBBLOCKS 3 ///< Number of sub-blocks in each short block +#define PSY_LAME_NUM_SUBBLOCKS 2 ///< Number of sub-blocks in each short block /** * @} @@ -133,6 +133,7 @@ typedef struct AacPsyChannel{ float attack_threshold; ///< attack threshold for this channel float prev_energy_subshort[AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS]; int prev_attack; ///< attack value for the last short block in the previous sequence + int next_attack0_zero; ///< whether attack[0] of the next frame is zero }AacPsyChannel; /** @@ -181,19 +182,19 @@ typedef struct PsyLamePreset { static const PsyLamePreset psy_abr_map[] = { /* TODO: Tuning. These were taken from LAME. */ /* kbps/ch st_lrm */ - { 8, 6.60}, - { 16, 6.60}, - { 24, 6.60}, - { 32, 6.60}, - { 40, 6.60}, - { 48, 6.60}, - { 56, 6.60}, - { 64, 6.40}, - { 80, 6.00}, - { 96, 5.60}, - {112, 5.20}, - {128, 5.20}, - {160, 5.20} + { 8, 7.60}, + { 16, 7.60}, + { 24, 7.60}, + { 32, 7.60}, + { 40, 7.60}, + { 48, 7.60}, + { 56, 7.60}, + { 64, 7.40}, + { 80, 7.00}, + { 96, 6.60}, + {112, 6.20}, + {128, 6.20}, + {160, 6.20} }; /** @@ -900,8 +901,8 @@ static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio, /* Calculate the energies of each sub-shortblock */ for (i = 0; i < PSY_LAME_NUM_SUBBLOCKS; i++) { energy_subshort[i] = pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 1) * PSY_LAME_NUM_SUBBLOCKS)]; - assert(pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 2) * PSY_LAME_NUM_SUBBLOCKS + 1)] > 0); - attack_intensity[i] = energy_subshort[i] / pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 2) * PSY_LAME_NUM_SUBBLOCKS + 1)]; + assert(pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 1) * PSY_LAME_NUM_SUBBLOCKS - 2)] > 0); + attack_intensity[i] = energy_subshort[i] / pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 1) * PSY_LAME_NUM_SUBBLOCKS - 2)]; energy_short[0] += energy_subshort[i]; } @@ -912,17 +913,12 @@ static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio, p = FFMAX(p, fabsf(*pf)); pch->prev_energy_subshort[i] = energy_subshort[i + PSY_LAME_NUM_SUBBLOCKS] = p; energy_short[1 + i / PSY_LAME_NUM_SUBBLOCKS] += p; - /* NOTE: The indexes below are [i + 3 - 2] in the LAME source. - * Obviously the 3 and 2 have some significance, or this would be just [i + 1] - * (which is what we use here). What the 3 stands for is ambiguous, as it is both - * number of short blocks, and the number of sub-short blocks. - * It seems that LAME is comparing each sub-block to sub-block + 1 in the - * previous block. - */ - if (p > energy_subshort[i + 1]) - p = p / energy_subshort[i + 1]; - else if (energy_subshort[i + 1] > p * 10.0f) - p = energy_subshort[i + 1] / (p * 10.0f); + + /* NOTE: The indexes below are [i + 3 - 2] in the LAME source. Compare each sub-block to sub-block - 2 */ + if (p > energy_subshort[i + PSY_LAME_NUM_SUBBLOCKS - 2]) + p = p / energy_subshort[i + PSY_LAME_NUM_SUBBLOCKS - 2]; + else if (energy_subshort[i + PSY_LAME_NUM_SUBBLOCKS - 2] > p * 10.0f) + p = energy_subshort[i + PSY_LAME_NUM_SUBBLOCKS - 2] / (p * 10.0f); else p = 0.0; attack_intensity[i + PSY_LAME_NUM_SUBBLOCKS] = p; @@ -943,7 +939,7 @@ static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio, const float v = energy_short[i]; const float m = FFMAX(u, v); if (m < 40000) { /* (2) */ - if (u < 1.7f * v && v < 1.7f * u) { /* (1) */ + if (u < 2.3f * v && v < 2.3f * u) { /* (1) */ if (i == 1 && attacks[0] < attacks[i]) attacks[0] = 0; attacks[i] = 0; @@ -951,13 +947,19 @@ static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio, } att_sum += attacks[i]; } + if (pch->next_attack0_zero) + attacks[0] = 0; + if (attacks[AAC_NUM_BLOCKS_SHORT] == 0) + pch->next_attack0_zero = 1; + else + pch->next_attack0_zero = 0; if (attacks[0] <= pch->prev_attack) attacks[0] = 0; att_sum += attacks[0]; - /* 3 below indicates the previous attack happened in the last sub-block of the previous sequence */ - if (pch->prev_attack == 3 || att_sum) { + /* If the previous attack happened in the last sub-block of the previous sequence, or if there's a new attack, use short window */ + if (pch->prev_attack == PSY_LAME_NUM_SUBBLOCKS || att_sum) { uselongblock = 0; for (i = 1; i < AAC_NUM_BLOCKS_SHORT + 1; i++) @@ -1007,7 +1009,7 @@ static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio, } pch->next_grouping = window_grouping[grouping]; - pch->prev_attack = attacks[8]; + pch->prev_attack = attacks[AAC_NUM_BLOCKS_SHORT - 1]; return wi; } -- 2.49.1 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
