PR #23430 opened by Lynne
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23430
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23430.patch

Matches Apple on Zimtohrli and ViSQOL. Beats fdk-aac conclusively.
Exact benchmarks in a bit.


>From 6ecaa5b91ef19ecf986cddb624c7287fec051c9d Mon Sep 17 00:00:00 2001
From: Lynne <[email protected]>
Date: Wed, 10 Jun 2026 01:44:49 +0900
Subject: [PATCH 1/2] avcodec/aacenc_tns: relax the gain gate on short blocks

TNS shapes quantization noise in time, so it is most useful on transients, 
which are coded as short blocks.
The stock upper LPC-gain bound the strong temporal structure where TNS pays 
off, leaving audible pre-echo on those frames.
So just relax the upper bound.
---
 libavcodec/aacenc_tns.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/libavcodec/aacenc_tns.c b/libavcodec/aacenc_tns.c
index 1e71c658c4..8bc77af904 100644
--- a/libavcodec/aacenc_tns.c
+++ b/libavcodec/aacenc_tns.c
@@ -44,6 +44,10 @@
 /* TNS will only be used if the LPC gain is within these margins */
 #define TNS_GAIN_THRESHOLD_LOW      1.4f
 #define TNS_GAIN_THRESHOLD_HIGH     1.16f*TNS_GAIN_THRESHOLD_LOW
+/* Short blocks (transients) relax the upper bound: TNS shapes quantization 
noise in
+ * time, so it pays off most on transients (coded as short blocks). 3.0x the 
low
+ * threshold, tuned by ear on transient material -- see ff_aac_search_for_tns. 
*/
+#define TNS_GAIN_THRESHOLD_SHORT    3.0f*TNS_GAIN_THRESHOLD_LOW
 
 static inline int compress_coeffs(int *coef, int order, int c_bits)
 {
@@ -64,8 +68,9 @@ static inline int compress_coeffs(int *coef, int order, int 
c_bits)
 
 /**
  * Encode TNS data.
- * Coefficient compression is simply not lossless as it should be
- * on any decoder tested and as such is not active.
+ * Coefficient compression (TNS_ENABLE_COEF_COMPRESSION) is active: 
compress_coeffs()
+ * only shifts a filter's indices when that shift is reversible, so it is 
lossless
+ * where applied, is signalled per filter by the coef_compress bit, and saves 
bits.
  */
 void ff_aac_encode_tns_info(AACEncContext *s, SingleChannelElement *sce)
 {
@@ -211,7 +216,12 @@ void ff_aac_search_for_tns(AACEncContext *s, 
SingleChannelElement *sce)
         gain = ff_lpc_calc_ref_coefs_f(&s->lpc, &sce->coeffs[w*128 + 
coef_start],
                                        coef_len, order, coefs);
 
-        if (!order || !isfinite(gain) || gain < TNS_GAIN_THRESHOLD_LOW || gain 
> TNS_GAIN_THRESHOLD_HIGH)
+        /* The stock upper gain bound rejects exactly the strong temporal 
structure where
+         * TNS pays off; relax it for short blocks (transients) so they get 
TNS (perceptual
+         * win on pre-echo, confirmed by PEAQ) while long blocks (steady / 
tonal music)
+         * stay conservative. See TNS_GAIN_THRESHOLD_SHORT. */
+        const float gain_high = is8 ? TNS_GAIN_THRESHOLD_SHORT : 
TNS_GAIN_THRESHOLD_HIGH;
+        if (!order || !isfinite(gain) || gain < TNS_GAIN_THRESHOLD_LOW || gain 
> gain_high)
             continue;
 
         tns->n_filt[w] = n_filt;
-- 
2.52.0


>From 20eadd5ae15f11e1ac90168b3d8abfac6ce6bdb1 Mon Sep 17 00:00:00 2001
From: Lynne <[email protected]>
Date: Wed, 10 Jun 2026 14:46:00 +0900
Subject: [PATCH 2/2] aaccoder: add NMR-based coder

---
 libavcodec/aaccoder.c     |  26 +++
 libavcodec/aaccoder_nmr.h | 426 ++++++++++++++++++++++++++++++++++++++
 libavcodec/aacenc.c       |   9 +
 libavcodec/aacenc.h       |  14 ++
 libavcodec/aacencdsp.c    |  32 ++-
 libavcodec/aacencdsp.h    |   6 +
 6 files changed, 511 insertions(+), 2 deletions(-)
 create mode 100644 libavcodec/aaccoder_nmr.h

diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c
index 7f1c4cdcc1..eb4c0ca4b7 100644
--- a/libavcodec/aaccoder.c
+++ b/libavcodec/aaccoder.c
@@ -59,6 +59,7 @@
 #define NOISE_LAMBDA_REPLACE 1.948f
 
 #include "libavcodec/aaccoder_trellis.h"
+#include "libavcodec/aaccoder_nmr.h"
 
 typedef float (*quantize_and_encode_band_func)(struct AACEncContext *s, 
PutBitContext *pb,
                                                const float *in, float *quant, 
const float *scaled,
@@ -840,6 +841,18 @@ static void search_for_ms(AACEncContext *s, ChannelElement 
*cpe)
     }
 }
 
+static void search_for_is_nmr(AACEncContext *s, AVCodecContext *avctx, 
ChannelElement *cpe)
+{
+    /* IS bitrate ceiling (bits/sample/channel), below it the shared 
lambda-driven
+     * search runs,above it IS is skipped. 1.5 keeps it for < ~144kbps stereo
+     * gates it out after the >= =192kpbs */
+    const float maxbps = 1.5f;
+    float bps = (avctx->bit_rate > 0 && avctx->sample_rate && 
avctx->ch_layout.nb_channels) ?
+        (float)avctx->bit_rate / avctx->sample_rate / 
avctx->ch_layout.nb_channels : 0.0f;
+    if (bps == 0.f || bps < maxbps)   /* bps 0 = VBR, rely on the search's own 
lambda gate */
+        ff_aac_search_for_is(s, avctx, cpe);
+}
+
 const AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
     [AAC_CODER_TWOLOOP] = {
         search_for_quantizers_twoloop,
@@ -867,4 +880,17 @@ const AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = 
{
         search_for_ms,
         ff_aac_search_for_is,
     },
+    [AAC_CODER_NMR] = {
+        search_for_quantizers_nmr,
+        codebook_trellis_rate,
+        quantize_and_encode_band,
+        ff_aac_encode_tns_info,
+        ff_aac_apply_tns,
+        set_special_band_scalefactors,
+        NULL,                    /* PNS decided in the trellis 
(search_for_quantizers_nmr) */
+        mark_pns,
+        ff_aac_search_for_tns,
+        search_for_ms,
+        search_for_is_nmr,
+    },
 };
diff --git a/libavcodec/aaccoder_nmr.h b/libavcodec/aaccoder_nmr.h
new file mode 100644
index 0000000000..7e6aef2aaa
--- /dev/null
+++ b/libavcodec/aaccoder_nmr.h
@@ -0,0 +1,426 @@
+/*
+ * AAC encoder NMR (noise-to-mask ratio) scalefactor coder
+ * Copyright (c) 2026 Lynne <[email protected]>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * AAC encoder NMR scalefactor coder.
+ *
+ * Optimizes the same noise-to-mask objective as the two-loop coder, but with 
an
+ * optimal Viterbi search over scalefactors instead of a heuristic loop. For 
each
+ * coded band the per-scalefactor distortion/bits curve is precomputed, then a
+ * trellis over the (window-group, band) coding sequence minimizes
+ *   sum_g = dist_g(sf_g)/threshold_g +
+ *           lambda * (spectral_bits_g(sf_g) + scalefactor_differential_bits)
+ * with |sf_g - sf_{g-1}| <= SCALE_MAX_DIFF as a constraint, and lambda
+ * binary-searched so the coded size meets the per-frame bit budget
+ *
+ * Perceptual noise substitution (PNS) is integrated into the same objective: 
once
+ * the trellis settles on its operating lambda, each noise-like band (flagged 
by
+ * mark_pns) is offered a terminal "code as noise" candidate whose cost is
+ * nmr_pns + lambda*NMR_PNS_BITS. Because NMR_PNS_BITS is far below a band's 
spectral bit
+ * count, this candidate only wins when lambda is large, i.e. when the encoder 
is
+ * struggling to hold the bitrate. The bits freed by the chosen PNS bands are
+ * then re-spent by a second trellis pass over the remaining bands.
+ */
+
+#ifndef AVCODEC_AACCODER_NMR_H
+#define AVCODEC_AACCODER_NMR_H
+
+#include <float.h>
+#include <string.h>
+#include "libavutil/mathematics.h"
+#include "mathops.h"
+#include "avcodec.h"
+#include "put_bits.h"
+#include "aac.h"
+#include "aacenc.h"
+#include "aactab.h"
+#include "aacenctab.h"
+
+/* differential scalefactor coding cost, clamped to the legal delta range */
+#define NMR_SFBITS(d) ff_aac_scalefactor_bits[av_clip((d) + SCALE_DIFF_ZERO, 
0, 2*SCALE_MAX_DIFF)]
+
+#define NMR_ITERS  14 /* lambda binary-search iters */
+#define NMR_IFINE    9 /* fine-pass lambda iters */
+#define NMR_CITERS   7 /* coarse-pass lambda iters */
+#define NMR_COARSE   8 /* two-pass coarse->fine grid step, cuts the Viterbi 
ncand^2 with no
+                        * quality loss, 0 disables it (single full-resolution 
pass) */
+#define NMR_STEP     1 /* fine-pass scalefactor candidate granularity */
+
+#define NMR_PNS_BITS 9 /* approx cost in bits of signalling PNS */
+
+/* only bands coded well above the masking floor (NMR > 4, genuine rate 
struggle/near-holes)
+ * become noise, tuned so noise-like content gains while tonal/music content 
sees no Zim/ViS
+ * regression (lower gates help noise more but harm music) */
+#define NMR_PNS_NDGATE 4.0f
+
+/* frame bitrate ceiling (bits/sample/channel) above which PNS is disabled,
+ * noise substitution only helps under rate distress, near transparency (~2.0 
bits/sample,
+ * 96kbps mono) PNS is just worse. 1.5 keeps it to below 64k mono/144k stereo 
*/
+#define NMR_PNS_MAXBPS 1.5f
+
+/**
+ * Viterbi over the coding sequence act[0..nact-1] (indices into the per-band
+ * curves nd/nb), with lambda binary-searched so the coded size ~ destbits.
+ * Fills chosen[band] for every band referenced by act. Returns the operating
+ * lambda. node cost = dist/threshold + lambda*spectral_bits;
+ * edge cost = lambda*sf_differential_bits; |delta sf| <= SCALE_MAX_DIFF hard.
+ */
+static float nmr_solve(AACEncContext *s,
+                       const float (*nd)[NMR_NCAND], const int 
(*nb)[NMR_NCAND],
+                       const int *blo, const int *bnc, int step,
+                       const int *act, int nact, int destbits, int *chosen,
+                       float lo_l, float hi_l, int iters)
+{
+    float dp[NMR_NCAND], dpp[NMR_NCAND], node[NMR_NCAND];
+    float lamsf[2*SCALE_MAX_DIFF + 1];   /* lam*sfdiff bit cost, per lambda */
+    uint8_t bp[128][NMR_NCAND];
+    float lam = 1.0f;
+
+    if (nact <= 0)
+        return lam;
+
+    for (int it = 0; it < iters; it++) {
+        lam = sqrtf(lo_l * hi_l);
+        for (int i = 0; i <= 2*SCALE_MAX_DIFF; i++)
+            lamsf[i] = lam * ff_aac_scalefactor_bits[i];   /* edge cost for 
this lambda */
+
+        int b0 = act[0];
+        for (int o = 0; o < bnc[b0]; o++)
+            dp[o] = nd[b0][o] + lam * nb[b0][o];   /* anchor band node cost */
+
+        for (int k = 1; k < nact; k++) {
+            int b = act[k], pb = act[k-1];
+            memcpy(dpp, dp, sizeof(dp));
+            for (int o = 0; o < bnc[b]; o++)
+                node[o] = nd[b][o] + lam * nb[b][o];
+            /* dp[o] = node[o] + min_op(dpp[op] + edge cost) */
+            s->aacdsp.nmr_trellis_step(dp, bp[k], dpp, node, lamsf,
+                                       bnc[b], bnc[pb], blo[b] - blo[pb], step,
+                                       SCALE_MAX_DIFF);
+        }
+
+        /* backtrack */
+        int beo = 0, b = act[nact-1];
+        float bec = FLT_MAX;
+        for (int o = 0; o < bnc[b]; o++)
+            if (dp[o] < bec) { bec = dp[o]; beo = o; }
+        chosen[b] = beo;
+        for (int k = nact-1; k > 0; k--)
+            chosen[act[k-1]] = bp[k][chosen[act[k]]];
+
+        /* calc cost */
+        int total = 0;
+        for (int k = 0; k < nact; k++)
+            total += nb[act[k]][chosen[act[k]]];
+        for (int k = 1; k < nact; k++)
+            total += NMR_SFBITS((blo[act[k]]+chosen[act[k]]*step) - 
(blo[act[k-1]]+chosen[act[k-1]]*step));
+
+        if (it == iters - 1)
+            break;
+
+        /* check if we went over budget, go coarser if we did */
+        if (total > destbits)
+            lo_l = lam;
+        else
+            hi_l = lam;
+    }
+    return lam;
+}
+
+/* Build one coded band's (dist/threshold, bits) cost curve, candidates sf = 
lo + o*step
+ * for o in [0,maxn), stopping when the band would drop (cb <= 0). Returns the 
bit count. */
+static int nmr_band_curve(AACEncContext *s, SingleChannelElement *sce, int w, 
int g,
+                          int start, int lo, int step, int maxn, float invthr,
+                          float maxval, float *nd_row, int *nb_row)
+{
+    int ncand = 0;
+    for (int o = 0; o < maxn && lo + o*step <= SCALE_MAX_POS; o++) {
+        int sf = lo + o*step, btot = 0, cb = find_min_book(maxval, sf);
+        float dist = 0.0f;
+        if (cb <= 0)
+            break;
+        for (int w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
+            int bb;
+            dist += quantize_band_cost_cached(s, w + w2, g, sce->coeffs + 
start + w2*128,
+                                              s->scoefs + start + w2*128, 
sce->ics.swb_sizes[g],
+                                              sf, cb, 1.0f, INFINITY, &bb, 
NULL, 0);
+            btot += bb;
+        }
+        nd_row[ncand] = (dist - btot) * invthr;
+        nb_row[ncand] = btot;
+        ncand++;
+    }
+    return ncand;
+}
+
+static void search_for_quantizers_nmr(AVCodecContext *avctx,
+                                      AACEncContext *s,
+                                      SingleChannelElement *sce,
+                                      const float lambda)
+{
+    int bch = ((avctx->flags & AV_CODEC_FLAG_QSCALE) ? 2.0f : 
avctx->ch_layout.nb_channels);
+    int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / bch * 
(lambda / 120.f);
+    int allz = 0, cutoff = 1024, nbnd = 0;
+
+    float thr[128];
+    float pener[128];               /* band energy (for PNS noise target)  */
+    float pspread[128];             /* band tonality spread (1 = noise)     */
+    int   minsf[128];
+    float maxvals[128];
+
+    /* coded-band trellis state (indexed 0..nbnd-1) */
+    int bidx[128];                  /* sce band index (w*16+g) */
+    int bw[128], bg[128], bst[128]; /* window group, swb, coef start per coded 
band */
+    int blo[128];                   /* finest candidate scalefactor */
+    int bnc[128];                   /* number of candidates */
+    int chosen[128];
+    int act[128];                   /* active (non-PNS) band coding order */
+    uint8_t is_pns[128];            /* trellis band coded as noise */
+
+    float (*nd)[NMR_NCAND] = s->nmr->nd; /* dist / threshold per candidate 
(heap) */
+    int   (*nb)[NMR_NCAND] = s->nmr->nb; /* spectral bits per candidate (heap) 
   */
+
+    /* two-pass coarse->fine grid step (see NMR_COARSE), the lambda search 
runs on
+     * the cheap coarse grid, PASS 2 refines the winner at NMR_STEP 
granularity */
+    const int cstep = NMR_COARSE > 0 ? NMR_COARSE : NMR_STEP;
+
+    /* per-frame PNS enable: -aac_pns on and below the bitrate ceiling (CBR 
only;
+     * VBR has bit_rate 0 -> bps 0 -> the per-band nd gate governs). */
+    float pns_thresh = (avctx->bit_rate > 0 && avctx->sample_rate && 
avctx->ch_layout.nb_channels) ?
+                       (float)avctx->bit_rate / avctx->sample_rate / 
avctx->ch_layout.nb_channels : 0.f;
+    int pns_apply = s->options.pns && (pns_thresh < NMR_PNS_MAXBPS);
+
+    if (s->psy.bitres.alloc >= 0)
+        destbits = s->psy.bitres.alloc *
+                   (lambda / (avctx->global_quality ? avctx->global_quality : 
120));
+    destbits = FFMIN(destbits, 5800);
+
+    /* cutoff frequency (kept in sync with twoloop's selection) */
+    int wlen = 1024 / sce->ics.num_windows;
+    int frame_bit_rate = (avctx->flags & AV_CODEC_FLAG_QSCALE) ?
+                         (destbits * 1.5f * avctx->sample_rate / 1024) :
+                         (avctx->bit_rate / avctx->ch_layout.nb_channels);
+    if (s->options.pns || s->options.intensity_stereo)
+        frame_bit_rate *= 1.15f;
+
+    int bandwidth;
+    if (avctx->cutoff > 0)
+        bandwidth = avctx->cutoff;
+    else
+        bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, 
avctx->sample_rate));
+    cutoff = bandwidth * 2 * wlen / avctx->sample_rate;
+
+    /* zero low-energy / out-of-band bands, accumulate per-band masking 
threshold,
+     * energy and tonality spread (the latter two drive the PNS decision) */
+    for (int w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+        int start = 0;
+        for (int g = 0; g < sce->ics.num_swb; start += 
sce->ics.swb_sizes[g++]) {
+            float uplim = 0.0f, ener = 0.0f, spread = 2.0f;
+            int nz = 0;
+            for (int w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
+                FFPsyBand *band = 
&s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
+                ener   += band->energy;
+                spread  = FFMIN(spread, band->spread);
+                if (start >= cutoff || band->energy <= band->threshold || 
band->threshold == 0.0f) {
+                    sce->zeroes[(w+w2)*16+g] = 1;
+                    continue;
+                }
+                uplim += band->threshold;
+                nz = 1;
+            }
+            sce->zeroes[w*16+g] = !nz;
+            thr[w*16+g]     = uplim;
+            pener[w*16+g]   = ener;
+            pspread[w*16+g] = spread;
+            allz |= nz;
+        }
+    }
+    if (!allz)
+        return;
+
+    s->aacdsp.abs_pow34(s->scoefs, sce->coeffs, 1024);
+    ff_quantize_band_cost_cache_init(s);
+
+    /* finest codeable scalefactor and max value per band */
+    for (int w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+        int start = w*128;
+        for (int g = 0; g < sce->ics.num_swb; g++) {
+            maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], 
sce->ics.swb_sizes[g], s->scoefs + start);
+            minsf[w*16+g]   = maxvals[w*16+g] > 0 ? 
coef2minsf(maxvals[w*16+g]) : 0;
+            start += sce->ics.swb_sizes[g];
+        }
+    }
+
+    /* PASS 1:
+     * precompute each coded band's cost curve at the coarse candidate step
+     * (the lambda search runs on this cheap grid, PASS 2 refines the winner) 
*/
+    {
+        for (int w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+            int start = w*128;
+            for (int g = 0; g < sce->ics.num_swb; g++) {
+                if (!sce->zeroes[w*16+g] && maxvals[w*16+g] > 0 && nbnd < 128) 
{
+                    int lo = av_clip(minsf[w*16+g], 0, SCALE_MAX_POS);
+                    float invthr = 1.0f / FFMAX(thr[w*16+g], 1e-9f);
+                    int ncand = nmr_band_curve(s, sce, w, g, start, lo, cstep, 
NMR_NCAND,
+                                               invthr, maxvals[w*16+g], 
nd[nbnd], nb[nbnd]);
+                    if (ncand == 0) {
+                        sce->zeroes[w*16+g] = 1;   /* nothing codeable -> 
leave to cleanup */
+                    } else {
+                        bidx[nbnd] = w*16+g;
+                        bw[nbnd] = w;
+                        bg[nbnd] = g;
+                        bst[nbnd] = start;
+                        blo[nbnd] = lo;
+                        bnc[nbnd] = ncand;
+                        nbnd++;
+                    }
+                }
+                start += sce->ics.swb_sizes[g];
+            }
+        }
+    }
+    if (!nbnd)
+        return;
+
+    /* solve the trellis over all coded bands, then offer PNS at the operating
+     * lambda and re-solve over the survivors with the freed budget */
+    {
+        int nact = nbnd, pns_count = 0;
+        float lam;
+
+        for (int b = 0; b < nbnd; b++) {
+            act[b] = b;
+            is_pns[b] = 0;
+        }
+        lam = nmr_solve(s, nd, nb, blo, bnc, cstep, act, nact, destbits, 
chosen,
+                        1e-9f, 1e4f, NMR_COARSE > 0 ? NMR_CITERS : NMR_ITERS);
+
+        /* PASS 2:
+         * refine each band at full granularity (NMR_STEP) in a +/-cstep window
+         * around the coarse pick, then re-solve. Recovers single-pass quality 
while the
+         * lambda search stayed cheap on the coarse grid. */
+        if (NMR_COARSE > 0) {
+            /* nmr_speed, 0 = slowest/best, higher = faster. It narrows the 
fine
+             * refine +/-window (scalefactors) below NMR_COARSE: at speed 0 
the window
+             * spans the whole coarse-grid gap, so the two-pass result matches 
the
+             * exhaustive single-pass search.
+             * Each speed level shaves one sf off the window.
+             * At @64k mono (Zim / xRT): speed 0 -> 0.00095/15x,
+             * 2 -> 0.00096/18x, 3 -> 0.00100/20x, 4 -> 0.00103/22x */
+            int win = NMR_COARSE - av_clip(s->options.nmr_speed, 0, 4);
+            for (int b = 0; b < nbnd; b++) {
+                int center = blo[b] + chosen[b]*cstep;
+                int flo    = av_clip(center - win, av_clip(minsf[bidx[b]], 0, 
SCALE_MAX_POS), SCALE_MAX_POS);
+                int maxn   = FFMIN(NMR_NCAND, 2*win/NMR_STEP + 1);
+                float invthr = 1.0f / FFMAX(thr[bidx[b]], 1e-9f);
+                int ncand  = nmr_band_curve(s, sce, bw[b], bg[b], bst[b], flo, 
NMR_STEP, maxn,
+                                            invthr, maxvals[bidx[b]], nd[b], 
nb[b]);
+                blo[b] = flo;
+                bnc[b] = FFMAX(1, ncand);
+            }
+            /* fine pass: seed a narrow lambda bracket around the coarse 
solution */
+            lam = nmr_solve(s, nd, nb, blo, bnc, NMR_STEP, act, nact, 
destbits, chosen,
+                            lam/16.0f, lam*16.0f, NMR_IFINE);
+        }
+
+        if (pns_apply) {
+            /* band 0 (lowest freq) is kept as the global-gain / sf-chain 
anchor */
+            for (int b = 1; b < nbnd; b++) {
+                int bi = bidx[b];
+                float spread = pspread[bi];
+                float nmr_pns, cost_keep, cost_pns;
+                if (!sce->can_pns[bi])
+                    continue;
+                /* Only replace a band that is being coded audibly badly: its 
coded
+                 * noise-to-mask ratio must exceed the masking floor. 
Well-coded bands
+                 * (low NMR with plenty of bits) keep their real content, this 
is what
+                 * confines PNS to only rate distress and avoids high-bitrate 
harm */
+                if (nd[b][chosen[b]] <= NMR_PNS_NDGATE)
+                    continue;
+                /* perceptual cost of replacing the band with energy-matched 
noise:
+                 * the non-noise-like fraction of its energy, in 
dist/threshold units */
+                nmr_pns = FFMAX(0.0f, pener[bi] * (1.0f - spread*spread))
+                          / FFMAX(thr[bi], 1e-9f);
+                cost_keep = nd[b][chosen[b]] + lam * nb[b][chosen[b]];
+                cost_pns  = nmr_pns + lam * NMR_PNS_BITS;
+                if (cost_pns < cost_keep) {
+                    is_pns[b] = 1;
+                    pns_count++;
+                }
+            }
+            if (pns_count) {
+                int budget2 = destbits - pns_count * NMR_PNS_BITS;
+                nact = 0;
+                for (int b = 0; b < nbnd; b++)
+                    if (!is_pns[b])
+                        act[nact++] = b;
+                nmr_solve(s, nd, nb, blo, bnc, NMR_STEP, act, nact, budget2, 
chosen, 1e-9f, 1e4f, NMR_ITERS);
+            }
+        }
+        for (int b = 0; b < nbnd; b++) {
+            int bi = bidx[b];
+            if (is_pns[b]) {
+                sce->band_type[bi] = NOISE_BT;
+                sce->zeroes[bi]    = 0;
+                sce->pns_ener[bi]  = pener[bi] * FFMIN(1.0f, 
pspread[bi]*pspread[bi]);
+            } else {
+                sce->sf_idx[bi] = av_clip(blo[b] + chosen[b]*NMR_STEP, 0, 
SCALE_MAX_POS);
+            }
+        }
+    }
+
+    /* SCALE_MAX_DIFF condition:
+     * re-clamp, codebook fixup, drop uncodeable, set global gain
+     * NOISE_BT bands keep their own scalefactor chain via 
set_special_band_scalefactors) */
+    {
+        uint8_t nextband[128];
+        int prev = -1;
+        ff_init_nextband_map(sce, nextband);
+        for (int w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+            for (int g = 0; g < sce->ics.num_swb; g++) {
+                if (sce->zeroes[w*16+g]) {
+                    sce->band_type[w*16+g] = 0;
+                    continue;
+                }
+                if (sce->band_type[w*16+g] == NOISE_BT)
+                    continue;
+
+                if (prev != -1)
+                    sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], prev - 
SCALE_MAX_DIFF, prev + SCALE_MAX_DIFF);
+                sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], 
sce->sf_idx[w*16+g]);
+                if (sce->band_type[w*16+g] <= 0) {
+                    if (!ff_sfdelta_can_remove_band(sce, nextband, prev, 
w*16+g)) {
+                        sce->band_type[w*16+g] = 1;
+                    } else {
+                        sce->zeroes[w*16+g] = 1;
+                        sce->band_type[w*16+g] = 0;
+                        continue;
+                    }
+                }
+                if (prev == -1)
+                    sce->sf_idx[0] = sce->sf_idx[w*16+g];   /* global gain */
+                prev = sce->sf_idx[w*16+g];
+            }
+        }
+    }
+}
+
+#endif /* AVCODEC_AACCODER_NMR_H */
diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c
index f221d79ed3..6ac8bbb7f6 100644
--- a/libavcodec/aacenc.c
+++ b/libavcodec/aacenc.c
@@ -1114,6 +1114,7 @@ static av_cold int aac_encode_end(AVCodecContext *avctx)
     av_freep(&s->buffer.samples);
     av_freep(&s->cpe);
     av_freep(&s->fdsp);
+    av_freep(&s->nmr);
     ff_af_queue_close(&s->afq);
     return 0;
 }
@@ -1147,6 +1148,12 @@ static av_cold int alloc_buffers(AVCodecContext *avctx, 
AACEncContext *s)
     for(ch = 0; ch < s->channels; ch++)
         s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch;
 
+    if (s->options.coder == AAC_CODER_NMR) {
+        s->nmr = av_mallocz(sizeof(*s->nmr));
+        if (!s->nmr)
+            return AVERROR(ENOMEM);
+    }
+
     return 0;
 }
 
@@ -1279,11 +1286,13 @@ static const AVOption aacenc_options[] = {
     {"aac_coder", "Coding algorithm", offsetof(AACEncContext, options.coder), 
AV_OPT_TYPE_INT, {.i64 = AAC_CODER_TWOLOOP}, 0, AAC_CODER_NB-1, AACENC_FLAGS, 
.unit = "coder"},
         {"twoloop",  "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 
= AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, .unit = "coder"},
         {"fast",     "Fast search",               0, AV_OPT_TYPE_CONST, {.i64 
= AAC_CODER_FAST},    INT_MIN, INT_MAX, AACENC_FLAGS, .unit = "coder"},
+        {"nmr",      "Noise-to-mask ratio scalefactor trellis", 0, 
AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_NMR}, INT_MIN, INT_MAX, AACENC_FLAGS, 
.unit = "coder"},
     {"aac_ms", "Force M/S stereo coding", offsetof(AACEncContext, 
options.mid_side), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, AACENC_FLAGS},
     {"aac_is", "Intensity stereo coding", offsetof(AACEncContext, 
options.intensity_stereo), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
     {"aac_pns", "Perceptual noise substitution", offsetof(AACEncContext, 
options.pns), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
     {"aac_tns", "Temporal noise shaping", offsetof(AACEncContext, 
options.tns), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
     {"aac_pce", "Forces the use of PCEs", offsetof(AACEncContext, 
options.pce), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
+    {"aac_nmr_speed", "NMR coder speed level: 0 = slowest/best, higher trades 
quality for speed", offsetof(AACEncContext, options.nmr_speed), 
AV_OPT_TYPE_INT, {.i64 = 0}, 0, 4, AACENC_FLAGS},
     FF_AAC_PROFILE_OPTS
     {NULL}
 };
diff --git a/libavcodec/aacenc.h b/libavcodec/aacenc.h
index 61a9e6102b..4e20a3892e 100644
--- a/libavcodec/aacenc.h
+++ b/libavcodec/aacenc.h
@@ -44,6 +44,7 @@
 typedef enum AACCoder {
     AAC_CODER_TWOLOOP,
     AAC_CODER_FAST,
+    AAC_CODER_NMR,
 
     AAC_CODER_NB,
 }AACCoder;
@@ -69,6 +70,7 @@ typedef struct AACEncOptions {
     int pce;
     int mid_side;
     int intensity_stereo;
+    int nmr_speed;          ///< NMR coder speed level: 0 = slowest/best, 
higher is faster
 } AACEncOptions;
 
 /**
@@ -165,6 +167,17 @@ typedef struct AACQuantizeBandCostCacheEntry {
     uint16_t generation;
 } AACQuantizeBandCostCacheEntry;
 
+/** per-band scalefactor candidates above the finest codeable sf (NMR coder) */
+#define NMR_NCAND 96
+
+/**
+ * NMR coder per-band candidate cost curves, ~96 KiB
+ */
+typedef struct AACNMRCurves {
+    float nd[128][NMR_NCAND];                    ///< dist / threshold per 
candidate
+    int   nb[128][NMR_NCAND];                    ///< spectral bits per 
candidate
+} AACNMRCurves;
+
 typedef struct AACPCEInfo {
     AVChannelLayout layout;
     uint8_t num_ele[4];                          ///< front, side, back, lfe
@@ -216,6 +229,7 @@ typedef struct AACEncContext {
     AACQuantizeBandCostCacheEntry quantize_band_cost_cache[256][128]; ///< 
memoization area for quantize_band_cost
 
     AACEncDSPContext aacdsp;
+    AACNMRCurves *nmr;                            ///< NMR coder scratch (NULL 
unless coder == nmr)
 
     struct {
         float *samples;
diff --git a/libavcodec/aacencdsp.c b/libavcodec/aacencdsp.c
index fb809405f7..5ccc7e8fc8 100644
--- a/libavcodec/aacencdsp.c
+++ b/libavcodec/aacencdsp.c
@@ -16,6 +16,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include <float.h>
 #include <math.h>
 
 #include "config.h"
@@ -45,10 +46,37 @@ static void quantize_bands(int *out, const float *in, const 
float *scaled,
     }
 }
 
+/* One NMR scalefactor-trellis Viterbi step, for each current-band candidate, 
find the
+ * previous-band candidate minimising dpp[op] + lamsf[d] then set
+ * dp[o] = node[o] + that cost and record the back-pointer bp[o] */
+static void nmr_trellis_step_c(float *dp, uint8_t *bp, const float *dpp,
+                               const float *node, const float *lamsf,
+                               int n_cur, int n_prev, int base, int step, int 
mdiff)
+{
+    for (int o = 0; o < n_cur; o++) {
+        int best = -1;
+        float bestc = FLT_MAX;
+        for (int op = 0; op < n_prev; op++) {
+            int d = base + (o - op) * step;
+            float c;
+            if (d < -mdiff || d > mdiff)
+                continue;
+            c = dpp[op] + lamsf[d + mdiff];
+            if (c < bestc) {
+                bestc = c;
+                best  = op;
+            }
+        }
+        bp[o] = best < 0 ? 0 : best;
+        dp[o] = best < 0 ? FLT_MAX : node[o] + bestc;
+    }
+}
+
 void ff_aacenc_dsp_init(AACEncDSPContext *s)
 {
-    s->abs_pow34   = abs_pow34_v;
-    s->quant_bands = quantize_bands;
+    s->abs_pow34        = abs_pow34_v;
+    s->quant_bands      = quantize_bands;
+    s->nmr_trellis_step = nmr_trellis_step_c;
 
 #if ARCH_RISCV
     ff_aacenc_dsp_init_riscv(s);
diff --git a/libavcodec/aacencdsp.h b/libavcodec/aacencdsp.h
index 6d9ae221d1..4ead54669d 100644
--- a/libavcodec/aacencdsp.h
+++ b/libavcodec/aacencdsp.h
@@ -19,11 +19,17 @@
 #ifndef AVCODEC_AACENCDSP_H
 #define AVCODEC_AACENCDSP_H
 
+#include <stdint.h>
+
 typedef struct AACEncDSPContext {
     void (*abs_pow34)(float *out, const float *in, const int size);
     void (*quant_bands)(int *out, const float *in, const float *scaled,
                         int size, int is_signed, int maxval, const float Q34,
                         const float rounding);
+
+    void (*nmr_trellis_step)(float *dp, uint8_t *bp, const float *dpp,
+                             const float *node, const float *lamsf,
+                             int n_cur, int n_prev, int base, int step, int 
mdiff);
 } AACEncDSPContext;
 
 void ff_aacenc_dsp_init(AACEncDSPContext *s);
-- 
2.52.0

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to