[FFmpeg-devel] [PR] fate/aac: add xHE-AAC decode and loudness normalization tests (PR #23443)

Lynne via ffmpeg-devel Wed, 10 Jun 2026 10:46:06 -0700

PR #23443 opened by Lynne
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23443
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23443.patch


Tests target_level-based loudness normalization, compared against the reference 
decoder output of the ISO/IEC 23003-3 conformance, sequences (at -16, -24 and 
-31 dB targets driven by loudnessInfoV1 metadata), plus an exhale-encoded 
stream carrying v0 loudnessInfo().
Samples:  https://files.lynne.ee/xhe_samples.tar.gz


>From 7247181a79339a77b8aa3908ed94e4d9fcb14f2c Mon Sep 17 00:00:00 2001
From: Lynne <[email protected]>
Date: Thu, 11 Jun 2026 02:25:13 +0900
Subject: [PATCH 1/2] aacdec_usac: parse loudnessInfoV1

Real-world xHE-AAC streams and the ISO/IEC 23003-3 conformance
sequences carry their loudness metadata exclusively as loudnessInfoV1()
inside loudnessInfoSetExtension(), which was previously rejected with
AVERROR_PATCHWELCOME, making such streams undecodable and loudness
normalization inoperative on them.

loudnessInfoV1() is identical to loudnessInfo() apart from an added
eqSetId field. Parse it, restrict measurement selection to
eqSetId == 0 (in line with the downmixId/drcSetId restrictions), and
skip unknown loudnessInfoSetExtension() payloads using their explicitly
coded size instead of erroring out.
---
 libavcodec/aac/aacdec.h      |  1 +
 libavcodec/aac/aacdec_usac.c | 64 +++++++++++++++++++++++++++++-------
 2 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/libavcodec/aac/aacdec.h b/libavcodec/aac/aacdec.h
index 80a77289e6..18412fb97a 100644
--- a/libavcodec/aac/aacdec.h
+++ b/libavcodec/aac/aacdec.h
@@ -308,6 +308,7 @@ typedef struct ChannelElement {
 
 typedef struct AACUSACLoudnessInfo {
     uint8_t drc_set_id : 6;
+    uint8_t eq_set_id : 6; /* loudnessInfoV1() only, 0 otherwise */
     uint8_t downmix_id : 7;
     struct {
         uint16_t lvl : 12;
diff --git a/libavcodec/aac/aacdec_usac.c b/libavcodec/aac/aacdec_usac.c
index 6d48a5746a..00b4d4586d 100644
--- a/libavcodec/aac/aacdec_usac.c
+++ b/libavcodec/aac/aacdec_usac.c
@@ -95,10 +95,13 @@ static int methodvalue_width(int method_def)
     }
 }
 
+/* ISO/IEC 23003-4, Table 58/60: loudnessInfo(), loudnessInfoV1().
+ * The only difference in V1 is the added eqSetId field. */
 static int decode_loudness_info(AACDecContext *ac, AACUSACLoudnessInfo *info,
-                                GetBitContext *gb)
+                                GetBitContext *gb, int v1)
 {
     info->drc_set_id = get_bits(gb, 6);
+    info->eq_set_id = v1 ? get_bits(gb, 6) : 0;
     info->downmix_id = get_bits(gb, 7);
 
     if ((info->sample_peak.present = get_bits1(gb))) /* samplePeakLevelPresent 
*/
@@ -122,16 +125,46 @@ static int decode_loudness_info(AACDecContext *ac, 
AACUSACLoudnessInfo *info,
     return 0;
 }
 
+/* ISO/IEC 23003-4, Table 61: loudnessInfoSetExtension(), UNIDRCLOUDEXT_EQ */
+static int decode_loudness_set_v1(AACDecContext *ac, AACUSACConfig *usac,
+                                  GetBitContext *gb)
+{
+    int ret;
+    int nb_album = get_bits(gb, 6); /* loudnessInfoV1AlbumCount */
+    int nb_info = get_bits(gb, 6); /* loudnessInfoV1Count */
+
+    for (int i = 0; i < nb_album; i++) {
+        AACUSACLoudnessInfo tmp;
+        ret = decode_loudness_info(ac, &tmp, gb, 1);
+        if (ret < 0)
+            return ret;
+        if (usac->loudness.nb_album < 
FF_ARRAY_ELEMS(usac->loudness.album_info))
+            usac->loudness.album_info[usac->loudness.nb_album++] = tmp;
+    }
+
+    for (int i = 0; i < nb_info; i++) {
+        AACUSACLoudnessInfo tmp;
+        ret = decode_loudness_info(ac, &tmp, gb, 1);
+        if (ret < 0)
+            return ret;
+        if (usac->loudness.nb_info < FF_ARRAY_ELEMS(usac->loudness.info))
+            usac->loudness.info[usac->loudness.nb_info++] = tmp;
+    }
+
+    return 0;
+}
+
 /* Pick the bsMethodValue of a program- or anchor-loudness measurement.
- * Per ISO/IEC 23003-4 6.1.2.5, downmixId and drcSetId identify the signal a
- * loudnessInfo() applies to; only downmixId == 0 (base layout) together with
- * drcSetId == 0 (no DRC) describes the unprocessed signal we output, so
- * measurements for any other downmix/DRC set must not be used. */
+ * Per ISO/IEC 23003-4 6.1.2.5, downmixId, drcSetId and eqSetId identify the
+ * signal a loudnessInfo() applies to; only downmixId == 0 (base layout)
+ * together with drcSetId == 0 and eqSetId == 0 (no DRC/EQ) describes the
+ * unprocessed signal we output, so measurements for any other
+ * downmix/DRC/EQ set must not be used. */
 static int select_loudness_measurement(const AACUSACConfig *usac)
 {
     for (int i = 0; i < usac->loudness.nb_info; i++) {
         const AACUSACLoudnessInfo *info = &usac->loudness.info[i];
-        if (info->downmix_id != 0 || info->drc_set_id != 0)
+        if (info->downmix_id != 0 || info->drc_set_id != 0 || info->eq_set_id 
!= 0)
             continue;
         for (int j = 0; j < info->nb_measurements; j++) {
             int method = info->measurements[j].method_def;
@@ -151,13 +184,13 @@ static int decode_loudness_set(AACDecContext *ac, 
AACUSACConfig *usac,
     usac->loudness.nb_info = get_bits(gb, 6); /* loudnessInfoCount */
 
     for (int i = 0; i < usac->loudness.nb_album; i++) {
-        ret = decode_loudness_info(ac, &usac->loudness.album_info[i], gb);
+        ret = decode_loudness_info(ac, &usac->loudness.album_info[i], gb, 0);
         if (ret < 0)
             return ret;
     }
 
     for (int i = 0; i < usac->loudness.nb_info; i++) {
-        ret = decode_loudness_info(ac, &usac->loudness.info[i], gb);
+        ret = decode_loudness_info(ac, &usac->loudness.info[i], gb, 0);
         if (ret < 0)
             return ret;
     }
@@ -167,14 +200,23 @@ static int decode_loudness_set(AACDecContext *ac, 
AACUSACConfig *usac,
         while ((type = get_bits(gb, 4)) != UNIDRCLOUDEXT_TERM) {
             uint8_t size_bits = get_bits(gb, 4) + 4; /* bitSizeLen */
             uint32_t bit_size = get_bits_long(gb, size_bits) + 1; /* bitSize */
+            int start = get_bits_count(gb);
+            int skip;
             switch (type) {
             case UNIDRCLOUDEXT_EQ:
-                avpriv_report_missing_feature(ac->avctx, "loudnessInfoV1");
-                return AVERROR_PATCHWELCOME;
+                ret = decode_loudness_set_v1(ac, usac, gb);
+                if (ret < 0)
+                    return ret;
+                break;
             default:
-                skip_bits_long(gb, bit_size);
                 break;
             }
+            /* The extension size is explicit, so unparsed (or unknown)
+             * data can be skipped without desynchronizing. */
+            skip = bit_size - (get_bits_count(gb) - start);
+            if (skip < 0)
+                return AVERROR_INVALIDDATA;
+            skip_bits_long(gb, skip);
         }
     }
 
-- 
2.52.0


>From a04c16d54ef205ed555adadc45009108bd44cbb5 Mon Sep 17 00:00:00 2001
From: Lynne <[email protected]>
Date: Thu, 11 Jun 2026 02:26:47 +0900
Subject: [PATCH 2/2] fate/aac: add xHE-AAC decode and loudness normalization
 tests

Frequency-domain mono/stereo decoding at several sampling rates, M/S,
TNS, and target_level-based loudness normalization, compared against
the reference decoder output of the ISO/IEC 23003-3 conformance
sequences (at -16, -24 and -31 dB targets driven by loudnessInfoV1
metadata), plus an exhale-encoded stream carrying v0 loudnessInfo().
---
 tests/fate/aac.mak | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/tests/fate/aac.mak b/tests/fate/aac.mak
index 8a7f1da567..d0a1bc56a1 100644
--- a/tests/fate/aac.mak
+++ b/tests/fate/aac.mak
@@ -86,6 +86,52 @@ FATE_AAC += fate-aac-er_eld2100np_48_ep0
 fate-aac-er_eld2100np_48_ep0: CMD = pcm -i 
$(TARGET_SAMPLES)/aac/er_eld2100np_48_ep0.mp4
 fate-aac-er_eld2100np_48_ep0: REF = $(SAMPLES)/aac/er_eld2100np_48.s16
 
+# USAC (xHE-AAC); the samples and references are from the ISO/IEC 23003-3
+# conformance suite, except for xhe_target_level, which was encoded by exhale
+FATE_AAC_USAC += fate-aac-usac-fd_1_c1_0x03
+fate-aac-usac-fd_1_c1_0x03: CMD = pcm -i 
$(TARGET_SAMPLES)/aac/usac/Fd_1_c1_0x03.mp4
+fate-aac-usac-fd_1_c1_0x03: REF = $(SAMPLES)/aac/usac/Fd_1_c1_0x03.s16
+
+FATE_AAC_USAC += fate-aac-usac-fd_1_c1_0x04
+fate-aac-usac-fd_1_c1_0x04: CMD = pcm -i 
$(TARGET_SAMPLES)/aac/usac/Fd_1_c1_0x04.mp4
+fate-aac-usac-fd_1_c1_0x04: REF = $(SAMPLES)/aac/usac/Fd_1_c1_0x04.s16
+
+FATE_AAC_USAC += fate-aac-usac-fd_2_c1_0x03
+fate-aac-usac-fd_2_c1_0x03: CMD = pcm -i 
$(TARGET_SAMPLES)/aac/usac/Fd_2_c1_0x03.mp4
+fate-aac-usac-fd_2_c1_0x03: REF = $(SAMPLES)/aac/usac/Fd_2_c1_0x03.s16
+
+FATE_AAC_USAC += fate-aac-usac-fd_2_c1_0x05
+fate-aac-usac-fd_2_c1_0x05: CMD = pcm -i 
$(TARGET_SAMPLES)/aac/usac/Fd_2_c1_0x05.mp4
+fate-aac-usac-fd_2_c1_0x05: REF = $(SAMPLES)/aac/usac/Fd_2_c1_0x05.s16
+
+FATE_AAC_USAC += fate-aac-usac-fd_2_c1_ms_0x04
+fate-aac-usac-fd_2_c1_ms_0x04: CMD = pcm -i 
$(TARGET_SAMPLES)/aac/usac/Fd_2_c1_Ms_0x04.mp4
+fate-aac-usac-fd_2_c1_ms_0x04: REF = $(SAMPLES)/aac/usac/Fd_2_c1_Ms_0x04.s16
+
+FATE_AAC_USAC += fate-aac-usac-fd_2_c1_tns_0x04
+fate-aac-usac-fd_2_c1_tns_0x04: CMD = pcm -i 
$(TARGET_SAMPLES)/aac/usac/Fd_2_c1_Tns_0x04.mp4
+fate-aac-usac-fd_2_c1_tns_0x04: REF = $(SAMPLES)/aac/usac/Fd_2_c1_Tns_0x04.s16
+
+FATE_AAC_USAC += fate-aac-usac-ln-16
+fate-aac-usac-ln-16: CMD = pcm -target_level -16 -i 
$(TARGET_SAMPLES)/aac/usac/Ext_2_c1_Ln_0x03.mp4
+fate-aac-usac-ln-16: REF = $(SAMPLES)/aac/usac/Ext_2_c1_Ln_0x03__Lou-16.s16
+
+FATE_AAC_USAC += fate-aac-usac-ln-24
+fate-aac-usac-ln-24: CMD = pcm -target_level -24 -i 
$(TARGET_SAMPLES)/aac/usac/Ext_2_c1_Ln_0x03.mp4
+fate-aac-usac-ln-24: REF = $(SAMPLES)/aac/usac/Ext_2_c1_Ln_0x03__Lou-24.s16
+
+FATE_AAC_USAC += fate-aac-usac-ln-31
+fate-aac-usac-ln-31: CMD = pcm -target_level -31 -i 
$(TARGET_SAMPLES)/aac/usac/Ext_2_c1_Ln_0x03.mp4
+fate-aac-usac-ln-31: REF = $(SAMPLES)/aac/usac/Ext_2_c1_Ln_0x03__Lou-31.s16
+
+FATE_AAC_USAC += fate-aac-usac-target-level
+fate-aac-usac-target-level: CMD = pcm -target_level -24 -i 
$(TARGET_SAMPLES)/aac/usac/xhe_target_level.m4a
+fate-aac-usac-target-level: REF = $(SAMPLES)/aac/usac/xhe_target_level.s16
+
+# The decoder outputs slightly more trailing samples than the reference files
+$(FATE_AAC_USAC): SIZE_TOLERANCE = 3500
+FATE_AAC += $(FATE_AAC_USAC)
+
 FATE_AAC_FIXED += fate-aac-fixed-al04_44
 fate-aac-fixed-al04_44: CMD = pcm -c aac_fixed -i 
$(TARGET_SAMPLES)/aac/al04_44.mp4
 fate-aac-fixed-al04_44: REF = $(SAMPLES)/aac/al04_44.s16
-- 
2.52.0

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[FFmpeg-devel] [PR] fate/aac: add xHE-AAC decode and loudness normalization tests (PR #23443)

Reply via email to