PR #23443 opened by Lynne URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23443 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23443.patch
Tests target_level-based loudness normalization, compared against the reference decoder output of the ISO/IEC 23003-3 conformance, sequences (at -16, -24 and -31 dB targets driven by loudnessInfoV1 metadata), plus an exhale-encoded stream carrying v0 loudnessInfo(). Samples: https://files.lynne.ee/xhe_samples.tar.gz >From 7247181a79339a77b8aa3908ed94e4d9fcb14f2c Mon Sep 17 00:00:00 2001 From: Lynne <[email protected]> Date: Thu, 11 Jun 2026 02:25:13 +0900 Subject: [PATCH 1/2] aacdec_usac: parse loudnessInfoV1 Real-world xHE-AAC streams and the ISO/IEC 23003-3 conformance sequences carry their loudness metadata exclusively as loudnessInfoV1() inside loudnessInfoSetExtension(), which was previously rejected with AVERROR_PATCHWELCOME, making such streams undecodable and loudness normalization inoperative on them. loudnessInfoV1() is identical to loudnessInfo() apart from an added eqSetId field. Parse it, restrict measurement selection to eqSetId == 0 (in line with the downmixId/drcSetId restrictions), and skip unknown loudnessInfoSetExtension() payloads using their explicitly coded size instead of erroring out. --- libavcodec/aac/aacdec.h | 1 + libavcodec/aac/aacdec_usac.c | 64 +++++++++++++++++++++++++++++------- 2 files changed, 54 insertions(+), 11 deletions(-) diff --git a/libavcodec/aac/aacdec.h b/libavcodec/aac/aacdec.h index 80a77289e6..18412fb97a 100644 --- a/libavcodec/aac/aacdec.h +++ b/libavcodec/aac/aacdec.h @@ -308,6 +308,7 @@ typedef struct ChannelElement { typedef struct AACUSACLoudnessInfo { uint8_t drc_set_id : 6; + uint8_t eq_set_id : 6; /* loudnessInfoV1() only, 0 otherwise */ uint8_t downmix_id : 7; struct { uint16_t lvl : 12; diff --git a/libavcodec/aac/aacdec_usac.c b/libavcodec/aac/aacdec_usac.c index 6d48a5746a..00b4d4586d 100644 --- a/libavcodec/aac/aacdec_usac.c +++ b/libavcodec/aac/aacdec_usac.c @@ -95,10 +95,13 @@ static int methodvalue_width(int method_def) } } +/* ISO/IEC 23003-4, Table 58/60: loudnessInfo(), loudnessInfoV1(). + * The only difference in V1 is the added eqSetId field. */ static int decode_loudness_info(AACDecContext *ac, AACUSACLoudnessInfo *info, - GetBitContext *gb) + GetBitContext *gb, int v1) { info->drc_set_id = get_bits(gb, 6); + info->eq_set_id = v1 ? get_bits(gb, 6) : 0; info->downmix_id = get_bits(gb, 7); if ((info->sample_peak.present = get_bits1(gb))) /* samplePeakLevelPresent */ @@ -122,16 +125,46 @@ static int decode_loudness_info(AACDecContext *ac, AACUSACLoudnessInfo *info, return 0; } +/* ISO/IEC 23003-4, Table 61: loudnessInfoSetExtension(), UNIDRCLOUDEXT_EQ */ +static int decode_loudness_set_v1(AACDecContext *ac, AACUSACConfig *usac, + GetBitContext *gb) +{ + int ret; + int nb_album = get_bits(gb, 6); /* loudnessInfoV1AlbumCount */ + int nb_info = get_bits(gb, 6); /* loudnessInfoV1Count */ + + for (int i = 0; i < nb_album; i++) { + AACUSACLoudnessInfo tmp; + ret = decode_loudness_info(ac, &tmp, gb, 1); + if (ret < 0) + return ret; + if (usac->loudness.nb_album < FF_ARRAY_ELEMS(usac->loudness.album_info)) + usac->loudness.album_info[usac->loudness.nb_album++] = tmp; + } + + for (int i = 0; i < nb_info; i++) { + AACUSACLoudnessInfo tmp; + ret = decode_loudness_info(ac, &tmp, gb, 1); + if (ret < 0) + return ret; + if (usac->loudness.nb_info < FF_ARRAY_ELEMS(usac->loudness.info)) + usac->loudness.info[usac->loudness.nb_info++] = tmp; + } + + return 0; +} + /* Pick the bsMethodValue of a program- or anchor-loudness measurement. - * Per ISO/IEC 23003-4 6.1.2.5, downmixId and drcSetId identify the signal a - * loudnessInfo() applies to; only downmixId == 0 (base layout) together with - * drcSetId == 0 (no DRC) describes the unprocessed signal we output, so - * measurements for any other downmix/DRC set must not be used. */ + * Per ISO/IEC 23003-4 6.1.2.5, downmixId, drcSetId and eqSetId identify the + * signal a loudnessInfo() applies to; only downmixId == 0 (base layout) + * together with drcSetId == 0 and eqSetId == 0 (no DRC/EQ) describes the + * unprocessed signal we output, so measurements for any other + * downmix/DRC/EQ set must not be used. */ static int select_loudness_measurement(const AACUSACConfig *usac) { for (int i = 0; i < usac->loudness.nb_info; i++) { const AACUSACLoudnessInfo *info = &usac->loudness.info[i]; - if (info->downmix_id != 0 || info->drc_set_id != 0) + if (info->downmix_id != 0 || info->drc_set_id != 0 || info->eq_set_id != 0) continue; for (int j = 0; j < info->nb_measurements; j++) { int method = info->measurements[j].method_def; @@ -151,13 +184,13 @@ static int decode_loudness_set(AACDecContext *ac, AACUSACConfig *usac, usac->loudness.nb_info = get_bits(gb, 6); /* loudnessInfoCount */ for (int i = 0; i < usac->loudness.nb_album; i++) { - ret = decode_loudness_info(ac, &usac->loudness.album_info[i], gb); + ret = decode_loudness_info(ac, &usac->loudness.album_info[i], gb, 0); if (ret < 0) return ret; } for (int i = 0; i < usac->loudness.nb_info; i++) { - ret = decode_loudness_info(ac, &usac->loudness.info[i], gb); + ret = decode_loudness_info(ac, &usac->loudness.info[i], gb, 0); if (ret < 0) return ret; } @@ -167,14 +200,23 @@ static int decode_loudness_set(AACDecContext *ac, AACUSACConfig *usac, while ((type = get_bits(gb, 4)) != UNIDRCLOUDEXT_TERM) { uint8_t size_bits = get_bits(gb, 4) + 4; /* bitSizeLen */ uint32_t bit_size = get_bits_long(gb, size_bits) + 1; /* bitSize */ + int start = get_bits_count(gb); + int skip; switch (type) { case UNIDRCLOUDEXT_EQ: - avpriv_report_missing_feature(ac->avctx, "loudnessInfoV1"); - return AVERROR_PATCHWELCOME; + ret = decode_loudness_set_v1(ac, usac, gb); + if (ret < 0) + return ret; + break; default: - skip_bits_long(gb, bit_size); break; } + /* The extension size is explicit, so unparsed (or unknown) + * data can be skipped without desynchronizing. */ + skip = bit_size - (get_bits_count(gb) - start); + if (skip < 0) + return AVERROR_INVALIDDATA; + skip_bits_long(gb, skip); } } -- 2.52.0 >From a04c16d54ef205ed555adadc45009108bd44cbb5 Mon Sep 17 00:00:00 2001 From: Lynne <[email protected]> Date: Thu, 11 Jun 2026 02:26:47 +0900 Subject: [PATCH 2/2] fate/aac: add xHE-AAC decode and loudness normalization tests Frequency-domain mono/stereo decoding at several sampling rates, M/S, TNS, and target_level-based loudness normalization, compared against the reference decoder output of the ISO/IEC 23003-3 conformance sequences (at -16, -24 and -31 dB targets driven by loudnessInfoV1 metadata), plus an exhale-encoded stream carrying v0 loudnessInfo(). --- tests/fate/aac.mak | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/tests/fate/aac.mak b/tests/fate/aac.mak index 8a7f1da567..d0a1bc56a1 100644 --- a/tests/fate/aac.mak +++ b/tests/fate/aac.mak @@ -86,6 +86,52 @@ FATE_AAC += fate-aac-er_eld2100np_48_ep0 fate-aac-er_eld2100np_48_ep0: CMD = pcm -i $(TARGET_SAMPLES)/aac/er_eld2100np_48_ep0.mp4 fate-aac-er_eld2100np_48_ep0: REF = $(SAMPLES)/aac/er_eld2100np_48.s16 +# USAC (xHE-AAC); the samples and references are from the ISO/IEC 23003-3 +# conformance suite, except for xhe_target_level, which was encoded by exhale +FATE_AAC_USAC += fate-aac-usac-fd_1_c1_0x03 +fate-aac-usac-fd_1_c1_0x03: CMD = pcm -i $(TARGET_SAMPLES)/aac/usac/Fd_1_c1_0x03.mp4 +fate-aac-usac-fd_1_c1_0x03: REF = $(SAMPLES)/aac/usac/Fd_1_c1_0x03.s16 + +FATE_AAC_USAC += fate-aac-usac-fd_1_c1_0x04 +fate-aac-usac-fd_1_c1_0x04: CMD = pcm -i $(TARGET_SAMPLES)/aac/usac/Fd_1_c1_0x04.mp4 +fate-aac-usac-fd_1_c1_0x04: REF = $(SAMPLES)/aac/usac/Fd_1_c1_0x04.s16 + +FATE_AAC_USAC += fate-aac-usac-fd_2_c1_0x03 +fate-aac-usac-fd_2_c1_0x03: CMD = pcm -i $(TARGET_SAMPLES)/aac/usac/Fd_2_c1_0x03.mp4 +fate-aac-usac-fd_2_c1_0x03: REF = $(SAMPLES)/aac/usac/Fd_2_c1_0x03.s16 + +FATE_AAC_USAC += fate-aac-usac-fd_2_c1_0x05 +fate-aac-usac-fd_2_c1_0x05: CMD = pcm -i $(TARGET_SAMPLES)/aac/usac/Fd_2_c1_0x05.mp4 +fate-aac-usac-fd_2_c1_0x05: REF = $(SAMPLES)/aac/usac/Fd_2_c1_0x05.s16 + +FATE_AAC_USAC += fate-aac-usac-fd_2_c1_ms_0x04 +fate-aac-usac-fd_2_c1_ms_0x04: CMD = pcm -i $(TARGET_SAMPLES)/aac/usac/Fd_2_c1_Ms_0x04.mp4 +fate-aac-usac-fd_2_c1_ms_0x04: REF = $(SAMPLES)/aac/usac/Fd_2_c1_Ms_0x04.s16 + +FATE_AAC_USAC += fate-aac-usac-fd_2_c1_tns_0x04 +fate-aac-usac-fd_2_c1_tns_0x04: CMD = pcm -i $(TARGET_SAMPLES)/aac/usac/Fd_2_c1_Tns_0x04.mp4 +fate-aac-usac-fd_2_c1_tns_0x04: REF = $(SAMPLES)/aac/usac/Fd_2_c1_Tns_0x04.s16 + +FATE_AAC_USAC += fate-aac-usac-ln-16 +fate-aac-usac-ln-16: CMD = pcm -target_level -16 -i $(TARGET_SAMPLES)/aac/usac/Ext_2_c1_Ln_0x03.mp4 +fate-aac-usac-ln-16: REF = $(SAMPLES)/aac/usac/Ext_2_c1_Ln_0x03__Lou-16.s16 + +FATE_AAC_USAC += fate-aac-usac-ln-24 +fate-aac-usac-ln-24: CMD = pcm -target_level -24 -i $(TARGET_SAMPLES)/aac/usac/Ext_2_c1_Ln_0x03.mp4 +fate-aac-usac-ln-24: REF = $(SAMPLES)/aac/usac/Ext_2_c1_Ln_0x03__Lou-24.s16 + +FATE_AAC_USAC += fate-aac-usac-ln-31 +fate-aac-usac-ln-31: CMD = pcm -target_level -31 -i $(TARGET_SAMPLES)/aac/usac/Ext_2_c1_Ln_0x03.mp4 +fate-aac-usac-ln-31: REF = $(SAMPLES)/aac/usac/Ext_2_c1_Ln_0x03__Lou-31.s16 + +FATE_AAC_USAC += fate-aac-usac-target-level +fate-aac-usac-target-level: CMD = pcm -target_level -24 -i $(TARGET_SAMPLES)/aac/usac/xhe_target_level.m4a +fate-aac-usac-target-level: REF = $(SAMPLES)/aac/usac/xhe_target_level.s16 + +# The decoder outputs slightly more trailing samples than the reference files +$(FATE_AAC_USAC): SIZE_TOLERANCE = 3500 +FATE_AAC += $(FATE_AAC_USAC) + FATE_AAC_FIXED += fate-aac-fixed-al04_44 fate-aac-fixed-al04_44: CMD = pcm -c aac_fixed -i $(TARGET_SAMPLES)/aac/al04_44.mp4 fate-aac-fixed-al04_44: REF = $(SAMPLES)/aac/al04_44.s16 -- 2.52.0 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
