Message-Id: <89817380bf2af23bedb4e61fa5ebea556befe5e3.1402646664.git.ni...@southpole.se> In-Reply-To: <6849fe55de4f2b999e480ea35f72029bbc09014e.1402646664.git.ni...@southpole.se> References: <6849fe55de4f2b999e480ea35f72029bbc09014e.1402646664.git.ni...@southpole.se> Subject: [PATCH 4/4] dcadec: Support for xll (lossless extension)
Builds on top of changes by Paul B Mahol <[email protected]>, copied from the ffmpeg branch at https://github.com/richardpl/FFmpeg/commits/xll. In particular, new xll-related state variables, and the current function dca_xll_decode_header is an expanded version of Paul's dca_xll_decode_frame. Signed-off-by: Niels Möller <[email protected]> --- libavcodec/dcadec.c | 1078 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 1050 insertions(+), 28 deletions(-) diff --git a/libavcodec/dcadec.c b/libavcodec/dcadec.c index 0727857..cf183a3 100644 --- a/libavcodec/dcadec.c +++ b/libavcodec/dcadec.c @@ -4,6 +4,8 @@ * Copyright (C) 2004 Benjamin Zores * Copyright (C) 2006 Benjamin Larsson * Copyright (C) 2007 Konstantin Shishkov + * Copyright (C) 2012 Paul B Mahol + * Copyright (C) 2014 Niels Möller * * This file is part of Libav. * @@ -38,6 +40,7 @@ #include "fft.h" #include "get_bits.h" #include "put_bits.h" +#include "unary.h" #include "dcadata.h" #include "dcahuff.h" #include "dca.h" @@ -59,6 +62,14 @@ #define DCA_SUBFRAMES_MAX (16) #define DCA_BLOCKS_MAX (16) #define DCA_LFE_MAX (3) +#define DCA_XLL_FBANDS_MAX (4) +#define DCA_XLL_SEGMENTS_MAX (16) +#define DCA_XLL_CHSETS_MAX (16) +#define DCA_XLL_CHANNELS_MAX (16) +#define DCA_XLL_AORDER_MAX (15) + +/* Arbitrary limit; not sure what the maximum really is, but much larger. */ +#define DCA_XLL_DMIX_NCOEFFS_MAX (18) enum DCAMode { DCA_MONO = 0, @@ -284,6 +295,61 @@ static av_always_inline int get_bitalloc(GetBitContext *gb, BitAlloc *ba, ba->offset; } +typedef struct XllChSetSubHeader { + int channels; ///< number of channels in channel set, at most 16 + int residual_encode; ///< residual channel encoding + int bit_resolution; ///< input sample bit-width + int bit_width; ///< original input sample bit-width + int sampling_frequency; ///< sampling frequency + int fs_interpolate; ///< sampling frequency interpolation multiplier + int replacement_set; ///< replacement channel set group + int active_replace_set; ///< current channel set is active channel set + int primary_ch_set; + int downmix_coeff_code_embedded; + int downmix_embedded; + int downmix_type; + int hier_chset; + int downmix_ncoeffs; + int downmix_coeffs[DCA_XLL_DMIX_NCOEFFS_MAX]; + int ch_mask_enabled; + int ch_mask; + int mapping_coeffs_present; + int num_freq_bands; + + /* m_nOrigChanOrder */ + uint8_t orig_chan_order[DCA_XLL_FBANDS_MAX][DCA_XLL_CHANNELS_MAX]; + uint8_t orig_chan_order_inv[DCA_XLL_FBANDS_MAX][DCA_XLL_CHANNELS_MAX]; + /* Coefficients for channel pairs (at most 8), m_anPWChPairsCoeffs */ + int8_t pw_ch_pairs_coeffs[DCA_XLL_FBANDS_MAX][DCA_XLL_CHANNELS_MAX/2]; + /* m_nCurrHighestLPCOrder */ + uint8_t adapt_order_max[DCA_XLL_FBANDS_MAX]; + /* m_pnAdaptPredOrder */ + uint8_t adapt_order[DCA_XLL_FBANDS_MAX][DCA_XLL_CHANNELS_MAX]; + /* m_pnFixedPredOrder */ + uint8_t fixed_order[DCA_XLL_FBANDS_MAX][DCA_XLL_CHANNELS_MAX]; + /* m_pnLPCReflCoeffsQInd, unsigned version */ + uint8_t lpc_refl_coeffs_q_ind[DCA_XLL_FBANDS_MAX] + [DCA_XLL_CHANNELS_MAX][DCA_XLL_AORDER_MAX]; + + int lsb_fsize[DCA_XLL_FBANDS_MAX]; + int8_t scalable_lsbs[DCA_XLL_FBANDS_MAX][DCA_XLL_CHANNELS_MAX]; + int8_t bit_width_adj_per_ch[DCA_XLL_FBANDS_MAX][DCA_XLL_CHANNELS_MAX]; +} XllChSetSubHeader; + +typedef struct XllNavi { + GetBitContext gb; // Context for parsing the data segments + unsigned band_size[DCA_XLL_FBANDS_MAX]; + unsigned segment_size[DCA_XLL_FBANDS_MAX][DCA_XLL_SEGMENTS_MAX]; + unsigned chset_size[DCA_XLL_FBANDS_MAX][DCA_XLL_SEGMENTS_MAX][DCA_XLL_CHSETS_MAX]; +} XllNavi; + +typedef struct QMF64_table { + float dct4_coeff[32][32]; + float dct2_coeff[32][32]; + float rcos[32]; + float rsin[32]; +} QMF64_table; + typedef struct { AVClass *class; ///< class for AVOptions AVCodecContext *avctx; @@ -356,8 +422,10 @@ typedef struct { /* Subband samples history (for ADPCM) */ DECLARE_ALIGNED(16, float, subband_samples_hist)[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS][4]; - DECLARE_ALIGNED(32, float, subband_fir_hist)[DCA_PRIM_CHANNELS_MAX][512]; - DECLARE_ALIGNED(32, float, subband_fir_noidea)[DCA_PRIM_CHANNELS_MAX][32]; + /* Half size is sufficient for core decoding, but for 96 kHz data + * we need qmf with 64 subbands and 1024 samples. */ + DECLARE_ALIGNED(32, float, subband_fir_hist)[DCA_PRIM_CHANNELS_MAX][1024]; + DECLARE_ALIGNED(32, float, subband_fir_noidea)[DCA_PRIM_CHANNELS_MAX][64]; int hist_index[DCA_PRIM_CHANNELS_MAX]; DECLARE_ALIGNED(32, float, raXin)[32]; @@ -379,12 +447,30 @@ typedef struct { int current_subsubframe; int core_ext_mask; ///< present extensions in the core substream - + int exss_ext_mask; // Non-core extensions /* XCh extension information */ int xch_present; ///< XCh extension present and valid int xch_base_channel; ///< index of first (only) channel containing XCH data int xch_disable; ///< whether the XCh extension should be decoded or not + /* XLL extension information */ + int xll_disable; + int xll_nch_sets; ///< number of channel sets per frame + int xll_channels; // Total number of channels + int xll_residual_channels; // Number of residual channels. + int xll_segments; ///< number of segments per frame + int xll_log_smpl_in_seg; // I guess this is "nBits4SamplLoci" + int xll_smpl_in_seg; ///< samples in segment per one frequency band for the first channel set + int xll_bits4seg_size; ///< number of bits used to read segment size + int xll_banddata_crcen; ///< presence of CRC16 within each frequency band + int xll_scalable_lsb; + int xll_bits4ch_mask; ///< channel position mask + int xll_fixed_lsb_width; + XllChSetSubHeader xll_chsets[DCA_XLL_CHSETS_MAX]; + XllNavi xll_navi; + int *xll_sample_buf; + unsigned int xll_sample_buf_size; + /* ExSS header parser */ int static_fields; ///< static fields present int mix_metadata; ///< mixing metadata present @@ -392,13 +478,17 @@ typedef struct { int mix_config_num_ch[4]; ///< number of channels in each mix out configuration int profile; + int one2one_map_chtospkr; int debug_flag; ///< used for suppressing repeated error messages output AVFloatDSPContext fdsp; FFTContext imdct; SynthFilterContext synth; DCADSPContext dcadsp; + QMF64_table *qmf64_table; FmtConvertContext fmt_conv; + unsigned frame_index; + unsigned sample_index; } DCAContext; static const uint16_t dca_vlc_offs[] = { @@ -941,8 +1031,85 @@ static void qmf_32_subbands(DCAContext *s, int chans, samples_out, s->raXin, scale); } -static void lfe_interpolation_fir(DCAContext *s, int decimation_select, - int num_deci_sample, float *samples_in, +static QMF64_table *qmf64_precompute(void) +{ + QMF64_table *table = av_malloc(sizeof(*table)); + unsigned i, j; + if (!table) + return NULL; + + for (i = 0; i < 32; i++) + for (j = 0; j < 32; j++) + table->dct4_coeff[i][j] = cos((2*i + 1) * (2*j + 1) * M_PI / 128); + for (i = 0; i < 32; i++) + for (j = 0; j < 32; j++) + table->dct2_coeff[i][j] = cos(j * (2*i + 1) * M_PI / 64); + + /* FIXME: Is the factor 0.125 = 1/8 right? */ + for (i = 0; i < 32; i++) + table->rcos[i] = 0.125 / cos((2*i + 1) * M_PI / 256); + for (i = 0; i < 32; i++) + table->rsin[i] = -0.125 / sin((2*i + 1) * M_PI / 256); + + return table; +} + +/* FIXME: Totally un-optimized. Based on the reference code and + * http://multimedia.cx/mirror/dca-transform.pdf, with guessed tweaks + * for doubling the size. */ +static void qmf_64_subbands(DCAContext *s, int chans, + float samples_in[64][8], float *samples_out, + float scale) +{ + float raXin[64]; + float A[32], B[32]; + float *raX = s->subband_fir_hist[chans]; + float *raZ = s->subband_fir_noidea[chans]; + unsigned i, j, k, subindex; + + for (i = s->subband_activity[chans]; i < 64; i++) + raXin[i] = 0.0; + for (subindex = 0; subindex < 8; subindex++) { + for (i = 0; i < s->subband_activity[chans]; i++) + raXin[i] = samples_in[i][subindex]; + + for (k = 0; k < 32; k++) { + A[k] = 0.0; + for (i = 0; i < 32; i++) + A[k] += (raXin[2*i] + raXin[2*i + 1]) * s->qmf64_table->dct4_coeff[k][i]; + } + for (k = 0; k < 32; k++) { + B[k] = raXin[0] * s->qmf64_table->dct2_coeff[k][0]; + for (i = 1; i < 32; i++) + B[k] += (raXin[2*i] + raXin[2*i - 1]) * s->qmf64_table->dct2_coeff[k][i]; + } + for (k = 0; k < 32; k++) { + raX[k] = s->qmf64_table->rcos[k] * (A[k] + B[k]); + raX[63 - k] = s->qmf64_table->rsin[k] * (A[k] - B[k]); + } + + for (i = 0; i < 64; i++) { + float out = raZ[i]; + for (j = 0; j < 1024; j += 128) + out += fir_64bands[j + i] * (raX[j + i] - raX[j + 63 - i]); + *samples_out++ = out * scale; + } + + for (i = 0; i < 64; i++) { + float hist = 0.0; + for (j = 0; j < 1024; j += 128) + hist += fir_64bands[64 + j + i] * (-raX[i + j] - raX[j + 63 - i]); + + raZ[i] = hist; + } + + /* FIXME: Make buffer circular, to avoid this move. */ + memmove(raX + 64, raX, (1024 - 64) * sizeof(*raX)); + } +} + +static void lfe_interpolation_fir(DCAContext *s, + const float *samples_in, float *samples_out) { /* samples_in: An array holding decimated samples. @@ -958,15 +1125,18 @@ static void lfe_interpolation_fir(DCAContext *s, int decimation_select, int deciindex; /* Select decimation filter */ - if (decimation_select == 1) { + if (s->lfe == 1) { idx = 1; prCoeff = lfe_fir_128; } else { idx = 0; - prCoeff = lfe_fir_64; + if (s->exss_ext_mask & DCA_EXT_EXSS_XLL) + prCoeff = lfe_xll_fir_64; + else + prCoeff = lfe_fir_64; } /* Interpolation */ - for (deciindex = 0; deciindex < num_deci_sample; deciindex++) { + for (deciindex = 0; deciindex < 2*s->lfe; deciindex++) { s->dcadsp.lfe_fir[idx](samples_out, samples_in, prCoeff); samples_in++; samples_out += 2 * 32 * (1 + idx); @@ -1241,29 +1411,58 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index) return 0; } -static int dca_filter_channels(DCAContext *s, int block_index) +static int dca_filter_channels(DCAContext *s, int block_index, int upsample) { float (*subband_samples)[DCA_SUBBANDS][8] = s->subband_samples[block_index]; int k; - /* 32 subbands QMF */ - for (k = 0; k < s->prim_channels; k++) { + if (upsample) { + if (!s->qmf64_table) { + s->qmf64_table = qmf64_precompute(); + if (!s->qmf64_table) + return AVERROR(ENOMEM); + } + + /* 64 subbands QMF */ + for (k = 0; k < s->prim_channels; k++) { + if (s->channel_order_tab[k] >= 0) + qmf_64_subbands(s, k, subband_samples[k], + s->samples_chanptr[s->channel_order_tab[k]], + /* Upsampling needs a factor 2 here. */ + M_SQRT2 / 32768.0); + } + } + else { + /* 32 subbands QMF */ + for (k = 0; k < s->prim_channels; k++) { /* static float pcm_to_double[8] = { 32768.0, 32768.0, 524288.0, 524288.0, 0, 8388608.0, 8388608.0 };*/ - if (s->channel_order_tab[k] >= 0) - qmf_32_subbands(s, k, subband_samples[k], - s->samples_chanptr[s->channel_order_tab[k]], - M_SQRT1_2 / 32768.0 /* pcm_to_double[s->source_pcm_res] */); + if (s->channel_order_tab[k] >= 0) + qmf_32_subbands(s, k, subband_samples[k], + s->samples_chanptr[s->channel_order_tab[k]], + M_SQRT1_2 / 32768.0 /* pcm_to_double[s->source_pcm_res] */); + } } /* Generate LFE samples for this subsubframe FIXME!!! */ if (s->lfe) { - lfe_interpolation_fir(s, s->lfe, 2 * s->lfe, + float *samples = s->samples_chanptr[dca_lfe_index[s->amode]]; + lfe_interpolation_fir(s, s->lfe_data + 2 * s->lfe * (block_index + 4), - s->samples_chanptr[dca_lfe_index[s->amode]]); + samples); /* Outputs 20bits pcm samples */ + if (upsample) { + unsigned i; + /* Should apply the filter in Table 6-11 when upsampling. For + * now, just duplicate. */ + for (i = 511; i > 0; i--) + samples[2*i] = samples[2*i+1] = samples[i]; + samples[1] = samples[0]; + } } + /* FIXME: This downmixing is probably broken with upsample. + * Probably totally broken also with xll in general. */ /* Downmixing to Stereo */ if (s->prim_channels + !!s->lfe > 2 && s->avctx->request_channel_layout == AV_CH_LAYOUT_STEREO) { @@ -1461,6 +1660,730 @@ static void dca_exss_skip_mix_coeffs(GetBitContext *gb, int channels, int out_ch } } +/* Sign as bit 0 */ +static inline int get_bits_sm(GetBitContext *s, unsigned n) +{ + int x = get_bits(s, n); + if (x & 1) + return - (x >> 1) - 1; + else + return x >> 1; +} + +/* Returns -1 on error */ +static int32_t dca_get_dmix_coeff(DCAContext *s) +{ + unsigned code = get_bits(&s->gb, 9); + int32_t sign = (int32_t) (code >> 8) - 1; + unsigned idx = code & 0xff; + if (idx > 241) { + av_log(s->avctx, AV_LOG_ERROR, + "XLL: Invalid channel set downmix code %x\n", code); + return -1; + } + else + return (dca_dmixtable[idx] ^ sign) - sign; +} + +/* Returns -1 on error */ +static int32_t dca_get_inv_dmix_coeff(DCAContext *s) +{ + unsigned code = get_bits(&s->gb, 9); + int32_t sign = (int32_t) (code >> 8) - 1; + unsigned idx = code & 0xff; + if (idx < 41) { + av_log(s->avctx, AV_LOG_ERROR, + "XLL: Invalid channel set inv downmix code %x\n", code); + return -1; + } + else if (idx > 241) { + av_log(s->avctx, AV_LOG_ERROR, + "XLL: Invalid channel set downmix code %x\n", code); + return -1; + } + else + return (dca_inv_dmixtable[idx-41] ^ sign) - sign; +} + +/* parse XLL header */ +static int dca_xll_decode_header(DCAContext *s) +{ + int hdr_pos, hdr_size, version, frame_size; + int i, chset_index; + + /* get bit position of sync header */ + hdr_pos = get_bits_count(&s->gb) - 32; + + version = get_bits(&s->gb, 4) + 1; + hdr_size = get_bits(&s->gb, 8) + 1; + + frame_size = get_bits_long(&s->gb, get_bits(&s->gb, 5) + 1) + 1; + + s->xll_channels = s->xll_residual_channels = 0; + s->xll_nch_sets = get_bits(&s->gb, 4) + 1; + s->xll_segments = 1 << get_bits(&s->gb, 4); + s->xll_log_smpl_in_seg = get_bits(&s->gb, 4); + s->xll_smpl_in_seg = 1 << s->xll_log_smpl_in_seg; + s->xll_bits4seg_size = get_bits(&s->gb, 5) + 1; + s->xll_banddata_crcen = get_bits(&s->gb, 2); + s->xll_scalable_lsb = get_bits1(&s->gb); + s->xll_bits4ch_mask = get_bits(&s->gb, 5) + 1; + + if (s->xll_scalable_lsb) { + s->xll_fixed_lsb_width = get_bits(&s->gb, 4); + if (s->xll_fixed_lsb_width) + av_log(s->avctx, AV_LOG_WARNING, + "dca xll: fixed lsb width = %d, non-zero not supported.\n", + s->xll_fixed_lsb_width); + } + /* skip to the end of the common header */ + i = get_bits_count(&s->gb); + if (hdr_pos + hdr_size * 8 > i) + skip_bits_long(&s->gb, hdr_pos + hdr_size * 8 - i); + + for (chset_index = 0; chset_index < s->xll_nch_sets; chset_index++) { + XllChSetSubHeader *chset = &s->xll_chsets[chset_index]; + hdr_pos = get_bits_count(&s->gb); + hdr_size = get_bits(&s->gb, 10) + 1; + + chset->channels = get_bits(&s->gb, 4) + 1; + chset->residual_encode = get_bits(&s->gb, chset->channels); + chset->bit_resolution = get_bits(&s->gb, 5) + 1; + chset->bit_width = get_bits(&s->gb, 5) + 1; + chset->sampling_frequency = dca_sampling_freqs[get_bits(&s->gb, 4)]; + chset->fs_interpolate = get_bits(&s->gb, 2); + chset->replacement_set = get_bits(&s->gb, 2); + if (chset->replacement_set) + chset->active_replace_set = get_bits(&s->gb, 1); + + if (s->one2one_map_chtospkr) { + chset->primary_ch_set = get_bits(&s->gb, 1); + chset->downmix_coeff_code_embedded = get_bits(&s->gb, 1); + if (chset->downmix_coeff_code_embedded) { + chset->downmix_embedded = get_bits(&s->gb, 1); + if (chset->primary_ch_set) { + chset->downmix_type = get_bits(&s->gb, 3); + if (chset->downmix_type > 6) { + av_log(s->avctx, AV_LOG_ERROR, "XLL: Invalid channel set downmix type\n"); + return AVERROR_INVALIDDATA; + } + } + } + chset->hier_chset = get_bits(&s->gb, 1); + + if (chset->downmix_coeff_code_embedded) { + /* nDownmixCoeffs is specified as N * M. For a primary + * channel set, it appears that N = number of + * channels, and M is the number of downmix channels. + * + * For a non-primary channel set, N is specified as + * number of channels + 1, and M is derived from the + * channel set hierarchy, and at least in simple cases + * M is the number of channels in preceeding channel + * sets. */ + if (chset->primary_ch_set) { + static const char dmix_table[7] = { 1, 2, 2, 3, 3, 4, 4 }; + chset->downmix_ncoeffs = chset->channels * dmix_table[chset->downmix_type]; + } + else + chset->downmix_ncoeffs = (chset->channels + 1) * s->xll_channels; + + + if (chset->downmix_ncoeffs > DCA_XLL_DMIX_NCOEFFS_MAX) { + av_log(s->avctx, AV_LOG_WARNING, + "dca xll: Skipping %d downmix coefficients, exceeding impl. limit %d\n", + chset->downmix_ncoeffs, DCA_XLL_DMIX_NCOEFFS_MAX); + skip_bits_long(&s->gb, 9 * chset->downmix_ncoeffs); + chset->downmix_ncoeffs = 0; + } + else { + if (chset->primary_ch_set) { + for (i = 0; i < chset->downmix_ncoeffs; i++) + if ( (chset->downmix_coeffs[i] = dca_get_dmix_coeff(s)) == -1) + return AVERROR_INVALIDDATA; + } + else { + unsigned c, r; + for (c = i = 0; c < s->xll_channels; c++, i += chset->channels + 1) { + if ((chset->downmix_coeffs[i] = dca_get_inv_dmix_coeff(s)) + == -1) + return AVERROR_INVALIDDATA; + for (r = 1; r <= chset->channels; r++) { + int32_t coeff = dca_get_dmix_coeff(s); + if (coeff == -1) + return AVERROR_INVALIDDATA; + chset->downmix_coeffs[i+r] + = (chset->downmix_coeffs[i] * (int64_t) coeff + (1<<15)) >> 16; + } + } + } + } + } + chset->ch_mask_enabled = get_bits(&s->gb, 1); + if (chset->ch_mask_enabled) + chset->ch_mask = get_bits(&s->gb, s->xll_bits4ch_mask); + else + /* Skip speaker configuration bits */ + skip_bits_long(&s->gb, 25 * chset->channels); + } else { + chset->primary_ch_set = 1; + chset->downmix_coeff_code_embedded = 0; + /* Spec: NumChHierChSet = 0, NumDwnMixCodeCoeffs = 0, whatever that means. */ + chset->mapping_coeffs_present = get_bits(&s->gb, 1); + if (chset->mapping_coeffs_present) { + av_log(s->avctx, AV_LOG_ERROR, "FIXME: XLL: mapping coeffs not implemented!\n"); + return AVERROR_PATCHWELCOME; + } + } + if (chset->sampling_frequency > 96000) + chset->num_freq_bands = 2*(1 + get_bits(&s->gb, 1)); + else + chset->num_freq_bands = 1; + + if (chset->num_freq_bands > 1) { + av_log(s->avctx, AV_LOG_ERROR, "FIXME: XLL: num_freq_bands > 1 not implemented!\n"); + return AVERROR_PATCHWELCOME; + } + + if (get_bits(&s->gb, 1)) { /* pw_ch_decor_enabled */ + int bits = av_ceil_log2 (chset->channels); + for (i = 0; i < chset->channels; i++) { + unsigned j = get_bits(&s->gb, bits); + if (j >= chset->channels) { + av_log(s->avctx, AV_LOG_ERROR, + "Original channel order value %d too large, only %d channels.\n", + j, chset->channels); + return AVERROR_INVALIDDATA; + } + chset->orig_chan_order[0][i] = j; + chset->orig_chan_order_inv[0][j] = i; + } + for (i = 0; i < chset->channels / 2; i++) { + if (get_bits(&s->gb, 1)) { /* bChPFlag */ + chset->pw_ch_pairs_coeffs[0][i] = get_bits_sm(&s->gb, 7); + } + else + chset->pw_ch_pairs_coeffs[0][i] = 0; + } + } + else { + for (i = 0; i < chset->channels; i++) + chset->orig_chan_order[0][i] = chset->orig_chan_order_inv[0][i] = i; + for (i = 0; i < chset->channels / 2; i++) + chset->pw_ch_pairs_coeffs[0][i] = 0; + } + /* Adaptive prediction order */ + chset->adapt_order_max[0] = 0; + for (i = 0; i < chset->channels; i++) { + chset->adapt_order[0][i] = get_bits(&s->gb, 4); + if (chset->adapt_order_max[0] < chset->adapt_order[0][i]) + chset->adapt_order_max[0] = chset->adapt_order[0][i]; + } + /* Fixed prediction order, used in case the adaptive order + * above is zero */ + for (i = 0; i < chset->channels; i++) + chset->fixed_order[0][i] + = chset->adapt_order[0][i] ? 0 : get_bits(&s->gb, 2); + + for (i = 0; i < chset->channels; i++) { + unsigned j; + for (j = 0; j < chset->adapt_order[0][i]; j++) + chset->lpc_refl_coeffs_q_ind[0][i][j] = get_bits(&s->gb, 8); + } + + if (s->xll_scalable_lsb) { + chset->lsb_fsize[0] = get_bits(&s->gb, s->xll_bits4seg_size); + + for (i = 0; i < chset->channels; i++) + chset->scalable_lsbs[0][i] = get_bits(&s->gb, 4); + for (i = 0; i < chset->channels; i++) + chset->bit_width_adj_per_ch[0][i] = get_bits(&s->gb, 4); + } + else { + memset(chset->scalable_lsbs[0], 0, + chset->channels * sizeof(chset->scalable_lsbs[0][0])); + memset(chset->bit_width_adj_per_ch[0], 0, + chset->channels * sizeof(chset->bit_width_adj_per_ch[0][0])); + } + + s->xll_channels += chset->channels; + s->xll_residual_channels += chset->channels - av_popcount(chset->residual_encode); + + /* FIXME: Parse header data for extra frequency bands. */ + + /* Skip to end of channel set sub header. */ + i = get_bits_count(&s->gb); + if (hdr_pos + 8*hdr_size < i) { + av_log(s->avctx, AV_LOG_ERROR, "chset header too large, %d bits, should be <= %d bits\n", + i - hdr_pos, 8 * hdr_size); + return AVERROR_INVALIDDATA; + } + if (hdr_pos + 8*hdr_size > i) + skip_bits_long(&s->gb, hdr_pos + 8*hdr_size - i); + } + return 0; +} + +/* parse XLL navigation table */ +static int dca_xll_decode_navi(DCAContext *s, int asset_end) +{ + int nbands, band, chset, seg, data_start; + + /* FIXME: Supports only a single frequency band */ + nbands = 1; + + for (band = 0; band < nbands; band++) { + s->xll_navi.band_size[band] = 0; + for (seg = 0; seg < s->xll_segments; seg++) { + /* Note: The spec, ETSI TS 102 114 V1.4.1 (2012-09), says + * we should read a base value for segment_size from the + * stream, before reading the sizes of the channel sets. + * But that's apparently incorrect. */ + s->xll_navi.segment_size[band][seg] = 0; + + for (chset = 0; chset < s->xll_nch_sets; chset++) + if (band < s->xll_chsets[chset].num_freq_bands) { + s->xll_navi.chset_size[band][seg][chset] + = get_bits(&s->gb, s->xll_bits4seg_size) + 1; + s->xll_navi.segment_size[band][seg] + += s->xll_navi.chset_size[band][seg][chset]; + } + s->xll_navi.band_size[band] += s->xll_navi.segment_size[band][seg]; + } + } + /* align to 8-bit, and skip 16-bit crc */ + skip_bits_long(&s->gb, 16 + ((-get_bits_count(&s->gb)) & 7)); + + data_start = get_bits_count(&s->gb); + if (data_start + 8 * s->xll_navi.band_size[0] > asset_end) { + av_log(s->avctx, AV_LOG_ERROR, "DCA XLL: Data in NAVI table exceeds containing asset\n" + "start: %d (bit), size %d (bytes), end %d (bit), error %d\n", + data_start, s->xll_navi.band_size[0], asset_end, + data_start + 8 * s->xll_navi.band_size[0] - asset_end); + } + init_get_bits(&s->xll_navi.gb, s->gb.buffer + data_start / 8, + 8 * s->xll_navi.band_size[0]); + return 0; +} + +static void dca_xll_inv_adapt_pred(int *samples, int nsamples, + unsigned order, const int *prev, const uint8_t *q_ind) +{ + static const uint16_t table[0x81] = { + 0, 3070, 5110, 7140, 9156, 11154, 13132, 15085, + 17010, 18904, 20764, 22588, 24373, 26117, 27818, 29474, + 31085, 32648, 34164, 35631, 37049, 38418, 39738, 41008, + 42230, 43404, 44530, 45609, 46642, 47630, 48575, 49477, + 50337, 51157, 51937, 52681, 53387, 54059, 54697, 55302, + 55876, 56421, 56937, 57426, 57888, 58326, 58741, 59132, + 59502, 59852, 60182, 60494, 60789, 61066, 61328, 61576, + 61809, 62029, 62236, 62431, 62615, 62788, 62951, 63105, + 63250, 63386, 63514, 63635, 63749, 63855, 63956, 64051, + 64140, 64224, 64302, 64376, 64446, 64512, 64573, 64631, + 64686, 64737, 64785, 64830, 64873, 64913, 64950, 64986, + 65019, 65050, 65079, 65107, 65133, 65157, 65180, 65202, + 65222, 65241, 65259, 65275, 65291, 65306, 65320, 65333, + 65345, 65357, 65368, 65378, 65387, 65396, 65405, 65413, + 65420, 65427, 65434, 65440, 65446, 65451, 65456, 65461, + 65466, 65470, 65474, 65478, 65481, 65485, 65488, 65491, + 65535, /* Final value is for the -128 corner case, see below. */ + }; + int c[DCA_XLL_AORDER_MAX]; + int64_t s; + unsigned i, j; + + for (i = 0; i < order; i++) { + if (q_ind[i] & 1) + /* The index value 0xff corresponds to a lookup of entry + * 0x80 in the table, and no value is provided in the + * specification. */ + c[i] = -table[(q_ind[i] >> 1) + 1]; + else + c[i] = table[q_ind[i] >> 1]; + } + /* The description in the spec is a bit convoluted. We can convert + the reflected values to direct values in place, using a + sequence of reflections operating on two values. */ + for (i = 1; i < order; i++) { + /* i = 1: scale c[0] + i = 2: reflect c[0] <-> c[1] + i = 3: scale c[1], reflect c[0] <-> c[2] + i = 4: reflect c[0] <-> c[3] reflect c[1] <-> c[2] + ... + */ + if (i & 1) + c[i/2] += ((int64_t) c[i] * c[i/2] + 0x8000) >> 16; + for (j = 0; j < i/2; j++) { + int r0 = c[j]; + int r1 = c[i-j-1]; + c[j] += ((int64_t) c[i] * r1 + 0x8000) >> 16; + c[i-j-1] += ((int64_t) c[i] * r0 + 0x8000) >> 16; + } + } + /* Apply predictor. */ + /* NOTE: Processing samples in this order means that the + predictor is applied to the newly reconstructed samples. */ + if (prev) { + for (i = 0; i < order; i++) { + for (j = s = 0; j < i; j++) + s += (int64_t) c[j] * samples[i-1-j]; + for (; j < order; j++) + s += (int64_t) c[j] * prev[DCA_XLL_AORDER_MAX+i-1-j]; + + samples[i] -= av_clip((s + 0x8000) >> 16, -0x1000000, 0xffffff); + } + } + for (i = order; i < nsamples; i++) { + for (j = s = 0; j < order; j++) + s += (int64_t) c[j] * samples[i-1-j]; + + /* NOTE: Equations seem to imply addition, while the + * pseudocode seems to use subtraction.*/ + samples[i] -= av_clip((s + 0x8000) >> 16, -0x1000000, 0xffffff); + } +} + +static int dca_xll_decode_audio(DCAContext *s, AVFrame *frame) +{ + /* FIXME: Decodes only the first frequency band. */ + int seg, chset_i; + + /* Coding parameters for each channel set. */ + struct coding_params { + int seg_type; + int rice_code_flag[16]; + int pancAuxABIT[16]; + int pancABIT0[16]; /* Not sure what this is */ + int pancABIT[16]; /* Not sure what this is */ + int nSamplPart0[16]; + } param_state[16]; + + GetBitContext *gb = &s->xll_navi.gb; + int *history; + + /* Layout: First the sample buffer for one segment per channel, + * followed by history buffers of DCA_XLL_AORDER_MAX samples for + * each channel. */ + av_fast_malloc(&s->xll_sample_buf, &s->xll_sample_buf_size, + (s->xll_smpl_in_seg + DCA_XLL_AORDER_MAX) * + s->xll_channels * sizeof(*s->xll_sample_buf)); + if (!s->xll_sample_buf) + return AVERROR(ENOMEM); + + history = s->xll_sample_buf + s->xll_smpl_in_seg * s->xll_channels; + + for (seg = 0; seg < s->xll_segments; seg++) { + unsigned in_channel; + + for (chset_i = in_channel = 0; chset_i < s->xll_nch_sets; chset_i++) { + /* The spec isn't very explicit, but I think the NAVI sizes are in bytes. */ + int end_pos = get_bits_count(gb) + + 8 * s->xll_navi.chset_size[0][seg][chset_i]; + int i, j; + struct coding_params *params = ¶m_state[chset_i]; + /* I think this flag means that we should keep seg_type and + * other parameters from the previous segment. */ + int use_seg_state_code_parm; + XllChSetSubHeader *chset = &s->xll_chsets[chset_i]; + if (in_channel >= s->avctx->channels) + /* FIXME: Could go directly to next segment */ + goto next_chset; + + if (s->avctx->sample_rate != chset->sampling_frequency) { + av_log(s->avctx, AV_LOG_WARNING, + "DCA XLL: unexected chset sample rate %d, expected %d\n", + chset->sampling_frequency, s->avctx->sample_rate); + goto next_chset; + } + if (seg != 0) + use_seg_state_code_parm = get_bits(gb, 1); + else + use_seg_state_code_parm = 0; + + if (!use_seg_state_code_parm) { + int num_parm_sets, i; + unsigned bits4ABIT; + + params->seg_type = get_bits(gb, 1); + num_parm_sets = params->seg_type ? 1 : chset->channels; + + if (chset->bit_width > 16) + bits4ABIT = 5; + else { + if (chset->bit_width > 8) + bits4ABIT = 4; + else + bits4ABIT = 3; + if (s->xll_nch_sets > 1) + bits4ABIT++; + } + + for (i = 0; i < num_parm_sets; i++) { + params->rice_code_flag[i] = get_bits(gb, 1); + if (!params->seg_type && params->rice_code_flag[i] && get_bits(gb, 1)) + params->pancAuxABIT[i] = get_bits(gb, bits4ABIT) + 1; + else + params->pancAuxABIT[i] = 0; + } + + for (i = 0; i < num_parm_sets; i++) { + if (!seg) { + /* Parameters for part 1 */ + params->pancABIT0[i] = get_bits(gb, bits4ABIT); + if (params->rice_code_flag[i] == 0 && params->pancABIT0[i] > 0) + /* For linear code */ + params->pancABIT0[i]++; + + /* NOTE: In the spec, not indexed by band??? */ + if (params->seg_type == 0) + params->nSamplPart0[i] = chset->adapt_order[0][i]; + else + params->nSamplPart0[i] = chset->adapt_order_max[0]; + } + else + params->nSamplPart0[i] = 0; + + /* Parameters for part 2 */ + params->pancABIT[i] = get_bits(gb, bits4ABIT); + if (params->rice_code_flag[i] == 0 && params->pancABIT[i] > 0) + /* For linear code */ + params->pancABIT[i]++; + } + } + for (i = 0; i < chset->channels; i++) { + int parm_index = params->seg_type ? 0 : i; + int bits = params->pancABIT0[parm_index]; + int part0 = params->nSamplPart0[parm_index]; + int *sample_buf = s->xll_sample_buf + (in_channel + i) * s->xll_smpl_in_seg; + + if (!params->rice_code_flag[parm_index]) { + /* Linear code */ + if (bits) + for (j = 0; j < part0; j++) + sample_buf[j] = get_bits_sm(gb, bits); + else + memset(sample_buf, 0, part0 * sizeof(sample_buf[0])); + + /* Second part */ + bits = params->pancABIT[parm_index]; + if (bits) + for (j = part0; j < s->xll_smpl_in_seg; j++) + sample_buf[j] = get_bits_sm(gb, bits); + else + memset(sample_buf + part0, 0, + (s->xll_smpl_in_seg - part0) * sizeof(sample_buf[0])); + } + else { + int aux_bits = params->pancAuxABIT[parm_index]; + + for (j = 0; j < part0; j++) { + /* FIXME: Is this identical to golomb code? */ + int t = get_unary(gb, 1, 33) << bits; + /* FIXME: Could move this test outside of the loop, for efficiency. */ + if (bits) + t |= get_bits(gb, bits); + sample_buf[j] = (t & 1) ? - (t >> 1) - 1: (t >> 1); + } + + /* Second part */ + bits = params->pancABIT[parm_index]; + + /* Follow the spec's suggestion of using the + * buffer also to store the hybrid-rice flags. */ + memset(sample_buf + part0, 0, + (s->xll_smpl_in_seg - part0) * sizeof(sample_buf[0])); + + if (aux_bits > 0) { + /* For hybrid rice encoding, some samples are linearly coded */ + /* According to the spec, "nBits4SamplLoci" + * bits are used for each index, but this + * value is not defined. I guess we should use + * log2(xll_smpl_in_seg) bits. */ + int count = get_bits(gb, s->xll_log_smpl_in_seg); + av_log(s->avctx, AV_LOG_DEBUG, "aux count %d (bits %d)\n", + count, s->xll_log_smpl_in_seg); + + for (j = 0; j < count; j++) + sample_buf[get_bits(gb, s->xll_log_smpl_in_seg)] = 1; + } + for (j = part0; j < s->xll_smpl_in_seg; j++) { + if (!sample_buf[j]) { + int t = get_unary(gb, 1, 33); + if (bits) + t = (t << bits) | get_bits(gb, bits); + + sample_buf[j] = (t & 1) ? - (t >> 1) - 1 : (t >> 1); + } + else + sample_buf[j] = get_bits_sm(gb, aux_bits); + } + } + } + + for (i = 0; i < chset->channels; i++) { + unsigned adapt_order = chset->adapt_order[0][i]; + int *sample_buf = s->xll_sample_buf + (in_channel + i) * s->xll_smpl_in_seg; + int *prev = history + (in_channel + i) * DCA_XLL_AORDER_MAX; + + if (!adapt_order) { + unsigned order; + for (order = chset->fixed_order[0][i]; + order > 0; order--) { + unsigned j; + for (j = 1; j < s->xll_smpl_in_seg; j++) + sample_buf[j] += sample_buf[j-1]; + } + } + else + /* Inverse adaptive prediction, in place. */ + dca_xll_inv_adapt_pred(sample_buf, s->xll_smpl_in_seg, + adapt_order, seg ? prev : NULL, + chset->lpc_refl_coeffs_q_ind[0][i]); + memcpy(prev, sample_buf + s->xll_smpl_in_seg - DCA_XLL_AORDER_MAX, + DCA_XLL_AORDER_MAX * sizeof(*prev)); + } + for (i = 1; i < chset->channels; i += 2) { + int coeff = chset->pw_ch_pairs_coeffs[0][i/2]; + if (coeff != 0) { + int *sample_buf = s->xll_sample_buf + (in_channel + i) * s->xll_smpl_in_seg; + int *prev = sample_buf - s->xll_smpl_in_seg; + unsigned j; + for (j = 0; j < s->xll_smpl_in_seg; j++) + /* Shift is unspecified, but should apparently + be 3. */ + sample_buf[j] += ((int64_t) coeff * prev[j] + 4) >> 3; + } + } + + if (s->xll_scalable_lsb) { + int lsb_start = end_pos - 8 * chset->lsb_fsize[0] + - 8 * (s->xll_banddata_crcen & 2); + int done; + i = get_bits_count(gb); + if (i > lsb_start) { + av_log(s->avctx, AV_LOG_ERROR, "chset data lsb exceeds NAVI size, end_pos %d, lsb_start %d, pos %d\n", + end_pos, lsb_start, i); + return AVERROR_INVALIDDATA; + } + if (i < lsb_start) + skip_bits_long(gb, lsb_start - i); + + for (i = done = 0; i < chset->channels; i++) { + int bits = chset->scalable_lsbs[0][i]; + if (bits > 0) { + /* The channel reordering is conceptually done + * before adding the lsb:s, so we need to do + * the inverse permutation here. */ + unsigned pi = chset->orig_chan_order_inv[0][i]; + int *sample_buf = s->xll_sample_buf + + (in_channel + pi) * s->xll_smpl_in_seg; + int adj = chset->bit_width_adj_per_ch[0][i]; + int msb_shift = bits; + unsigned j; + + if (adj > 0) + msb_shift += (adj - 1); + + for (j = 0; j < s->xll_smpl_in_seg; j++) + sample_buf[j] = (sample_buf[j] << msb_shift) + + (get_bits(gb, bits) << adj); + + done += bits * s->xll_smpl_in_seg; + } + } + if (done > 8 * chset->lsb_fsize[0]) { + av_log(s->avctx, AV_LOG_ERROR, "chset lsb exceeds lsb_size\n"); + return AVERROR_INVALIDDATA; + } + } + + /* Store output. */ + for (i = 0; i < chset->channels; i++) { + int *sample_buf = s->xll_sample_buf + (in_channel + i) * s->xll_smpl_in_seg; + int shift = 1 - chset->bit_resolution; + int out_channel = chset->orig_chan_order[0][i]; + float *out; + + /* xll uses the channel order C, L, R, and we want L, + * R, C. FIXME: Generalize. */ + if (chset->ch_mask_enabled && + (chset->ch_mask & 7) == 7 && out_channel < 3) + out_channel = out_channel ? out_channel - 1 : 2; + + out_channel += in_channel; + if (out_channel >= s->avctx->channels) + continue; + + out = (float *) frame->extended_data[out_channel]; + out += seg * s->xll_smpl_in_seg; + + /* NOTE: A one bit means that residual encoding *not* used. */ + if ((chset->residual_encode >> i) & 1) { + /* Replace channel samples. FIXME: Most likely not + * the right thing to do. */ + for (j = 0; j < s->xll_smpl_in_seg; j++) + out[j] = ldexpf(sample_buf[j], shift); + } + else { + /* Add residual signal to core channel */ + for (j = 0; j < s->xll_smpl_in_seg; j++) + out[j] += ldexpf(sample_buf[j], shift); + } + } + + if (chset->downmix_coeff_code_embedded + && !chset->primary_ch_set + && chset->hier_chset) { + /* Undo hierarchical downmix of earlier channels. */ + unsigned mix_channel; + for (mix_channel = 0; mix_channel < in_channel; mix_channel++) { + float *mix_buf; + const int *col; + float coeff; + unsigned row; + /* Similar channel reorder C, L, R vs L, R, C reorder. */ + if (chset->ch_mask_enabled && + (chset->ch_mask & 7) == 7 && mix_channel < 3) + mix_buf = (float *) frame->extended_data[mix_channel ? mix_channel - 1 : 2]; + else + mix_buf = (float *) frame->extended_data[mix_channel]; + + mix_buf += seg * s->xll_smpl_in_seg; + col = &chset->downmix_coeffs[mix_channel * (chset->channels + 1)]; + + /* Scale */ + coeff = ldexpf(col[0], -16); + for (j = 0; j < s->xll_smpl_in_seg; j++) + mix_buf[j] *= coeff; + + for (row = 0; + row < chset->channels && in_channel + row < s->avctx->channels; + row++) + if (col[row+1]) { + const float *new_channel = (const float *) frame->extended_data[in_channel + row]; + new_channel += seg * s->xll_smpl_in_seg; + coeff = ldexpf(col[row+1], -15); + for (j = 0; j < s->xll_smpl_in_seg; j++) + mix_buf[j] -= coeff * new_channel[j]; + } + } + } + + next_chset: + in_channel += chset->channels; + /* Skip to next channel set using the NAVI info */ + i = get_bits_count(gb); + if (i > end_pos) { + av_log(s->avctx, AV_LOG_ERROR, "chset data exceeds NAVI size\n"); + return AVERROR_INVALIDDATA; + } + if (i < end_pos) + skip_bits_long(gb, end_pos - i); + } + } + return 0; +} + /** * Parse extension substream asset header (HD) */ @@ -1468,7 +2391,7 @@ static int dca_exss_parse_asset_header(DCAContext *s) { int header_pos = get_bits_count(&s->gb); int header_size; - int channels; + int channels = 0; int embedded_stereo = 0; int embedded_6ch = 0; int drc_code_present; @@ -1503,7 +2426,8 @@ static int dca_exss_parse_asset_header(DCAContext *s) skip_bits(&s->gb, 4); // max sample rate code channels = get_bits(&s->gb, 8) + 1; - if (get_bits1(&s->gb)) { // 1-to-1 channels to speakers + s->one2one_map_chtospkr = get_bits1(&s->gb); + if (s->one2one_map_chtospkr) { int spkr_remap_sets; int spkr_mask_size = 16; int num_spkrs[7]; @@ -1617,21 +2541,27 @@ static int dca_exss_parse_asset_header(DCAContext *s) */ static void dca_exss_parse_header(DCAContext *s) { + int asset_size[8]; int ss_index; int blownup; int num_audiop = 1; int num_assets = 1; int active_ss_mask[8]; int i, j; + int start_posn; + int hdrsize; + uint32_t mkr; if (get_bits_left(&s->gb) < 52) return; + start_posn = get_bits_count(&s->gb) - 32; + skip_bits(&s->gb, 8); // user data ss_index = get_bits(&s->gb, 2); blownup = get_bits1(&s->gb); - skip_bits(&s->gb, 8 + 4 * blownup); // header_size + hdrsize = get_bits(&s->gb, 8 + 4 * blownup) + 1; // header_size skip_bits(&s->gb, 16 + 4 * blownup); // hd_size s->static_fields = get_bits1(&s->gb); @@ -1684,15 +2614,50 @@ static void dca_exss_parse_header(DCAContext *s) } for (i = 0; i < num_assets; i++) - skip_bits_long(&s->gb, 16 + 4 * blownup); // asset size + asset_size[i] = get_bits_long(&s->gb, 16 + 4 * blownup) + 1; for (i = 0; i < num_assets; i++) { if (dca_exss_parse_asset_header(s)) return; } - /* not parsed further, we were only interested in the extensions mask - * from the asset header */ + if (num_assets > 0) { + j = get_bits_count(&s->gb); + if (start_posn + hdrsize * 8 > j) + skip_bits_long(&s->gb, start_posn + hdrsize * 8 - j); + + for (i = 0; i < num_assets; i++) { + int end_posn; + start_posn = get_bits_count(&s->gb); + end_posn = start_posn + asset_size[i] * 8; + mkr = get_bits_long(&s->gb, 32); + + /* parse extensions that we know about */ + switch (mkr) { + case 0x41a29547: /* XLL */ + if (s->xll_disable) + av_log(s->avctx, AV_LOG_DEBUG, "DTS-XLL: ignoring XLL extension\n"); + else { + av_log(s->avctx, AV_LOG_DEBUG, "DTS-XLL: decoding XLL extension\n"); + if (dca_xll_decode_header(s) == 0 && dca_xll_decode_navi(s, end_posn) == 0) + s->exss_ext_mask |= DCA_EXT_EXSS_XLL; + } + break; + case 0x655e315e: /* XBR */ + case 0x47004a03: /* XXCH */ + default: + av_log(s->avctx, AV_LOG_VERBOSE, + "DTS-ExSS: unknown marker = 0x%08x\n", mkr); + } + + /* skip to end of block */ + j = get_bits_count(&s->gb); + if (j > end_posn) + av_log(s->avctx, AV_LOG_ERROR, "DTS-ExSS: Processed asset too long.\n"); + if (j < end_posn) + skip_bits_long(&s->gb, end_posn - j); + } + } } static float dca_dmix_code(unsigned code) @@ -1720,8 +2685,15 @@ static int dca_decode_frame(AVCodecContext *avctx, void *data, DCAContext *s = avctx->priv_data; int channels, full_channels; int core_ss_end; + int upsample = 0; + if (!avpkt->pos) + s->frame_index = s->sample_index = 0; + av_log(avctx, AV_LOG_DEBUG, "%s: stream_index %d, pos %ld, frame index %u, sample index %u\n", + __func__, avpkt->stream_index, avpkt->pos, s->frame_index, s->sample_index); + s->frame_index++; + s->exss_ext_mask = 0; s->xch_present = 0; s->dca_buffer_size = ff_dca_convert_bitstream(buf, buf_size, s->dca_buffer, @@ -1950,10 +2922,52 @@ FF_ENABLE_DEPRECATION_WARNINGS /* get output buffer */ frame->nb_samples = 256 * (s->sample_blocks / 8); + if (s->exss_ext_mask & DCA_EXT_EXSS_XLL) { + int xll_nb_samples = s->xll_segments * s->xll_smpl_in_seg; + /* Check for invalid/unsupported conditions first */ + if (s->xll_residual_channels > channels) { + av_log(s->avctx, AV_LOG_WARNING, + "DCA: too many residual channels (%d, core channels %d). Disabling XLL\n", + s->xll_residual_channels, channels); + s->exss_ext_mask &= ~DCA_EXT_EXSS_XLL; + } + else if (xll_nb_samples != frame->nb_samples + && 2*frame->nb_samples != xll_nb_samples) { + av_log(s->avctx, AV_LOG_WARNING, + "DCA: unsupported upsampling (%d xll samples, %d core samples). Disabling XLL\n", + xll_nb_samples, frame->nb_samples); + s->exss_ext_mask &= ~DCA_EXT_EXSS_XLL; + } + else { + if (2*frame->nb_samples == xll_nb_samples) { + av_log(s->avctx, AV_LOG_INFO, + "DCA XLL: upsampling core channels by a factor 2\n"); + upsample = 1; + + frame->nb_samples = xll_nb_samples; + /* FIXME: Is it good enough to copy from the first channel + set? */ + avctx->sample_rate = s->xll_chsets[0].sampling_frequency; + } + /* If downmixing to stereo, don't decode additional + * channels. FIXME: Using the xch_disable flag for this + * doesn't seem right. */ + if (!s->xch_disable) + avctx->channels += s->xll_channels - s->xll_residual_channels; + } + } + + /* FIXME: This is an ugly hack, to just revert to the default + * layout if we have additional channels. Need to convert the xll + * channel masks to libav channel_layout mask. */ + if (av_get_channel_layout_nb_channels(avctx->channel_layout) != avctx->channels) + avctx->channel_layout = 0; + if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) { av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); return ret; } + samples_flt = (float **)frame->extended_data; /* allocate buffer for extra channels if downmixing */ @@ -1980,13 +2994,13 @@ FF_ENABLE_DEPRECATION_WARNINGS /* filter to get final output */ for (i = 0; i < (s->sample_blocks / 8); i++) { int ch; - + unsigned block = upsample ? 512 : 256; for (ch = 0; ch < channels; ch++) - s->samples_chanptr[ch] = samples_flt[ch] + i * 256; + s->samples_chanptr[ch] = samples_flt[ch] + i * block; for (; ch < full_channels; ch++) - s->samples_chanptr[ch] = s->extra_channels[ch - channels] + i * 256; + s->samples_chanptr[ch] = s->extra_channels[ch - channels] + i * block; - dca_filter_channels(s, i); + dca_filter_channels(s, i, upsample); /* If this was marked as a DTS-ES stream we need to subtract back- */ /* channel from SL & SR to remove matrixed back-channel signal */ @@ -2004,6 +3018,11 @@ FF_ENABLE_DEPRECATION_WARNINGS for (i = 0; i < 2 * s->lfe * 4; i++) s->lfe_data[i] = s->lfe_data[i + lfe_samples]; + if (s->exss_ext_mask & DCA_EXT_EXSS_XLL) { + ret = dca_xll_decode_audio(s, frame); + if (ret < 0) + return ret; + } /* AVMatrixEncoding * * DCA_STEREO_TOTAL (Lt/Rt) is equivalent to Dolby Surround */ @@ -2014,7 +3033,7 @@ FF_ENABLE_DEPRECATION_WARNINGS return ret; *got_frame_ptr = 1; - + s->sample_index += frame->nb_samples; return buf_size; } @@ -2060,6 +3079,8 @@ static av_cold int dca_decode_end(AVCodecContext *avctx) DCAContext *s = avctx->priv_data; ff_mdct_end(&s->imdct); av_freep(&s->extra_channels_buffer); + av_freep(&s->xll_sample_buf); + av_freep(&s->qmf64_table); return 0; } @@ -2074,6 +3095,7 @@ static const AVProfile profiles[] = { static const AVOption options[] = { { "disable_xch", "disable decoding of the XCh extension", offsetof(DCAContext, xch_disable), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_DECODING_PARAM|AV_OPT_FLAG_AUDIO_PARAM }, + { "disable_xll", "disable decoding of the XLL extension", offsetof(DCAContext, xll_disable), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_DECODING_PARAM|AV_OPT_FLAG_AUDIO_PARAM }, { NULL }, }; -- 1.8.3.2 -- Niels Möller. PGP-encrypted email is preferred. Keyid C0B98E26. Internet email is subject to wholesale government surveillance. _______________________________________________ libav-devel mailing list [email protected] https://lists.libav.org/mailman/listinfo/libav-devel
