Marcelo Galvão Póvoa wrote:
Fixed excitation array incorrect length.
Should now be applicable cleanly to the latest ffmpeg revision
(fd151a5f8bd152c456a).
First look:
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 1422b5c..9c68d72 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -60,6 +60,7 @@ OBJS-$(CONFIG_AMRNB_DECODER) += amrnbdec.o
celp_filters.o \
celp_math.o acelp_filters.o \
acelp_vectors.o \
acelp_pitch_delay.o
+OBJS-$(CONFIG_AMRWB_DECODER) += amrwbdec.o
You are using a lot of functions (ff_acelp_interpolatef, ff_lsp2polyf,
etc) that are not in amrwbdec.c, so you need to add the dependencies of
everything you use. To see if you didn't forgot any try "./configure
--disable-decoders --enable-decoder=amr-wb".
+static av_cold int amrwb_decode_init(AVCodecContext *avctx)
+{
+ AMRWBContext *ctx = avctx->priv_data;
+ int i;
+
+ avctx->sample_fmt = SAMPLE_FMT_FLT;
+
+ av_lfg_init(&ctx->prng, 1);
+
+ ctx->excitation = &ctx->excitation_buf[AMRWB_P_DELAY_MAX + LP_ORDER + 1];
+ ctx->first_frame = 1;
+ ctx->tilt_coef = ctx->prev_tr_gain = 0.0;
not needed, ctx is already allocated with av_mallocz().
+/**
+ * Parses a speech frame, storing data in the Context
+ *
+ * @param[in,out] ctx The context
+ * @param[in] buf Pointer to the input buffer
+ * @param[in] buf_size Size of the input buffer
+ *
+ * @return The frame mode
+ */
+static enum Mode unpack_bitstream(AMRWBContext *ctx, const uint8_t *buf,
+ int buf_size)
+{
+ GetBitContext gb;
+ enum Mode mode;
+ uint16_t *data;
+
+ init_get_bits(&gb, buf, buf_size * 8);
+
+ /* decode frame header (1st octet) */
+ skip_bits(&gb, 1); // padding bit
+ ctx->fr_cur_mode = get_bits(&gb, 4);
+ mode = ctx->fr_cur_mode;
Why the extra "mode" variable? You can just use ctx->fr_cur_mode directly...
+ if (mode < MODE_SID) { /* Normal speech frame */
+ const uint16_t *perm = amr_bit_orderings_by_mode[mode];
+ int field_size;
+
+ while ((field_size = *perm++)) {
+ int field = 0;
+ int field_offset = *perm++;
+ while (field_size--) {
+ uint16_t bit_idx = *perm++;
+ field <<= 1;
+ /* The bit index inside the byte is reversed (MSB->LSB) */
+ field |= BIT_POS(buf[bit_idx >> 3], 7 - (bit_idx & 7));
+ }
+ data[field_offset] = field;
+ }
+ }
+ else if (mode == MODE_SID) { /* Comfort noise frame */
+ /* not implemented */
+ }
av_log_missing_feature(...);
+/**
+ * Convert an ISF vector into an ISP vector
+ *
+ * @param[in] isf Isf vector
+ * @param[out] isp Output isp vector
+ * @param[in] size Isf/isp size
+ */
+static void isf2isp(const float *isf, double *isp, int size)
+{
+ int i;
+
+ for (i = 0; i < size - 1; i++)
+ isp[i] = cos(2.0 * M_PI * isf[i]);
+
+ isp[size - 1] = cos(4.0 * M_PI * isf[size - 1]);
+}
Almost duplication of amrnbdec.c:lsf2lsp().
+/**
+ * Decodes quantized ISF vectors using 36-bit indices (6K60 mode only)
+ *
+ * @param[in] ind Array of 5 indices
+ * @param[out] isf_q Buffer for isf_q[LP_ORDER]
+ * @param[in] fr_q Frame quality (good frame == 1)
+ *
+ */
+static void decode_isf_indices_36b(uint16_t *ind, float *isf_q, uint8_t fr_q) {
+ int i;
+
+ if (fr_q == 1) {
+ for (i = 0; i < 9; i++) {
+ isf_q[i] = dico1_isf[ind[0]][i] / (float) (1 << 15);
isf_q[i] = dico1_isf[ind[0]][i] * (1.0f / (1 << 15));
It is faster and the compiler cannot do it for you (this applies to a
lot of code in your patch).
+ for (i = 0; i < 7; i++) {
+ isf_q[i + 9] = dico2_isf[ind[1]][i] / (float) (1 << 15);
+ }
+ for (i = 0; i < 5; i++) {
+ isf_q[i] += dico21_isf_36b[ind[2]][i] / (float) (1 << 15);
+ }
+ for (i = 0; i < 4; i++) {
+ isf_q[i + 5] += dico22_isf_36b[ind[3]][i] / (float) (1 << 15);
+ }
+ for (i = 0; i < 7; i++) {
+ isf_q[i + 9] += dico23_isf_36b[ind[4]][i] / (float) (1 << 15);
+ }
+ }
+ /* not implemented for bad frame */
Unless you plan to implement bad frame handling soon, I suggest you just
remove the function parameter. It serves to nothing as now.
+/**
+ * Ensures a minimum distance between adjacent ISFs
+ *
+ * @param[in,out] isf ISF vector
+ * @param[in] min_spacing Minimum gap to keep
+ * @param[in] size ISF vector size
+ *
+ */
+static void isf_set_min_dist(float *isf, float min_spacing, int size) {
+ int i;
+ float prev = 0.0;
+
+ for (i = 0; i < size - 1; i++) {
+ isf[i] = FFMAX(isf[i], prev + min_spacing);
+ prev = isf[i];
+ }
+}
Please do not duplicate lsp.c:ff_set_min_dist_lsf().
+/**
+ * 16kHz version of ff_lsp2polyf for the high-band
+ */
+static void lsp2polyf_16k(const double *lsp, double *f, int lp_half_order)
+{
+ int i, j;
+
+ f[0] = 0.25;
+ f[1] = -0.5 * lsp[0];
+ lsp -= 2;
+ for(i = 2; i <= lp_half_order; i++)
+ {
+
nit:
for(i = 2; i <= lp_half_order; i++) {
+ f[i] = val * f[i - 1] + 2 * f[i - 2];
+ for(j = i - 1; j > 1; j--)
+ f[j] += f[j - 1] * val + f[j - 2];
+ f[1] += 0.25 * val;
+ }
+}
Hmm, can't this function be replaced by ff_lsp2polyf() with some rescaling?
+/**
+ * Convert a ISP vector to LP coefficient domain {a_k}
+ * Equations from TS 26.190 section 5.2.4
+ *
+ * @param[in] isp ISP vector for a subframe
+ * @param[out] lp LP coefficients
+ * @param[in] lp_half_order Half the number of LPs to construct
+ */
+static void isp2lp(const double *isp, float *lp, int lp_half_order) {
+ double pa[10 + 1], qa[10 + 1];
+ double last_isp = isp[2 * lp_half_order - 1];
+ double qa_old = 0.0;
+ float *lp2 = &lp[2 * lp_half_order];
+ int i;
+
+ if (lp_half_order > 8) { // high-band specific
+ lsp2polyf_16k(isp, pa, lp_half_order);
+ lsp2polyf_16k(isp + 1, qa, lp_half_order - 1);
+
+ for (i = 0; i <= lp_half_order; i++)
+ pa[i] *= 4.0;
+ for (i = 0; i < lp_half_order; i++)
+ qa[i] *= 4.0;
+ } else {
+ ff_lsp2polyf(isp, pa, lp_half_order);
+ ff_lsp2polyf(isp + 1, qa, lp_half_order - 1);
+ }
+
+ for (i = 1; i < lp_half_order; i++) {
+ double paf = (1 + last_isp) * pa[i];
+ double qaf = (1 - last_isp) * (qa[i] - qa_old);
+
+ qa_old = qa[i - 1];
+
+ lp[i] = 0.5 * (paf + qaf);
+ lp2[-i] = 0.5 * (paf - qaf);
+ }
+
+ lp[0] = 1.0;
+ lp[lp_half_order] = 0.5 * (1 + last_isp) * pa[lp_half_order];
+ lp2[0] = last_isp;
+}
Please double-check if this is not a duplication of sipr.c:lsp2lpc_sipr().
+static void decode_pitch_vector(AMRWBContext *ctx,
+ const AMRWBSubFrame *amr_subframe,
+ const int subframe)
+{
+ int pitch_lag_int, pitch_lag_frac;
+ int i;
+ float *exc = ctx->excitation;
+ enum Mode mode = ctx->fr_cur_mode;
+
+ if (mode <= MODE_8k85) {
+ decode_pitch_lag_low(&pitch_lag_int, &pitch_lag_frac, amr_subframe->ada
p,
+ &ctx->base_pitch_lag, subframe, mode);
+ } else
+ decode_pitch_lag_high(&pitch_lag_int, &pitch_lag_frac, amr_subframe->ad
ap,
+ &ctx->base_pitch_lag, subframe);
+
+ ctx->pitch_lag_int = pitch_lag_int;
+ pitch_lag_int += (pitch_lag_frac < 0 ? -1 : 0) + (pitch_lag_frac ? 1 :
0);
+
+
+ /* Calculate the pitch vector by interpolating the past excitation at the
+ pitch lag using a hamming windowed sinc function */
+ ff_acelp_interpolatef(exc, exc + 1 - pitch_lag_int,
+ ac_inter, 4,
+ pitch_lag_frac + (pitch_lag_frac > 0 ? 0 : 4),
+ LP_ORDER, AMRWB_SFR_SIZE + 1);
ac_inter is yet another hamming function. Can you check if you cannot
reuse acelp_vectors.c:ff_b60_sinc or sipr16kdata.h:sinc_win?
+/**
+ * Reduce fixed vector sparseness by smoothing with one of three IR filters
+ * Also known as "adaptive phase dispersion"
+ *
+ * @param[in] ctx The context
+ * @param[in,out] fixed_vector Unfiltered fixed vector
+ * @param[out] buf Space for modified vector if necessary
+ *
+ * @return The potentially overwritten filtered fixed vector address
+ */
+static float *anti_sparseness(AMRWBContext *ctx,
+ float *fixed_vector, float *buf)
amrnbedec.c has a function with the same name. Any code can be reused?
+/**
+ * Apply to synthesis a 2nd order high-pass filter
+ *
+ * @param[out] out Buffer for filtered output
+ * @param[in] hpf_coef Filter coefficients as used below
+ * @param[in,out] mem State from last filtering (updated)
+ * @param[in] in Input speech data
+ *
+ * @remark It is safe to pass the same array in in and out parameters
+ */
+static void high_pass_filter(float *out, const float hpf_coef[2][3],
+ float mem[4], const float *in)
+{
+ int i;
+ float *x = mem - 1, *y = mem + 2; // previous inputs and outputs
+
+ for (i = 0; i < AMRWB_SFR_SIZE; i++) {
+ float x0 = in[i];
+
+ out[i] = hpf_coef[0][0] * x0 + hpf_coef[1][0] * y[0] +
+ hpf_coef[0][1] * x[1] + hpf_coef[1][1] * y[1] +
+ hpf_coef[0][2] * x[2];
+
+ y[1] = y[0];
+ y[0] = out[i];
+
+ x[2] = x[1];
+ x[1] = x0;
+ }
+}
acelp_filter.c:ff_acelp_apply_order_2_transfer_function()
+/**
+ * Upsample a signal by 5/4 ratio (from 12.8kHz to 16kHz) using
+ * a FIR interpolation filter. Uses past data from before *in address
+ *
+ * @param[out] out Buffer for interpolated signal
+ * @param[in] in Current signal data (length 0.8*o_size)
+ * @param[in] o_size Output signal length
+ */
+static void upsample_5_4(float *out, const float *in, int o_size)
+{
+ const float *in0 = in - UPS_FIR_SIZE + 1;
+ int i;
+
+ for (i = 0; i < o_size; i++) {
+ int int_part = (4 * i) / 5;
+ int frac_part = (4 * i) - 5 * int_part;
You can break this loop in two to avoid the division:
i = 0;
for (j = 0; j < o_size/5; j++)
for (k = 0; k < 5; k++) {
....
i++;
}
+/**
+ * Generate the high-band excitation with the same energy from the lower
+ * one and scaled by the given gain
+ *
+ * @param[in] ctx The context
+ * @param[out] hb_exc Buffer for the excitation
+ * @param[in] synth_exc Low-band excitation used for synthesis
+ * @param[in] hb_gain Wanted excitation gain
+ */
+static void scaled_hb_excitation(AMRWBContext *ctx, float *hb_exc,
+ const float *synth_exc, float hb_gain)
+{
+ int i;
+ float energy = ff_dot_productf(synth_exc, synth_exc, AMRWB_SFR_SIZE);
+
+ /* Generate a white-noise excitation */
+ for (i = 0; i < AMRWB_SFR_SIZE_16k; i++)
+ hb_exc[i] = 32768.0 - (uint16_t) av_lfg_get(&ctx->prng);
+
+ ff_scale_vector_to_given_sum_of_squares(hb_exc, hb_exc, energy,
+ AMRWB_SFR_SIZE_16k);
+
+ for (i = 0; i < AMRWB_SFR_SIZE_16k; i++)
+ hb_exc[i] *= hb_gain;
+}
Why are you scaling it twice?
+/**
+ * Apply to high-band samples a 15th order filter
+ * The filter characteristic depends on the given coefficients
+ *
+ * @param[out] out Buffer for filtered output
+ * @param[in] fir_coef Filter coefficients
+ * @param[in,out] mem State from last filtering (updated)
+ * @param[in] cp_gain Compensation gain (usually the filter gain)
+ * @param[in] in Input speech data (high-band)
+ *
+ * @remark It is safe to pass the same array in in and out parameters
+ */
+static void hb_fir_filter(float *out, const float fir_coef[HB_FIR_SIZE + 1],
+ float mem[HB_FIR_SIZE], float cp_gain, const float
*in)
+{
+ int i, j;
+ float data[AMRWB_SFR_SIZE_16k + HB_FIR_SIZE]; // past and current samples
+
+ memcpy(data, mem, HB_FIR_SIZE * sizeof(float));
+
+ for (i = 0; i < AMRWB_SFR_SIZE_16k; i++)
+ data[i + HB_FIR_SIZE] = in[i] / cp_gain;
+
+ for (i = 0; i < AMRWB_SFR_SIZE_16k; i++) {
+ out[i] = 0.0;
+ for (j = 0; j <= HB_FIR_SIZE; j++)
+ out[i] += data[i + j] * fir_coef[j];
+ }
+
+ memcpy(mem, data + AMRWB_SFR_SIZE_16k, HB_FIR_SIZE * sizeof(float));
+}
I think it is cleaner (and more consistent) to do like the synthesis
filter and get one single buffer for output and memory...
-Vitor
_______________________________________________
FFmpeg-soc mailing list
[email protected]
https://lists.mplayerhq.hu/mailman/listinfo/ffmpeg-soc