Re: [FFmpeg-soc] [PATCH] AMR-WB Decoder

Vitor Sessak Thu, 26 Aug 2010 14:55:18 -0700

Marcelo Galvão Póvoa wrote:

Fixed excitation array incorrect length.


Should now be applicable cleanly to the latest ffmpeg revision
(fd151a5f8bd152c456a).


First look:

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 1422b5c..9c68d72 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -60,6 +60,7 @@ OBJS-$(CONFIG_AMRNB_DECODER)           += amrnbdec.o 
celp_filters.o   \
                                           celp_math.o acelp_filters.o \
                                           acelp_vectors.o             \
                                           acelp_pitch_delay.o
+OBJS-$(CONFIG_AMRWB_DECODER)           += amrwbdec.o


You are using a lot of functions (ff_acelp_interpolatef, ff_lsp2polyf,
etc) that are not in amrwbdec.c, so you need to add the dependencies of
everything you use. To see if you didn't forgot any try "./configure
--disable-decoders --enable-decoder=amr-wb".

+static av_cold int amrwb_decode_init(AVCodecContext *avctx)
+{
+    AMRWBContext *ctx = avctx->priv_data;
+    int i;
+
+    avctx->sample_fmt = SAMPLE_FMT_FLT;
+
+    av_lfg_init(&ctx->prng, 1);
+
+    ctx->excitation  = &ctx->excitation_buf[AMRWB_P_DELAY_MAX + LP_ORDER + 1];
+    ctx->first_frame = 1;

+    ctx->tilt_coef   = ctx->prev_tr_gain = 0.0;


not needed, ctx is already allocated with av_mallocz().

+/**
+ * Parses a speech frame, storing data in the Context
+ *
+ * @param[in,out] ctx              The context
+ * @param[in] buf                  Pointer to the input buffer
+ * @param[in] buf_size             Size of the input buffer
+ *
+ * @return The frame mode
+ */
+static enum Mode unpack_bitstream(AMRWBContext *ctx, const uint8_t *buf,
+                                  int buf_size)
+{
+    GetBitContext gb;
+    enum Mode mode;
+    uint16_t *data;
+
+    init_get_bits(&gb, buf, buf_size * 8);
+
+    /* decode frame header (1st octet) */
+    skip_bits(&gb, 1);  // padding bit
+    ctx->fr_cur_mode  = get_bits(&gb, 4);

+    mode              = ctx->fr_cur_mode;


Why the extra "mode" variable? You can just use ctx->fr_cur_mode directly...

+    if (mode < MODE_SID) { /* Normal speech frame */
+        const uint16_t *perm = amr_bit_orderings_by_mode[mode];
+        int field_size;
+
+        while ((field_size = *perm++)) {
+            int field = 0;
+            int field_offset = *perm++;
+            while (field_size--) {
+               uint16_t bit_idx = *perm++;
+               field <<= 1;
+               /* The bit index inside the byte is reversed (MSB->LSB) */
+               field |= BIT_POS(buf[bit_idx >> 3], 7 - (bit_idx & 7));
+            }
+            data[field_offset] = field;
+        }
+    }
+    else if (mode == MODE_SID) { /* Comfort noise frame */
+        /* not implemented */
+    }


av_log_missing_feature(...);

+/**
+ * Convert an ISF vector into an ISP vector
+ *
+ * @param[in] isf                  Isf vector
+ * @param[out] isp                 Output isp vector
+ * @param[in] size                 Isf/isp size
+ */
+static void isf2isp(const float *isf, double *isp, int size)
+{
+    int i;
+
+    for (i = 0; i < size - 1; i++)
+        isp[i] = cos(2.0 * M_PI * isf[i]);
+
+    isp[size - 1] = cos(4.0 * M_PI * isf[size - 1]);
+}


Almost duplication of amrnbdec.c:lsf2lsp().

+/**
+ * Decodes quantized ISF vectors using 36-bit indices (6K60 mode only)
+ *
+ * @param[in] ind                  Array of 5 indices
+ * @param[out] isf_q               Buffer for isf_q[LP_ORDER]
+ * @param[in] fr_q                 Frame quality (good frame == 1)
+ *
+ */
+static void decode_isf_indices_36b(uint16_t *ind, float *isf_q, uint8_t fr_q) {
+    int i;
+
+    if (fr_q == 1) {
+        for (i = 0; i < 9; i++) {
+            isf_q[i] = dico1_isf[ind[0]][i] / (float) (1 << 15);


isf_q[i] = dico1_isf[ind[0]][i] * (1.0f / (1 << 15));

It is faster and the compiler cannot do it for you (this applies to a
lot of code in your patch).

+        for (i = 0; i < 7; i++) {
+            isf_q[i + 9] = dico2_isf[ind[1]][i] / (float) (1 << 15);
+        }
+        for (i = 0; i < 5; i++) {
+            isf_q[i] += dico21_isf_36b[ind[2]][i] / (float) (1 << 15);
+        }
+        for (i = 0; i < 4; i++) {
+            isf_q[i + 5] += dico22_isf_36b[ind[3]][i] / (float) (1 << 15);
+        }
+        for (i = 0; i < 7; i++) {
+            isf_q[i + 9] += dico23_isf_36b[ind[4]][i] / (float) (1 << 15);
+        }
+    }
+    /* not implemented for bad frame */


Unless you plan to implement bad frame handling soon, I suggest you just
remove the function parameter. It serves to nothing as now.

+/**
+ * Ensures a minimum distance between adjacent ISFs
+ *
+ * @param[in,out] isf              ISF vector
+ * @param[in] min_spacing          Minimum gap to keep
+ * @param[in] size                 ISF vector size
+ *
+ */
+static void isf_set_min_dist(float *isf, float min_spacing, int size) {
+    int i;
+    float prev = 0.0;
+
+    for (i = 0; i < size - 1; i++) {
+        isf[i] = FFMAX(isf[i], prev + min_spacing);
+        prev = isf[i];
+    }
+}


Please do not duplicate lsp.c:ff_set_min_dist_lsf().

+/**
+ * 16kHz version of ff_lsp2polyf for the high-band
+ */
+static void lsp2polyf_16k(const double *lsp, double *f, int lp_half_order)
+{
+    int i, j;
+
+    f[0] = 0.25;
+    f[1] = -0.5 * lsp[0];
+    lsp -= 2;
+    for(i = 2; i <= lp_half_order; i++)
+    {
+


nit:
for(i = 2; i <= lp_half_order; i++) {

+        f[i] = val * f[i - 1] + 2 * f[i - 2];
+        for(j = i - 1; j > 1; j--)
+            f[j] += f[j - 1] * val + f[j - 2];
+        f[1] += 0.25 * val;
+    }
+}


Hmm, can't this function be replaced by ff_lsp2polyf() with some rescaling?

+/**
+ * Convert a ISP vector to LP coefficient domain {a_k}
+ * Equations from TS 26.190 section 5.2.4
+ *
+ * @param[in] isp                  ISP vector for a subframe
+ * @param[out] lp                  LP coefficients
+ * @param[in] lp_half_order        Half the number of LPs to construct
+ */
+static void isp2lp(const double *isp, float *lp, int lp_half_order) {
+    double pa[10 + 1], qa[10 + 1];
+    double last_isp = isp[2 * lp_half_order - 1];
+    double qa_old = 0.0;
+    float *lp2 = &lp[2 * lp_half_order];
+    int i;
+
+    if (lp_half_order > 8) { // high-band specific
+        lsp2polyf_16k(isp,     pa, lp_half_order);
+        lsp2polyf_16k(isp + 1, qa, lp_half_order - 1);
+
+        for (i = 0; i <= lp_half_order; i++)
+            pa[i] *= 4.0;
+        for (i = 0; i < lp_half_order; i++)
+            qa[i] *= 4.0;
+    } else {
+        ff_lsp2polyf(isp,     pa, lp_half_order);
+        ff_lsp2polyf(isp + 1, qa, lp_half_order - 1);
+    }
+
+    for (i = 1; i < lp_half_order; i++) {
+        double paf = (1 + last_isp) * pa[i];
+        double qaf = (1 - last_isp) * (qa[i] - qa_old);
+
+        qa_old = qa[i - 1];
+
+        lp[i]   = 0.5 * (paf + qaf);
+        lp2[-i] = 0.5 * (paf - qaf);
+    }
+
+    lp[0] = 1.0;
+    lp[lp_half_order] = 0.5 * (1 + last_isp) * pa[lp_half_order];
+    lp2[0] = last_isp;
+}


Please double-check if this is not a duplication of sipr.c:lsp2lpc_sipr().

+static void decode_pitch_vector(AMRWBContext *ctx,
+                                const AMRWBSubFrame *amr_subframe,
+                                const int subframe)
+{
+    int pitch_lag_int, pitch_lag_frac;
+    int i;
+    float *exc     = ctx->excitation;
+    enum Mode mode = ctx->fr_cur_mode;
+
+    if (mode <= MODE_8k85) {
+        decode_pitch_lag_low(&pitch_lag_int, &pitch_lag_frac, amr_subframe->ada
p,
+                              &ctx->base_pitch_lag, subframe, mode);
+    } else
+        decode_pitch_lag_high(&pitch_lag_int, &pitch_lag_frac, amr_subframe->ad
ap,
+                              &ctx->base_pitch_lag, subframe);
+
+    ctx->pitch_lag_int = pitch_lag_int;

+ pitch_lag_int += (pitch_lag_frac < 0 ? -1 : 0) + (pitch_lag_frac ? 1 :0);

+
+
+    /* Calculate the pitch vector by interpolating the past excitation at the
+       pitch lag using a hamming windowed sinc function */
+    ff_acelp_interpolatef(exc, exc + 1 - pitch_lag_int,
+                          ac_inter, 4,
+                          pitch_lag_frac + (pitch_lag_frac > 0 ? 0 : 4),
+                          LP_ORDER, AMRWB_SFR_SIZE + 1);

ac_inter is yet another hamming function. Can you check if you cannotreuse acelp_vectors.c:ff_b60_sinc or sipr16kdata.h:sinc_win?

+/**
+ * Reduce fixed vector sparseness by smoothing with one of three IR filters
+ * Also known as "adaptive phase dispersion"
+ *
+ * @param[in] ctx                  The context
+ * @param[in,out] fixed_vector     Unfiltered fixed vector
+ * @param[out] buf                 Space for modified vector if necessary
+ *
+ * @return The potentially overwritten filtered fixed vector address
+ */
+static float *anti_sparseness(AMRWBContext *ctx,
+                              float *fixed_vector, float *buf)


amrnbedec.c has a function with the same name. Any code can be reused?

+/**
+ * Apply to synthesis a 2nd order high-pass filter
+ *
+ * @param[out] out                 Buffer for filtered output
+ * @param[in] hpf_coef             Filter coefficients as used below
+ * @param[in,out] mem              State from last filtering (updated)
+ * @param[in] in                   Input speech data
+ *
+ * @remark It is safe to pass the same array in in and out parameters
+ */
+static void high_pass_filter(float *out, const float hpf_coef[2][3],
+                             float mem[4], const float *in)
+{
+    int i;
+    float *x = mem - 1, *y = mem + 2; // previous inputs and outputs
+
+    for (i = 0; i < AMRWB_SFR_SIZE; i++) {
+        float x0 = in[i];
+
+        out[i] = hpf_coef[0][0] * x0   + hpf_coef[1][0] * y[0] +
+                 hpf_coef[0][1] * x[1] + hpf_coef[1][1] * y[1] +
+                 hpf_coef[0][2] * x[2];
+
+        y[1] = y[0];
+        y[0] = out[i];
+
+        x[2] = x[1];
+        x[1] = x0;
+    }
+}


acelp_filter.c:ff_acelp_apply_order_2_transfer_function()

+/**
+ * Upsample a signal by 5/4 ratio (from 12.8kHz to 16kHz) using
+ * a FIR interpolation filter. Uses past data from before *in address
+ *
+ * @param[out] out                 Buffer for interpolated signal
+ * @param[in] in                   Current signal data (length 0.8*o_size)
+ * @param[in] o_size               Output signal length
+ */
+static void upsample_5_4(float *out, const float *in, int o_size)
+{
+    const float *in0 = in - UPS_FIR_SIZE + 1;
+    int i;
+
+    for (i = 0; i < o_size; i++) {
+        int int_part  = (4 * i) / 5;
+        int frac_part = (4 * i) - 5 * int_part;


You can break this loop in two to avoid the division:

i = 0;
for (j = 0; j < o_size/5; j++)
    for (k = 0; k < 5; k++) {
        ....
        i++;
    }

+/**
+ * Generate the high-band excitation with the same energy from the lower
+ * one and scaled by the given gain
+ *
+ * @param[in] ctx                  The context
+ * @param[out] hb_exc              Buffer for the excitation
+ * @param[in] synth_exc            Low-band excitation used for synthesis
+ * @param[in] hb_gain              Wanted excitation gain
+ */
+static void scaled_hb_excitation(AMRWBContext *ctx, float *hb_exc,
+                                 const float *synth_exc, float hb_gain)
+{
+    int i;
+    float energy = ff_dot_productf(synth_exc, synth_exc, AMRWB_SFR_SIZE);
+
+    /* Generate a white-noise excitation */
+    for (i = 0; i < AMRWB_SFR_SIZE_16k; i++)
+        hb_exc[i] = 32768.0 - (uint16_t) av_lfg_get(&ctx->prng);
+
+    ff_scale_vector_to_given_sum_of_squares(hb_exc, hb_exc, energy,
+                                            AMRWB_SFR_SIZE_16k);
+
+    for (i = 0; i < AMRWB_SFR_SIZE_16k; i++)
+        hb_exc[i] *= hb_gain;
+}


Why are you scaling it twice?

+/**
+ * Apply to high-band samples a 15th order filter
+ * The filter characteristic depends on the given coefficients
+ *
+ * @param[out] out                 Buffer for filtered output
+ * @param[in] fir_coef             Filter coefficients
+ * @param[in,out] mem              State from last filtering (updated)
+ * @param[in] cp_gain              Compensation gain (usually the filter gain)
+ * @param[in] in                   Input speech data (high-band)
+ *
+ * @remark It is safe to pass the same array in in and out parameters
+ */
+static void hb_fir_filter(float *out, const float fir_coef[HB_FIR_SIZE + 1],
+                          float mem[HB_FIR_SIZE], float cp_gain, const float 
*in)
+{
+    int i, j;
+    float data[AMRWB_SFR_SIZE_16k + HB_FIR_SIZE]; // past and current samples
+
+    memcpy(data, mem, HB_FIR_SIZE * sizeof(float));
+
+    for (i = 0; i < AMRWB_SFR_SIZE_16k; i++)
+        data[i + HB_FIR_SIZE] = in[i] / cp_gain;
+
+    for (i = 0; i < AMRWB_SFR_SIZE_16k; i++) {
+        out[i] = 0.0;
+        for (j = 0; j <= HB_FIR_SIZE; j++)
+            out[i] += data[i + j] * fir_coef[j];
+    }
+
+    memcpy(mem, data + AMRWB_SFR_SIZE_16k, HB_FIR_SIZE * sizeof(float));
+}

I think it is cleaner (and more consistent) to do like the synthesisfilter and get one single buffer for output and memory...


-Vitor

_______________________________________________
FFmpeg-soc mailing list
[email protected]
https://lists.mplayerhq.hu/mailman/listinfo/ffmpeg-soc

Re: [FFmpeg-soc] [PATCH] AMR-WB Decoder

Reply via email to